When a new session starts in the gateway (via /new, /reset, or auto-reset), send the user a summary of the detected configuration: ✨ Session reset! Starting fresh. ◆ Model: qwen3.5:27b-q4_K_M ◆ Provider: custom ◆ Context: 8K tokens (config) ◆ Endpoint: http://localhost:11434/v1 This makes misconfigured context length immediately visible — a user running a local 8K model that falls to the 128K default will see: ◆ Context: 128K tokens (default — set model.context_length in config to override) Instead of silently getting no compression and degrading responses. - _format_session_info() resolves model, provider, context length, and endpoint from config + runtime, matching the hygiene code's resolution chain - Local/custom endpoints shown; cloud endpoints hidden (not useful) - Context source annotated: config, detected, or default with hint - Appended to /new and /reset responses, and auto-reset notifications - 9 tests covering all formatting paths and failure resilience Addresses the user-facing side of #2708 — instead of trying to fix every edge case in context detection, surface the values so users can immediately see when something is wrong.
5890 lines
262 KiB
Python
5890 lines
262 KiB
Python
"""
|
||
Gateway runner - entry point for messaging platform integrations.
|
||
|
||
This module provides:
|
||
- start_gateway(): Start all configured platform adapters
|
||
- GatewayRunner: Main class managing the gateway lifecycle
|
||
|
||
Usage:
|
||
# Start the gateway
|
||
python -m gateway.run
|
||
|
||
# Or from CLI
|
||
python cli.py --gateway
|
||
"""
|
||
|
||
import asyncio
|
||
import json
|
||
import logging
|
||
import os
|
||
import re
|
||
import shlex
|
||
import sys
|
||
import signal
|
||
import tempfile
|
||
import threading
|
||
import time
|
||
from logging.handlers import RotatingFileHandler
|
||
from pathlib import Path
|
||
from datetime import datetime
|
||
from typing import Dict, Optional, Any, List
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# SSL certificate auto-detection for NixOS and other non-standard systems.
|
||
# Must run BEFORE any HTTP library (discord, aiohttp, etc.) is imported.
|
||
# ---------------------------------------------------------------------------
|
||
def _ensure_ssl_certs() -> None:
|
||
"""Set SSL_CERT_FILE if the system doesn't expose CA certs to Python."""
|
||
if "SSL_CERT_FILE" in os.environ:
|
||
return # user already configured it
|
||
|
||
import ssl
|
||
|
||
# 1. Python's compiled-in defaults
|
||
paths = ssl.get_default_verify_paths()
|
||
for candidate in (paths.cafile, paths.openssl_cafile):
|
||
if candidate and os.path.exists(candidate):
|
||
os.environ["SSL_CERT_FILE"] = candidate
|
||
return
|
||
|
||
# 2. certifi (ships its own Mozilla bundle)
|
||
try:
|
||
import certifi
|
||
os.environ["SSL_CERT_FILE"] = certifi.where()
|
||
return
|
||
except ImportError:
|
||
pass
|
||
|
||
# 3. Common distro / macOS locations
|
||
for candidate in (
|
||
"/etc/ssl/certs/ca-certificates.crt", # Debian/Ubuntu/Gentoo
|
||
"/etc/pki/tls/certs/ca-bundle.crt", # RHEL/CentOS 7
|
||
"/etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem", # RHEL/CentOS 8+
|
||
"/etc/ssl/ca-bundle.pem", # SUSE/OpenSUSE
|
||
"/etc/ssl/cert.pem", # Alpine / macOS
|
||
"/etc/pki/tls/cert.pem", # Fedora
|
||
"/usr/local/etc/openssl@1.1/cert.pem", # macOS Homebrew Intel
|
||
"/opt/homebrew/etc/openssl@1.1/cert.pem", # macOS Homebrew ARM
|
||
):
|
||
if os.path.exists(candidate):
|
||
os.environ["SSL_CERT_FILE"] = candidate
|
||
return
|
||
|
||
_ensure_ssl_certs()
|
||
|
||
# Add parent directory to path
|
||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||
|
||
# Resolve Hermes home directory (respects HERMES_HOME override)
|
||
from hermes_constants import get_hermes_home
|
||
_hermes_home = get_hermes_home()
|
||
|
||
# Load environment variables from ~/.hermes/.env first.
|
||
# User-managed env files should override stale shell exports on restart.
|
||
from dotenv import load_dotenv # backward-compat for tests that monkeypatch this symbol
|
||
from hermes_cli.env_loader import load_hermes_dotenv
|
||
_env_path = _hermes_home / '.env'
|
||
load_hermes_dotenv(hermes_home=_hermes_home, project_env=Path(__file__).resolve().parents[1] / '.env')
|
||
|
||
# Bridge config.yaml values into the environment so os.getenv() picks them up.
|
||
# config.yaml is authoritative for terminal settings — overrides .env.
|
||
_config_path = _hermes_home / 'config.yaml'
|
||
if _config_path.exists():
|
||
try:
|
||
import yaml as _yaml
|
||
with open(_config_path, encoding="utf-8") as _f:
|
||
_cfg = _yaml.safe_load(_f) or {}
|
||
# Expand ${ENV_VAR} references before bridging to env vars.
|
||
from hermes_cli.config import _expand_env_vars
|
||
_cfg = _expand_env_vars(_cfg)
|
||
# Top-level simple values (fallback only — don't override .env)
|
||
for _key, _val in _cfg.items():
|
||
if isinstance(_val, (str, int, float, bool)) and _key not in os.environ:
|
||
os.environ[_key] = str(_val)
|
||
# Terminal config is nested — bridge to TERMINAL_* env vars.
|
||
# config.yaml overrides .env for these since it's the documented config path.
|
||
_terminal_cfg = _cfg.get("terminal", {})
|
||
if _terminal_cfg and isinstance(_terminal_cfg, dict):
|
||
_terminal_env_map = {
|
||
"backend": "TERMINAL_ENV",
|
||
"cwd": "TERMINAL_CWD",
|
||
"timeout": "TERMINAL_TIMEOUT",
|
||
"lifetime_seconds": "TERMINAL_LIFETIME_SECONDS",
|
||
"docker_image": "TERMINAL_DOCKER_IMAGE",
|
||
"docker_forward_env": "TERMINAL_DOCKER_FORWARD_ENV",
|
||
"singularity_image": "TERMINAL_SINGULARITY_IMAGE",
|
||
"modal_image": "TERMINAL_MODAL_IMAGE",
|
||
"daytona_image": "TERMINAL_DAYTONA_IMAGE",
|
||
"ssh_host": "TERMINAL_SSH_HOST",
|
||
"ssh_user": "TERMINAL_SSH_USER",
|
||
"ssh_port": "TERMINAL_SSH_PORT",
|
||
"ssh_key": "TERMINAL_SSH_KEY",
|
||
"container_cpu": "TERMINAL_CONTAINER_CPU",
|
||
"container_memory": "TERMINAL_CONTAINER_MEMORY",
|
||
"container_disk": "TERMINAL_CONTAINER_DISK",
|
||
"container_persistent": "TERMINAL_CONTAINER_PERSISTENT",
|
||
"docker_volumes": "TERMINAL_DOCKER_VOLUMES",
|
||
"sandbox_dir": "TERMINAL_SANDBOX_DIR",
|
||
"persistent_shell": "TERMINAL_PERSISTENT_SHELL",
|
||
}
|
||
for _cfg_key, _env_var in _terminal_env_map.items():
|
||
if _cfg_key in _terminal_cfg:
|
||
_val = _terminal_cfg[_cfg_key]
|
||
if isinstance(_val, list):
|
||
os.environ[_env_var] = json.dumps(_val)
|
||
else:
|
||
os.environ[_env_var] = str(_val)
|
||
# Compression config is read directly from config.yaml by run_agent.py
|
||
# and auxiliary_client.py — no env var bridging needed.
|
||
# Auxiliary model/direct-endpoint overrides (vision, web_extract).
|
||
# Each task has provider/model/base_url/api_key; bridge non-default values to env vars.
|
||
_auxiliary_cfg = _cfg.get("auxiliary", {})
|
||
if _auxiliary_cfg and isinstance(_auxiliary_cfg, dict):
|
||
_aux_task_env = {
|
||
"vision": {
|
||
"provider": "AUXILIARY_VISION_PROVIDER",
|
||
"model": "AUXILIARY_VISION_MODEL",
|
||
"base_url": "AUXILIARY_VISION_BASE_URL",
|
||
"api_key": "AUXILIARY_VISION_API_KEY",
|
||
},
|
||
"web_extract": {
|
||
"provider": "AUXILIARY_WEB_EXTRACT_PROVIDER",
|
||
"model": "AUXILIARY_WEB_EXTRACT_MODEL",
|
||
"base_url": "AUXILIARY_WEB_EXTRACT_BASE_URL",
|
||
"api_key": "AUXILIARY_WEB_EXTRACT_API_KEY",
|
||
},
|
||
"approval": {
|
||
"provider": "AUXILIARY_APPROVAL_PROVIDER",
|
||
"model": "AUXILIARY_APPROVAL_MODEL",
|
||
"base_url": "AUXILIARY_APPROVAL_BASE_URL",
|
||
"api_key": "AUXILIARY_APPROVAL_API_KEY",
|
||
},
|
||
}
|
||
for _task_key, _env_map in _aux_task_env.items():
|
||
_task_cfg = _auxiliary_cfg.get(_task_key, {})
|
||
if not isinstance(_task_cfg, dict):
|
||
continue
|
||
_prov = str(_task_cfg.get("provider", "")).strip()
|
||
_model = str(_task_cfg.get("model", "")).strip()
|
||
_base_url = str(_task_cfg.get("base_url", "")).strip()
|
||
_api_key = str(_task_cfg.get("api_key", "")).strip()
|
||
if _prov and _prov != "auto":
|
||
os.environ[_env_map["provider"]] = _prov
|
||
if _model:
|
||
os.environ[_env_map["model"]] = _model
|
||
if _base_url:
|
||
os.environ[_env_map["base_url"]] = _base_url
|
||
if _api_key:
|
||
os.environ[_env_map["api_key"]] = _api_key
|
||
_agent_cfg = _cfg.get("agent", {})
|
||
if _agent_cfg and isinstance(_agent_cfg, dict):
|
||
if "max_turns" in _agent_cfg:
|
||
os.environ["HERMES_MAX_ITERATIONS"] = str(_agent_cfg["max_turns"])
|
||
# Timezone: bridge config.yaml → HERMES_TIMEZONE env var.
|
||
# HERMES_TIMEZONE from .env takes precedence (already in os.environ).
|
||
_tz_cfg = _cfg.get("timezone", "")
|
||
if _tz_cfg and isinstance(_tz_cfg, str) and "HERMES_TIMEZONE" not in os.environ:
|
||
os.environ["HERMES_TIMEZONE"] = _tz_cfg.strip()
|
||
# Security settings
|
||
_security_cfg = _cfg.get("security", {})
|
||
if isinstance(_security_cfg, dict):
|
||
_redact = _security_cfg.get("redact_secrets")
|
||
if _redact is not None:
|
||
os.environ["HERMES_REDACT_SECRETS"] = str(_redact).lower()
|
||
except Exception:
|
||
pass # Non-fatal; gateway can still run with .env values
|
||
|
||
# Gateway runs in quiet mode - suppress debug output and use cwd directly (no temp dirs)
|
||
os.environ["HERMES_QUIET"] = "1"
|
||
|
||
# Enable interactive exec approval for dangerous commands on messaging platforms
|
||
os.environ["HERMES_EXEC_ASK"] = "1"
|
||
|
||
# Set terminal working directory for messaging platforms.
|
||
# If the user set an explicit path in config.yaml (not "." or "auto"),
|
||
# respect it. Otherwise use MESSAGING_CWD or default to home directory.
|
||
_configured_cwd = os.environ.get("TERMINAL_CWD", "")
|
||
if not _configured_cwd or _configured_cwd in (".", "auto", "cwd"):
|
||
messaging_cwd = os.getenv("MESSAGING_CWD") or str(Path.home())
|
||
os.environ["TERMINAL_CWD"] = messaging_cwd
|
||
|
||
from gateway.config import (
|
||
Platform,
|
||
GatewayConfig,
|
||
load_gateway_config,
|
||
)
|
||
from gateway.session import (
|
||
SessionStore,
|
||
SessionSource,
|
||
SessionContext,
|
||
build_session_context,
|
||
build_session_context_prompt,
|
||
build_session_key,
|
||
)
|
||
from gateway.delivery import DeliveryRouter
|
||
from gateway.platforms.base import BasePlatformAdapter, MessageEvent, MessageType
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
# Sentinel placed into _running_agents immediately when a session starts
|
||
# processing, *before* any await. Prevents a second message for the same
|
||
# session from bypassing the "already running" guard during the async gap
|
||
# between the guard check and actual agent creation.
|
||
_AGENT_PENDING_SENTINEL = object()
|
||
|
||
|
||
def _resolve_runtime_agent_kwargs() -> dict:
|
||
"""Resolve provider credentials for gateway-created AIAgent instances."""
|
||
from hermes_cli.runtime_provider import (
|
||
resolve_runtime_provider,
|
||
format_runtime_provider_error,
|
||
)
|
||
|
||
try:
|
||
runtime = resolve_runtime_provider(
|
||
requested=os.getenv("HERMES_INFERENCE_PROVIDER"),
|
||
)
|
||
except Exception as exc:
|
||
raise RuntimeError(format_runtime_provider_error(exc)) from exc
|
||
|
||
return {
|
||
"api_key": runtime.get("api_key"),
|
||
"base_url": runtime.get("base_url"),
|
||
"provider": runtime.get("provider"),
|
||
"api_mode": runtime.get("api_mode"),
|
||
"command": runtime.get("command"),
|
||
"args": list(runtime.get("args") or []),
|
||
}
|
||
|
||
|
||
def _platform_config_key(platform: "Platform") -> str:
|
||
"""Map a Platform enum to its config.yaml key (LOCAL→"cli", rest→enum value)."""
|
||
return "cli" if platform == Platform.LOCAL else platform.value
|
||
|
||
|
||
def _load_gateway_config() -> dict:
|
||
"""Load and parse ~/.hermes/config.yaml, returning {} on any error."""
|
||
try:
|
||
config_path = _hermes_home / 'config.yaml'
|
||
if config_path.exists():
|
||
import yaml
|
||
with open(config_path, 'r', encoding='utf-8') as f:
|
||
return yaml.safe_load(f) or {}
|
||
except Exception:
|
||
logger.debug("Could not load gateway config from %s", _hermes_home / 'config.yaml')
|
||
return {}
|
||
|
||
|
||
def _resolve_gateway_model(config: dict | None = None) -> str:
|
||
"""Read model from env/config — mirrors the resolution in _run_agent_sync.
|
||
|
||
Without this, temporary AIAgent instances (memory flush, /compress) fall
|
||
back to the hardcoded default ("anthropic/claude-opus-4.6") which fails
|
||
when the active provider is openai-codex.
|
||
"""
|
||
model = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL") or "anthropic/claude-opus-4.6"
|
||
cfg = config if config is not None else _load_gateway_config()
|
||
model_cfg = cfg.get("model", {})
|
||
if isinstance(model_cfg, str):
|
||
model = model_cfg
|
||
elif isinstance(model_cfg, dict):
|
||
model = model_cfg.get("default", model)
|
||
return model
|
||
|
||
|
||
def _resolve_hermes_bin() -> Optional[list[str]]:
|
||
"""Resolve the Hermes update command as argv parts.
|
||
|
||
Tries in order:
|
||
1. ``shutil.which("hermes")`` — standard PATH lookup
|
||
2. ``sys.executable -m hermes_cli.main`` — fallback when Hermes is running
|
||
from a venv/module invocation and the ``hermes`` shim is not on PATH
|
||
|
||
Returns argv parts ready for quoting/joining, or ``None`` if neither works.
|
||
"""
|
||
import shutil
|
||
|
||
hermes_bin = shutil.which("hermes")
|
||
if hermes_bin:
|
||
return [hermes_bin]
|
||
|
||
try:
|
||
import importlib.util
|
||
|
||
if importlib.util.find_spec("hermes_cli") is not None:
|
||
return [sys.executable, "-m", "hermes_cli.main"]
|
||
except Exception:
|
||
pass
|
||
|
||
return None
|
||
|
||
|
||
class GatewayRunner:
|
||
"""
|
||
Main gateway controller.
|
||
|
||
Manages the lifecycle of all platform adapters and routes
|
||
messages to/from the agent.
|
||
"""
|
||
|
||
def __init__(self, config: Optional[GatewayConfig] = None):
|
||
self.config = config or load_gateway_config()
|
||
self.adapters: Dict[Platform, BasePlatformAdapter] = {}
|
||
|
||
# Load ephemeral config from config.yaml / env vars.
|
||
# Both are injected at API-call time only and never persisted.
|
||
self._prefill_messages = self._load_prefill_messages()
|
||
self._ephemeral_system_prompt = self._load_ephemeral_system_prompt()
|
||
self._reasoning_config = self._load_reasoning_config()
|
||
self._show_reasoning = self._load_show_reasoning()
|
||
self._provider_routing = self._load_provider_routing()
|
||
self._fallback_model = self._load_fallback_model()
|
||
self._smart_model_routing = self._load_smart_model_routing()
|
||
|
||
# Wire process registry into session store for reset protection
|
||
from tools.process_registry import process_registry
|
||
self.session_store = SessionStore(
|
||
self.config.sessions_dir, self.config,
|
||
has_active_processes_fn=lambda key: process_registry.has_active_for_session(key),
|
||
)
|
||
self.delivery_router = DeliveryRouter(self.config)
|
||
self._running = False
|
||
self._shutdown_event = asyncio.Event()
|
||
self._exit_cleanly = False
|
||
self._exit_with_failure = False
|
||
self._exit_reason: Optional[str] = None
|
||
|
||
# Track running agents per session for interrupt support
|
||
# Key: session_key, Value: AIAgent instance
|
||
self._running_agents: Dict[str, Any] = {}
|
||
self._pending_messages: Dict[str, str] = {} # Queued messages during interrupt
|
||
|
||
# Cache AIAgent instances per session to preserve prompt caching.
|
||
# Without this, a new AIAgent is created per message, rebuilding the
|
||
# system prompt (including memory) every turn — breaking prefix cache
|
||
# and costing ~10x more on providers with prompt caching (Anthropic).
|
||
# Key: session_key, Value: (AIAgent, config_signature_str)
|
||
import threading as _threading
|
||
self._agent_cache: Dict[str, tuple] = {}
|
||
self._agent_cache_lock = _threading.Lock()
|
||
|
||
# Track active fallback model/provider when primary is rate-limited.
|
||
# Set after an agent run where fallback was activated; cleared when
|
||
# the primary model succeeds again or the user switches via /model.
|
||
self._effective_model: Optional[str] = None
|
||
self._effective_provider: Optional[str] = None
|
||
|
||
# Track pending exec approvals per session
|
||
# Key: session_key, Value: {"command": str, "pattern_key": str, ...}
|
||
self._pending_approvals: Dict[str, Dict[str, Any]] = {}
|
||
|
||
# Track platforms that failed to connect for background reconnection.
|
||
# Key: Platform enum, Value: {"config": platform_config, "attempts": int, "next_retry": float}
|
||
self._failed_platforms: Dict[Platform, Dict[str, Any]] = {}
|
||
|
||
# Persistent Honcho managers keyed by gateway session key.
|
||
# This preserves write_frequency="session" semantics across short-lived
|
||
# per-message AIAgent instances.
|
||
self._honcho_managers: Dict[str, Any] = {}
|
||
self._honcho_configs: Dict[str, Any] = {}
|
||
|
||
# Ensure tirith security scanner is available (downloads if needed)
|
||
try:
|
||
from tools.tirith_security import ensure_installed
|
||
ensure_installed(log_failures=False)
|
||
except Exception:
|
||
pass # Non-fatal — fail-open at scan time if unavailable
|
||
|
||
# Initialize session database for session_search tool support
|
||
self._session_db = None
|
||
try:
|
||
from hermes_state import SessionDB
|
||
self._session_db = SessionDB()
|
||
except Exception as e:
|
||
logger.debug("SQLite session store not available: %s", e)
|
||
|
||
# DM pairing store for code-based user authorization
|
||
from gateway.pairing import PairingStore
|
||
self.pairing_store = PairingStore()
|
||
|
||
# Event hook system
|
||
from gateway.hooks import HookRegistry
|
||
self.hooks = HookRegistry()
|
||
|
||
# Per-chat voice reply mode: "off" | "voice_only" | "all"
|
||
self._voice_mode: Dict[str, str] = self._load_voice_modes()
|
||
|
||
# Track background tasks to prevent garbage collection mid-execution
|
||
self._background_tasks: set = set()
|
||
|
||
def _get_or_create_gateway_honcho(self, session_key: str):
|
||
"""Return a persistent Honcho manager/config pair for this gateway session."""
|
||
if not hasattr(self, "_honcho_managers"):
|
||
self._honcho_managers = {}
|
||
if not hasattr(self, "_honcho_configs"):
|
||
self._honcho_configs = {}
|
||
|
||
if session_key in self._honcho_managers:
|
||
return self._honcho_managers[session_key], self._honcho_configs.get(session_key)
|
||
|
||
try:
|
||
from honcho_integration.client import HonchoClientConfig, get_honcho_client
|
||
from honcho_integration.session import HonchoSessionManager
|
||
|
||
hcfg = HonchoClientConfig.from_global_config()
|
||
if not hcfg.enabled or not hcfg.api_key:
|
||
return None, hcfg
|
||
|
||
client = get_honcho_client(hcfg)
|
||
manager = HonchoSessionManager(
|
||
honcho=client,
|
||
config=hcfg,
|
||
context_tokens=hcfg.context_tokens,
|
||
)
|
||
self._honcho_managers[session_key] = manager
|
||
self._honcho_configs[session_key] = hcfg
|
||
return manager, hcfg
|
||
except Exception as e:
|
||
logger.debug("Gateway Honcho init failed for %s: %s", session_key, e)
|
||
return None, None
|
||
|
||
def _shutdown_gateway_honcho(self, session_key: str) -> None:
|
||
"""Flush and close the persistent Honcho manager for a gateway session."""
|
||
managers = getattr(self, "_honcho_managers", None)
|
||
configs = getattr(self, "_honcho_configs", None)
|
||
if managers is None or configs is None:
|
||
return
|
||
|
||
manager = managers.pop(session_key, None)
|
||
configs.pop(session_key, None)
|
||
if not manager:
|
||
return
|
||
try:
|
||
manager.shutdown()
|
||
except Exception as e:
|
||
logger.debug("Gateway Honcho shutdown failed for %s: %s", session_key, e)
|
||
|
||
def _shutdown_all_gateway_honcho(self) -> None:
|
||
"""Flush and close all persistent Honcho managers."""
|
||
managers = getattr(self, "_honcho_managers", None)
|
||
if not managers:
|
||
return
|
||
for session_key in list(managers.keys()):
|
||
self._shutdown_gateway_honcho(session_key)
|
||
|
||
# -- Setup skill availability ----------------------------------------
|
||
|
||
def _has_setup_skill(self) -> bool:
|
||
"""Check if the hermes-agent-setup skill is installed."""
|
||
try:
|
||
from tools.skill_manager_tool import _find_skill
|
||
return _find_skill("hermes-agent-setup") is not None
|
||
except Exception:
|
||
return False
|
||
|
||
# -- Voice mode persistence ------------------------------------------
|
||
|
||
_VOICE_MODE_PATH = _hermes_home / "gateway_voice_mode.json"
|
||
|
||
def _load_voice_modes(self) -> Dict[str, str]:
|
||
try:
|
||
data = json.loads(self._VOICE_MODE_PATH.read_text())
|
||
except (FileNotFoundError, json.JSONDecodeError, OSError):
|
||
return {}
|
||
|
||
if not isinstance(data, dict):
|
||
return {}
|
||
|
||
valid_modes = {"off", "voice_only", "all"}
|
||
return {
|
||
str(chat_id): mode
|
||
for chat_id, mode in data.items()
|
||
if mode in valid_modes
|
||
}
|
||
|
||
def _save_voice_modes(self) -> None:
|
||
try:
|
||
self._VOICE_MODE_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||
self._VOICE_MODE_PATH.write_text(
|
||
json.dumps(self._voice_mode, indent=2)
|
||
)
|
||
except OSError as e:
|
||
logger.warning("Failed to save voice modes: %s", e)
|
||
|
||
def _set_adapter_auto_tts_disabled(self, adapter, chat_id: str, disabled: bool) -> None:
|
||
"""Update an adapter's in-memory auto-TTS suppression set if present."""
|
||
disabled_chats = getattr(adapter, "_auto_tts_disabled_chats", None)
|
||
if not isinstance(disabled_chats, set):
|
||
return
|
||
if disabled:
|
||
disabled_chats.add(chat_id)
|
||
else:
|
||
disabled_chats.discard(chat_id)
|
||
|
||
def _sync_voice_mode_state_to_adapter(self, adapter) -> None:
|
||
"""Restore persisted /voice off state into a live platform adapter."""
|
||
disabled_chats = getattr(adapter, "_auto_tts_disabled_chats", None)
|
||
if not isinstance(disabled_chats, set):
|
||
return
|
||
disabled_chats.clear()
|
||
disabled_chats.update(
|
||
chat_id for chat_id, mode in self._voice_mode.items() if mode == "off"
|
||
)
|
||
|
||
# -----------------------------------------------------------------
|
||
|
||
def _flush_memories_for_session(
|
||
self,
|
||
old_session_id: str,
|
||
honcho_session_key: Optional[str] = None,
|
||
):
|
||
"""Prompt the agent to save memories/skills before context is lost.
|
||
|
||
Synchronous worker — meant to be called via run_in_executor from
|
||
an async context so it doesn't block the event loop.
|
||
"""
|
||
# Skip cron sessions — they run headless with no meaningful user
|
||
# conversation to extract memories from.
|
||
if old_session_id and old_session_id.startswith("cron_"):
|
||
logger.debug("Skipping memory flush for cron session: %s", old_session_id)
|
||
return
|
||
|
||
try:
|
||
history = self.session_store.load_transcript(old_session_id)
|
||
if not history or len(history) < 4:
|
||
return
|
||
|
||
from run_agent import AIAgent
|
||
runtime_kwargs = _resolve_runtime_agent_kwargs()
|
||
if not runtime_kwargs.get("api_key"):
|
||
return
|
||
|
||
# Resolve model from config — AIAgent's default is OpenRouter-
|
||
# formatted ("anthropic/claude-opus-4.6") which fails when the
|
||
# active provider is openai-codex.
|
||
model = _resolve_gateway_model()
|
||
|
||
tmp_agent = AIAgent(
|
||
**runtime_kwargs,
|
||
model=model,
|
||
max_iterations=8,
|
||
quiet_mode=True,
|
||
enabled_toolsets=["memory", "skills"],
|
||
session_id=old_session_id,
|
||
honcho_session_key=honcho_session_key,
|
||
)
|
||
# Fully silence the flush agent — quiet_mode only suppresses init
|
||
# messages; tool call output still leaks to the terminal through
|
||
# _safe_print → _print_fn. Set a no-op to prevent that.
|
||
tmp_agent._print_fn = lambda *a, **kw: None
|
||
|
||
# Build conversation history from transcript
|
||
msgs = [
|
||
{"role": m.get("role"), "content": m.get("content")}
|
||
for m in history
|
||
if m.get("role") in ("user", "assistant") and m.get("content")
|
||
]
|
||
|
||
# Read live memory state from disk so the flush agent can see
|
||
# what's already saved and avoid overwriting newer entries.
|
||
_current_memory = ""
|
||
try:
|
||
from tools.memory_tool import MEMORY_DIR
|
||
for fname, label in [
|
||
("MEMORY.md", "MEMORY (your personal notes)"),
|
||
("USER.md", "USER PROFILE (who the user is)"),
|
||
]:
|
||
fpath = MEMORY_DIR / fname
|
||
if fpath.exists():
|
||
content = fpath.read_text(encoding="utf-8").strip()
|
||
if content:
|
||
_current_memory += f"\n\n## Current {label}:\n{content}"
|
||
except Exception:
|
||
pass # Non-fatal — flush still works, just without the guard
|
||
|
||
# Give the agent a real turn to think about what to save
|
||
flush_prompt = (
|
||
"[System: This session is about to be automatically reset due to "
|
||
"inactivity or a scheduled daily reset. The conversation context "
|
||
"will be cleared after this turn.\n\n"
|
||
"Review the conversation above and:\n"
|
||
"1. Save any important facts, preferences, or decisions to memory "
|
||
"(user profile or your notes) that would be useful in future sessions.\n"
|
||
"2. If you discovered a reusable workflow or solved a non-trivial "
|
||
"problem, consider saving it as a skill.\n"
|
||
"3. If nothing is worth saving, that's fine — just skip.\n\n"
|
||
)
|
||
|
||
if _current_memory:
|
||
flush_prompt += (
|
||
"IMPORTANT — here is the current live state of memory. Other "
|
||
"sessions, cron jobs, or the user may have updated it since this "
|
||
"conversation ended. Do NOT overwrite or remove entries unless "
|
||
"the conversation above reveals something that genuinely "
|
||
"supersedes them. Only add new information that is not already "
|
||
"captured below."
|
||
f"{_current_memory}\n\n"
|
||
)
|
||
|
||
flush_prompt += (
|
||
"Do NOT respond to the user. Just use the memory and skill_manage "
|
||
"tools if needed, then stop.]"
|
||
)
|
||
|
||
tmp_agent.run_conversation(
|
||
user_message=flush_prompt,
|
||
conversation_history=msgs,
|
||
sync_honcho=False,
|
||
)
|
||
logger.info("Pre-reset memory flush completed for session %s", old_session_id)
|
||
# Flush any queued Honcho writes before the session is dropped
|
||
if getattr(tmp_agent, '_honcho', None):
|
||
try:
|
||
tmp_agent._honcho.shutdown()
|
||
except Exception:
|
||
pass
|
||
except Exception as e:
|
||
logger.debug("Pre-reset memory flush failed for session %s: %s", old_session_id, e)
|
||
|
||
async def _async_flush_memories(
|
||
self,
|
||
old_session_id: str,
|
||
honcho_session_key: Optional[str] = None,
|
||
):
|
||
"""Run the sync memory flush in a thread pool so it won't block the event loop."""
|
||
loop = asyncio.get_event_loop()
|
||
await loop.run_in_executor(
|
||
None,
|
||
self._flush_memories_for_session,
|
||
old_session_id,
|
||
honcho_session_key,
|
||
)
|
||
|
||
@property
|
||
def should_exit_cleanly(self) -> bool:
|
||
return self._exit_cleanly
|
||
|
||
@property
|
||
def should_exit_with_failure(self) -> bool:
|
||
return self._exit_with_failure
|
||
|
||
@property
|
||
def exit_reason(self) -> Optional[str]:
|
||
return self._exit_reason
|
||
|
||
def _session_key_for_source(self, source: SessionSource) -> str:
|
||
"""Resolve the current session key for a source, honoring gateway config when available."""
|
||
if hasattr(self, "session_store") and self.session_store is not None:
|
||
try:
|
||
session_key = self.session_store._generate_session_key(source)
|
||
if isinstance(session_key, str) and session_key:
|
||
return session_key
|
||
except Exception:
|
||
pass
|
||
config = getattr(self, "config", None)
|
||
return build_session_key(
|
||
source,
|
||
group_sessions_per_user=getattr(config, "group_sessions_per_user", True),
|
||
)
|
||
|
||
def _resolve_turn_agent_config(self, user_message: str, model: str, runtime_kwargs: dict) -> dict:
|
||
from agent.smart_model_routing import resolve_turn_route
|
||
|
||
primary = {
|
||
"model": model,
|
||
"api_key": runtime_kwargs.get("api_key"),
|
||
"base_url": runtime_kwargs.get("base_url"),
|
||
"provider": runtime_kwargs.get("provider"),
|
||
"api_mode": runtime_kwargs.get("api_mode"),
|
||
"command": runtime_kwargs.get("command"),
|
||
"args": list(runtime_kwargs.get("args") or []),
|
||
}
|
||
return resolve_turn_route(user_message, getattr(self, "_smart_model_routing", {}), primary)
|
||
|
||
async def _handle_adapter_fatal_error(self, adapter: BasePlatformAdapter) -> None:
|
||
"""React to an adapter failure after startup.
|
||
|
||
If the error is retryable (e.g. network blip, DNS failure), queue the
|
||
platform for background reconnection instead of giving up permanently.
|
||
"""
|
||
logger.error(
|
||
"Fatal %s adapter error (%s): %s",
|
||
adapter.platform.value,
|
||
adapter.fatal_error_code or "unknown",
|
||
adapter.fatal_error_message or "unknown error",
|
||
)
|
||
|
||
existing = self.adapters.get(adapter.platform)
|
||
if existing is adapter:
|
||
try:
|
||
await adapter.disconnect()
|
||
finally:
|
||
self.adapters.pop(adapter.platform, None)
|
||
self.delivery_router.adapters = self.adapters
|
||
|
||
# Queue retryable failures for background reconnection
|
||
if adapter.fatal_error_retryable:
|
||
platform_config = self.config.platforms.get(adapter.platform)
|
||
if platform_config and adapter.platform not in self._failed_platforms:
|
||
self._failed_platforms[adapter.platform] = {
|
||
"config": platform_config,
|
||
"attempts": 0,
|
||
"next_retry": time.monotonic() + 30,
|
||
}
|
||
logger.info(
|
||
"%s queued for background reconnection",
|
||
adapter.platform.value,
|
||
)
|
||
|
||
if not self.adapters and not self._failed_platforms:
|
||
self._exit_reason = adapter.fatal_error_message or "All messaging adapters disconnected"
|
||
if adapter.fatal_error_retryable:
|
||
self._exit_with_failure = True
|
||
logger.error("No connected messaging platforms remain. Shutting down gateway for service restart.")
|
||
else:
|
||
logger.error("No connected messaging platforms remain. Shutting down gateway cleanly.")
|
||
await self.stop()
|
||
elif not self.adapters and self._failed_platforms:
|
||
logger.warning(
|
||
"No connected messaging platforms remain, but %d platform(s) queued for reconnection",
|
||
len(self._failed_platforms),
|
||
)
|
||
|
||
def _request_clean_exit(self, reason: str) -> None:
|
||
self._exit_cleanly = True
|
||
self._exit_reason = reason
|
||
self._shutdown_event.set()
|
||
|
||
@staticmethod
|
||
def _load_prefill_messages() -> List[Dict[str, Any]]:
|
||
"""Load ephemeral prefill messages from config or env var.
|
||
|
||
Checks HERMES_PREFILL_MESSAGES_FILE env var first, then falls back to
|
||
the prefill_messages_file key in ~/.hermes/config.yaml.
|
||
Relative paths are resolved from ~/.hermes/.
|
||
"""
|
||
import json as _json
|
||
file_path = os.getenv("HERMES_PREFILL_MESSAGES_FILE", "")
|
||
if not file_path:
|
||
try:
|
||
import yaml as _y
|
||
cfg_path = _hermes_home / "config.yaml"
|
||
if cfg_path.exists():
|
||
with open(cfg_path, encoding="utf-8") as _f:
|
||
cfg = _y.safe_load(_f) or {}
|
||
file_path = cfg.get("prefill_messages_file", "")
|
||
except Exception:
|
||
pass
|
||
if not file_path:
|
||
return []
|
||
path = Path(file_path).expanduser()
|
||
if not path.is_absolute():
|
||
path = _hermes_home / path
|
||
if not path.exists():
|
||
logger.warning("Prefill messages file not found: %s", path)
|
||
return []
|
||
try:
|
||
with open(path, "r", encoding="utf-8") as f:
|
||
data = _json.load(f)
|
||
if not isinstance(data, list):
|
||
logger.warning("Prefill messages file must contain a JSON array: %s", path)
|
||
return []
|
||
return data
|
||
except Exception as e:
|
||
logger.warning("Failed to load prefill messages from %s: %s", path, e)
|
||
return []
|
||
|
||
@staticmethod
|
||
def _load_ephemeral_system_prompt() -> str:
|
||
"""Load ephemeral system prompt from config or env var.
|
||
|
||
Checks HERMES_EPHEMERAL_SYSTEM_PROMPT env var first, then falls back to
|
||
agent.system_prompt in ~/.hermes/config.yaml.
|
||
"""
|
||
prompt = os.getenv("HERMES_EPHEMERAL_SYSTEM_PROMPT", "")
|
||
if prompt:
|
||
return prompt
|
||
try:
|
||
import yaml as _y
|
||
cfg_path = _hermes_home / "config.yaml"
|
||
if cfg_path.exists():
|
||
with open(cfg_path, encoding="utf-8") as _f:
|
||
cfg = _y.safe_load(_f) or {}
|
||
return (cfg.get("agent", {}).get("system_prompt", "") or "").strip()
|
||
except Exception:
|
||
pass
|
||
return ""
|
||
|
||
@staticmethod
|
||
def _load_reasoning_config() -> dict | None:
|
||
"""Load reasoning effort from config with env fallback.
|
||
|
||
Checks agent.reasoning_effort in config.yaml first, then
|
||
HERMES_REASONING_EFFORT as a fallback. Valid: "xhigh", "high",
|
||
"medium", "low", "minimal", "none". Returns None to use default
|
||
(medium).
|
||
"""
|
||
from hermes_constants import parse_reasoning_effort
|
||
effort = ""
|
||
try:
|
||
import yaml as _y
|
||
cfg_path = _hermes_home / "config.yaml"
|
||
if cfg_path.exists():
|
||
with open(cfg_path, encoding="utf-8") as _f:
|
||
cfg = _y.safe_load(_f) or {}
|
||
effort = str(cfg.get("agent", {}).get("reasoning_effort", "") or "").strip()
|
||
except Exception:
|
||
pass
|
||
if not effort:
|
||
effort = os.getenv("HERMES_REASONING_EFFORT", "")
|
||
result = parse_reasoning_effort(effort)
|
||
if effort and effort.strip() and result is None:
|
||
logger.warning("Unknown reasoning_effort '%s', using default (medium)", effort)
|
||
return result
|
||
|
||
@staticmethod
|
||
def _load_show_reasoning() -> bool:
|
||
"""Load show_reasoning toggle from config.yaml display section."""
|
||
try:
|
||
import yaml as _y
|
||
cfg_path = _hermes_home / "config.yaml"
|
||
if cfg_path.exists():
|
||
with open(cfg_path, encoding="utf-8") as _f:
|
||
cfg = _y.safe_load(_f) or {}
|
||
return bool(cfg.get("display", {}).get("show_reasoning", False))
|
||
except Exception:
|
||
pass
|
||
return False
|
||
|
||
@staticmethod
|
||
def _load_background_notifications_mode() -> str:
|
||
"""Load background process notification mode from config or env var.
|
||
|
||
Modes:
|
||
- ``all`` — push running-output updates *and* the final message (default)
|
||
- ``result`` — only the final completion message (regardless of exit code)
|
||
- ``error`` — only the final message when exit code is non-zero
|
||
- ``off`` — no watcher messages at all
|
||
"""
|
||
mode = os.getenv("HERMES_BACKGROUND_NOTIFICATIONS", "")
|
||
if not mode:
|
||
try:
|
||
import yaml as _y
|
||
cfg_path = _hermes_home / "config.yaml"
|
||
if cfg_path.exists():
|
||
with open(cfg_path, encoding="utf-8") as _f:
|
||
cfg = _y.safe_load(_f) or {}
|
||
raw = cfg.get("display", {}).get("background_process_notifications")
|
||
if raw is False:
|
||
mode = "off"
|
||
elif raw not in (None, ""):
|
||
mode = str(raw)
|
||
except Exception:
|
||
pass
|
||
mode = (mode or "all").strip().lower()
|
||
valid = {"all", "result", "error", "off"}
|
||
if mode not in valid:
|
||
logger.warning(
|
||
"Unknown background_process_notifications '%s', defaulting to 'all'",
|
||
mode,
|
||
)
|
||
return "all"
|
||
return mode
|
||
|
||
@staticmethod
|
||
def _load_provider_routing() -> dict:
|
||
"""Load OpenRouter provider routing preferences from config.yaml."""
|
||
try:
|
||
import yaml as _y
|
||
cfg_path = _hermes_home / "config.yaml"
|
||
if cfg_path.exists():
|
||
with open(cfg_path, encoding="utf-8") as _f:
|
||
cfg = _y.safe_load(_f) or {}
|
||
return cfg.get("provider_routing", {}) or {}
|
||
except Exception:
|
||
pass
|
||
return {}
|
||
|
||
@staticmethod
|
||
def _load_fallback_model() -> dict | None:
|
||
"""Load fallback model config from config.yaml.
|
||
|
||
Returns a dict with 'provider' and 'model' keys, or None if
|
||
not configured / both fields empty.
|
||
"""
|
||
try:
|
||
import yaml as _y
|
||
cfg_path = _hermes_home / "config.yaml"
|
||
if cfg_path.exists():
|
||
with open(cfg_path, encoding="utf-8") as _f:
|
||
cfg = _y.safe_load(_f) or {}
|
||
fb = cfg.get("fallback_model", {}) or {}
|
||
if fb.get("provider") and fb.get("model"):
|
||
return fb
|
||
except Exception:
|
||
pass
|
||
return None
|
||
|
||
@staticmethod
|
||
def _load_smart_model_routing() -> dict:
|
||
"""Load optional smart cheap-vs-strong model routing config."""
|
||
try:
|
||
import yaml as _y
|
||
cfg_path = _hermes_home / "config.yaml"
|
||
if cfg_path.exists():
|
||
with open(cfg_path, encoding="utf-8") as _f:
|
||
cfg = _y.safe_load(_f) or {}
|
||
return cfg.get("smart_model_routing", {}) or {}
|
||
except Exception:
|
||
pass
|
||
return {}
|
||
|
||
async def start(self) -> bool:
|
||
"""
|
||
Start the gateway and all configured platform adapters.
|
||
|
||
Returns True if at least one adapter connected successfully.
|
||
"""
|
||
logger.info("Starting Hermes Gateway...")
|
||
logger.info("Session storage: %s", self.config.sessions_dir)
|
||
try:
|
||
from gateway.status import write_runtime_status
|
||
write_runtime_status(gateway_state="starting", exit_reason=None)
|
||
except Exception:
|
||
pass
|
||
|
||
# Warn if no user allowlists are configured and open access is not opted in
|
||
_any_allowlist = any(
|
||
os.getenv(v)
|
||
for v in ("TELEGRAM_ALLOWED_USERS", "DISCORD_ALLOWED_USERS",
|
||
"WHATSAPP_ALLOWED_USERS", "SLACK_ALLOWED_USERS",
|
||
"SIGNAL_ALLOWED_USERS", "SIGNAL_GROUP_ALLOWED_USERS",
|
||
"EMAIL_ALLOWED_USERS",
|
||
"SMS_ALLOWED_USERS", "MATTERMOST_ALLOWED_USERS",
|
||
"MATRIX_ALLOWED_USERS", "DINGTALK_ALLOWED_USERS",
|
||
"GATEWAY_ALLOWED_USERS")
|
||
)
|
||
_allow_all = os.getenv("GATEWAY_ALLOW_ALL_USERS", "").lower() in ("true", "1", "yes") or any(
|
||
os.getenv(v, "").lower() in ("true", "1", "yes")
|
||
for v in ("TELEGRAM_ALLOW_ALL_USERS", "DISCORD_ALLOW_ALL_USERS",
|
||
"WHATSAPP_ALLOW_ALL_USERS", "SLACK_ALLOW_ALL_USERS",
|
||
"SIGNAL_ALLOW_ALL_USERS", "EMAIL_ALLOW_ALL_USERS",
|
||
"SMS_ALLOW_ALL_USERS", "MATTERMOST_ALLOW_ALL_USERS",
|
||
"MATRIX_ALLOW_ALL_USERS", "DINGTALK_ALLOW_ALL_USERS")
|
||
)
|
||
if not _any_allowlist and not _allow_all:
|
||
logger.warning(
|
||
"No user allowlists configured. All unauthorized users will be denied. "
|
||
"Set GATEWAY_ALLOW_ALL_USERS=true in ~/.hermes/.env to allow open access, "
|
||
"or configure platform allowlists (e.g., TELEGRAM_ALLOWED_USERS=your_id)."
|
||
)
|
||
|
||
# Discover and load event hooks
|
||
self.hooks.discover_and_load()
|
||
|
||
# Recover background processes from checkpoint (crash recovery)
|
||
try:
|
||
from tools.process_registry import process_registry
|
||
recovered = process_registry.recover_from_checkpoint()
|
||
if recovered:
|
||
logger.info("Recovered %s background process(es) from previous run", recovered)
|
||
except Exception as e:
|
||
logger.warning("Process checkpoint recovery: %s", e)
|
||
|
||
connected_count = 0
|
||
enabled_platform_count = 0
|
||
startup_nonretryable_errors: list[str] = []
|
||
startup_retryable_errors: list[str] = []
|
||
|
||
# Initialize and connect each configured platform
|
||
for platform, platform_config in self.config.platforms.items():
|
||
if not platform_config.enabled:
|
||
continue
|
||
enabled_platform_count += 1
|
||
|
||
adapter = self._create_adapter(platform, platform_config)
|
||
if not adapter:
|
||
logger.warning("No adapter available for %s", platform.value)
|
||
continue
|
||
|
||
# Set up message + fatal error handlers
|
||
adapter.set_message_handler(self._handle_message)
|
||
adapter.set_fatal_error_handler(self._handle_adapter_fatal_error)
|
||
|
||
# Try to connect
|
||
logger.info("Connecting to %s...", platform.value)
|
||
try:
|
||
success = await adapter.connect()
|
||
if success:
|
||
self.adapters[platform] = adapter
|
||
self._sync_voice_mode_state_to_adapter(adapter)
|
||
connected_count += 1
|
||
logger.info("✓ %s connected", platform.value)
|
||
else:
|
||
logger.warning("✗ %s failed to connect", platform.value)
|
||
if adapter.has_fatal_error:
|
||
target = (
|
||
startup_retryable_errors
|
||
if adapter.fatal_error_retryable
|
||
else startup_nonretryable_errors
|
||
)
|
||
target.append(
|
||
f"{platform.value}: {adapter.fatal_error_message}"
|
||
)
|
||
# Queue for reconnection if the error is retryable
|
||
if adapter.fatal_error_retryable:
|
||
self._failed_platforms[platform] = {
|
||
"config": platform_config,
|
||
"attempts": 1,
|
||
"next_retry": time.monotonic() + 30,
|
||
}
|
||
else:
|
||
startup_retryable_errors.append(
|
||
f"{platform.value}: failed to connect"
|
||
)
|
||
# No fatal error info means likely a transient issue — queue for retry
|
||
self._failed_platforms[platform] = {
|
||
"config": platform_config,
|
||
"attempts": 1,
|
||
"next_retry": time.monotonic() + 30,
|
||
}
|
||
except Exception as e:
|
||
logger.error("✗ %s error: %s", platform.value, e)
|
||
startup_retryable_errors.append(f"{platform.value}: {e}")
|
||
# Unexpected exceptions are typically transient — queue for retry
|
||
self._failed_platforms[platform] = {
|
||
"config": platform_config,
|
||
"attempts": 1,
|
||
"next_retry": time.monotonic() + 30,
|
||
}
|
||
|
||
if connected_count == 0:
|
||
if startup_nonretryable_errors:
|
||
reason = "; ".join(startup_nonretryable_errors)
|
||
logger.error("Gateway hit a non-retryable startup conflict: %s", reason)
|
||
try:
|
||
from gateway.status import write_runtime_status
|
||
write_runtime_status(gateway_state="startup_failed", exit_reason=reason)
|
||
except Exception:
|
||
pass
|
||
self._request_clean_exit(reason)
|
||
return True
|
||
if enabled_platform_count > 0:
|
||
reason = "; ".join(startup_retryable_errors) or "all configured messaging platforms failed to connect"
|
||
logger.error("Gateway failed to connect any configured messaging platform: %s", reason)
|
||
try:
|
||
from gateway.status import write_runtime_status
|
||
write_runtime_status(gateway_state="startup_failed", exit_reason=reason)
|
||
except Exception:
|
||
pass
|
||
return False
|
||
logger.warning("No messaging platforms enabled.")
|
||
logger.info("Gateway will continue running for cron job execution.")
|
||
|
||
# Update delivery router with adapters
|
||
self.delivery_router.adapters = self.adapters
|
||
|
||
self._running = True
|
||
try:
|
||
from gateway.status import write_runtime_status
|
||
write_runtime_status(gateway_state="running", exit_reason=None)
|
||
except Exception:
|
||
pass
|
||
|
||
# Emit gateway:startup hook
|
||
hook_count = len(self.hooks.loaded_hooks)
|
||
if hook_count:
|
||
logger.info("%s hook(s) loaded", hook_count)
|
||
await self.hooks.emit("gateway:startup", {
|
||
"platforms": [p.value for p in self.adapters.keys()],
|
||
})
|
||
|
||
if connected_count > 0:
|
||
logger.info("Gateway running with %s platform(s)", connected_count)
|
||
|
||
# Build initial channel directory for send_message name resolution
|
||
try:
|
||
from gateway.channel_directory import build_channel_directory
|
||
directory = build_channel_directory(self.adapters)
|
||
ch_count = sum(len(chs) for chs in directory.get("platforms", {}).values())
|
||
logger.info("Channel directory built: %d target(s)", ch_count)
|
||
except Exception as e:
|
||
logger.warning("Channel directory build failed: %s", e)
|
||
|
||
# Check if we're restarting after a /update command. If the update is
|
||
# still running, keep watching so we notify once it actually finishes.
|
||
notified = await self._send_update_notification()
|
||
if not notified and any(
|
||
path.exists()
|
||
for path in (
|
||
_hermes_home / ".update_pending.json",
|
||
_hermes_home / ".update_pending.claimed.json",
|
||
)
|
||
):
|
||
self._schedule_update_notification_watch()
|
||
|
||
# Drain any recovered process watchers (from crash recovery checkpoint)
|
||
try:
|
||
from tools.process_registry import process_registry
|
||
while process_registry.pending_watchers:
|
||
watcher = process_registry.pending_watchers.pop(0)
|
||
asyncio.create_task(self._run_process_watcher(watcher))
|
||
logger.info("Resumed watcher for recovered process %s", watcher.get("session_id"))
|
||
except Exception as e:
|
||
logger.error("Recovered watcher setup error: %s", e)
|
||
|
||
# Start background session expiry watcher for proactive memory flushing
|
||
asyncio.create_task(self._session_expiry_watcher())
|
||
|
||
# Start background reconnection watcher for platforms that failed at startup
|
||
if self._failed_platforms:
|
||
logger.info(
|
||
"Starting reconnection watcher for %d failed platform(s): %s",
|
||
len(self._failed_platforms),
|
||
", ".join(p.value for p in self._failed_platforms),
|
||
)
|
||
asyncio.create_task(self._platform_reconnect_watcher())
|
||
|
||
logger.info("Press Ctrl+C to stop")
|
||
|
||
return True
|
||
|
||
async def _session_expiry_watcher(self, interval: int = 300):
|
||
"""Background task that proactively flushes memories for expired sessions.
|
||
|
||
Runs every `interval` seconds (default 5 min). For each session that
|
||
has expired according to its reset policy, flushes memories in a thread
|
||
pool and marks the session so it won't be flushed again.
|
||
|
||
This means memories are already saved by the time the user sends their
|
||
next message, so there's no blocking delay.
|
||
"""
|
||
await asyncio.sleep(60) # initial delay — let the gateway fully start
|
||
while self._running:
|
||
try:
|
||
self.session_store._ensure_loaded()
|
||
for key, entry in list(self.session_store._entries.items()):
|
||
if entry.session_id in self.session_store._pre_flushed_sessions:
|
||
continue # already flushed this session
|
||
if not self.session_store._is_session_expired(entry):
|
||
continue # session still active
|
||
# Session has expired — flush memories in the background
|
||
logger.info(
|
||
"Session %s expired (key=%s), flushing memories proactively",
|
||
entry.session_id, key,
|
||
)
|
||
try:
|
||
await self._async_flush_memories(entry.session_id, key)
|
||
self._shutdown_gateway_honcho(key)
|
||
self.session_store._pre_flushed_sessions.add(entry.session_id)
|
||
except Exception as e:
|
||
logger.debug("Proactive memory flush failed for %s: %s", entry.session_id, e)
|
||
except Exception as e:
|
||
logger.debug("Session expiry watcher error: %s", e)
|
||
# Sleep in small increments so we can stop quickly
|
||
for _ in range(interval):
|
||
if not self._running:
|
||
break
|
||
await asyncio.sleep(1)
|
||
|
||
async def _platform_reconnect_watcher(self) -> None:
|
||
"""Background task that periodically retries connecting failed platforms.
|
||
|
||
Uses exponential backoff: 30s → 60s → 120s → 240s → 300s (cap).
|
||
Stops retrying a platform after 20 failed attempts or if the error
|
||
is non-retryable (e.g. bad auth token).
|
||
"""
|
||
_MAX_ATTEMPTS = 20
|
||
_BACKOFF_CAP = 300 # 5 minutes max between retries
|
||
|
||
await asyncio.sleep(10) # initial delay — let startup finish
|
||
while self._running:
|
||
if not self._failed_platforms:
|
||
# Nothing to reconnect — sleep and check again
|
||
for _ in range(30):
|
||
if not self._running:
|
||
return
|
||
await asyncio.sleep(1)
|
||
continue
|
||
|
||
now = time.monotonic()
|
||
for platform in list(self._failed_platforms.keys()):
|
||
if not self._running:
|
||
return
|
||
info = self._failed_platforms[platform]
|
||
if now < info["next_retry"]:
|
||
continue # not time yet
|
||
|
||
if info["attempts"] >= _MAX_ATTEMPTS:
|
||
logger.warning(
|
||
"Giving up reconnecting %s after %d attempts",
|
||
platform.value, info["attempts"],
|
||
)
|
||
del self._failed_platforms[platform]
|
||
continue
|
||
|
||
platform_config = info["config"]
|
||
attempt = info["attempts"] + 1
|
||
logger.info(
|
||
"Reconnecting %s (attempt %d/%d)...",
|
||
platform.value, attempt, _MAX_ATTEMPTS,
|
||
)
|
||
|
||
try:
|
||
adapter = self._create_adapter(platform, platform_config)
|
||
if not adapter:
|
||
logger.warning(
|
||
"Reconnect %s: adapter creation returned None, removing from retry queue",
|
||
platform.value,
|
||
)
|
||
del self._failed_platforms[platform]
|
||
continue
|
||
|
||
adapter.set_message_handler(self._handle_message)
|
||
adapter.set_fatal_error_handler(self._handle_adapter_fatal_error)
|
||
|
||
success = await adapter.connect()
|
||
if success:
|
||
self.adapters[platform] = adapter
|
||
self._sync_voice_mode_state_to_adapter(adapter)
|
||
self.delivery_router.adapters = self.adapters
|
||
del self._failed_platforms[platform]
|
||
logger.info("✓ %s reconnected successfully", platform.value)
|
||
|
||
# Rebuild channel directory with the new adapter
|
||
try:
|
||
from gateway.channel_directory import build_channel_directory
|
||
build_channel_directory(self.adapters)
|
||
except Exception:
|
||
pass
|
||
else:
|
||
# Check if the failure is non-retryable
|
||
if adapter.has_fatal_error and not adapter.fatal_error_retryable:
|
||
logger.warning(
|
||
"Reconnect %s: non-retryable error (%s), removing from retry queue",
|
||
platform.value, adapter.fatal_error_message,
|
||
)
|
||
del self._failed_platforms[platform]
|
||
else:
|
||
backoff = min(30 * (2 ** (attempt - 1)), _BACKOFF_CAP)
|
||
info["attempts"] = attempt
|
||
info["next_retry"] = time.monotonic() + backoff
|
||
logger.info(
|
||
"Reconnect %s failed, next retry in %ds",
|
||
platform.value, backoff,
|
||
)
|
||
except Exception as e:
|
||
backoff = min(30 * (2 ** (attempt - 1)), _BACKOFF_CAP)
|
||
info["attempts"] = attempt
|
||
info["next_retry"] = time.monotonic() + backoff
|
||
logger.warning(
|
||
"Reconnect %s error: %s, next retry in %ds",
|
||
platform.value, e, backoff,
|
||
)
|
||
|
||
# Check every 10 seconds for platforms that need reconnection
|
||
for _ in range(10):
|
||
if not self._running:
|
||
return
|
||
await asyncio.sleep(1)
|
||
|
||
async def stop(self) -> None:
|
||
"""Stop the gateway and disconnect all adapters."""
|
||
logger.info("Stopping gateway...")
|
||
self._running = False
|
||
|
||
for session_key, agent in list(self._running_agents.items()):
|
||
if agent is _AGENT_PENDING_SENTINEL:
|
||
continue
|
||
try:
|
||
agent.interrupt("Gateway shutting down")
|
||
logger.debug("Interrupted running agent for session %s during shutdown", session_key[:20])
|
||
except Exception as e:
|
||
logger.debug("Failed interrupting agent during shutdown: %s", e)
|
||
|
||
for platform, adapter in list(self.adapters.items()):
|
||
try:
|
||
await adapter.cancel_background_tasks()
|
||
except Exception as e:
|
||
logger.debug("✗ %s background-task cancel error: %s", platform.value, e)
|
||
try:
|
||
await adapter.disconnect()
|
||
logger.info("✓ %s disconnected", platform.value)
|
||
except Exception as e:
|
||
logger.error("✗ %s disconnect error: %s", platform.value, e)
|
||
|
||
# Cancel any pending background tasks
|
||
for _task in list(self._background_tasks):
|
||
_task.cancel()
|
||
self._background_tasks.clear()
|
||
|
||
self.adapters.clear()
|
||
self._running_agents.clear()
|
||
self._pending_messages.clear()
|
||
self._pending_approvals.clear()
|
||
self._shutdown_all_gateway_honcho()
|
||
self._shutdown_event.set()
|
||
|
||
from gateway.status import remove_pid_file, write_runtime_status
|
||
remove_pid_file()
|
||
try:
|
||
write_runtime_status(gateway_state="stopped", exit_reason=self._exit_reason)
|
||
except Exception:
|
||
pass
|
||
|
||
logger.info("Gateway stopped")
|
||
|
||
async def wait_for_shutdown(self) -> None:
|
||
"""Wait for shutdown signal."""
|
||
await self._shutdown_event.wait()
|
||
|
||
def _create_adapter(
|
||
self,
|
||
platform: Platform,
|
||
config: Any
|
||
) -> Optional[BasePlatformAdapter]:
|
||
"""Create the appropriate adapter for a platform."""
|
||
if hasattr(config, "extra") and isinstance(config.extra, dict):
|
||
config.extra.setdefault(
|
||
"group_sessions_per_user",
|
||
self.config.group_sessions_per_user,
|
||
)
|
||
|
||
if platform == Platform.TELEGRAM:
|
||
from gateway.platforms.telegram import TelegramAdapter, check_telegram_requirements
|
||
if not check_telegram_requirements():
|
||
logger.warning("Telegram: python-telegram-bot not installed")
|
||
return None
|
||
return TelegramAdapter(config)
|
||
|
||
elif platform == Platform.DISCORD:
|
||
from gateway.platforms.discord import DiscordAdapter, check_discord_requirements
|
||
if not check_discord_requirements():
|
||
logger.warning("Discord: discord.py not installed")
|
||
return None
|
||
return DiscordAdapter(config)
|
||
|
||
elif platform == Platform.WHATSAPP:
|
||
from gateway.platforms.whatsapp import WhatsAppAdapter, check_whatsapp_requirements
|
||
if not check_whatsapp_requirements():
|
||
logger.warning("WhatsApp: Node.js not installed or bridge not configured")
|
||
return None
|
||
return WhatsAppAdapter(config)
|
||
|
||
elif platform == Platform.SLACK:
|
||
from gateway.platforms.slack import SlackAdapter, check_slack_requirements
|
||
if not check_slack_requirements():
|
||
logger.warning("Slack: slack-bolt not installed. Run: pip install 'hermes-agent[slack]'")
|
||
return None
|
||
return SlackAdapter(config)
|
||
|
||
elif platform == Platform.SIGNAL:
|
||
from gateway.platforms.signal import SignalAdapter, check_signal_requirements
|
||
if not check_signal_requirements():
|
||
logger.warning("Signal: SIGNAL_HTTP_URL or SIGNAL_ACCOUNT not configured")
|
||
return None
|
||
return SignalAdapter(config)
|
||
|
||
elif platform == Platform.HOMEASSISTANT:
|
||
from gateway.platforms.homeassistant import HomeAssistantAdapter, check_ha_requirements
|
||
if not check_ha_requirements():
|
||
logger.warning("HomeAssistant: aiohttp not installed or HASS_TOKEN not set")
|
||
return None
|
||
return HomeAssistantAdapter(config)
|
||
|
||
elif platform == Platform.EMAIL:
|
||
from gateway.platforms.email import EmailAdapter, check_email_requirements
|
||
if not check_email_requirements():
|
||
logger.warning("Email: EMAIL_ADDRESS, EMAIL_PASSWORD, EMAIL_IMAP_HOST, or EMAIL_SMTP_HOST not set")
|
||
return None
|
||
return EmailAdapter(config)
|
||
|
||
elif platform == Platform.SMS:
|
||
from gateway.platforms.sms import SmsAdapter, check_sms_requirements
|
||
if not check_sms_requirements():
|
||
logger.warning("SMS: aiohttp not installed or TWILIO_ACCOUNT_SID/TWILIO_AUTH_TOKEN not set")
|
||
return None
|
||
return SmsAdapter(config)
|
||
|
||
elif platform == Platform.DINGTALK:
|
||
from gateway.platforms.dingtalk import DingTalkAdapter, check_dingtalk_requirements
|
||
if not check_dingtalk_requirements():
|
||
logger.warning("DingTalk: dingtalk-stream not installed or DINGTALK_CLIENT_ID/SECRET not set")
|
||
return None
|
||
return DingTalkAdapter(config)
|
||
|
||
elif platform == Platform.MATTERMOST:
|
||
from gateway.platforms.mattermost import MattermostAdapter, check_mattermost_requirements
|
||
if not check_mattermost_requirements():
|
||
logger.warning("Mattermost: MATTERMOST_TOKEN or MATTERMOST_URL not set, or aiohttp missing")
|
||
return None
|
||
return MattermostAdapter(config)
|
||
|
||
elif platform == Platform.MATRIX:
|
||
from gateway.platforms.matrix import MatrixAdapter, check_matrix_requirements
|
||
if not check_matrix_requirements():
|
||
logger.warning("Matrix: matrix-nio not installed or credentials not set. Run: pip install 'matrix-nio[e2e]'")
|
||
return None
|
||
return MatrixAdapter(config)
|
||
|
||
elif platform == Platform.API_SERVER:
|
||
from gateway.platforms.api_server import APIServerAdapter, check_api_server_requirements
|
||
if not check_api_server_requirements():
|
||
logger.warning("API Server: aiohttp not installed")
|
||
return None
|
||
return APIServerAdapter(config)
|
||
|
||
elif platform == Platform.WEBHOOK:
|
||
from gateway.platforms.webhook import WebhookAdapter, check_webhook_requirements
|
||
if not check_webhook_requirements():
|
||
logger.warning("Webhook: aiohttp not installed")
|
||
return None
|
||
adapter = WebhookAdapter(config)
|
||
adapter.gateway_runner = self # For cross-platform delivery
|
||
return adapter
|
||
|
||
return None
|
||
|
||
def _is_user_authorized(self, source: SessionSource) -> bool:
|
||
"""
|
||
Check if a user is authorized to use the bot.
|
||
|
||
Checks in order:
|
||
1. Per-platform allow-all flag (e.g., DISCORD_ALLOW_ALL_USERS=true)
|
||
2. Environment variable allowlists (TELEGRAM_ALLOWED_USERS, etc.)
|
||
3. DM pairing approved list
|
||
4. Global allow-all (GATEWAY_ALLOW_ALL_USERS=true)
|
||
5. Default: deny
|
||
"""
|
||
# Home Assistant events are system-generated (state changes), not
|
||
# user-initiated messages. The HASS_TOKEN already authenticates the
|
||
# connection, so HA events are always authorized.
|
||
# Webhook events are authenticated via HMAC signature validation in
|
||
# the adapter itself — no user allowlist applies.
|
||
if source.platform in (Platform.HOMEASSISTANT, Platform.WEBHOOK):
|
||
return True
|
||
|
||
user_id = source.user_id
|
||
if not user_id:
|
||
return False
|
||
|
||
platform_env_map = {
|
||
Platform.TELEGRAM: "TELEGRAM_ALLOWED_USERS",
|
||
Platform.DISCORD: "DISCORD_ALLOWED_USERS",
|
||
Platform.WHATSAPP: "WHATSAPP_ALLOWED_USERS",
|
||
Platform.SLACK: "SLACK_ALLOWED_USERS",
|
||
Platform.SIGNAL: "SIGNAL_ALLOWED_USERS",
|
||
Platform.EMAIL: "EMAIL_ALLOWED_USERS",
|
||
Platform.SMS: "SMS_ALLOWED_USERS",
|
||
Platform.MATTERMOST: "MATTERMOST_ALLOWED_USERS",
|
||
Platform.MATRIX: "MATRIX_ALLOWED_USERS",
|
||
Platform.DINGTALK: "DINGTALK_ALLOWED_USERS",
|
||
}
|
||
platform_allow_all_map = {
|
||
Platform.TELEGRAM: "TELEGRAM_ALLOW_ALL_USERS",
|
||
Platform.DISCORD: "DISCORD_ALLOW_ALL_USERS",
|
||
Platform.WHATSAPP: "WHATSAPP_ALLOW_ALL_USERS",
|
||
Platform.SLACK: "SLACK_ALLOW_ALL_USERS",
|
||
Platform.SIGNAL: "SIGNAL_ALLOW_ALL_USERS",
|
||
Platform.EMAIL: "EMAIL_ALLOW_ALL_USERS",
|
||
Platform.SMS: "SMS_ALLOW_ALL_USERS",
|
||
Platform.MATTERMOST: "MATTERMOST_ALLOW_ALL_USERS",
|
||
Platform.MATRIX: "MATRIX_ALLOW_ALL_USERS",
|
||
Platform.DINGTALK: "DINGTALK_ALLOW_ALL_USERS",
|
||
}
|
||
|
||
# Per-platform allow-all flag (e.g., DISCORD_ALLOW_ALL_USERS=true)
|
||
platform_allow_all_var = platform_allow_all_map.get(source.platform, "")
|
||
if platform_allow_all_var and os.getenv(platform_allow_all_var, "").lower() in ("true", "1", "yes"):
|
||
return True
|
||
|
||
# Check pairing store (always checked, regardless of allowlists)
|
||
platform_name = source.platform.value if source.platform else ""
|
||
if self.pairing_store.is_approved(platform_name, user_id):
|
||
return True
|
||
|
||
# Check platform-specific and global allowlists
|
||
platform_allowlist = os.getenv(platform_env_map.get(source.platform, ""), "").strip()
|
||
global_allowlist = os.getenv("GATEWAY_ALLOWED_USERS", "").strip()
|
||
|
||
if not platform_allowlist and not global_allowlist:
|
||
# No allowlists configured -- check global allow-all flag
|
||
return os.getenv("GATEWAY_ALLOW_ALL_USERS", "").lower() in ("true", "1", "yes")
|
||
|
||
# Check if user is in any allowlist
|
||
allowed_ids = set()
|
||
if platform_allowlist:
|
||
allowed_ids.update(uid.strip() for uid in platform_allowlist.split(",") if uid.strip())
|
||
if global_allowlist:
|
||
allowed_ids.update(uid.strip() for uid in global_allowlist.split(",") if uid.strip())
|
||
|
||
# WhatsApp JIDs have @s.whatsapp.net suffix — strip it for comparison
|
||
check_ids = {user_id}
|
||
if "@" in user_id:
|
||
check_ids.add(user_id.split("@")[0])
|
||
return bool(check_ids & allowed_ids)
|
||
|
||
def _get_unauthorized_dm_behavior(self, platform: Optional[Platform]) -> str:
|
||
"""Return how unauthorized DMs should be handled for a platform."""
|
||
config = getattr(self, "config", None)
|
||
if config and hasattr(config, "get_unauthorized_dm_behavior"):
|
||
return config.get_unauthorized_dm_behavior(platform)
|
||
return "pair"
|
||
|
||
async def _handle_message(self, event: MessageEvent) -> Optional[str]:
|
||
"""
|
||
Handle an incoming message from any platform.
|
||
|
||
This is the core message processing pipeline:
|
||
1. Check user authorization
|
||
2. Check for commands (/new, /reset, etc.)
|
||
3. Check for running agent and interrupt if needed
|
||
4. Get or create session
|
||
5. Build context for agent
|
||
6. Run agent conversation
|
||
7. Return response
|
||
"""
|
||
source = event.source
|
||
|
||
# Check if user is authorized
|
||
if not self._is_user_authorized(source):
|
||
logger.warning("Unauthorized user: %s (%s) on %s", source.user_id, source.user_name, source.platform.value)
|
||
# In DMs: offer pairing code. In groups: silently ignore.
|
||
if source.chat_type == "dm" and self._get_unauthorized_dm_behavior(source.platform) == "pair":
|
||
platform_name = source.platform.value if source.platform else "unknown"
|
||
code = self.pairing_store.generate_code(
|
||
platform_name, source.user_id, source.user_name or ""
|
||
)
|
||
if code:
|
||
adapter = self.adapters.get(source.platform)
|
||
if adapter:
|
||
await adapter.send(
|
||
source.chat_id,
|
||
f"Hi~ I don't recognize you yet!\n\n"
|
||
f"Here's your pairing code: `{code}`\n\n"
|
||
f"Ask the bot owner to run:\n"
|
||
f"`hermes pairing approve {platform_name} {code}`"
|
||
)
|
||
else:
|
||
adapter = self.adapters.get(source.platform)
|
||
if adapter:
|
||
await adapter.send(
|
||
source.chat_id,
|
||
"Too many pairing requests right now~ "
|
||
"Please try again later!"
|
||
)
|
||
return None
|
||
|
||
# PRIORITY handling when an agent is already running for this session.
|
||
# Default behavior is to interrupt immediately so user text/stop messages
|
||
# are handled with minimal latency.
|
||
#
|
||
# Special case: Telegram/photo bursts often arrive as multiple near-
|
||
# simultaneous updates. Do NOT interrupt for photo-only follow-ups here;
|
||
# let the adapter-level batching/queueing logic absorb them.
|
||
_quick_key = self._session_key_for_source(source)
|
||
if _quick_key in self._running_agents:
|
||
if event.get_command() == "status":
|
||
return await self._handle_status_command(event)
|
||
|
||
# Resolve the command once for all early-intercept checks below.
|
||
from hermes_cli.commands import resolve_command as _resolve_cmd_inner
|
||
_evt_cmd = event.get_command()
|
||
_cmd_def_inner = _resolve_cmd_inner(_evt_cmd) if _evt_cmd else None
|
||
|
||
# /stop must hard-kill the session when an agent is running.
|
||
# A soft interrupt (agent.interrupt()) doesn't help when the agent
|
||
# is truly hung — the executor thread is blocked and never checks
|
||
# _interrupt_requested. Force-clean _running_agents so the session
|
||
# is unlocked and subsequent messages are processed normally.
|
||
if _cmd_def_inner and _cmd_def_inner.name == "stop":
|
||
running_agent = self._running_agents.get(_quick_key)
|
||
if running_agent and running_agent is not _AGENT_PENDING_SENTINEL:
|
||
running_agent.interrupt("Stop requested")
|
||
# Force-clean: remove the session lock regardless of agent state
|
||
adapter = self.adapters.get(source.platform)
|
||
if adapter and hasattr(adapter, 'get_pending_message'):
|
||
adapter.get_pending_message(_quick_key) # consume and discard
|
||
self._pending_messages.pop(_quick_key, None)
|
||
if _quick_key in self._running_agents:
|
||
del self._running_agents[_quick_key]
|
||
logger.info("HARD STOP for session %s — session lock released", _quick_key[:20])
|
||
return "⚡ Force-stopped. The session is unlocked — you can send a new message."
|
||
|
||
# /reset and /new must bypass the running-agent guard so they
|
||
# actually dispatch as commands instead of being queued as user
|
||
# text (which would be fed back to the agent with the same
|
||
# broken history — #2170). Interrupt the agent first, then
|
||
# clear the adapter's pending queue so the stale "/reset" text
|
||
# doesn't get re-processed as a user message after the
|
||
# interrupt completes.
|
||
if _cmd_def_inner and _cmd_def_inner.name == "new":
|
||
running_agent = self._running_agents.get(_quick_key)
|
||
if running_agent and running_agent is not _AGENT_PENDING_SENTINEL:
|
||
running_agent.interrupt("Session reset requested")
|
||
# Clear any pending messages so the old text doesn't replay
|
||
adapter = self.adapters.get(source.platform)
|
||
if adapter and hasattr(adapter, 'get_pending_message'):
|
||
adapter.get_pending_message(_quick_key) # consume and discard
|
||
self._pending_messages.pop(_quick_key, None)
|
||
# Clean up the running agent entry so the reset handler
|
||
# doesn't think an agent is still active.
|
||
if _quick_key in self._running_agents:
|
||
del self._running_agents[_quick_key]
|
||
return await self._handle_reset_command(event)
|
||
|
||
# /queue <prompt> — queue without interrupting
|
||
if event.get_command() in ("queue", "q"):
|
||
queued_text = event.get_command_args().strip()
|
||
if not queued_text:
|
||
return "Usage: /queue <prompt>"
|
||
adapter = self.adapters.get(source.platform)
|
||
if adapter:
|
||
from gateway.platforms.base import MessageEvent as _ME, MessageType as _MT
|
||
queued_event = _ME(
|
||
text=queued_text,
|
||
message_type=_MT.TEXT,
|
||
source=event.source,
|
||
message_id=event.message_id,
|
||
)
|
||
adapter._pending_messages[_quick_key] = queued_event
|
||
return "Queued for the next turn."
|
||
|
||
if event.message_type == MessageType.PHOTO:
|
||
logger.debug("PRIORITY photo follow-up for session %s — queueing without interrupt", _quick_key[:20])
|
||
adapter = self.adapters.get(source.platform)
|
||
if adapter:
|
||
# Reuse adapter queue semantics so photo bursts merge cleanly.
|
||
if _quick_key in adapter._pending_messages:
|
||
existing = adapter._pending_messages[_quick_key]
|
||
if getattr(existing, "message_type", None) == MessageType.PHOTO:
|
||
existing.media_urls.extend(event.media_urls)
|
||
existing.media_types.extend(event.media_types)
|
||
if event.text:
|
||
if not existing.text:
|
||
existing.text = event.text
|
||
elif event.text not in existing.text:
|
||
existing.text = f"{existing.text}\n\n{event.text}".strip()
|
||
else:
|
||
adapter._pending_messages[_quick_key] = event
|
||
else:
|
||
adapter._pending_messages[_quick_key] = event
|
||
return None
|
||
|
||
running_agent = self._running_agents.get(_quick_key)
|
||
if running_agent is _AGENT_PENDING_SENTINEL:
|
||
# Agent is being set up but not ready yet.
|
||
if event.get_command() == "stop":
|
||
# Force-clean the sentinel so the session is unlocked.
|
||
if _quick_key in self._running_agents:
|
||
del self._running_agents[_quick_key]
|
||
logger.info("HARD STOP (pending) for session %s — sentinel cleared", _quick_key[:20])
|
||
return "⚡ Force-stopped. The agent was still starting — session unlocked."
|
||
# Queue the message so it will be picked up after the
|
||
# agent starts.
|
||
adapter = self.adapters.get(source.platform)
|
||
if adapter:
|
||
adapter._pending_messages[_quick_key] = event
|
||
return None
|
||
logger.debug("PRIORITY interrupt for session %s", _quick_key[:20])
|
||
running_agent.interrupt(event.text)
|
||
if _quick_key in self._pending_messages:
|
||
self._pending_messages[_quick_key] += "\n" + event.text
|
||
else:
|
||
self._pending_messages[_quick_key] = event.text
|
||
return None
|
||
|
||
# Check for commands
|
||
command = event.get_command()
|
||
|
||
# Emit command:* hook for any recognized slash command.
|
||
# GATEWAY_KNOWN_COMMANDS is derived from the central COMMAND_REGISTRY
|
||
# in hermes_cli/commands.py — no hardcoded set to maintain here.
|
||
from hermes_cli.commands import GATEWAY_KNOWN_COMMANDS, resolve_command as _resolve_cmd
|
||
if command and command in GATEWAY_KNOWN_COMMANDS:
|
||
await self.hooks.emit(f"command:{command}", {
|
||
"platform": source.platform.value if source.platform else "",
|
||
"user_id": source.user_id,
|
||
"command": command,
|
||
"args": event.get_command_args().strip(),
|
||
})
|
||
|
||
# Resolve aliases to canonical name so dispatch only checks canonicals.
|
||
_cmd_def = _resolve_cmd(command) if command else None
|
||
canonical = _cmd_def.name if _cmd_def else command
|
||
|
||
if canonical == "new":
|
||
return await self._handle_reset_command(event)
|
||
|
||
if canonical == "help":
|
||
return await self._handle_help_command(event)
|
||
|
||
if canonical == "status":
|
||
return await self._handle_status_command(event)
|
||
|
||
if canonical == "stop":
|
||
return await self._handle_stop_command(event)
|
||
|
||
if canonical == "reasoning":
|
||
return await self._handle_reasoning_command(event)
|
||
|
||
if canonical == "verbose":
|
||
return await self._handle_verbose_command(event)
|
||
|
||
if canonical == "provider":
|
||
return await self._handle_provider_command(event)
|
||
|
||
if canonical == "personality":
|
||
return await self._handle_personality_command(event)
|
||
|
||
if canonical == "plan":
|
||
try:
|
||
from agent.skill_commands import build_plan_path, build_skill_invocation_message
|
||
|
||
user_instruction = event.get_command_args().strip()
|
||
plan_path = build_plan_path(user_instruction)
|
||
event.text = build_skill_invocation_message(
|
||
"/plan",
|
||
user_instruction,
|
||
task_id=_quick_key,
|
||
runtime_note=(
|
||
"Save the markdown plan with write_file to this exact relative path "
|
||
f"inside the active workspace/backend cwd: {plan_path}"
|
||
),
|
||
)
|
||
if not event.text:
|
||
return "Failed to load the bundled /plan skill."
|
||
canonical = None
|
||
except Exception as e:
|
||
logger.exception("Failed to prepare /plan command")
|
||
return f"Failed to enter plan mode: {e}"
|
||
|
||
if canonical == "retry":
|
||
return await self._handle_retry_command(event)
|
||
|
||
if canonical == "undo":
|
||
return await self._handle_undo_command(event)
|
||
|
||
if canonical == "sethome":
|
||
return await self._handle_set_home_command(event)
|
||
|
||
if canonical == "compress":
|
||
return await self._handle_compress_command(event)
|
||
|
||
if canonical == "usage":
|
||
return await self._handle_usage_command(event)
|
||
|
||
if canonical == "insights":
|
||
return await self._handle_insights_command(event)
|
||
|
||
if canonical == "reload-mcp":
|
||
return await self._handle_reload_mcp_command(event)
|
||
|
||
if canonical == "approve":
|
||
return await self._handle_approve_command(event)
|
||
|
||
if canonical == "deny":
|
||
return await self._handle_deny_command(event)
|
||
|
||
if canonical == "update":
|
||
return await self._handle_update_command(event)
|
||
|
||
if canonical == "title":
|
||
return await self._handle_title_command(event)
|
||
|
||
if canonical == "resume":
|
||
return await self._handle_resume_command(event)
|
||
|
||
if canonical == "rollback":
|
||
return await self._handle_rollback_command(event)
|
||
|
||
if canonical == "background":
|
||
return await self._handle_background_command(event)
|
||
|
||
if canonical == "voice":
|
||
return await self._handle_voice_command(event)
|
||
|
||
# User-defined quick commands (bypass agent loop, no LLM call)
|
||
if command:
|
||
if isinstance(self.config, dict):
|
||
quick_commands = self.config.get("quick_commands", {}) or {}
|
||
else:
|
||
quick_commands = getattr(self.config, "quick_commands", {}) or {}
|
||
if not isinstance(quick_commands, dict):
|
||
quick_commands = {}
|
||
if command in quick_commands:
|
||
qcmd = quick_commands[command]
|
||
if qcmd.get("type") == "exec":
|
||
exec_cmd = qcmd.get("command", "")
|
||
if exec_cmd:
|
||
try:
|
||
proc = await asyncio.create_subprocess_shell(
|
||
exec_cmd,
|
||
stdout=asyncio.subprocess.PIPE,
|
||
stderr=asyncio.subprocess.PIPE,
|
||
)
|
||
stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=30)
|
||
output = (stdout or stderr).decode().strip()
|
||
return output if output else "Command returned no output."
|
||
except asyncio.TimeoutError:
|
||
return "Quick command timed out (30s)."
|
||
except Exception as e:
|
||
return f"Quick command error: {e}"
|
||
else:
|
||
return f"Quick command '/{command}' has no command defined."
|
||
elif qcmd.get("type") == "alias":
|
||
target = qcmd.get("target", "").strip()
|
||
if target:
|
||
target = target if target.startswith("/") else f"/{target}"
|
||
target_command = target.lstrip("/")
|
||
user_args = event.get_command_args().strip()
|
||
event.text = f"{target} {user_args}".strip()
|
||
command = target_command
|
||
# Fall through to normal command dispatch below
|
||
else:
|
||
return f"Quick command '/{command}' has no target defined."
|
||
else:
|
||
return f"Quick command '/{command}' has unsupported type (supported: 'exec', 'alias')."
|
||
|
||
# Plugin-registered slash commands
|
||
if command:
|
||
try:
|
||
from hermes_cli.plugins import get_plugin_command_handler
|
||
plugin_handler = get_plugin_command_handler(command)
|
||
if plugin_handler:
|
||
user_args = event.get_command_args().strip()
|
||
import asyncio as _aio
|
||
result = plugin_handler(user_args)
|
||
if _aio.iscoroutine(result):
|
||
result = await result
|
||
return str(result) if result else None
|
||
except Exception as e:
|
||
logger.debug("Plugin command dispatch failed (non-fatal): %s", e)
|
||
|
||
# Skill slash commands: /skill-name loads the skill and sends to agent
|
||
if command:
|
||
try:
|
||
from agent.skill_commands import get_skill_commands, build_skill_invocation_message
|
||
skill_cmds = get_skill_commands()
|
||
cmd_key = f"/{command}"
|
||
if cmd_key in skill_cmds:
|
||
user_instruction = event.get_command_args().strip()
|
||
msg = build_skill_invocation_message(
|
||
cmd_key, user_instruction, task_id=_quick_key
|
||
)
|
||
if msg:
|
||
event.text = msg
|
||
# Fall through to normal message processing with skill content
|
||
except Exception as e:
|
||
logger.debug("Skill command check failed (non-fatal): %s", e)
|
||
|
||
# Pending exec approvals are handled by /approve and /deny commands above.
|
||
# No bare text matching — "yes" in normal conversation must not trigger
|
||
# execution of a dangerous command.
|
||
|
||
# ── Claim this session before any await ───────────────────────
|
||
# Between here and _run_agent registering the real AIAgent, there
|
||
# are numerous await points (hooks, vision enrichment, STT,
|
||
# session hygiene compression). Without this sentinel a second
|
||
# message arriving during any of those yields would pass the
|
||
# "already running" guard and spin up a duplicate agent for the
|
||
# same session — corrupting the transcript.
|
||
self._running_agents[_quick_key] = _AGENT_PENDING_SENTINEL
|
||
|
||
try:
|
||
return await self._handle_message_with_agent(event, source, _quick_key)
|
||
finally:
|
||
# If _run_agent replaced the sentinel with a real agent and
|
||
# then cleaned it up, this is a no-op. If we exited early
|
||
# (exception, command fallthrough, etc.) the sentinel must
|
||
# not linger or the session would be permanently locked out.
|
||
if self._running_agents.get(_quick_key) is _AGENT_PENDING_SENTINEL:
|
||
del self._running_agents[_quick_key]
|
||
|
||
async def _handle_message_with_agent(self, event, source, _quick_key: str):
|
||
"""Inner handler that runs under the _running_agents sentinel guard."""
|
||
|
||
# Get or create session
|
||
session_entry = self.session_store.get_or_create_session(source)
|
||
session_key = session_entry.session_key
|
||
|
||
# Emit session:start for new or auto-reset sessions
|
||
_is_new_session = (
|
||
session_entry.created_at == session_entry.updated_at
|
||
or getattr(session_entry, "was_auto_reset", False)
|
||
)
|
||
if _is_new_session:
|
||
await self.hooks.emit("session:start", {
|
||
"platform": source.platform.value if source.platform else "",
|
||
"user_id": source.user_id,
|
||
"session_id": session_entry.session_id,
|
||
"session_key": session_key,
|
||
})
|
||
|
||
# Build session context
|
||
context = build_session_context(source, self.config, session_entry)
|
||
|
||
# Set environment variables for tools
|
||
self._set_session_env(context)
|
||
|
||
# Read privacy.redact_pii from config (re-read per message)
|
||
_redact_pii = False
|
||
try:
|
||
import yaml as _pii_yaml
|
||
with open(_config_path, encoding="utf-8") as _pf:
|
||
_pcfg = _pii_yaml.safe_load(_pf) or {}
|
||
_redact_pii = bool((_pcfg.get("privacy") or {}).get("redact_pii", False))
|
||
except Exception:
|
||
pass
|
||
|
||
# Build the context prompt to inject
|
||
context_prompt = build_session_context_prompt(context, redact_pii=_redact_pii)
|
||
|
||
# If the previous session expired and was auto-reset, prepend a notice
|
||
# so the agent knows this is a fresh conversation (not an intentional /reset).
|
||
if getattr(session_entry, 'was_auto_reset', False):
|
||
reset_reason = getattr(session_entry, 'auto_reset_reason', None) or 'idle'
|
||
if reset_reason == "daily":
|
||
context_note = "[System note: The user's session was automatically reset by the daily schedule. This is a fresh conversation with no prior context.]"
|
||
else:
|
||
context_note = "[System note: The user's previous session expired due to inactivity. This is a fresh conversation with no prior context.]"
|
||
context_prompt = context_note + "\n\n" + context_prompt
|
||
|
||
# Send a user-facing notification explaining the reset, unless:
|
||
# - notifications are disabled in config
|
||
# - the platform is excluded (e.g. api_server, webhook)
|
||
# - the expired session had no activity (nothing was cleared)
|
||
try:
|
||
policy = self.session_store.config.get_reset_policy(
|
||
platform=source.platform,
|
||
session_type=getattr(source, 'chat_type', 'dm'),
|
||
)
|
||
platform_name = source.platform.value if source.platform else ""
|
||
had_activity = getattr(session_entry, 'reset_had_activity', False)
|
||
should_notify = (
|
||
policy.notify
|
||
and had_activity
|
||
and platform_name not in policy.notify_exclude_platforms
|
||
)
|
||
if should_notify:
|
||
adapter = self.adapters.get(source.platform)
|
||
if adapter:
|
||
if reset_reason == "daily":
|
||
reason_text = f"daily schedule at {policy.at_hour}:00"
|
||
else:
|
||
hours = policy.idle_minutes // 60
|
||
mins = policy.idle_minutes % 60
|
||
duration = f"{hours}h" if not mins else f"{hours}h {mins}m" if hours else f"{mins}m"
|
||
reason_text = f"inactive for {duration}"
|
||
notice = (
|
||
f"◐ Session automatically reset ({reason_text}). "
|
||
f"Conversation history cleared.\n"
|
||
f"Use /resume to browse and restore a previous session.\n"
|
||
f"Adjust reset timing in config.yaml under session_reset."
|
||
)
|
||
try:
|
||
session_info = self._format_session_info()
|
||
if session_info:
|
||
notice = f"{notice}\n\n{session_info}"
|
||
except Exception:
|
||
pass
|
||
await adapter.send(
|
||
source.chat_id, notice,
|
||
metadata=getattr(event, 'metadata', None),
|
||
)
|
||
except Exception as e:
|
||
logger.debug("Auto-reset notification failed (non-fatal): %s", e)
|
||
|
||
session_entry.was_auto_reset = False
|
||
session_entry.auto_reset_reason = None
|
||
|
||
# Auto-load skill for DM topic bindings (e.g., Telegram Private Chat Topics)
|
||
# Only inject on NEW sessions — for ongoing conversations the skill content
|
||
# is already in the conversation history from the first message.
|
||
if _is_new_session and getattr(event, "auto_skill", None):
|
||
try:
|
||
from agent.skill_commands import _load_skill_payload, _build_skill_message
|
||
_skill_name = event.auto_skill
|
||
_loaded = _load_skill_payload(_skill_name, task_id=_quick_key)
|
||
if _loaded:
|
||
_loaded_skill, _skill_dir, _display_name = _loaded
|
||
_activation_note = (
|
||
f'[SYSTEM: This conversation is in a topic with the "{_display_name}" skill '
|
||
f"auto-loaded. Follow its instructions for the duration of this session.]"
|
||
)
|
||
_skill_msg = _build_skill_message(
|
||
_loaded_skill, _skill_dir, _activation_note,
|
||
user_instruction=event.text,
|
||
)
|
||
if _skill_msg:
|
||
event.text = _skill_msg
|
||
logger.info(
|
||
"[Gateway] Auto-loaded skill '%s' for DM topic session %s",
|
||
_skill_name, session_key,
|
||
)
|
||
else:
|
||
logger.warning(
|
||
"[Gateway] DM topic skill '%s' not found in available skills",
|
||
_skill_name,
|
||
)
|
||
except Exception as e:
|
||
logger.warning("[Gateway] Failed to auto-load topic skill '%s': %s", event.auto_skill, e)
|
||
|
||
# Load conversation history from transcript
|
||
history = self.session_store.load_transcript(session_entry.session_id)
|
||
|
||
# -----------------------------------------------------------------
|
||
# Session hygiene: auto-compress pathologically large transcripts
|
||
#
|
||
# Long-lived gateway sessions can accumulate enough history that
|
||
# every new message rehydrates an oversized transcript, causing
|
||
# repeated truncation/context failures. Detect this early and
|
||
# compress proactively — before the agent even starts. (#628)
|
||
#
|
||
# Token source priority:
|
||
# 1. Actual API-reported prompt_tokens from the last turn
|
||
# (stored in session_entry.last_prompt_tokens)
|
||
# 2. Rough char-based estimate (str(msg)//4). Overestimates
|
||
# by 30-50% on code/JSON-heavy sessions, but that just
|
||
# means hygiene fires a bit early — safe and harmless.
|
||
# -----------------------------------------------------------------
|
||
if history and len(history) >= 4:
|
||
from agent.model_metadata import (
|
||
estimate_messages_tokens_rough,
|
||
get_model_context_length,
|
||
)
|
||
|
||
# Read model + compression config from config.yaml.
|
||
# NOTE: hygiene threshold is intentionally HIGHER than the agent's
|
||
# own compressor (0.85 vs 0.50). Hygiene is a safety net for
|
||
# sessions that grew too large between turns — it fires pre-agent
|
||
# to prevent API failures. The agent's own compressor handles
|
||
# normal context management during its tool loop with accurate
|
||
# real token counts. Having hygiene at 0.50 caused premature
|
||
# compression on every turn in long gateway sessions.
|
||
_hyg_model = "anthropic/claude-sonnet-4.6"
|
||
_hyg_threshold_pct = 0.85
|
||
_hyg_compression_enabled = True
|
||
_hyg_config_context_length = None
|
||
_hyg_provider = None
|
||
_hyg_base_url = None
|
||
_hyg_api_key = None
|
||
try:
|
||
_hyg_cfg_path = _hermes_home / "config.yaml"
|
||
if _hyg_cfg_path.exists():
|
||
import yaml as _hyg_yaml
|
||
with open(_hyg_cfg_path, encoding="utf-8") as _hyg_f:
|
||
_hyg_data = _hyg_yaml.safe_load(_hyg_f) or {}
|
||
|
||
# Resolve model name (same logic as run_sync)
|
||
_model_cfg = _hyg_data.get("model", {})
|
||
if isinstance(_model_cfg, str):
|
||
_hyg_model = _model_cfg
|
||
elif isinstance(_model_cfg, dict):
|
||
_hyg_model = _model_cfg.get("default", _hyg_model)
|
||
# Read explicit context_length override from model config
|
||
# (same as run_agent.py lines 995-1005)
|
||
_raw_ctx = _model_cfg.get("context_length")
|
||
if _raw_ctx is not None:
|
||
try:
|
||
_hyg_config_context_length = int(_raw_ctx)
|
||
except (TypeError, ValueError):
|
||
pass
|
||
# Read provider for accurate context detection
|
||
_hyg_provider = _model_cfg.get("provider") or None
|
||
_hyg_base_url = _model_cfg.get("base_url") or None
|
||
|
||
# Read compression settings — only use enabled flag.
|
||
# The threshold is intentionally separate from the agent's
|
||
# compression.threshold (hygiene runs higher).
|
||
_comp_cfg = _hyg_data.get("compression", {})
|
||
if isinstance(_comp_cfg, dict):
|
||
_hyg_compression_enabled = str(
|
||
_comp_cfg.get("enabled", True)
|
||
).lower() in ("true", "1", "yes")
|
||
|
||
# Resolve provider/base_url from runtime if not in config
|
||
if not _hyg_provider or not _hyg_base_url:
|
||
try:
|
||
_hyg_runtime = _resolve_runtime_agent_kwargs()
|
||
_hyg_provider = _hyg_provider or _hyg_runtime.get("provider")
|
||
_hyg_base_url = _hyg_base_url or _hyg_runtime.get("base_url")
|
||
_hyg_api_key = _hyg_runtime.get("api_key")
|
||
except Exception:
|
||
pass
|
||
except Exception:
|
||
pass
|
||
|
||
if _hyg_compression_enabled:
|
||
_hyg_context_length = get_model_context_length(
|
||
_hyg_model,
|
||
base_url=_hyg_base_url or "",
|
||
api_key=_hyg_api_key or "",
|
||
config_context_length=_hyg_config_context_length,
|
||
provider=_hyg_provider or "",
|
||
)
|
||
_compress_token_threshold = int(
|
||
_hyg_context_length * _hyg_threshold_pct
|
||
)
|
||
_warn_token_threshold = int(_hyg_context_length * 0.95)
|
||
|
||
_msg_count = len(history)
|
||
|
||
# Prefer actual API-reported tokens from the last turn
|
||
# (stored in session entry) over the rough char-based estimate.
|
||
_stored_tokens = session_entry.last_prompt_tokens
|
||
if _stored_tokens > 0:
|
||
_approx_tokens = _stored_tokens
|
||
_token_source = "actual"
|
||
else:
|
||
_approx_tokens = estimate_messages_tokens_rough(history)
|
||
_token_source = "estimated"
|
||
# Note: rough estimates overestimate by 30-50% for code/JSON-heavy
|
||
# sessions, but that just means hygiene fires a bit early — which
|
||
# is safe and harmless. The 85% threshold already provides ample
|
||
# headroom (agent's own compressor runs at 50%). A previous 1.4x
|
||
# multiplier tried to compensate by inflating the threshold, but
|
||
# 85% * 1.4 = 119% of context — which exceeds the model's limit
|
||
# and prevented hygiene from ever firing for ~200K models (GLM-5).
|
||
|
||
_needs_compress = _approx_tokens >= _compress_token_threshold
|
||
|
||
if _needs_compress:
|
||
logger.info(
|
||
"Session hygiene: %s messages, ~%s tokens (%s) — auto-compressing "
|
||
"(threshold: %s%% of %s = %s tokens)",
|
||
_msg_count, f"{_approx_tokens:,}", _token_source,
|
||
int(_hyg_threshold_pct * 100),
|
||
f"{_hyg_context_length:,}",
|
||
f"{_compress_token_threshold:,}",
|
||
)
|
||
|
||
_hyg_adapter = self.adapters.get(source.platform)
|
||
_hyg_meta = {"thread_id": source.thread_id} if source.thread_id else None
|
||
if _hyg_adapter:
|
||
try:
|
||
await _hyg_adapter.send(
|
||
source.chat_id,
|
||
f"🗜️ Session is large ({_msg_count} messages, "
|
||
f"~{_approx_tokens:,} tokens). Auto-compressing...",
|
||
metadata=_hyg_meta,
|
||
)
|
||
except Exception:
|
||
pass
|
||
|
||
try:
|
||
from run_agent import AIAgent
|
||
|
||
_hyg_runtime = _resolve_runtime_agent_kwargs()
|
||
if _hyg_runtime.get("api_key"):
|
||
_hyg_msgs = [
|
||
{"role": m.get("role"), "content": m.get("content")}
|
||
for m in history
|
||
if m.get("role") in ("user", "assistant")
|
||
and m.get("content")
|
||
]
|
||
|
||
if len(_hyg_msgs) >= 4:
|
||
_hyg_agent = AIAgent(
|
||
**_hyg_runtime,
|
||
model=_hyg_model,
|
||
max_iterations=4,
|
||
quiet_mode=True,
|
||
enabled_toolsets=["memory"],
|
||
session_id=session_entry.session_id,
|
||
)
|
||
_hyg_agent._print_fn = lambda *a, **kw: None
|
||
|
||
loop = asyncio.get_event_loop()
|
||
_compressed, _ = await loop.run_in_executor(
|
||
None,
|
||
lambda: _hyg_agent._compress_context(
|
||
_hyg_msgs, "",
|
||
approx_tokens=_approx_tokens,
|
||
),
|
||
)
|
||
|
||
self.session_store.rewrite_transcript(
|
||
session_entry.session_id, _compressed
|
||
)
|
||
# Reset stored token count — transcript was rewritten
|
||
session_entry.last_prompt_tokens = 0
|
||
history = _compressed
|
||
_new_count = len(_compressed)
|
||
_new_tokens = estimate_messages_tokens_rough(
|
||
_compressed
|
||
)
|
||
|
||
logger.info(
|
||
"Session hygiene: compressed %s → %s msgs, "
|
||
"~%s → ~%s tokens",
|
||
_msg_count, _new_count,
|
||
f"{_approx_tokens:,}", f"{_new_tokens:,}",
|
||
)
|
||
|
||
if _hyg_adapter:
|
||
try:
|
||
await _hyg_adapter.send(
|
||
source.chat_id,
|
||
f"🗜️ Compressed: {_msg_count} → "
|
||
f"{_new_count} messages, "
|
||
f"~{_approx_tokens:,} → "
|
||
f"~{_new_tokens:,} tokens",
|
||
metadata=_hyg_meta,
|
||
)
|
||
except Exception:
|
||
pass
|
||
|
||
# Still too large after compression — warn user
|
||
if _new_tokens >= _warn_token_threshold:
|
||
logger.warning(
|
||
"Session hygiene: still ~%s tokens after "
|
||
"compression — suggesting /reset",
|
||
f"{_new_tokens:,}",
|
||
)
|
||
if _hyg_adapter:
|
||
try:
|
||
await _hyg_adapter.send(
|
||
source.chat_id,
|
||
"⚠️ Session is still very large "
|
||
"after compression "
|
||
f"(~{_new_tokens:,} tokens). "
|
||
"Consider using /reset to start "
|
||
"fresh if you experience issues.",
|
||
metadata=_hyg_meta,
|
||
)
|
||
except Exception:
|
||
pass
|
||
|
||
except Exception as e:
|
||
logger.warning(
|
||
"Session hygiene auto-compress failed: %s", e
|
||
)
|
||
# Compression failed and session is dangerously large
|
||
if _approx_tokens >= _warn_token_threshold:
|
||
_hyg_adapter = self.adapters.get(source.platform)
|
||
_hyg_meta = {"thread_id": source.thread_id} if source.thread_id else None
|
||
if _hyg_adapter:
|
||
try:
|
||
await _hyg_adapter.send(
|
||
source.chat_id,
|
||
f"⚠️ Session is very large "
|
||
f"({_msg_count} messages, "
|
||
f"~{_approx_tokens:,} tokens) and "
|
||
"auto-compression failed. Consider "
|
||
"using /compress or /reset to avoid "
|
||
"issues.",
|
||
metadata=_hyg_meta,
|
||
)
|
||
except Exception:
|
||
pass
|
||
|
||
# First-message onboarding -- only on the very first interaction ever
|
||
if not history and not self.session_store.has_any_sessions():
|
||
context_prompt += (
|
||
"\n\n[System note: This is the user's very first message ever. "
|
||
"Briefly introduce yourself and mention that /help shows available commands. "
|
||
"Keep the introduction concise -- one or two sentences max.]"
|
||
)
|
||
|
||
# One-time prompt if no home channel is set for this platform
|
||
if not history and source.platform and source.platform != Platform.LOCAL:
|
||
platform_name = source.platform.value
|
||
env_key = f"{platform_name.upper()}_HOME_CHANNEL"
|
||
if not os.getenv(env_key):
|
||
adapter = self.adapters.get(source.platform)
|
||
if adapter:
|
||
await adapter.send(
|
||
source.chat_id,
|
||
f"📬 No home channel is set for {platform_name.title()}. "
|
||
f"A home channel is where Hermes delivers cron job results "
|
||
f"and cross-platform messages.\n\n"
|
||
f"Type /sethome to make this chat your home channel, "
|
||
f"or ignore to skip."
|
||
)
|
||
|
||
# -----------------------------------------------------------------
|
||
# Voice channel awareness — inject current voice channel state
|
||
# into context so the agent knows who is in the channel and who
|
||
# is speaking, without needing a separate tool call.
|
||
# -----------------------------------------------------------------
|
||
if source.platform == Platform.DISCORD:
|
||
adapter = self.adapters.get(Platform.DISCORD)
|
||
guild_id = self._get_guild_id(event)
|
||
if guild_id and adapter and hasattr(adapter, "get_voice_channel_context"):
|
||
vc_context = adapter.get_voice_channel_context(guild_id)
|
||
if vc_context:
|
||
context_prompt += f"\n\n{vc_context}"
|
||
|
||
# -----------------------------------------------------------------
|
||
# Auto-analyze images sent by the user
|
||
#
|
||
# If the user attached image(s), we run the vision tool eagerly so
|
||
# the conversation model always receives a text description. The
|
||
# local file path is also included so the model can re-examine the
|
||
# image later with a more targeted question via vision_analyze.
|
||
#
|
||
# We filter to image paths only (by media_type) so that non-image
|
||
# attachments (documents, audio, etc.) are not sent to the vision
|
||
# tool even when they appear in the same message.
|
||
# -----------------------------------------------------------------
|
||
message_text = event.text or ""
|
||
if event.media_urls:
|
||
image_paths = []
|
||
for i, path in enumerate(event.media_urls):
|
||
# Check media_types if available; otherwise infer from message type
|
||
mtype = event.media_types[i] if i < len(event.media_types) else ""
|
||
is_image = (
|
||
mtype.startswith("image/")
|
||
or event.message_type == MessageType.PHOTO
|
||
)
|
||
if is_image:
|
||
image_paths.append(path)
|
||
if image_paths:
|
||
message_text = await self._enrich_message_with_vision(
|
||
message_text, image_paths
|
||
)
|
||
|
||
# -----------------------------------------------------------------
|
||
# Auto-transcribe voice/audio messages sent by the user
|
||
# -----------------------------------------------------------------
|
||
if event.media_urls:
|
||
audio_paths = []
|
||
for i, path in enumerate(event.media_urls):
|
||
mtype = event.media_types[i] if i < len(event.media_types) else ""
|
||
is_audio = (
|
||
mtype.startswith("audio/")
|
||
or event.message_type in (MessageType.VOICE, MessageType.AUDIO)
|
||
)
|
||
if is_audio:
|
||
audio_paths.append(path)
|
||
if audio_paths:
|
||
message_text = await self._enrich_message_with_transcription(
|
||
message_text, audio_paths
|
||
)
|
||
# If STT failed, send a direct message to the user so they
|
||
# know voice isn't configured — don't rely on the agent to
|
||
# relay the error clearly.
|
||
_stt_fail_markers = (
|
||
"No STT provider",
|
||
"STT is disabled",
|
||
"can't listen",
|
||
"VOICE_TOOLS_OPENAI_KEY",
|
||
)
|
||
if any(m in message_text for m in _stt_fail_markers):
|
||
_stt_adapter = self.adapters.get(source.platform)
|
||
_stt_meta = {"thread_id": source.thread_id} if source.thread_id else None
|
||
if _stt_adapter:
|
||
try:
|
||
_stt_msg = (
|
||
"🎤 I received your voice message but can't transcribe it — "
|
||
"no speech-to-text provider is configured.\n\n"
|
||
"To enable voice: install faster-whisper "
|
||
"(`pip install faster-whisper` in the Hermes venv) "
|
||
"and set `stt.enabled: true` in config.yaml, "
|
||
"then /restart the gateway."
|
||
)
|
||
# Point to setup skill if it's installed
|
||
if self._has_setup_skill():
|
||
_stt_msg += "\n\nFor full setup instructions, type: `/skill hermes-agent-setup`"
|
||
await _stt_adapter.send(
|
||
source.chat_id, _stt_msg,
|
||
metadata=_stt_meta,
|
||
)
|
||
except Exception:
|
||
pass
|
||
|
||
# -----------------------------------------------------------------
|
||
# Enrich document messages with context notes for the agent
|
||
# -----------------------------------------------------------------
|
||
if event.media_urls and event.message_type == MessageType.DOCUMENT:
|
||
for i, path in enumerate(event.media_urls):
|
||
mtype = event.media_types[i] if i < len(event.media_types) else ""
|
||
if not (mtype.startswith("application/") or mtype.startswith("text/")):
|
||
continue
|
||
# Extract display filename by stripping the doc_{uuid12}_ prefix
|
||
import os as _os
|
||
basename = _os.path.basename(path)
|
||
# Format: doc_<12hex>_<original_filename>
|
||
parts = basename.split("_", 2)
|
||
display_name = parts[2] if len(parts) >= 3 else basename
|
||
# Sanitize to prevent prompt injection via filenames
|
||
import re as _re
|
||
display_name = _re.sub(r'[^\w.\- ]', '_', display_name)
|
||
|
||
if mtype.startswith("text/"):
|
||
context_note = (
|
||
f"[The user sent a text document: '{display_name}'. "
|
||
f"Its content has been included below. "
|
||
f"The file is also saved at: {path}]"
|
||
)
|
||
else:
|
||
context_note = (
|
||
f"[The user sent a document: '{display_name}'. "
|
||
f"The file is saved at: {path}. "
|
||
f"Ask the user what they'd like you to do with it.]"
|
||
)
|
||
message_text = f"{context_note}\n\n{message_text}"
|
||
|
||
# -----------------------------------------------------------------
|
||
# Inject reply context when user replies to a message not in history.
|
||
# Telegram (and other platforms) let users reply to specific messages,
|
||
# but if the quoted message is from a previous session, cron delivery,
|
||
# or background task, the agent has no context about what's being
|
||
# referenced. Prepend the quoted text so the agent understands. (#1594)
|
||
# -----------------------------------------------------------------
|
||
if getattr(event, 'reply_to_text', None) and event.reply_to_message_id:
|
||
reply_snippet = event.reply_to_text[:500]
|
||
found_in_history = any(
|
||
reply_snippet[:200] in (msg.get("content") or "")
|
||
for msg in history
|
||
if msg.get("role") in ("assistant", "user", "tool")
|
||
)
|
||
if not found_in_history:
|
||
message_text = f'[Replying to: "{reply_snippet}"]\n\n{message_text}'
|
||
|
||
try:
|
||
# Emit agent:start hook
|
||
hook_ctx = {
|
||
"platform": source.platform.value if source.platform else "",
|
||
"user_id": source.user_id,
|
||
"session_id": session_entry.session_id,
|
||
"message": message_text[:500],
|
||
}
|
||
await self.hooks.emit("agent:start", hook_ctx)
|
||
|
||
# Expand @ context references (@file:, @folder:, @diff, etc.)
|
||
if "@" in message_text:
|
||
try:
|
||
from agent.context_references import preprocess_context_references_async
|
||
from agent.model_metadata import get_model_context_length
|
||
_msg_cwd = os.environ.get("MESSAGING_CWD", os.path.expanduser("~"))
|
||
_msg_ctx_len = get_model_context_length(
|
||
self._model, base_url=self._base_url or "")
|
||
_ctx_result = await preprocess_context_references_async(
|
||
message_text, cwd=_msg_cwd,
|
||
context_length=_msg_ctx_len, allowed_root=_msg_cwd)
|
||
if _ctx_result.blocked:
|
||
_adapter = self.adapters.get(source.platform)
|
||
if _adapter:
|
||
await _adapter.send(
|
||
source.chat_id,
|
||
"\n".join(_ctx_result.warnings) or "Context injection refused.",
|
||
)
|
||
return
|
||
if _ctx_result.expanded:
|
||
message_text = _ctx_result.message
|
||
except Exception as exc:
|
||
logger.debug("@ context reference expansion failed: %s", exc)
|
||
|
||
# Run the agent
|
||
agent_result = await self._run_agent(
|
||
message=message_text,
|
||
context_prompt=context_prompt,
|
||
history=history,
|
||
source=source,
|
||
session_id=session_entry.session_id,
|
||
session_key=session_key,
|
||
event_message_id=event.message_id,
|
||
)
|
||
|
||
# Stop persistent typing indicator now that the agent is done
|
||
try:
|
||
_typing_adapter = self.adapters.get(source.platform)
|
||
if _typing_adapter and hasattr(_typing_adapter, "stop_typing"):
|
||
await _typing_adapter.stop_typing(source.chat_id)
|
||
except Exception:
|
||
pass
|
||
|
||
response = agent_result.get("final_response") or ""
|
||
agent_messages = agent_result.get("messages", [])
|
||
|
||
# Surface error details when the agent failed silently (final_response=None)
|
||
if not response and agent_result.get("failed"):
|
||
error_detail = agent_result.get("error", "unknown error")
|
||
error_str = str(error_detail).lower()
|
||
|
||
# Detect context-overflow failures and give specific guidance.
|
||
# Generic 400 "Error" from Anthropic with large sessions is the
|
||
# most common cause of this (#1630).
|
||
_is_ctx_fail = any(p in error_str for p in (
|
||
"context", "token", "too large", "too long",
|
||
"exceed", "payload",
|
||
)) or (
|
||
"400" in error_str
|
||
and len(history) > 50
|
||
)
|
||
|
||
if _is_ctx_fail:
|
||
response = (
|
||
"⚠️ Session too large for the model's context window.\n"
|
||
"Use /compact to compress the conversation, or "
|
||
"/reset to start fresh."
|
||
)
|
||
else:
|
||
response = (
|
||
f"The request failed: {str(error_detail)[:300]}\n"
|
||
"Try again or use /reset to start a fresh session."
|
||
)
|
||
|
||
# If the agent's session_id changed during compression, update
|
||
# session_entry so transcript writes below go to the right session.
|
||
if agent_result.get("session_id") and agent_result["session_id"] != session_entry.session_id:
|
||
session_entry.session_id = agent_result["session_id"]
|
||
|
||
# Prepend reasoning/thinking if display is enabled
|
||
if getattr(self, "_show_reasoning", False) and response:
|
||
last_reasoning = agent_result.get("last_reasoning")
|
||
if last_reasoning:
|
||
# Collapse long reasoning to keep messages readable
|
||
lines = last_reasoning.strip().splitlines()
|
||
if len(lines) > 15:
|
||
display_reasoning = "\n".join(lines[:15])
|
||
display_reasoning += f"\n_... ({len(lines) - 15} more lines)_"
|
||
else:
|
||
display_reasoning = last_reasoning.strip()
|
||
response = f"💭 **Reasoning:**\n```\n{display_reasoning}\n```\n\n{response}"
|
||
|
||
# Emit agent:end hook
|
||
await self.hooks.emit("agent:end", {
|
||
**hook_ctx,
|
||
"response": (response or "")[:500],
|
||
})
|
||
|
||
# Check for pending process watchers (check_interval on background processes)
|
||
try:
|
||
from tools.process_registry import process_registry
|
||
while process_registry.pending_watchers:
|
||
watcher = process_registry.pending_watchers.pop(0)
|
||
asyncio.create_task(self._run_process_watcher(watcher))
|
||
except Exception as e:
|
||
logger.error("Process watcher setup error: %s", e)
|
||
|
||
# Check if the agent encountered a dangerous command needing approval
|
||
try:
|
||
from tools.approval import pop_pending
|
||
import time as _time
|
||
pending = pop_pending(session_key)
|
||
if pending:
|
||
pending["timestamp"] = _time.time()
|
||
self._pending_approvals[session_key] = pending
|
||
# Append structured instructions so the user knows how to respond
|
||
cmd_preview = pending.get("command", "")
|
||
if len(cmd_preview) > 200:
|
||
cmd_preview = cmd_preview[:200] + "..."
|
||
approval_hint = (
|
||
f"\n\n⚠️ **Dangerous command requires approval:**\n"
|
||
f"```\n{cmd_preview}\n```\n"
|
||
f"Reply `/approve` to execute, `/approve session` to approve this pattern "
|
||
f"for the session, or `/deny` to cancel."
|
||
)
|
||
response = (response or "") + approval_hint
|
||
except Exception as e:
|
||
logger.debug("Failed to check pending approvals: %s", e)
|
||
|
||
# Save the full conversation to the transcript, including tool calls.
|
||
# This preserves the complete agent loop (tool_calls, tool results,
|
||
# intermediate reasoning) so sessions can be resumed with full context
|
||
# and transcripts are useful for debugging and training data.
|
||
#
|
||
# IMPORTANT: When the agent failed before producing any response
|
||
# (e.g. context-overflow 400), do NOT persist the user's message.
|
||
# Persisting it would make the session even larger, causing the
|
||
# same failure on the next attempt — an infinite loop. (#1630)
|
||
agent_failed_early = (
|
||
agent_result.get("failed")
|
||
and not agent_result.get("final_response")
|
||
)
|
||
if agent_failed_early:
|
||
logger.info(
|
||
"Skipping transcript persistence for failed request in "
|
||
"session %s to prevent session growth loop.",
|
||
session_entry.session_id,
|
||
)
|
||
|
||
ts = datetime.now().isoformat()
|
||
|
||
# If this is a fresh session (no history), write the full tool
|
||
# definitions as the first entry so the transcript is self-describing
|
||
# -- the same list of dicts sent as tools=[...] in the API request.
|
||
if agent_failed_early:
|
||
pass # Skip all transcript writes — don't grow a broken session
|
||
elif not history:
|
||
tool_defs = agent_result.get("tools", [])
|
||
self.session_store.append_to_transcript(
|
||
session_entry.session_id,
|
||
{
|
||
"role": "session_meta",
|
||
"tools": tool_defs or [],
|
||
"model": os.getenv("HERMES_MODEL", ""),
|
||
"platform": source.platform.value if source.platform else "",
|
||
"timestamp": ts,
|
||
}
|
||
)
|
||
|
||
# Find only the NEW messages from this turn (skip history we loaded).
|
||
# Use the filtered history length (history_offset) that was actually
|
||
# passed to the agent, not len(history) which includes session_meta
|
||
# entries that were stripped before the agent saw them.
|
||
if not agent_failed_early:
|
||
history_len = agent_result.get("history_offset", len(history))
|
||
new_messages = agent_messages[history_len:] if len(agent_messages) > history_len else []
|
||
|
||
# If no new messages found (edge case), fall back to simple user/assistant
|
||
if not new_messages:
|
||
self.session_store.append_to_transcript(
|
||
session_entry.session_id,
|
||
{"role": "user", "content": message_text, "timestamp": ts}
|
||
)
|
||
if response:
|
||
self.session_store.append_to_transcript(
|
||
session_entry.session_id,
|
||
{"role": "assistant", "content": response, "timestamp": ts}
|
||
)
|
||
else:
|
||
# The agent already persisted these messages to SQLite via
|
||
# _flush_messages_to_session_db(), so skip the DB write here
|
||
# to prevent the duplicate-write bug (#860). We still write
|
||
# to JSONL for backward compatibility and as a backup.
|
||
agent_persisted = self._session_db is not None
|
||
for msg in new_messages:
|
||
# Skip system messages (they're rebuilt each run)
|
||
if msg.get("role") == "system":
|
||
continue
|
||
# Add timestamp to each message for debugging
|
||
entry = {**msg, "timestamp": ts}
|
||
self.session_store.append_to_transcript(
|
||
session_entry.session_id, entry,
|
||
skip_db=agent_persisted,
|
||
)
|
||
|
||
# Update session with actual prompt token count and model from the agent
|
||
self.session_store.update_session(
|
||
session_entry.session_key,
|
||
input_tokens=agent_result.get("input_tokens", 0),
|
||
output_tokens=agent_result.get("output_tokens", 0),
|
||
cache_read_tokens=agent_result.get("cache_read_tokens", 0),
|
||
cache_write_tokens=agent_result.get("cache_write_tokens", 0),
|
||
last_prompt_tokens=agent_result.get("last_prompt_tokens", 0),
|
||
model=agent_result.get("model"),
|
||
estimated_cost_usd=agent_result.get("estimated_cost_usd"),
|
||
cost_status=agent_result.get("cost_status"),
|
||
cost_source=agent_result.get("cost_source"),
|
||
provider=agent_result.get("provider"),
|
||
base_url=agent_result.get("base_url"),
|
||
)
|
||
|
||
# Auto voice reply: send TTS audio before the text response
|
||
_already_sent = bool(agent_result.get("already_sent"))
|
||
if self._should_send_voice_reply(event, response, agent_messages, already_sent=_already_sent):
|
||
await self._send_voice_reply(event, response)
|
||
|
||
# If streaming already delivered the response, extract and
|
||
# deliver any MEDIA: files before returning None. Streaming
|
||
# sends raw text chunks that include MEDIA: tags — the normal
|
||
# post-processing in _process_message_background is skipped
|
||
# when already_sent is True, so media files would never be
|
||
# delivered without this.
|
||
if agent_result.get("already_sent"):
|
||
if response:
|
||
_media_adapter = self.adapters.get(source.platform)
|
||
if _media_adapter:
|
||
await self._deliver_media_from_response(
|
||
response, event, _media_adapter,
|
||
)
|
||
return None
|
||
|
||
return response
|
||
|
||
except Exception as e:
|
||
# Stop typing indicator on error too
|
||
try:
|
||
_err_adapter = self.adapters.get(source.platform)
|
||
if _err_adapter and hasattr(_err_adapter, "stop_typing"):
|
||
await _err_adapter.stop_typing(source.chat_id)
|
||
except Exception:
|
||
pass
|
||
logger.exception("Agent error in session %s", session_key)
|
||
error_type = type(e).__name__
|
||
error_detail = str(e)[:300] if str(e) else "no details available"
|
||
status_hint = ""
|
||
status_code = getattr(e, "status_code", None)
|
||
_hist_len = len(history) if 'history' in locals() else 0
|
||
if status_code == 401:
|
||
status_hint = " Check your API key or run `claude /login` to refresh OAuth credentials."
|
||
elif status_code == 429:
|
||
# Check if this is a plan usage limit (resets on a schedule) vs a transient rate limit
|
||
_err_body = getattr(e, "response", None)
|
||
_err_json = {}
|
||
try:
|
||
if _err_body is not None:
|
||
_err_json = _err_body.json().get("error", {})
|
||
except Exception:
|
||
pass
|
||
if _err_json.get("type") == "usage_limit_reached":
|
||
_resets_in = _err_json.get("resets_in_seconds")
|
||
if _resets_in and _resets_in > 0:
|
||
import math
|
||
_hours = math.ceil(_resets_in / 3600)
|
||
status_hint = f" Your plan's usage limit has been reached. It resets in ~{_hours}h."
|
||
else:
|
||
status_hint = " Your plan's usage limit has been reached. Please wait until it resets."
|
||
else:
|
||
status_hint = " You are being rate-limited. Please wait a moment and try again."
|
||
elif status_code == 529:
|
||
status_hint = " The API is temporarily overloaded. Please try again shortly."
|
||
elif status_code in (400, 500):
|
||
# 400 with a large session is context overflow.
|
||
# 500 with a large session often means the payload is too large
|
||
# for the API to process — treat it the same way.
|
||
if _hist_len > 50:
|
||
return (
|
||
"⚠️ Session too large for the model's context window.\n"
|
||
"Use /compact to compress the conversation, or "
|
||
"/reset to start fresh."
|
||
)
|
||
elif status_code == 400:
|
||
status_hint = " The request was rejected by the API."
|
||
return (
|
||
f"Sorry, I encountered an error ({error_type}).\n"
|
||
f"{error_detail}\n"
|
||
f"{status_hint}"
|
||
"Try again or use /reset to start a fresh session."
|
||
)
|
||
finally:
|
||
# Clear session env
|
||
self._clear_session_env()
|
||
|
||
def _format_session_info(self) -> str:
|
||
"""Resolve current model config and return a formatted info block.
|
||
|
||
Surfaces model, provider, context length, and endpoint so gateway
|
||
users can immediately see if context detection went wrong (e.g.
|
||
local models falling to the 128K default).
|
||
"""
|
||
from agent.model_metadata import get_model_context_length, DEFAULT_FALLBACK_CONTEXT
|
||
|
||
model = _resolve_gateway_model()
|
||
config_context_length = None
|
||
provider = None
|
||
base_url = None
|
||
api_key = None
|
||
|
||
try:
|
||
cfg_path = _hermes_home / "config.yaml"
|
||
if cfg_path.exists():
|
||
import yaml as _info_yaml
|
||
with open(cfg_path, encoding="utf-8") as f:
|
||
data = _info_yaml.safe_load(f) or {}
|
||
model_cfg = data.get("model", {})
|
||
if isinstance(model_cfg, dict):
|
||
raw_ctx = model_cfg.get("context_length")
|
||
if raw_ctx is not None:
|
||
try:
|
||
config_context_length = int(raw_ctx)
|
||
except (TypeError, ValueError):
|
||
pass
|
||
provider = model_cfg.get("provider") or None
|
||
base_url = model_cfg.get("base_url") or None
|
||
except Exception:
|
||
pass
|
||
|
||
# Resolve runtime credentials for probing
|
||
try:
|
||
runtime = _resolve_runtime_agent_kwargs()
|
||
provider = provider or runtime.get("provider")
|
||
base_url = base_url or runtime.get("base_url")
|
||
api_key = runtime.get("api_key")
|
||
except Exception:
|
||
pass
|
||
|
||
context_length = get_model_context_length(
|
||
model,
|
||
base_url=base_url or "",
|
||
api_key=api_key or "",
|
||
config_context_length=config_context_length,
|
||
provider=provider or "",
|
||
)
|
||
|
||
# Format context source hint
|
||
if config_context_length is not None:
|
||
ctx_source = "config"
|
||
elif context_length == DEFAULT_FALLBACK_CONTEXT:
|
||
ctx_source = "default — set model.context_length in config to override"
|
||
else:
|
||
ctx_source = "detected"
|
||
|
||
# Format context length for display
|
||
if context_length >= 1_000_000:
|
||
ctx_display = f"{context_length / 1_000_000:.1f}M"
|
||
elif context_length >= 1_000:
|
||
ctx_display = f"{context_length // 1_000}K"
|
||
else:
|
||
ctx_display = str(context_length)
|
||
|
||
lines = [
|
||
f"◆ Model: `{model}`",
|
||
f"◆ Provider: {provider or 'openrouter'}",
|
||
f"◆ Context: {ctx_display} tokens ({ctx_source})",
|
||
]
|
||
|
||
# Show endpoint for local/custom setups
|
||
if base_url and ("localhost" in base_url or "127.0.0.1" in base_url or "0.0.0.0" in base_url):
|
||
lines.append(f"◆ Endpoint: {base_url}")
|
||
|
||
return "\n".join(lines)
|
||
|
||
async def _handle_reset_command(self, event: MessageEvent) -> str:
|
||
"""Handle /new or /reset command."""
|
||
source = event.source
|
||
|
||
# Get existing session key
|
||
session_key = self._session_key_for_source(source)
|
||
|
||
# Flush memories in the background (fire-and-forget) so the user
|
||
# gets the "Session reset!" response immediately.
|
||
try:
|
||
old_entry = self.session_store._entries.get(session_key)
|
||
if old_entry:
|
||
_flush_task = asyncio.create_task(
|
||
self._async_flush_memories(old_entry.session_id, session_key)
|
||
)
|
||
self._background_tasks.add(_flush_task)
|
||
_flush_task.add_done_callback(self._background_tasks.discard)
|
||
except Exception as e:
|
||
logger.debug("Gateway memory flush on reset failed: %s", e)
|
||
|
||
self._shutdown_gateway_honcho(session_key)
|
||
self._evict_cached_agent(session_key)
|
||
|
||
# Reset the session
|
||
new_entry = self.session_store.reset_session(session_key)
|
||
|
||
# Emit session:end hook (session is ending)
|
||
await self.hooks.emit("session:end", {
|
||
"platform": source.platform.value if source.platform else "",
|
||
"user_id": source.user_id,
|
||
"session_key": session_key,
|
||
})
|
||
|
||
# Emit session:reset hook
|
||
await self.hooks.emit("session:reset", {
|
||
"platform": source.platform.value if source.platform else "",
|
||
"user_id": source.user_id,
|
||
"session_key": session_key,
|
||
})
|
||
|
||
# Resolve session config info to surface to the user
|
||
try:
|
||
session_info = self._format_session_info()
|
||
except Exception:
|
||
session_info = ""
|
||
|
||
if new_entry:
|
||
header = "✨ Session reset! Starting fresh."
|
||
else:
|
||
# No existing session, just create one
|
||
self.session_store.get_or_create_session(source, force_new=True)
|
||
header = "✨ New session started!"
|
||
|
||
if session_info:
|
||
return f"{header}\n\n{session_info}"
|
||
return header
|
||
|
||
async def _handle_status_command(self, event: MessageEvent) -> str:
|
||
"""Handle /status command."""
|
||
source = event.source
|
||
session_entry = self.session_store.get_or_create_session(source)
|
||
|
||
connected_platforms = [p.value for p in self.adapters.keys()]
|
||
|
||
# Check if there's an active agent
|
||
session_key = session_entry.session_key
|
||
is_running = session_key in self._running_agents
|
||
|
||
lines = [
|
||
"📊 **Hermes Gateway Status**",
|
||
"",
|
||
f"**Session ID:** `{session_entry.session_id[:12]}...`",
|
||
f"**Created:** {session_entry.created_at.strftime('%Y-%m-%d %H:%M')}",
|
||
f"**Last Activity:** {session_entry.updated_at.strftime('%Y-%m-%d %H:%M')}",
|
||
f"**Tokens:** {session_entry.total_tokens:,}",
|
||
f"**Agent Running:** {'Yes ⚡' if is_running else 'No'}",
|
||
"",
|
||
f"**Connected Platforms:** {', '.join(connected_platforms)}",
|
||
]
|
||
|
||
return "\n".join(lines)
|
||
|
||
async def _handle_stop_command(self, event: MessageEvent) -> str:
|
||
"""Handle /stop command - interrupt a running agent.
|
||
|
||
When an agent is truly hung (blocked thread that never checks
|
||
_interrupt_requested), the early intercept in _handle_message()
|
||
handles /stop before this method is reached. This handler fires
|
||
only through normal command dispatch (no running agent) or as a
|
||
fallback. Force-clean the session lock in all cases for safety.
|
||
"""
|
||
source = event.source
|
||
session_entry = self.session_store.get_or_create_session(source)
|
||
session_key = session_entry.session_key
|
||
|
||
agent = self._running_agents.get(session_key)
|
||
if agent is _AGENT_PENDING_SENTINEL:
|
||
# Force-clean the sentinel so the session is unlocked.
|
||
if session_key in self._running_agents:
|
||
del self._running_agents[session_key]
|
||
logger.info("HARD STOP (pending) for session %s — sentinel cleared", session_key[:20])
|
||
return "⚡ Force-stopped. The agent was still starting — session unlocked."
|
||
if agent:
|
||
agent.interrupt("Stop requested")
|
||
# Force-clean the session lock so a truly hung agent doesn't
|
||
# keep it locked forever.
|
||
if session_key in self._running_agents:
|
||
del self._running_agents[session_key]
|
||
return "⚡ Force-stopped. The session is unlocked — you can send a new message."
|
||
else:
|
||
return "No active task to stop."
|
||
|
||
async def _handle_help_command(self, event: MessageEvent) -> str:
|
||
"""Handle /help command - list available commands."""
|
||
from hermes_cli.commands import gateway_help_lines
|
||
lines = [
|
||
"📖 **Hermes Commands**\n",
|
||
*gateway_help_lines(),
|
||
]
|
||
try:
|
||
from agent.skill_commands import get_skill_commands
|
||
skill_cmds = get_skill_commands()
|
||
if skill_cmds:
|
||
lines.append(f"\n⚡ **Skill Commands** ({len(skill_cmds)} installed):")
|
||
for cmd in sorted(skill_cmds):
|
||
lines.append(f"`{cmd}` — {skill_cmds[cmd]['description']}")
|
||
except Exception:
|
||
pass
|
||
return "\n".join(lines)
|
||
|
||
async def _handle_provider_command(self, event: MessageEvent) -> str:
|
||
"""Handle /provider command - show available providers."""
|
||
import yaml
|
||
from hermes_cli.models import (
|
||
list_available_providers,
|
||
normalize_provider,
|
||
_PROVIDER_LABELS,
|
||
)
|
||
|
||
# Resolve current provider from config
|
||
current_provider = "openrouter"
|
||
config_path = _hermes_home / 'config.yaml'
|
||
try:
|
||
if config_path.exists():
|
||
with open(config_path, encoding="utf-8") as f:
|
||
cfg = yaml.safe_load(f) or {}
|
||
model_cfg = cfg.get("model", {})
|
||
if isinstance(model_cfg, dict):
|
||
current_provider = model_cfg.get("provider", current_provider)
|
||
except Exception:
|
||
pass
|
||
|
||
current_provider = normalize_provider(current_provider)
|
||
if current_provider == "auto":
|
||
try:
|
||
from hermes_cli.auth import resolve_provider as _resolve_provider
|
||
current_provider = _resolve_provider(current_provider)
|
||
except Exception:
|
||
current_provider = "openrouter"
|
||
|
||
# Detect custom endpoint
|
||
if current_provider == "openrouter" and os.getenv("OPENAI_BASE_URL", "").strip():
|
||
current_provider = "custom"
|
||
|
||
current_label = _PROVIDER_LABELS.get(current_provider, current_provider)
|
||
|
||
lines = [
|
||
f"🔌 **Current provider:** {current_label} (`{current_provider}`)",
|
||
"",
|
||
"**Available providers:**",
|
||
]
|
||
|
||
providers = list_available_providers()
|
||
for p in providers:
|
||
marker = " ← active" if p["id"] == current_provider else ""
|
||
auth = "✅" if p["authenticated"] else "❌"
|
||
aliases = f" _(also: {', '.join(p['aliases'])})_" if p["aliases"] else ""
|
||
lines.append(f"{auth} `{p['id']}` — {p['label']}{aliases}{marker}")
|
||
|
||
lines.append("")
|
||
lines.append("Switch: `/model provider:model-name`")
|
||
lines.append("Setup: `hermes setup`")
|
||
return "\n".join(lines)
|
||
|
||
async def _handle_personality_command(self, event: MessageEvent) -> str:
|
||
"""Handle /personality command - list or set a personality."""
|
||
import yaml
|
||
|
||
args = event.get_command_args().strip().lower()
|
||
config_path = _hermes_home / 'config.yaml'
|
||
|
||
try:
|
||
if config_path.exists():
|
||
with open(config_path, 'r', encoding="utf-8") as f:
|
||
config = yaml.safe_load(f) or {}
|
||
personalities = config.get("agent", {}).get("personalities", {})
|
||
else:
|
||
config = {}
|
||
personalities = {}
|
||
except Exception:
|
||
config = {}
|
||
personalities = {}
|
||
|
||
if not personalities:
|
||
return "No personalities configured in `~/.hermes/config.yaml`"
|
||
|
||
if not args:
|
||
lines = ["🎭 **Available Personalities**\n"]
|
||
lines.append("• `none` — (no personality overlay)")
|
||
for name, prompt in personalities.items():
|
||
if isinstance(prompt, dict):
|
||
preview = prompt.get("description") or prompt.get("system_prompt", "")[:50]
|
||
else:
|
||
preview = prompt[:50] + "..." if len(prompt) > 50 else prompt
|
||
lines.append(f"• `{name}` — {preview}")
|
||
lines.append("\nUsage: `/personality <name>`")
|
||
return "\n".join(lines)
|
||
|
||
def _resolve_prompt(value):
|
||
if isinstance(value, dict):
|
||
parts = [value.get("system_prompt", "")]
|
||
if value.get("tone"):
|
||
parts.append(f'Tone: {value["tone"]}')
|
||
if value.get("style"):
|
||
parts.append(f'Style: {value["style"]}')
|
||
return "\n".join(p for p in parts if p)
|
||
return str(value)
|
||
|
||
if args in ("none", "default", "neutral"):
|
||
try:
|
||
if "agent" not in config or not isinstance(config.get("agent"), dict):
|
||
config["agent"] = {}
|
||
config["agent"]["system_prompt"] = ""
|
||
with open(config_path, "w") as f:
|
||
yaml.dump(config, f, default_flow_style=False, sort_keys=False)
|
||
except Exception as e:
|
||
return f"⚠️ Failed to save personality change: {e}"
|
||
self._ephemeral_system_prompt = ""
|
||
return "🎭 Personality cleared — using base agent behavior.\n_(takes effect on next message)_"
|
||
elif args in personalities:
|
||
new_prompt = _resolve_prompt(personalities[args])
|
||
|
||
# Write to config.yaml, same pattern as CLI save_config_value.
|
||
try:
|
||
if "agent" not in config or not isinstance(config.get("agent"), dict):
|
||
config["agent"] = {}
|
||
config["agent"]["system_prompt"] = new_prompt
|
||
with open(config_path, 'w', encoding="utf-8") as f:
|
||
yaml.dump(config, f, default_flow_style=False, sort_keys=False)
|
||
except Exception as e:
|
||
return f"⚠️ Failed to save personality change: {e}"
|
||
|
||
# Update in-memory so it takes effect on the very next message.
|
||
self._ephemeral_system_prompt = new_prompt
|
||
|
||
return f"🎭 Personality set to **{args}**\n_(takes effect on next message)_"
|
||
|
||
available = "`none`, " + ", ".join(f"`{n}`" for n in personalities.keys())
|
||
return f"Unknown personality: `{args}`\n\nAvailable: {available}"
|
||
|
||
async def _handle_retry_command(self, event: MessageEvent) -> str:
|
||
"""Handle /retry command - re-send the last user message."""
|
||
source = event.source
|
||
session_entry = self.session_store.get_or_create_session(source)
|
||
history = self.session_store.load_transcript(session_entry.session_id)
|
||
|
||
# Find the last user message
|
||
last_user_msg = None
|
||
last_user_idx = None
|
||
for i in range(len(history) - 1, -1, -1):
|
||
if history[i].get("role") == "user":
|
||
last_user_msg = history[i].get("content", "")
|
||
last_user_idx = i
|
||
break
|
||
|
||
if not last_user_msg:
|
||
return "No previous message to retry."
|
||
|
||
# Truncate history to before the last user message and persist
|
||
truncated = history[:last_user_idx]
|
||
self.session_store.rewrite_transcript(session_entry.session_id, truncated)
|
||
# Reset stored token count — transcript was truncated
|
||
session_entry.last_prompt_tokens = 0
|
||
|
||
# Re-send by creating a fake text event with the old message
|
||
retry_event = MessageEvent(
|
||
text=last_user_msg,
|
||
message_type=MessageType.TEXT,
|
||
source=source,
|
||
raw_message=event.raw_message,
|
||
)
|
||
|
||
# Let the normal message handler process it
|
||
return await self._handle_message(retry_event)
|
||
|
||
async def _handle_undo_command(self, event: MessageEvent) -> str:
|
||
"""Handle /undo command - remove the last user/assistant exchange."""
|
||
source = event.source
|
||
session_entry = self.session_store.get_or_create_session(source)
|
||
history = self.session_store.load_transcript(session_entry.session_id)
|
||
|
||
# Find the last user message and remove everything from it onward
|
||
last_user_idx = None
|
||
for i in range(len(history) - 1, -1, -1):
|
||
if history[i].get("role") == "user":
|
||
last_user_idx = i
|
||
break
|
||
|
||
if last_user_idx is None:
|
||
return "Nothing to undo."
|
||
|
||
removed_msg = history[last_user_idx].get("content", "")
|
||
removed_count = len(history) - last_user_idx
|
||
self.session_store.rewrite_transcript(session_entry.session_id, history[:last_user_idx])
|
||
# Reset stored token count — transcript was truncated
|
||
session_entry.last_prompt_tokens = 0
|
||
|
||
preview = removed_msg[:40] + "..." if len(removed_msg) > 40 else removed_msg
|
||
return f"↩️ Undid {removed_count} message(s).\nRemoved: \"{preview}\""
|
||
|
||
async def _handle_set_home_command(self, event: MessageEvent) -> str:
|
||
"""Handle /sethome command -- set the current chat as the platform's home channel."""
|
||
source = event.source
|
||
platform_name = source.platform.value if source.platform else "unknown"
|
||
chat_id = source.chat_id
|
||
chat_name = source.chat_name or chat_id
|
||
|
||
env_key = f"{platform_name.upper()}_HOME_CHANNEL"
|
||
|
||
# Save to config.yaml
|
||
try:
|
||
import yaml
|
||
config_path = _hermes_home / 'config.yaml'
|
||
user_config = {}
|
||
if config_path.exists():
|
||
with open(config_path, encoding="utf-8") as f:
|
||
user_config = yaml.safe_load(f) or {}
|
||
user_config[env_key] = chat_id
|
||
with open(config_path, 'w', encoding="utf-8") as f:
|
||
yaml.dump(user_config, f, default_flow_style=False)
|
||
# Also set in the current environment so it takes effect immediately
|
||
os.environ[env_key] = str(chat_id)
|
||
except Exception as e:
|
||
return f"Failed to save home channel: {e}"
|
||
|
||
return (
|
||
f"✅ Home channel set to **{chat_name}** (ID: {chat_id}).\n"
|
||
f"Cron jobs and cross-platform messages will be delivered here."
|
||
)
|
||
|
||
@staticmethod
|
||
def _get_guild_id(event: MessageEvent) -> Optional[int]:
|
||
"""Extract Discord guild_id from the raw message object."""
|
||
raw = getattr(event, "raw_message", None)
|
||
if raw is None:
|
||
return None
|
||
# Slash command interaction
|
||
if hasattr(raw, "guild_id") and raw.guild_id:
|
||
return int(raw.guild_id)
|
||
# Regular message
|
||
if hasattr(raw, "guild") and raw.guild:
|
||
return raw.guild.id
|
||
return None
|
||
|
||
async def _handle_voice_command(self, event: MessageEvent) -> str:
|
||
"""Handle /voice [on|off|tts|channel|leave|status] command."""
|
||
args = event.get_command_args().strip().lower()
|
||
chat_id = event.source.chat_id
|
||
|
||
adapter = self.adapters.get(event.source.platform)
|
||
|
||
if args in ("on", "enable"):
|
||
self._voice_mode[chat_id] = "voice_only"
|
||
self._save_voice_modes()
|
||
if adapter:
|
||
self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=False)
|
||
return (
|
||
"Voice mode enabled.\n"
|
||
"I'll reply with voice when you send voice messages.\n"
|
||
"Use /voice tts to get voice replies for all messages."
|
||
)
|
||
elif args in ("off", "disable"):
|
||
self._voice_mode[chat_id] = "off"
|
||
self._save_voice_modes()
|
||
if adapter:
|
||
self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=True)
|
||
return "Voice mode disabled. Text-only replies."
|
||
elif args == "tts":
|
||
self._voice_mode[chat_id] = "all"
|
||
self._save_voice_modes()
|
||
if adapter:
|
||
self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=False)
|
||
return (
|
||
"Auto-TTS enabled.\n"
|
||
"All replies will include a voice message."
|
||
)
|
||
elif args in ("channel", "join"):
|
||
return await self._handle_voice_channel_join(event)
|
||
elif args == "leave":
|
||
return await self._handle_voice_channel_leave(event)
|
||
elif args == "status":
|
||
mode = self._voice_mode.get(chat_id, "off")
|
||
labels = {
|
||
"off": "Off (text only)",
|
||
"voice_only": "On (voice reply to voice messages)",
|
||
"all": "TTS (voice reply to all messages)",
|
||
}
|
||
# Append voice channel info if connected
|
||
adapter = self.adapters.get(event.source.platform)
|
||
guild_id = self._get_guild_id(event)
|
||
if guild_id and hasattr(adapter, "get_voice_channel_info"):
|
||
info = adapter.get_voice_channel_info(guild_id)
|
||
if info:
|
||
lines = [
|
||
f"Voice mode: {labels.get(mode, mode)}",
|
||
f"Voice channel: #{info['channel_name']}",
|
||
f"Participants: {info['member_count']}",
|
||
]
|
||
for m in info["members"]:
|
||
status = " (speaking)" if m.get("is_speaking") else ""
|
||
lines.append(f" - {m['display_name']}{status}")
|
||
return "\n".join(lines)
|
||
return f"Voice mode: {labels.get(mode, mode)}"
|
||
else:
|
||
# Toggle: off → on, on/all → off
|
||
current = self._voice_mode.get(chat_id, "off")
|
||
if current == "off":
|
||
self._voice_mode[chat_id] = "voice_only"
|
||
self._save_voice_modes()
|
||
if adapter:
|
||
self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=False)
|
||
return "Voice mode enabled."
|
||
else:
|
||
self._voice_mode[chat_id] = "off"
|
||
self._save_voice_modes()
|
||
if adapter:
|
||
self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=True)
|
||
return "Voice mode disabled."
|
||
|
||
async def _handle_voice_channel_join(self, event: MessageEvent) -> str:
|
||
"""Join the user's current Discord voice channel."""
|
||
adapter = self.adapters.get(event.source.platform)
|
||
if not hasattr(adapter, "join_voice_channel"):
|
||
return "Voice channels are not supported on this platform."
|
||
|
||
guild_id = self._get_guild_id(event)
|
||
if not guild_id:
|
||
return "This command only works in a Discord server."
|
||
|
||
voice_channel = await adapter.get_user_voice_channel(
|
||
guild_id, event.source.user_id
|
||
)
|
||
if not voice_channel:
|
||
return "You need to be in a voice channel first."
|
||
|
||
# Wire callbacks BEFORE join so voice input arriving immediately
|
||
# after connection is not lost.
|
||
if hasattr(adapter, "_voice_input_callback"):
|
||
adapter._voice_input_callback = self._handle_voice_channel_input
|
||
if hasattr(adapter, "_on_voice_disconnect"):
|
||
adapter._on_voice_disconnect = self._handle_voice_timeout_cleanup
|
||
|
||
try:
|
||
success = await adapter.join_voice_channel(voice_channel)
|
||
except Exception as e:
|
||
logger.warning("Failed to join voice channel: %s", e)
|
||
adapter._voice_input_callback = None
|
||
err_lower = str(e).lower()
|
||
if "pynacl" in err_lower or "nacl" in err_lower or "davey" in err_lower:
|
||
return (
|
||
"Voice dependencies are missing (PyNaCl / davey). "
|
||
"Install or reinstall Hermes with the messaging extra, e.g. "
|
||
"`pip install hermes-agent[messaging]`."
|
||
)
|
||
return f"Failed to join voice channel: {e}"
|
||
|
||
if success:
|
||
adapter._voice_text_channels[guild_id] = int(event.source.chat_id)
|
||
self._voice_mode[event.source.chat_id] = "all"
|
||
self._save_voice_modes()
|
||
self._set_adapter_auto_tts_disabled(adapter, event.source.chat_id, disabled=False)
|
||
return (
|
||
f"Joined voice channel **{voice_channel.name}**.\n"
|
||
f"I'll speak my replies and listen to you. Use /voice leave to disconnect."
|
||
)
|
||
# Join failed — clear callback
|
||
adapter._voice_input_callback = None
|
||
return "Failed to join voice channel. Check bot permissions (Connect + Speak)."
|
||
|
||
async def _handle_voice_channel_leave(self, event: MessageEvent) -> str:
|
||
"""Leave the Discord voice channel."""
|
||
adapter = self.adapters.get(event.source.platform)
|
||
guild_id = self._get_guild_id(event)
|
||
|
||
if not guild_id or not hasattr(adapter, "leave_voice_channel"):
|
||
return "Not in a voice channel."
|
||
|
||
if not hasattr(adapter, "is_in_voice_channel") or not adapter.is_in_voice_channel(guild_id):
|
||
return "Not in a voice channel."
|
||
|
||
try:
|
||
await adapter.leave_voice_channel(guild_id)
|
||
except Exception as e:
|
||
logger.warning("Error leaving voice channel: %s", e)
|
||
# Always clean up state even if leave raised an exception
|
||
self._voice_mode[event.source.chat_id] = "off"
|
||
self._save_voice_modes()
|
||
self._set_adapter_auto_tts_disabled(adapter, event.source.chat_id, disabled=True)
|
||
if hasattr(adapter, "_voice_input_callback"):
|
||
adapter._voice_input_callback = None
|
||
return "Left voice channel."
|
||
|
||
def _handle_voice_timeout_cleanup(self, chat_id: str) -> None:
|
||
"""Called by the adapter when a voice channel times out.
|
||
|
||
Cleans up runner-side voice_mode state that the adapter cannot reach.
|
||
"""
|
||
self._voice_mode[chat_id] = "off"
|
||
self._save_voice_modes()
|
||
adapter = self.adapters.get(Platform.DISCORD)
|
||
self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=True)
|
||
|
||
async def _handle_voice_channel_input(
|
||
self, guild_id: int, user_id: int, transcript: str
|
||
):
|
||
"""Handle transcribed voice from a user in a voice channel.
|
||
|
||
Creates a synthetic MessageEvent and processes it through the
|
||
adapter's full message pipeline (session, typing, agent, TTS reply).
|
||
"""
|
||
adapter = self.adapters.get(Platform.DISCORD)
|
||
if not adapter:
|
||
return
|
||
|
||
text_ch_id = adapter._voice_text_channels.get(guild_id)
|
||
if not text_ch_id:
|
||
return
|
||
|
||
# Check authorization before processing voice input
|
||
source = SessionSource(
|
||
platform=Platform.DISCORD,
|
||
chat_id=str(text_ch_id),
|
||
user_id=str(user_id),
|
||
user_name=str(user_id),
|
||
chat_type="channel",
|
||
)
|
||
if not self._is_user_authorized(source):
|
||
logger.debug("Unauthorized voice input from user %d, ignoring", user_id)
|
||
return
|
||
|
||
# Show transcript in text channel (after auth, with mention sanitization)
|
||
try:
|
||
channel = adapter._client.get_channel(text_ch_id)
|
||
if channel:
|
||
safe_text = transcript[:2000].replace("@everyone", "@\u200beveryone").replace("@here", "@\u200bhere")
|
||
await channel.send(f"**[Voice]** <@{user_id}>: {safe_text}")
|
||
except Exception:
|
||
pass
|
||
|
||
# Build a synthetic MessageEvent and feed through the normal pipeline
|
||
# Use SimpleNamespace as raw_message so _get_guild_id() can extract
|
||
# guild_id and _send_voice_reply() plays audio in the voice channel.
|
||
from types import SimpleNamespace
|
||
event = MessageEvent(
|
||
source=source,
|
||
text=transcript,
|
||
message_type=MessageType.VOICE,
|
||
raw_message=SimpleNamespace(guild_id=guild_id, guild=None),
|
||
)
|
||
|
||
await adapter.handle_message(event)
|
||
|
||
def _should_send_voice_reply(
|
||
self,
|
||
event: MessageEvent,
|
||
response: str,
|
||
agent_messages: list,
|
||
already_sent: bool = False,
|
||
) -> bool:
|
||
"""Decide whether the runner should send a TTS voice reply.
|
||
|
||
Returns False when:
|
||
- voice_mode is off for this chat
|
||
- response is empty or an error
|
||
- agent already called text_to_speech tool (dedup)
|
||
- voice input and base adapter auto-TTS already handled it (skip_double)
|
||
UNLESS streaming already consumed the response (already_sent=True),
|
||
in which case the base adapter won't have text for auto-TTS so the
|
||
runner must handle it.
|
||
"""
|
||
if not response or response.startswith("Error:"):
|
||
return False
|
||
|
||
chat_id = event.source.chat_id
|
||
voice_mode = self._voice_mode.get(chat_id, "off")
|
||
is_voice_input = (event.message_type == MessageType.VOICE)
|
||
|
||
should = (
|
||
(voice_mode == "all")
|
||
or (voice_mode == "voice_only" and is_voice_input)
|
||
)
|
||
if not should:
|
||
return False
|
||
|
||
# Dedup: agent already called TTS tool
|
||
has_agent_tts = any(
|
||
msg.get("role") == "assistant"
|
||
and any(
|
||
tc.get("function", {}).get("name") == "text_to_speech"
|
||
for tc in (msg.get("tool_calls") or [])
|
||
)
|
||
for msg in agent_messages
|
||
)
|
||
if has_agent_tts:
|
||
return False
|
||
|
||
# Dedup: base adapter auto-TTS already handles voice input
|
||
# (play_tts plays in VC when connected, so runner can skip).
|
||
# When streaming already delivered the text (already_sent=True),
|
||
# the base adapter will receive None and can't run auto-TTS,
|
||
# so the runner must take over.
|
||
if is_voice_input and not already_sent:
|
||
return False
|
||
|
||
return True
|
||
|
||
async def _send_voice_reply(self, event: MessageEvent, text: str) -> None:
|
||
"""Generate TTS audio and send as a voice message before the text reply."""
|
||
import uuid as _uuid
|
||
audio_path = None
|
||
actual_path = None
|
||
try:
|
||
from tools.tts_tool import text_to_speech_tool, _strip_markdown_for_tts
|
||
|
||
tts_text = _strip_markdown_for_tts(text[:4000])
|
||
if not tts_text:
|
||
return
|
||
|
||
# Use .mp3 extension so edge-tts conversion to opus works correctly.
|
||
# The TTS tool may convert to .ogg — use file_path from result.
|
||
audio_path = os.path.join(
|
||
tempfile.gettempdir(), "hermes_voice",
|
||
f"tts_reply_{_uuid.uuid4().hex[:12]}.mp3",
|
||
)
|
||
os.makedirs(os.path.dirname(audio_path), exist_ok=True)
|
||
|
||
result_json = await asyncio.to_thread(
|
||
text_to_speech_tool, text=tts_text, output_path=audio_path
|
||
)
|
||
result = json.loads(result_json)
|
||
|
||
# Use the actual file path from result (may differ after opus conversion)
|
||
actual_path = result.get("file_path", audio_path)
|
||
if not result.get("success") or not os.path.isfile(actual_path):
|
||
logger.warning("Auto voice reply TTS failed: %s", result.get("error"))
|
||
return
|
||
|
||
adapter = self.adapters.get(event.source.platform)
|
||
|
||
# If connected to a voice channel, play there instead of sending a file
|
||
guild_id = self._get_guild_id(event)
|
||
if (guild_id
|
||
and hasattr(adapter, "play_in_voice_channel")
|
||
and hasattr(adapter, "is_in_voice_channel")
|
||
and adapter.is_in_voice_channel(guild_id)):
|
||
await adapter.play_in_voice_channel(guild_id, actual_path)
|
||
elif adapter and hasattr(adapter, "send_voice"):
|
||
send_kwargs: Dict[str, Any] = {
|
||
"chat_id": event.source.chat_id,
|
||
"audio_path": actual_path,
|
||
"reply_to": event.message_id,
|
||
}
|
||
if event.source.thread_id:
|
||
send_kwargs["metadata"] = {"thread_id": event.source.thread_id}
|
||
await adapter.send_voice(**send_kwargs)
|
||
except Exception as e:
|
||
logger.warning("Auto voice reply failed: %s", e, exc_info=True)
|
||
finally:
|
||
for p in {audio_path, actual_path} - {None}:
|
||
try:
|
||
os.unlink(p)
|
||
except OSError:
|
||
pass
|
||
|
||
async def _deliver_media_from_response(
|
||
self,
|
||
response: str,
|
||
event: MessageEvent,
|
||
adapter,
|
||
) -> None:
|
||
"""Extract MEDIA: tags and local file paths from a response and deliver them.
|
||
|
||
Called after streaming has already sent the text to the user, so the
|
||
text itself is already delivered — this only handles file attachments
|
||
that the normal _process_message_background path would have caught.
|
||
"""
|
||
from pathlib import Path
|
||
|
||
try:
|
||
media_files, _ = adapter.extract_media(response)
|
||
_, cleaned = adapter.extract_images(response)
|
||
local_files, _ = adapter.extract_local_files(cleaned)
|
||
|
||
_thread_meta = {"thread_id": event.source.thread_id} if event.source.thread_id else None
|
||
|
||
_AUDIO_EXTS = {'.ogg', '.opus', '.mp3', '.wav', '.m4a'}
|
||
_VIDEO_EXTS = {'.mp4', '.mov', '.avi', '.mkv', '.webm', '.3gp'}
|
||
_IMAGE_EXTS = {'.jpg', '.jpeg', '.png', '.webp', '.gif'}
|
||
|
||
for media_path, is_voice in media_files:
|
||
try:
|
||
ext = Path(media_path).suffix.lower()
|
||
if ext in _AUDIO_EXTS:
|
||
await adapter.send_voice(
|
||
chat_id=event.source.chat_id,
|
||
audio_path=media_path,
|
||
metadata=_thread_meta,
|
||
)
|
||
elif ext in _VIDEO_EXTS:
|
||
await adapter.send_video(
|
||
chat_id=event.source.chat_id,
|
||
video_path=media_path,
|
||
metadata=_thread_meta,
|
||
)
|
||
elif ext in _IMAGE_EXTS:
|
||
await adapter.send_image_file(
|
||
chat_id=event.source.chat_id,
|
||
image_path=media_path,
|
||
metadata=_thread_meta,
|
||
)
|
||
else:
|
||
await adapter.send_document(
|
||
chat_id=event.source.chat_id,
|
||
file_path=media_path,
|
||
metadata=_thread_meta,
|
||
)
|
||
except Exception as e:
|
||
logger.warning("[%s] Post-stream media delivery failed: %s", adapter.name, e)
|
||
|
||
for file_path in local_files:
|
||
try:
|
||
ext = Path(file_path).suffix.lower()
|
||
if ext in _IMAGE_EXTS:
|
||
await adapter.send_image_file(
|
||
chat_id=event.source.chat_id,
|
||
image_path=file_path,
|
||
metadata=_thread_meta,
|
||
)
|
||
else:
|
||
await adapter.send_document(
|
||
chat_id=event.source.chat_id,
|
||
file_path=file_path,
|
||
metadata=_thread_meta,
|
||
)
|
||
except Exception as e:
|
||
logger.warning("[%s] Post-stream file delivery failed: %s", adapter.name, e)
|
||
|
||
except Exception as e:
|
||
logger.warning("Post-stream media extraction failed: %s", e)
|
||
|
||
async def _handle_rollback_command(self, event: MessageEvent) -> str:
|
||
"""Handle /rollback command — list or restore filesystem checkpoints."""
|
||
from tools.checkpoint_manager import CheckpointManager, format_checkpoint_list
|
||
|
||
# Read checkpoint config from config.yaml
|
||
cp_cfg = {}
|
||
try:
|
||
import yaml as _y
|
||
_cfg_path = _hermes_home / "config.yaml"
|
||
if _cfg_path.exists():
|
||
with open(_cfg_path, encoding="utf-8") as _f:
|
||
_data = _y.safe_load(_f) or {}
|
||
cp_cfg = _data.get("checkpoints", {})
|
||
if isinstance(cp_cfg, bool):
|
||
cp_cfg = {"enabled": cp_cfg}
|
||
except Exception:
|
||
pass
|
||
|
||
if not cp_cfg.get("enabled", False):
|
||
return (
|
||
"Checkpoints are not enabled.\n"
|
||
"Enable in config.yaml:\n```\ncheckpoints:\n enabled: true\n```"
|
||
)
|
||
|
||
mgr = CheckpointManager(
|
||
enabled=True,
|
||
max_snapshots=cp_cfg.get("max_snapshots", 50),
|
||
)
|
||
|
||
cwd = os.getenv("MESSAGING_CWD", str(Path.home()))
|
||
arg = event.get_command_args().strip()
|
||
|
||
if not arg:
|
||
checkpoints = mgr.list_checkpoints(cwd)
|
||
return format_checkpoint_list(checkpoints, cwd)
|
||
|
||
# Restore by number or hash
|
||
checkpoints = mgr.list_checkpoints(cwd)
|
||
if not checkpoints:
|
||
return f"No checkpoints found for {cwd}"
|
||
|
||
target_hash = None
|
||
try:
|
||
idx = int(arg) - 1
|
||
if 0 <= idx < len(checkpoints):
|
||
target_hash = checkpoints[idx]["hash"]
|
||
else:
|
||
return f"Invalid checkpoint number. Use 1-{len(checkpoints)}."
|
||
except ValueError:
|
||
target_hash = arg
|
||
|
||
result = mgr.restore(cwd, target_hash)
|
||
if result["success"]:
|
||
return (
|
||
f"✅ Restored to checkpoint {result['restored_to']}: {result['reason']}\n"
|
||
f"A pre-rollback snapshot was saved automatically."
|
||
)
|
||
return f"❌ {result['error']}"
|
||
|
||
async def _handle_background_command(self, event: MessageEvent) -> str:
|
||
"""Handle /background <prompt> — run a prompt in a separate background session.
|
||
|
||
Spawns a new AIAgent in a background thread with its own session.
|
||
When it completes, sends the result back to the same chat without
|
||
modifying the active session's conversation history.
|
||
"""
|
||
prompt = event.get_command_args().strip()
|
||
if not prompt:
|
||
return (
|
||
"Usage: /background <prompt>\n"
|
||
"Example: /background Summarize the top HN stories today\n\n"
|
||
"Runs the prompt in a separate session. "
|
||
"You can keep chatting — the result will appear here when done."
|
||
)
|
||
|
||
source = event.source
|
||
task_id = f"bg_{datetime.now().strftime('%H%M%S')}_{os.urandom(3).hex()}"
|
||
|
||
# Fire-and-forget the background task
|
||
_task = asyncio.create_task(
|
||
self._run_background_task(prompt, source, task_id)
|
||
)
|
||
self._background_tasks.add(_task)
|
||
_task.add_done_callback(self._background_tasks.discard)
|
||
|
||
preview = prompt[:60] + ("..." if len(prompt) > 60 else "")
|
||
return f'🔄 Background task started: "{preview}"\nTask ID: {task_id}\nYou can keep chatting — results will appear when done.'
|
||
|
||
async def _run_background_task(
|
||
self, prompt: str, source: "SessionSource", task_id: str
|
||
) -> None:
|
||
"""Execute a background agent task and deliver the result to the chat."""
|
||
from run_agent import AIAgent
|
||
|
||
adapter = self.adapters.get(source.platform)
|
||
if not adapter:
|
||
logger.warning("No adapter for platform %s in background task %s", source.platform, task_id)
|
||
return
|
||
|
||
_thread_metadata = {"thread_id": source.thread_id} if source.thread_id else None
|
||
|
||
try:
|
||
runtime_kwargs = _resolve_runtime_agent_kwargs()
|
||
if not runtime_kwargs.get("api_key"):
|
||
await adapter.send(
|
||
source.chat_id,
|
||
f"❌ Background task {task_id} failed: no provider credentials configured.",
|
||
metadata=_thread_metadata,
|
||
)
|
||
return
|
||
|
||
user_config = _load_gateway_config()
|
||
model = _resolve_gateway_model(user_config)
|
||
platform_key = _platform_config_key(source.platform)
|
||
|
||
from hermes_cli.tools_config import _get_platform_tools
|
||
enabled_toolsets = sorted(_get_platform_tools(user_config, platform_key))
|
||
|
||
pr = self._provider_routing
|
||
max_iterations = int(os.getenv("HERMES_MAX_ITERATIONS", "90"))
|
||
reasoning_config = self._load_reasoning_config()
|
||
self._reasoning_config = reasoning_config
|
||
turn_route = self._resolve_turn_agent_config(prompt, model, runtime_kwargs)
|
||
|
||
def run_sync():
|
||
agent = AIAgent(
|
||
model=turn_route["model"],
|
||
**turn_route["runtime"],
|
||
max_iterations=max_iterations,
|
||
quiet_mode=True,
|
||
verbose_logging=False,
|
||
enabled_toolsets=enabled_toolsets,
|
||
reasoning_config=reasoning_config,
|
||
providers_allowed=pr.get("only"),
|
||
providers_ignored=pr.get("ignore"),
|
||
providers_order=pr.get("order"),
|
||
provider_sort=pr.get("sort"),
|
||
provider_require_parameters=pr.get("require_parameters", False),
|
||
provider_data_collection=pr.get("data_collection"),
|
||
session_id=task_id,
|
||
platform=platform_key,
|
||
session_db=self._session_db,
|
||
fallback_model=self._fallback_model,
|
||
)
|
||
|
||
return agent.run_conversation(
|
||
user_message=prompt,
|
||
task_id=task_id,
|
||
)
|
||
|
||
loop = asyncio.get_event_loop()
|
||
result = await loop.run_in_executor(None, run_sync)
|
||
|
||
response = result.get("final_response", "") if result else ""
|
||
if not response and result and result.get("error"):
|
||
response = f"Error: {result['error']}"
|
||
|
||
# Extract media files from the response
|
||
if response:
|
||
media_files, response = adapter.extract_media(response)
|
||
images, text_content = adapter.extract_images(response)
|
||
|
||
preview = prompt[:60] + ("..." if len(prompt) > 60 else "")
|
||
header = f'✅ Background task complete\nPrompt: "{preview}"\n\n'
|
||
|
||
if text_content:
|
||
await adapter.send(
|
||
chat_id=source.chat_id,
|
||
content=header + text_content,
|
||
metadata=_thread_metadata,
|
||
)
|
||
elif not images and not media_files:
|
||
await adapter.send(
|
||
chat_id=source.chat_id,
|
||
content=header + "(No response generated)",
|
||
metadata=_thread_metadata,
|
||
)
|
||
|
||
# Send extracted images
|
||
for image_url, alt_text in (images or []):
|
||
try:
|
||
await adapter.send_image(
|
||
chat_id=source.chat_id,
|
||
image_url=image_url,
|
||
caption=alt_text,
|
||
)
|
||
except Exception:
|
||
pass
|
||
|
||
# Send media files
|
||
for media_path in (media_files or []):
|
||
try:
|
||
await adapter.send_file(
|
||
chat_id=source.chat_id,
|
||
file_path=media_path,
|
||
)
|
||
except Exception:
|
||
pass
|
||
else:
|
||
preview = prompt[:60] + ("..." if len(prompt) > 60 else "")
|
||
await adapter.send(
|
||
chat_id=source.chat_id,
|
||
content=f'✅ Background task complete\nPrompt: "{preview}"\n\n(No response generated)',
|
||
metadata=_thread_metadata,
|
||
)
|
||
|
||
except Exception as e:
|
||
logger.exception("Background task %s failed", task_id)
|
||
try:
|
||
await adapter.send(
|
||
chat_id=source.chat_id,
|
||
content=f"❌ Background task {task_id} failed: {e}",
|
||
metadata=_thread_metadata,
|
||
)
|
||
except Exception:
|
||
pass
|
||
|
||
async def _handle_reasoning_command(self, event: MessageEvent) -> str:
|
||
"""Handle /reasoning command — manage reasoning effort and display toggle.
|
||
|
||
Usage:
|
||
/reasoning Show current effort level and display state
|
||
/reasoning <level> Set reasoning effort (none, low, medium, high, xhigh)
|
||
/reasoning show|on Show model reasoning in responses
|
||
/reasoning hide|off Hide model reasoning from responses
|
||
"""
|
||
import yaml
|
||
|
||
args = event.get_command_args().strip().lower()
|
||
config_path = _hermes_home / "config.yaml"
|
||
self._reasoning_config = self._load_reasoning_config()
|
||
self._show_reasoning = self._load_show_reasoning()
|
||
|
||
def _save_config_key(key_path: str, value):
|
||
"""Save a dot-separated key to config.yaml."""
|
||
try:
|
||
user_config = {}
|
||
if config_path.exists():
|
||
with open(config_path, encoding="utf-8") as f:
|
||
user_config = yaml.safe_load(f) or {}
|
||
keys = key_path.split(".")
|
||
current = user_config
|
||
for k in keys[:-1]:
|
||
if k not in current or not isinstance(current[k], dict):
|
||
current[k] = {}
|
||
current = current[k]
|
||
current[keys[-1]] = value
|
||
with open(config_path, "w", encoding="utf-8") as f:
|
||
yaml.dump(user_config, f, default_flow_style=False, sort_keys=False)
|
||
return True
|
||
except Exception as e:
|
||
logger.error("Failed to save config key %s: %s", key_path, e)
|
||
return False
|
||
|
||
if not args:
|
||
# Show current state
|
||
rc = self._reasoning_config
|
||
if rc is None:
|
||
level = "medium (default)"
|
||
elif rc.get("enabled") is False:
|
||
level = "none (disabled)"
|
||
else:
|
||
level = rc.get("effort", "medium")
|
||
display_state = "on ✓" if self._show_reasoning else "off"
|
||
return (
|
||
"🧠 **Reasoning Settings**\n\n"
|
||
f"**Effort:** `{level}`\n"
|
||
f"**Display:** {display_state}\n\n"
|
||
"_Usage:_ `/reasoning <none|low|medium|high|xhigh|show|hide>`"
|
||
)
|
||
|
||
# Display toggle
|
||
if args in ("show", "on"):
|
||
self._show_reasoning = True
|
||
_save_config_key("display.show_reasoning", True)
|
||
return "🧠 ✓ Reasoning display: **ON**\nModel thinking will be shown before each response."
|
||
|
||
if args in ("hide", "off"):
|
||
self._show_reasoning = False
|
||
_save_config_key("display.show_reasoning", False)
|
||
return "🧠 ✓ Reasoning display: **OFF**"
|
||
|
||
# Effort level change
|
||
effort = args.strip()
|
||
if effort == "none":
|
||
parsed = {"enabled": False}
|
||
elif effort in ("xhigh", "high", "medium", "low", "minimal"):
|
||
parsed = {"enabled": True, "effort": effort}
|
||
else:
|
||
return (
|
||
f"⚠️ Unknown argument: `{effort}`\n\n"
|
||
"**Valid levels:** none, low, minimal, medium, high, xhigh\n"
|
||
"**Display:** show, hide"
|
||
)
|
||
|
||
self._reasoning_config = parsed
|
||
if _save_config_key("agent.reasoning_effort", effort):
|
||
return f"🧠 ✓ Reasoning effort set to `{effort}` (saved to config)\n_(takes effect on next message)_"
|
||
else:
|
||
return f"🧠 ✓ Reasoning effort set to `{effort}` (this session only)"
|
||
|
||
async def _handle_verbose_command(self, event: MessageEvent) -> str:
|
||
"""Handle /verbose command — cycle tool progress display mode.
|
||
|
||
Gated by ``display.tool_progress_command`` in config.yaml (default off).
|
||
When enabled, cycles the tool progress mode through off → new → all →
|
||
verbose → off, same as the CLI.
|
||
"""
|
||
import yaml
|
||
|
||
config_path = _hermes_home / "config.yaml"
|
||
|
||
# --- check config gate ------------------------------------------------
|
||
try:
|
||
user_config = {}
|
||
if config_path.exists():
|
||
with open(config_path, encoding="utf-8") as f:
|
||
user_config = yaml.safe_load(f) or {}
|
||
gate_enabled = user_config.get("display", {}).get("tool_progress_command", False)
|
||
except Exception:
|
||
gate_enabled = False
|
||
|
||
if not gate_enabled:
|
||
return (
|
||
"The `/verbose` command is not enabled for messaging platforms.\n\n"
|
||
"Enable it in `config.yaml`:\n```yaml\n"
|
||
"display:\n tool_progress_command: true\n```"
|
||
)
|
||
|
||
# --- cycle mode -------------------------------------------------------
|
||
cycle = ["off", "new", "all", "verbose"]
|
||
descriptions = {
|
||
"off": "⚙️ Tool progress: **OFF** — no tool activity shown.",
|
||
"new": "⚙️ Tool progress: **NEW** — shown when tool changes.",
|
||
"all": "⚙️ Tool progress: **ALL** — every tool call shown.",
|
||
"verbose": "⚙️ Tool progress: **VERBOSE** — full args and results.",
|
||
}
|
||
|
||
raw_progress = user_config.get("display", {}).get("tool_progress", "all")
|
||
# YAML 1.1 parses bare "off" as boolean False — normalise back
|
||
if raw_progress is False:
|
||
current = "off"
|
||
elif raw_progress is True:
|
||
current = "all"
|
||
else:
|
||
current = str(raw_progress).lower()
|
||
if current not in cycle:
|
||
current = "all"
|
||
idx = (cycle.index(current) + 1) % len(cycle)
|
||
new_mode = cycle[idx]
|
||
|
||
# Save to config.yaml
|
||
try:
|
||
if "display" not in user_config or not isinstance(user_config.get("display"), dict):
|
||
user_config["display"] = {}
|
||
user_config["display"]["tool_progress"] = new_mode
|
||
with open(config_path, "w", encoding="utf-8") as f:
|
||
yaml.dump(user_config, f, default_flow_style=False, sort_keys=False)
|
||
return f"{descriptions[new_mode]}\n_(saved to config — takes effect on next message)_"
|
||
except Exception as e:
|
||
logger.warning("Failed to save tool_progress mode: %s", e)
|
||
return f"{descriptions[new_mode]}\n_(could not save to config: {e})_"
|
||
|
||
async def _handle_compress_command(self, event: MessageEvent) -> str:
|
||
"""Handle /compress command -- manually compress conversation context."""
|
||
source = event.source
|
||
session_entry = self.session_store.get_or_create_session(source)
|
||
history = self.session_store.load_transcript(session_entry.session_id)
|
||
|
||
if not history or len(history) < 4:
|
||
return "Not enough conversation to compress (need at least 4 messages)."
|
||
|
||
try:
|
||
from run_agent import AIAgent
|
||
from agent.model_metadata import estimate_messages_tokens_rough
|
||
|
||
runtime_kwargs = _resolve_runtime_agent_kwargs()
|
||
if not runtime_kwargs.get("api_key"):
|
||
return "No provider configured -- cannot compress."
|
||
|
||
# Resolve model from config (same reason as memory flush above).
|
||
model = _resolve_gateway_model()
|
||
|
||
msgs = [
|
||
{"role": m.get("role"), "content": m.get("content")}
|
||
for m in history
|
||
if m.get("role") in ("user", "assistant") and m.get("content")
|
||
]
|
||
original_count = len(msgs)
|
||
approx_tokens = estimate_messages_tokens_rough(msgs)
|
||
|
||
tmp_agent = AIAgent(
|
||
**runtime_kwargs,
|
||
model=model,
|
||
max_iterations=4,
|
||
quiet_mode=True,
|
||
enabled_toolsets=["memory"],
|
||
session_id=session_entry.session_id,
|
||
)
|
||
tmp_agent._print_fn = lambda *a, **kw: None
|
||
|
||
loop = asyncio.get_event_loop()
|
||
compressed, _ = await loop.run_in_executor(
|
||
None,
|
||
lambda: tmp_agent._compress_context(msgs, "", approx_tokens=approx_tokens),
|
||
)
|
||
|
||
self.session_store.rewrite_transcript(session_entry.session_id, compressed)
|
||
# Reset stored token count — transcript changed, old value is stale
|
||
self.session_store.update_session(
|
||
session_entry.session_key, last_prompt_tokens=0,
|
||
)
|
||
new_count = len(compressed)
|
||
new_tokens = estimate_messages_tokens_rough(compressed)
|
||
|
||
return (
|
||
f"🗜️ Compressed: {original_count} → {new_count} messages\n"
|
||
f"~{approx_tokens:,} → ~{new_tokens:,} tokens"
|
||
)
|
||
except Exception as e:
|
||
logger.warning("Manual compress failed: %s", e)
|
||
return f"Compression failed: {e}"
|
||
|
||
async def _handle_title_command(self, event: MessageEvent) -> str:
|
||
"""Handle /title command — set or show the current session's title."""
|
||
source = event.source
|
||
session_entry = self.session_store.get_or_create_session(source)
|
||
session_id = session_entry.session_id
|
||
|
||
if not self._session_db:
|
||
return "Session database not available."
|
||
|
||
# Ensure session exists in SQLite DB (it may only exist in session_store
|
||
# if this is the first command in a new session)
|
||
existing_title = self._session_db.get_session_title(session_id)
|
||
if existing_title is None:
|
||
# Session doesn't exist in DB yet — create it
|
||
try:
|
||
self._session_db.create_session(
|
||
session_id=session_id,
|
||
source=source.platform.value if source.platform else "unknown",
|
||
user_id=source.user_id,
|
||
)
|
||
except Exception:
|
||
pass # Session might already exist, ignore errors
|
||
|
||
title_arg = event.get_command_args().strip()
|
||
if title_arg:
|
||
# Sanitize the title before setting
|
||
try:
|
||
sanitized = self._session_db.sanitize_title(title_arg)
|
||
except ValueError as e:
|
||
return f"⚠️ {e}"
|
||
if not sanitized:
|
||
return "⚠️ Title is empty after cleanup. Please use printable characters."
|
||
# Set the title
|
||
try:
|
||
if self._session_db.set_session_title(session_id, sanitized):
|
||
return f"✏️ Session title set: **{sanitized}**"
|
||
else:
|
||
return "Session not found in database."
|
||
except ValueError as e:
|
||
return f"⚠️ {e}"
|
||
else:
|
||
# Show the current title and session ID
|
||
title = self._session_db.get_session_title(session_id)
|
||
if title:
|
||
return f"📌 Session: `{session_id}`\nTitle: **{title}**"
|
||
else:
|
||
return f"📌 Session: `{session_id}`\nNo title set. Usage: `/title My Session Name`"
|
||
|
||
async def _handle_resume_command(self, event: MessageEvent) -> str:
|
||
"""Handle /resume command — switch to a previously-named session."""
|
||
if not self._session_db:
|
||
return "Session database not available."
|
||
|
||
source = event.source
|
||
session_key = self._session_key_for_source(source)
|
||
name = event.get_command_args().strip()
|
||
|
||
if not name:
|
||
# List recent titled sessions for this user/platform
|
||
try:
|
||
user_source = source.platform.value if source.platform else None
|
||
sessions = self._session_db.list_sessions_rich(
|
||
source=user_source, limit=10
|
||
)
|
||
titled = [s for s in sessions if s.get("title")]
|
||
if not titled:
|
||
return (
|
||
"No named sessions found.\n"
|
||
"Use `/title My Session` to name your current session, "
|
||
"then `/resume My Session` to return to it later."
|
||
)
|
||
lines = ["📋 **Named Sessions**\n"]
|
||
for s in titled[:10]:
|
||
title = s["title"]
|
||
preview = s.get("preview", "")[:40]
|
||
preview_part = f" — _{preview}_" if preview else ""
|
||
lines.append(f"• **{title}**{preview_part}")
|
||
lines.append("\nUsage: `/resume <session name>`")
|
||
return "\n".join(lines)
|
||
except Exception as e:
|
||
logger.debug("Failed to list titled sessions: %s", e)
|
||
return f"Could not list sessions: {e}"
|
||
|
||
# Resolve the name to a session ID
|
||
target_id = self._session_db.resolve_session_by_title(name)
|
||
if not target_id:
|
||
return (
|
||
f"No session found matching '**{name}**'.\n"
|
||
"Use `/resume` with no arguments to see available sessions."
|
||
)
|
||
|
||
# Check if already on that session
|
||
current_entry = self.session_store.get_or_create_session(source)
|
||
if current_entry.session_id == target_id:
|
||
return f"📌 Already on session **{name}**."
|
||
|
||
# Flush memories for current session before switching
|
||
try:
|
||
_flush_task = asyncio.create_task(
|
||
self._async_flush_memories(current_entry.session_id, session_key)
|
||
)
|
||
self._background_tasks.add(_flush_task)
|
||
_flush_task.add_done_callback(self._background_tasks.discard)
|
||
except Exception as e:
|
||
logger.debug("Memory flush on resume failed: %s", e)
|
||
|
||
self._shutdown_gateway_honcho(session_key)
|
||
|
||
# Clear any running agent for this session key
|
||
if session_key in self._running_agents:
|
||
del self._running_agents[session_key]
|
||
|
||
# Switch the session entry to point at the old session
|
||
new_entry = self.session_store.switch_session(session_key, target_id)
|
||
if not new_entry:
|
||
return "Failed to switch session."
|
||
|
||
# Get the title for confirmation
|
||
title = self._session_db.get_session_title(target_id) or name
|
||
|
||
# Count messages for context
|
||
history = self.session_store.load_transcript(target_id)
|
||
msg_count = len([m for m in history if m.get("role") == "user"]) if history else 0
|
||
msg_part = f" ({msg_count} message{'s' if msg_count != 1 else ''})" if msg_count else ""
|
||
|
||
return f"↻ Resumed session **{title}**{msg_part}. Conversation restored."
|
||
|
||
async def _handle_usage_command(self, event: MessageEvent) -> str:
|
||
"""Handle /usage command -- show token usage for the session's last agent run."""
|
||
source = event.source
|
||
session_key = self._session_key_for_source(source)
|
||
|
||
agent = self._running_agents.get(session_key)
|
||
if agent and hasattr(agent, "session_total_tokens") and agent.session_api_calls > 0:
|
||
lines = [
|
||
"📊 **Session Token Usage**",
|
||
f"Prompt (input): {agent.session_prompt_tokens:,}",
|
||
f"Completion (output): {agent.session_completion_tokens:,}",
|
||
f"Total: {agent.session_total_tokens:,}",
|
||
f"API calls: {agent.session_api_calls}",
|
||
]
|
||
ctx = agent.context_compressor
|
||
if ctx.last_prompt_tokens:
|
||
pct = ctx.last_prompt_tokens / ctx.context_length * 100 if ctx.context_length else 0
|
||
lines.append(f"Context: {ctx.last_prompt_tokens:,} / {ctx.context_length:,} ({pct:.0f}%)")
|
||
if ctx.compression_count:
|
||
lines.append(f"Compressions: {ctx.compression_count}")
|
||
return "\n".join(lines)
|
||
|
||
# No running agent -- check session history for a rough count
|
||
session_entry = self.session_store.get_or_create_session(source)
|
||
history = self.session_store.load_transcript(session_entry.session_id)
|
||
if history:
|
||
from agent.model_metadata import estimate_messages_tokens_rough
|
||
msgs = [m for m in history if m.get("role") in ("user", "assistant") and m.get("content")]
|
||
approx = estimate_messages_tokens_rough(msgs)
|
||
return (
|
||
f"📊 **Session Info**\n"
|
||
f"Messages: {len(msgs)}\n"
|
||
f"Estimated context: ~{approx:,} tokens\n"
|
||
f"_(Detailed usage available during active conversations)_"
|
||
)
|
||
return "No usage data available for this session."
|
||
|
||
async def _handle_insights_command(self, event: MessageEvent) -> str:
|
||
"""Handle /insights command -- show usage insights and analytics."""
|
||
import asyncio as _asyncio
|
||
|
||
args = event.get_command_args().strip()
|
||
days = 30
|
||
source = None
|
||
|
||
# Parse simple args: /insights 7 or /insights --days 7
|
||
if args:
|
||
parts = args.split()
|
||
i = 0
|
||
while i < len(parts):
|
||
if parts[i] == "--days" and i + 1 < len(parts):
|
||
try:
|
||
days = int(parts[i + 1])
|
||
except ValueError:
|
||
return f"Invalid --days value: {parts[i + 1]}"
|
||
i += 2
|
||
elif parts[i] == "--source" and i + 1 < len(parts):
|
||
source = parts[i + 1]
|
||
i += 2
|
||
elif parts[i].isdigit():
|
||
days = int(parts[i])
|
||
i += 1
|
||
else:
|
||
i += 1
|
||
|
||
try:
|
||
from hermes_state import SessionDB
|
||
from agent.insights import InsightsEngine
|
||
|
||
loop = _asyncio.get_event_loop()
|
||
|
||
def _run_insights():
|
||
db = SessionDB()
|
||
engine = InsightsEngine(db)
|
||
report = engine.generate(days=days, source=source)
|
||
result = engine.format_gateway(report)
|
||
db.close()
|
||
return result
|
||
|
||
return await loop.run_in_executor(None, _run_insights)
|
||
except Exception as e:
|
||
logger.error("Insights command error: %s", e, exc_info=True)
|
||
return f"Error generating insights: {e}"
|
||
|
||
async def _handle_reload_mcp_command(self, event: MessageEvent) -> str:
|
||
"""Handle /reload-mcp command -- disconnect and reconnect all MCP servers."""
|
||
loop = asyncio.get_event_loop()
|
||
try:
|
||
from tools.mcp_tool import shutdown_mcp_servers, discover_mcp_tools, _load_mcp_config, _servers, _lock
|
||
|
||
# Capture old server names before shutdown
|
||
with _lock:
|
||
old_servers = set(_servers.keys())
|
||
|
||
# Read new config before shutting down, so we know what will be added/removed
|
||
new_config = _load_mcp_config()
|
||
new_server_names = set(new_config.keys())
|
||
|
||
# Shutdown existing connections
|
||
await loop.run_in_executor(None, shutdown_mcp_servers)
|
||
|
||
# Reconnect by discovering tools (reads config.yaml fresh)
|
||
new_tools = await loop.run_in_executor(None, discover_mcp_tools)
|
||
|
||
# Compute what changed
|
||
with _lock:
|
||
connected_servers = set(_servers.keys())
|
||
|
||
added = connected_servers - old_servers
|
||
removed = old_servers - connected_servers
|
||
reconnected = connected_servers & old_servers
|
||
|
||
lines = ["🔄 **MCP Servers Reloaded**\n"]
|
||
if reconnected:
|
||
lines.append(f"♻️ Reconnected: {', '.join(sorted(reconnected))}")
|
||
if added:
|
||
lines.append(f"➕ Added: {', '.join(sorted(added))}")
|
||
if removed:
|
||
lines.append(f"➖ Removed: {', '.join(sorted(removed))}")
|
||
if not connected_servers:
|
||
lines.append("No MCP servers connected.")
|
||
else:
|
||
lines.append(f"\n🔧 {len(new_tools)} tool(s) available from {len(connected_servers)} server(s)")
|
||
|
||
# Inject a message at the END of the session history so the
|
||
# model knows tools changed on its next turn. Appended after
|
||
# all existing messages to preserve prompt-cache for the prefix.
|
||
change_parts = []
|
||
if added:
|
||
change_parts.append(f"Added servers: {', '.join(sorted(added))}")
|
||
if removed:
|
||
change_parts.append(f"Removed servers: {', '.join(sorted(removed))}")
|
||
if reconnected:
|
||
change_parts.append(f"Reconnected servers: {', '.join(sorted(reconnected))}")
|
||
tool_summary = f"{len(new_tools)} MCP tool(s) now available" if new_tools else "No MCP tools available"
|
||
change_detail = ". ".join(change_parts) + ". " if change_parts else ""
|
||
reload_msg = {
|
||
"role": "user",
|
||
"content": f"[SYSTEM: MCP servers have been reloaded. {change_detail}{tool_summary}. The tool list for this conversation has been updated accordingly.]",
|
||
}
|
||
try:
|
||
session_entry = self.session_store.get_or_create_session(event.source)
|
||
self.session_store.append_to_transcript(
|
||
session_entry.session_id, reload_msg
|
||
)
|
||
except Exception:
|
||
pass # Best-effort; don't fail the reload over a transcript write
|
||
|
||
return "\n".join(lines)
|
||
|
||
except Exception as e:
|
||
logger.warning("MCP reload failed: %s", e)
|
||
return f"❌ MCP reload failed: {e}"
|
||
|
||
# ------------------------------------------------------------------
|
||
# /approve & /deny — explicit dangerous-command approval
|
||
# ------------------------------------------------------------------
|
||
|
||
_APPROVAL_TIMEOUT_SECONDS = 300 # 5 minutes
|
||
|
||
async def _handle_approve_command(self, event: MessageEvent) -> str:
|
||
"""Handle /approve command — execute a pending dangerous command.
|
||
|
||
Usage:
|
||
/approve — approve and execute the pending command
|
||
/approve session — approve and remember for this session
|
||
/approve always — approve this pattern permanently
|
||
"""
|
||
source = event.source
|
||
session_key = self._session_key_for_source(source)
|
||
|
||
if session_key not in self._pending_approvals:
|
||
return "No pending command to approve."
|
||
|
||
import time as _time
|
||
approval = self._pending_approvals[session_key]
|
||
|
||
# Check for timeout
|
||
ts = approval.get("timestamp", 0)
|
||
if _time.time() - ts > self._APPROVAL_TIMEOUT_SECONDS:
|
||
self._pending_approvals.pop(session_key, None)
|
||
return "⚠️ Approval expired (timed out after 5 minutes). Ask the agent to try again."
|
||
|
||
self._pending_approvals.pop(session_key)
|
||
cmd = approval["command"]
|
||
pattern_keys = approval.get("pattern_keys", [])
|
||
if not pattern_keys:
|
||
pk = approval.get("pattern_key", "")
|
||
pattern_keys = [pk] if pk else []
|
||
|
||
# Determine approval scope from args
|
||
args = event.get_command_args().strip().lower()
|
||
from tools.approval import approve_session, approve_permanent
|
||
|
||
if args in ("always", "permanent", "permanently"):
|
||
for pk in pattern_keys:
|
||
approve_permanent(pk)
|
||
scope_msg = " (pattern approved permanently)"
|
||
elif args in ("session", "ses"):
|
||
for pk in pattern_keys:
|
||
approve_session(session_key, pk)
|
||
scope_msg = " (pattern approved for this session)"
|
||
else:
|
||
# One-time approval — just approve for session so the immediate
|
||
# replay works, but don't advertise it as session-wide
|
||
for pk in pattern_keys:
|
||
approve_session(session_key, pk)
|
||
scope_msg = ""
|
||
|
||
logger.info("User approved dangerous command via /approve: %s...%s", cmd[:60], scope_msg)
|
||
from tools.terminal_tool import terminal_tool
|
||
result = terminal_tool(command=cmd, force=True)
|
||
return f"✅ Command approved and executed{scope_msg}.\n\n```\n{result[:3500]}\n```"
|
||
|
||
async def _handle_deny_command(self, event: MessageEvent) -> str:
|
||
"""Handle /deny command — reject a pending dangerous command."""
|
||
source = event.source
|
||
session_key = self._session_key_for_source(source)
|
||
|
||
if session_key not in self._pending_approvals:
|
||
return "No pending command to deny."
|
||
|
||
self._pending_approvals.pop(session_key)
|
||
logger.info("User denied dangerous command via /deny")
|
||
return "❌ Command denied."
|
||
|
||
async def _handle_update_command(self, event: MessageEvent) -> str:
|
||
"""Handle /update command — update Hermes Agent to the latest version.
|
||
|
||
Spawns ``hermes update`` in a separate systemd scope so it survives the
|
||
gateway restart that ``hermes update`` may trigger at the end. Marker
|
||
files are written so either the current gateway process or the next one
|
||
can notify the user when the update finishes.
|
||
"""
|
||
import json
|
||
import shutil
|
||
import subprocess
|
||
from datetime import datetime
|
||
|
||
project_root = Path(__file__).parent.parent.resolve()
|
||
git_dir = project_root / '.git'
|
||
|
||
if not git_dir.exists():
|
||
return "✗ Not a git repository — cannot update."
|
||
|
||
hermes_cmd = _resolve_hermes_bin()
|
||
if not hermes_cmd:
|
||
return (
|
||
"✗ Could not locate the `hermes` command. "
|
||
"Hermes is running, but the update command could not find the "
|
||
"executable on PATH or via the current Python interpreter. "
|
||
"Try running `hermes update` manually in your terminal."
|
||
)
|
||
|
||
pending_path = _hermes_home / ".update_pending.json"
|
||
output_path = _hermes_home / ".update_output.txt"
|
||
exit_code_path = _hermes_home / ".update_exit_code"
|
||
pending = {
|
||
"platform": event.source.platform.value,
|
||
"chat_id": event.source.chat_id,
|
||
"user_id": event.source.user_id,
|
||
"timestamp": datetime.now().isoformat(),
|
||
}
|
||
pending_path.write_text(json.dumps(pending))
|
||
exit_code_path.unlink(missing_ok=True)
|
||
|
||
# Spawn `hermes update` in a separate cgroup so it survives gateway
|
||
# restart. systemd-run --user --scope creates a transient scope unit.
|
||
hermes_cmd_str = " ".join(shlex.quote(part) for part in hermes_cmd)
|
||
update_cmd = (
|
||
f"{hermes_cmd_str} update > {shlex.quote(str(output_path))} 2>&1; "
|
||
f"status=$?; printf '%s' \"$status\" > {shlex.quote(str(exit_code_path))}"
|
||
)
|
||
try:
|
||
systemd_run = shutil.which("systemd-run")
|
||
if systemd_run:
|
||
subprocess.Popen(
|
||
[systemd_run, "--user", "--scope",
|
||
"--unit=hermes-update", "--",
|
||
"bash", "-c", update_cmd],
|
||
stdout=subprocess.DEVNULL,
|
||
stderr=subprocess.DEVNULL,
|
||
start_new_session=True,
|
||
)
|
||
else:
|
||
# Fallback: best-effort detach with start_new_session
|
||
subprocess.Popen(
|
||
["bash", "-c", f"nohup {update_cmd} &"],
|
||
stdout=subprocess.DEVNULL,
|
||
stderr=subprocess.DEVNULL,
|
||
start_new_session=True,
|
||
)
|
||
except Exception as e:
|
||
pending_path.unlink(missing_ok=True)
|
||
exit_code_path.unlink(missing_ok=True)
|
||
return f"✗ Failed to start update: {e}"
|
||
|
||
self._schedule_update_notification_watch()
|
||
return "⚕ Starting Hermes update… I'll notify you when it's done."
|
||
|
||
def _schedule_update_notification_watch(self) -> None:
|
||
"""Ensure a background task is watching for update completion."""
|
||
existing_task = getattr(self, "_update_notification_task", None)
|
||
if existing_task and not existing_task.done():
|
||
return
|
||
|
||
try:
|
||
self._update_notification_task = asyncio.create_task(
|
||
self._watch_for_update_completion()
|
||
)
|
||
except RuntimeError:
|
||
logger.debug("Skipping update notification watcher: no running event loop")
|
||
|
||
async def _watch_for_update_completion(
|
||
self,
|
||
poll_interval: float = 2.0,
|
||
timeout: float = 1800.0,
|
||
) -> None:
|
||
"""Wait for ``hermes update`` to finish, then send its notification."""
|
||
pending_path = _hermes_home / ".update_pending.json"
|
||
claimed_path = _hermes_home / ".update_pending.claimed.json"
|
||
exit_code_path = _hermes_home / ".update_exit_code"
|
||
loop = asyncio.get_running_loop()
|
||
deadline = loop.time() + timeout
|
||
|
||
while (pending_path.exists() or claimed_path.exists()) and loop.time() < deadline:
|
||
if exit_code_path.exists():
|
||
await self._send_update_notification()
|
||
return
|
||
await asyncio.sleep(poll_interval)
|
||
|
||
if (pending_path.exists() or claimed_path.exists()) and not exit_code_path.exists():
|
||
logger.warning("Update watcher timed out waiting for completion marker")
|
||
exit_code_path.write_text("124")
|
||
await self._send_update_notification()
|
||
|
||
async def _send_update_notification(self) -> bool:
|
||
"""If an update finished, notify the user.
|
||
|
||
Returns False when the update is still running so a caller can retry
|
||
later. Returns True after a definitive send/skip decision.
|
||
"""
|
||
import json
|
||
import re as _re
|
||
|
||
pending_path = _hermes_home / ".update_pending.json"
|
||
claimed_path = _hermes_home / ".update_pending.claimed.json"
|
||
output_path = _hermes_home / ".update_output.txt"
|
||
exit_code_path = _hermes_home / ".update_exit_code"
|
||
|
||
if not pending_path.exists() and not claimed_path.exists():
|
||
return False
|
||
|
||
cleanup = True
|
||
active_pending_path = claimed_path
|
||
try:
|
||
if pending_path.exists():
|
||
try:
|
||
pending_path.replace(claimed_path)
|
||
except FileNotFoundError:
|
||
if not claimed_path.exists():
|
||
return True
|
||
elif not claimed_path.exists():
|
||
return True
|
||
|
||
pending = json.loads(claimed_path.read_text())
|
||
platform_str = pending.get("platform")
|
||
chat_id = pending.get("chat_id")
|
||
|
||
if not exit_code_path.exists():
|
||
logger.info("Update notification deferred: update still running")
|
||
cleanup = False
|
||
active_pending_path = pending_path
|
||
claimed_path.replace(pending_path)
|
||
return False
|
||
|
||
exit_code_raw = exit_code_path.read_text().strip() or "1"
|
||
exit_code = int(exit_code_raw)
|
||
|
||
# Read the captured update output
|
||
output = ""
|
||
if output_path.exists():
|
||
output = output_path.read_text()
|
||
|
||
# Resolve adapter
|
||
platform = Platform(platform_str)
|
||
adapter = self.adapters.get(platform)
|
||
|
||
if adapter and chat_id:
|
||
# Strip ANSI escape codes for clean display
|
||
output = _re.sub(r'\x1b\[[0-9;]*m', '', output).strip()
|
||
if output:
|
||
if len(output) > 3500:
|
||
output = "…" + output[-3500:]
|
||
if exit_code == 0:
|
||
msg = f"✅ Hermes update finished.\n\n```\n{output}\n```"
|
||
else:
|
||
msg = f"❌ Hermes update failed.\n\n```\n{output}\n```"
|
||
else:
|
||
if exit_code == 0:
|
||
msg = "✅ Hermes update finished successfully."
|
||
else:
|
||
msg = "❌ Hermes update failed. Check the gateway logs or run `hermes update` manually for details."
|
||
await adapter.send(chat_id, msg)
|
||
logger.info(
|
||
"Sent post-update notification to %s:%s (exit=%s)",
|
||
platform_str,
|
||
chat_id,
|
||
exit_code,
|
||
)
|
||
except Exception as e:
|
||
logger.warning("Post-update notification failed: %s", e)
|
||
finally:
|
||
if cleanup:
|
||
active_pending_path.unlink(missing_ok=True)
|
||
claimed_path.unlink(missing_ok=True)
|
||
output_path.unlink(missing_ok=True)
|
||
exit_code_path.unlink(missing_ok=True)
|
||
|
||
return True
|
||
|
||
def _set_session_env(self, context: SessionContext) -> None:
|
||
"""Set environment variables for the current session."""
|
||
os.environ["HERMES_SESSION_PLATFORM"] = context.source.platform.value
|
||
os.environ["HERMES_SESSION_CHAT_ID"] = context.source.chat_id
|
||
if context.source.chat_name:
|
||
os.environ["HERMES_SESSION_CHAT_NAME"] = context.source.chat_name
|
||
if context.source.thread_id:
|
||
os.environ["HERMES_SESSION_THREAD_ID"] = str(context.source.thread_id)
|
||
|
||
def _clear_session_env(self) -> None:
|
||
"""Clear session environment variables."""
|
||
for var in ["HERMES_SESSION_PLATFORM", "HERMES_SESSION_CHAT_ID", "HERMES_SESSION_CHAT_NAME", "HERMES_SESSION_THREAD_ID"]:
|
||
if var in os.environ:
|
||
del os.environ[var]
|
||
|
||
async def _enrich_message_with_vision(
|
||
self,
|
||
user_text: str,
|
||
image_paths: List[str],
|
||
) -> str:
|
||
"""
|
||
Auto-analyze user-attached images with the vision tool and prepend
|
||
the descriptions to the message text.
|
||
|
||
Each image is analyzed with a general-purpose prompt. The resulting
|
||
description *and* the local cache path are injected so the model can:
|
||
1. Immediately understand what the user sent (no extra tool call).
|
||
2. Re-examine the image with vision_analyze if it needs more detail.
|
||
|
||
Args:
|
||
user_text: The user's original caption / message text.
|
||
image_paths: List of local file paths to cached images.
|
||
|
||
Returns:
|
||
The enriched message string with vision descriptions prepended.
|
||
"""
|
||
from tools.vision_tools import vision_analyze_tool
|
||
import json as _json
|
||
|
||
analysis_prompt = (
|
||
"Describe everything visible in this image in thorough detail. "
|
||
"Include any text, code, data, objects, people, layout, colors, "
|
||
"and any other notable visual information."
|
||
)
|
||
|
||
enriched_parts = []
|
||
for path in image_paths:
|
||
try:
|
||
logger.debug("Auto-analyzing user image: %s", path)
|
||
result_json = await vision_analyze_tool(
|
||
image_url=path,
|
||
user_prompt=analysis_prompt,
|
||
)
|
||
result = _json.loads(result_json)
|
||
if result.get("success"):
|
||
description = result.get("analysis", "")
|
||
enriched_parts.append(
|
||
f"[The user sent an image~ Here's what I can see:\n{description}]\n"
|
||
f"[If you need a closer look, use vision_analyze with "
|
||
f"image_url: {path} ~]"
|
||
)
|
||
else:
|
||
enriched_parts.append(
|
||
"[The user sent an image but I couldn't quite see it "
|
||
"this time (>_<) You can try looking at it yourself "
|
||
f"with vision_analyze using image_url: {path}]"
|
||
)
|
||
except Exception as e:
|
||
logger.error("Vision auto-analysis error: %s", e)
|
||
enriched_parts.append(
|
||
f"[The user sent an image but something went wrong when I "
|
||
f"tried to look at it~ You can try examining it yourself "
|
||
f"with vision_analyze using image_url: {path}]"
|
||
)
|
||
|
||
# Combine: vision descriptions first, then the user's original text
|
||
if enriched_parts:
|
||
prefix = "\n\n".join(enriched_parts)
|
||
if user_text:
|
||
return f"{prefix}\n\n{user_text}"
|
||
return prefix
|
||
return user_text
|
||
|
||
async def _enrich_message_with_transcription(
|
||
self,
|
||
user_text: str,
|
||
audio_paths: List[str],
|
||
) -> str:
|
||
"""
|
||
Auto-transcribe user voice/audio messages using the configured STT provider
|
||
and prepend the transcript to the message text.
|
||
|
||
Args:
|
||
user_text: The user's original caption / message text.
|
||
audio_paths: List of local file paths to cached audio files.
|
||
|
||
Returns:
|
||
The enriched message string with transcriptions prepended.
|
||
"""
|
||
if not getattr(self.config, "stt_enabled", True):
|
||
disabled_note = "[The user sent voice message(s), but transcription is disabled in config."
|
||
if self._has_setup_skill():
|
||
disabled_note += (
|
||
" You have a skill called hermes-agent-setup that can help "
|
||
"users configure Hermes features including voice, tools, and more."
|
||
)
|
||
disabled_note += "]"
|
||
if user_text:
|
||
return f"{disabled_note}\n\n{user_text}"
|
||
return disabled_note
|
||
|
||
from tools.transcription_tools import transcribe_audio, get_stt_model_from_config
|
||
import asyncio
|
||
|
||
stt_model = get_stt_model_from_config()
|
||
|
||
enriched_parts = []
|
||
for path in audio_paths:
|
||
try:
|
||
logger.debug("Transcribing user voice: %s", path)
|
||
result = await asyncio.to_thread(transcribe_audio, path, model=stt_model)
|
||
if result["success"]:
|
||
transcript = result["transcript"]
|
||
enriched_parts.append(
|
||
f'[The user sent a voice message~ '
|
||
f'Here\'s what they said: "{transcript}"]'
|
||
)
|
||
else:
|
||
error = result.get("error", "unknown error")
|
||
if (
|
||
"No STT provider" in error
|
||
or error.startswith("Neither VOICE_TOOLS_OPENAI_KEY nor OPENAI_API_KEY is set")
|
||
):
|
||
_no_stt_note = (
|
||
"[The user sent a voice message but I can't listen "
|
||
"to it right now — no STT provider is configured. "
|
||
"A direct message has already been sent to the user "
|
||
"with setup instructions."
|
||
)
|
||
if self._has_setup_skill():
|
||
_no_stt_note += (
|
||
" You have a skill called hermes-agent-setup "
|
||
"that can help users configure Hermes features "
|
||
"including voice, tools, and more."
|
||
)
|
||
_no_stt_note += "]"
|
||
enriched_parts.append(_no_stt_note)
|
||
else:
|
||
enriched_parts.append(
|
||
"[The user sent a voice message but I had trouble "
|
||
f"transcribing it~ ({error})]"
|
||
)
|
||
except Exception as e:
|
||
logger.error("Transcription error: %s", e)
|
||
enriched_parts.append(
|
||
"[The user sent a voice message but something went wrong "
|
||
"when I tried to listen to it~ Let them know!]"
|
||
)
|
||
|
||
if enriched_parts:
|
||
prefix = "\n\n".join(enriched_parts)
|
||
if user_text:
|
||
return f"{prefix}\n\n{user_text}"
|
||
return prefix
|
||
return user_text
|
||
|
||
async def _run_process_watcher(self, watcher: dict) -> None:
|
||
"""
|
||
Periodically check a background process and push updates to the user.
|
||
|
||
Runs as an asyncio task. Stays silent when nothing changed.
|
||
Auto-removes when the process exits or is killed.
|
||
|
||
Notification mode (from ``display.background_process_notifications``):
|
||
- ``all`` — running-output updates + final message
|
||
- ``result`` — final completion message only
|
||
- ``error`` — final message only when exit code != 0
|
||
- ``off`` — no messages at all
|
||
"""
|
||
from tools.process_registry import process_registry
|
||
|
||
session_id = watcher["session_id"]
|
||
interval = watcher["check_interval"]
|
||
session_key = watcher.get("session_key", "")
|
||
platform_name = watcher.get("platform", "")
|
||
chat_id = watcher.get("chat_id", "")
|
||
thread_id = watcher.get("thread_id", "")
|
||
notify_mode = self._load_background_notifications_mode()
|
||
|
||
logger.debug("Process watcher started: %s (every %ss, notify=%s)",
|
||
session_id, interval, notify_mode)
|
||
|
||
if notify_mode == "off":
|
||
# Still wait for the process to exit so we can log it, but don't
|
||
# push any messages to the user.
|
||
while True:
|
||
await asyncio.sleep(interval)
|
||
session = process_registry.get(session_id)
|
||
if session is None or session.exited:
|
||
break
|
||
logger.debug("Process watcher ended (silent): %s", session_id)
|
||
return
|
||
|
||
last_output_len = 0
|
||
while True:
|
||
await asyncio.sleep(interval)
|
||
|
||
session = process_registry.get(session_id)
|
||
if session is None:
|
||
break
|
||
|
||
current_output_len = len(session.output_buffer)
|
||
has_new_output = current_output_len > last_output_len
|
||
last_output_len = current_output_len
|
||
|
||
if session.exited:
|
||
# Decide whether to notify based on mode
|
||
should_notify = (
|
||
notify_mode in ("all", "result")
|
||
or (notify_mode == "error" and session.exit_code not in (0, None))
|
||
)
|
||
if should_notify:
|
||
new_output = session.output_buffer[-1000:] if session.output_buffer else ""
|
||
message_text = (
|
||
f"[Background process {session_id} finished with exit code {session.exit_code}~ "
|
||
f"Here's the final output:\n{new_output}]"
|
||
)
|
||
adapter = None
|
||
for p, a in self.adapters.items():
|
||
if p.value == platform_name:
|
||
adapter = a
|
||
break
|
||
if adapter and chat_id:
|
||
try:
|
||
send_meta = {"thread_id": thread_id} if thread_id else None
|
||
await adapter.send(chat_id, message_text, metadata=send_meta)
|
||
except Exception as e:
|
||
logger.error("Watcher delivery error: %s", e)
|
||
break
|
||
|
||
elif has_new_output and notify_mode == "all":
|
||
# New output available -- deliver status update (only in "all" mode)
|
||
new_output = session.output_buffer[-500:] if session.output_buffer else ""
|
||
message_text = (
|
||
f"[Background process {session_id} is still running~ "
|
||
f"New output:\n{new_output}]"
|
||
)
|
||
adapter = None
|
||
for p, a in self.adapters.items():
|
||
if p.value == platform_name:
|
||
adapter = a
|
||
break
|
||
if adapter and chat_id:
|
||
try:
|
||
send_meta = {"thread_id": thread_id} if thread_id else None
|
||
await adapter.send(chat_id, message_text, metadata=send_meta)
|
||
except Exception as e:
|
||
logger.error("Watcher delivery error: %s", e)
|
||
|
||
logger.debug("Process watcher ended: %s", session_id)
|
||
|
||
_MAX_INTERRUPT_DEPTH = 3 # Cap recursive interrupt handling (#816)
|
||
|
||
@staticmethod
|
||
def _agent_config_signature(
|
||
model: str,
|
||
runtime: dict,
|
||
enabled_toolsets: list,
|
||
ephemeral_prompt: str,
|
||
) -> str:
|
||
"""Compute a stable string key from agent config values.
|
||
|
||
When this signature changes between messages, the cached AIAgent is
|
||
discarded and rebuilt. When it stays the same, the cached agent is
|
||
reused — preserving the frozen system prompt and tool schemas for
|
||
prompt cache hits.
|
||
"""
|
||
import hashlib, json as _j
|
||
|
||
# Fingerprint the FULL credential string instead of using a short
|
||
# prefix. OAuth/JWT-style tokens frequently share a common prefix
|
||
# (e.g. "eyJhbGci"), which can cause false cache hits across auth
|
||
# switches if only the first few characters are considered.
|
||
_api_key = str(runtime.get("api_key", "") or "")
|
||
_api_key_fingerprint = hashlib.sha256(_api_key.encode()).hexdigest() if _api_key else ""
|
||
|
||
blob = _j.dumps(
|
||
[
|
||
model,
|
||
_api_key_fingerprint,
|
||
runtime.get("base_url", ""),
|
||
runtime.get("provider", ""),
|
||
runtime.get("api_mode", ""),
|
||
sorted(enabled_toolsets) if enabled_toolsets else [],
|
||
# reasoning_config excluded — it's set per-message on the
|
||
# cached agent and doesn't affect system prompt or tools.
|
||
ephemeral_prompt or "",
|
||
],
|
||
sort_keys=True,
|
||
default=str,
|
||
)
|
||
return hashlib.sha256(blob.encode()).hexdigest()[:16]
|
||
|
||
def _evict_cached_agent(self, session_key: str) -> None:
|
||
"""Remove a cached agent for a session (called on /new, /model, etc)."""
|
||
_lock = getattr(self, "_agent_cache_lock", None)
|
||
if _lock:
|
||
with _lock:
|
||
self._agent_cache.pop(session_key, None)
|
||
|
||
async def _run_agent(
|
||
self,
|
||
message: str,
|
||
context_prompt: str,
|
||
history: List[Dict[str, Any]],
|
||
source: SessionSource,
|
||
session_id: str,
|
||
session_key: str = None,
|
||
_interrupt_depth: int = 0,
|
||
event_message_id: Optional[str] = None,
|
||
) -> Dict[str, Any]:
|
||
"""
|
||
Run the agent with the given message and context.
|
||
|
||
Returns the full result dict from run_conversation, including:
|
||
- "final_response": str (the text to send back)
|
||
- "messages": list (full conversation including tool calls)
|
||
- "api_calls": int
|
||
- "completed": bool
|
||
|
||
This is run in a thread pool to not block the event loop.
|
||
Supports interruption via new messages.
|
||
"""
|
||
from run_agent import AIAgent
|
||
import queue
|
||
|
||
user_config = _load_gateway_config()
|
||
platform_key = _platform_config_key(source.platform)
|
||
|
||
from hermes_cli.tools_config import _get_platform_tools
|
||
enabled_toolsets = sorted(_get_platform_tools(user_config, platform_key))
|
||
|
||
# Tool progress mode from config.yaml: "all", "new", "verbose", "off"
|
||
# Falls back to env vars for backward compatibility.
|
||
# YAML 1.1 parses bare `off` as boolean False — normalise before
|
||
# the `or` chain so it doesn't silently fall through to "all".
|
||
_raw_tp = user_config.get("display", {}).get("tool_progress")
|
||
if _raw_tp is False:
|
||
_raw_tp = "off"
|
||
progress_mode = (
|
||
_raw_tp
|
||
or os.getenv("HERMES_TOOL_PROGRESS_MODE")
|
||
or "all"
|
||
)
|
||
tool_progress_enabled = progress_mode != "off"
|
||
|
||
# Queue for progress messages (thread-safe)
|
||
progress_queue = queue.Queue() if tool_progress_enabled else None
|
||
last_tool = [None] # Mutable container for tracking in closure
|
||
last_progress_msg = [None] # Track last message for dedup
|
||
repeat_count = [0] # How many times the same message repeated
|
||
|
||
def progress_callback(tool_name: str, preview: str = None, args: dict = None):
|
||
"""Callback invoked by agent when a tool is called."""
|
||
if not progress_queue:
|
||
return
|
||
|
||
# "new" mode: only report when tool changes
|
||
if progress_mode == "new" and tool_name == last_tool[0]:
|
||
return
|
||
last_tool[0] = tool_name
|
||
|
||
# Build progress message with primary argument preview
|
||
from agent.display import get_tool_emoji
|
||
emoji = get_tool_emoji(tool_name, default="⚙️")
|
||
|
||
# Verbose mode: show detailed arguments
|
||
if progress_mode == "verbose" and args:
|
||
import json as _json
|
||
args_str = _json.dumps(args, ensure_ascii=False, default=str)
|
||
if len(args_str) > 200:
|
||
args_str = args_str[:197] + "..."
|
||
msg = f"{emoji} {tool_name}({list(args.keys())})\n{args_str}"
|
||
progress_queue.put(msg)
|
||
return
|
||
|
||
if preview:
|
||
# Truncate preview to keep messages clean
|
||
if len(preview) > 80:
|
||
preview = preview[:77] + "..."
|
||
msg = f"{emoji} {tool_name}: \"{preview}\""
|
||
else:
|
||
msg = f"{emoji} {tool_name}..."
|
||
|
||
# Dedup: collapse consecutive identical progress messages.
|
||
# Common with execute_code where models iterate with the same
|
||
# code (same boilerplate imports → identical previews).
|
||
if msg == last_progress_msg[0]:
|
||
repeat_count[0] += 1
|
||
# Update the last line in progress_lines with a counter
|
||
# via a special "dedup" queue message.
|
||
progress_queue.put(("__dedup__", msg, repeat_count[0]))
|
||
return
|
||
last_progress_msg[0] = msg
|
||
repeat_count[0] = 0
|
||
|
||
progress_queue.put(msg)
|
||
|
||
# Background task to send progress messages
|
||
# Accumulates tool lines into a single message that gets edited
|
||
# For DM top-level Slack messages, source.thread_id is None but the
|
||
# final reply will be threaded under the original message via reply_to.
|
||
# Use event_message_id as fallback so progress messages land in the
|
||
# same thread as the final response instead of going to the DM root.
|
||
_progress_thread_id = source.thread_id or event_message_id
|
||
_progress_metadata = {"thread_id": _progress_thread_id} if _progress_thread_id else None
|
||
|
||
async def send_progress_messages():
|
||
if not progress_queue:
|
||
return
|
||
|
||
adapter = self.adapters.get(source.platform)
|
||
if not adapter:
|
||
return
|
||
|
||
progress_lines = [] # Accumulated tool lines
|
||
progress_msg_id = None # ID of the progress message to edit
|
||
can_edit = True # False once an edit fails (platform doesn't support it)
|
||
|
||
while True:
|
||
try:
|
||
raw = progress_queue.get_nowait()
|
||
|
||
# Handle dedup messages: update last line with repeat counter
|
||
if isinstance(raw, tuple) and len(raw) == 3 and raw[0] == "__dedup__":
|
||
_, base_msg, count = raw
|
||
if progress_lines:
|
||
progress_lines[-1] = f"{base_msg} (×{count + 1})"
|
||
msg = progress_lines[-1] if progress_lines else base_msg
|
||
else:
|
||
msg = raw
|
||
progress_lines.append(msg)
|
||
|
||
if can_edit and progress_msg_id is not None:
|
||
# Try to edit the existing progress message
|
||
full_text = "\n".join(progress_lines)
|
||
result = await adapter.edit_message(
|
||
chat_id=source.chat_id,
|
||
message_id=progress_msg_id,
|
||
content=full_text,
|
||
)
|
||
if not result.success:
|
||
# Platform doesn't support editing — stop trying,
|
||
# send just this new line as a separate message
|
||
can_edit = False
|
||
await adapter.send(chat_id=source.chat_id, content=msg, metadata=_progress_metadata)
|
||
else:
|
||
if can_edit:
|
||
# First tool: send all accumulated text as new message
|
||
full_text = "\n".join(progress_lines)
|
||
result = await adapter.send(chat_id=source.chat_id, content=full_text, metadata=_progress_metadata)
|
||
else:
|
||
# Editing unsupported: send just this line
|
||
result = await adapter.send(chat_id=source.chat_id, content=msg, metadata=_progress_metadata)
|
||
if result.success and result.message_id:
|
||
progress_msg_id = result.message_id
|
||
|
||
# Restore typing indicator
|
||
await asyncio.sleep(0.3)
|
||
await adapter.send_typing(source.chat_id, metadata=_progress_metadata)
|
||
|
||
except queue.Empty:
|
||
await asyncio.sleep(0.3)
|
||
except asyncio.CancelledError:
|
||
# Drain remaining queued messages
|
||
while not progress_queue.empty():
|
||
try:
|
||
raw = progress_queue.get_nowait()
|
||
if isinstance(raw, tuple) and len(raw) == 3 and raw[0] == "__dedup__":
|
||
_, base_msg, count = raw
|
||
if progress_lines:
|
||
progress_lines[-1] = f"{base_msg} (×{count + 1})"
|
||
else:
|
||
progress_lines.append(raw)
|
||
except Exception:
|
||
break
|
||
# Final edit with all remaining tools (only if editing works)
|
||
if can_edit and progress_lines and progress_msg_id:
|
||
full_text = "\n".join(progress_lines)
|
||
try:
|
||
await adapter.edit_message(
|
||
chat_id=source.chat_id,
|
||
message_id=progress_msg_id,
|
||
content=full_text,
|
||
)
|
||
except Exception:
|
||
pass
|
||
return
|
||
except Exception as e:
|
||
logger.error("Progress message error: %s", e)
|
||
await asyncio.sleep(1)
|
||
|
||
# We need to share the agent instance for interrupt support
|
||
agent_holder = [None] # Mutable container for the agent instance
|
||
result_holder = [None] # Mutable container for the result
|
||
tools_holder = [None] # Mutable container for the tool definitions
|
||
stream_consumer_holder = [None] # Mutable container for stream consumer
|
||
|
||
# Bridge sync step_callback → async hooks.emit for agent:step events
|
||
_loop_for_step = asyncio.get_event_loop()
|
||
_hooks_ref = self.hooks
|
||
|
||
def _step_callback_sync(iteration: int, tool_names: list) -> None:
|
||
try:
|
||
asyncio.run_coroutine_threadsafe(
|
||
_hooks_ref.emit("agent:step", {
|
||
"platform": source.platform.value if source.platform else "",
|
||
"user_id": source.user_id,
|
||
"session_id": session_id,
|
||
"iteration": iteration,
|
||
"tool_names": tool_names,
|
||
}),
|
||
_loop_for_step,
|
||
)
|
||
except Exception as _e:
|
||
logger.debug("agent:step hook error: %s", _e)
|
||
|
||
# Bridge sync status_callback → async adapter.send for context pressure
|
||
_status_adapter = self.adapters.get(source.platform)
|
||
_status_chat_id = source.chat_id
|
||
_status_thread_metadata = {"thread_id": _progress_thread_id} if _progress_thread_id else None
|
||
|
||
def _status_callback_sync(event_type: str, message: str) -> None:
|
||
if not _status_adapter:
|
||
return
|
||
try:
|
||
asyncio.run_coroutine_threadsafe(
|
||
_status_adapter.send(
|
||
_status_chat_id,
|
||
message,
|
||
metadata=_status_thread_metadata,
|
||
),
|
||
_loop_for_step,
|
||
)
|
||
except Exception as _e:
|
||
logger.debug("status_callback error (%s): %s", event_type, _e)
|
||
|
||
def run_sync():
|
||
# Pass session_key to process registry via env var so background
|
||
# processes can be mapped back to this gateway session
|
||
os.environ["HERMES_SESSION_KEY"] = session_key or ""
|
||
|
||
# Read from env var or use default (same as CLI)
|
||
max_iterations = int(os.getenv("HERMES_MAX_ITERATIONS", "90"))
|
||
|
||
# Map platform enum to the platform hint key the agent understands.
|
||
# Platform.LOCAL ("local") maps to "cli"; others pass through as-is.
|
||
platform_key = "cli" if source.platform == Platform.LOCAL else source.platform.value
|
||
|
||
# Combine platform context with user-configured ephemeral system prompt
|
||
combined_ephemeral = context_prompt or ""
|
||
if self._ephemeral_system_prompt:
|
||
combined_ephemeral = (combined_ephemeral + "\n\n" + self._ephemeral_system_prompt).strip()
|
||
|
||
# Re-read .env and config for fresh credentials (gateway is long-lived,
|
||
# keys may change without restart).
|
||
try:
|
||
load_dotenv(_env_path, override=True, encoding="utf-8")
|
||
except UnicodeDecodeError:
|
||
load_dotenv(_env_path, override=True, encoding="latin-1")
|
||
except Exception:
|
||
pass
|
||
|
||
model = _resolve_gateway_model(user_config)
|
||
|
||
try:
|
||
runtime_kwargs = _resolve_runtime_agent_kwargs()
|
||
except Exception as exc:
|
||
return {
|
||
"final_response": f"⚠️ Provider authentication failed: {exc}",
|
||
"messages": [],
|
||
"api_calls": 0,
|
||
"tools": [],
|
||
}
|
||
|
||
pr = self._provider_routing
|
||
honcho_manager, honcho_config = self._get_or_create_gateway_honcho(session_key)
|
||
reasoning_config = self._load_reasoning_config()
|
||
self._reasoning_config = reasoning_config
|
||
# Set up streaming consumer if enabled
|
||
_stream_consumer = None
|
||
_stream_delta_cb = None
|
||
_scfg = getattr(getattr(self, 'config', None), 'streaming', None)
|
||
if _scfg is None:
|
||
from gateway.config import StreamingConfig
|
||
_scfg = StreamingConfig()
|
||
|
||
if _scfg.enabled and _scfg.transport != "off":
|
||
try:
|
||
from gateway.stream_consumer import GatewayStreamConsumer, StreamConsumerConfig
|
||
_adapter = self.adapters.get(source.platform)
|
||
if _adapter:
|
||
_consumer_cfg = StreamConsumerConfig(
|
||
edit_interval=_scfg.edit_interval,
|
||
buffer_threshold=_scfg.buffer_threshold,
|
||
cursor=_scfg.cursor,
|
||
)
|
||
_stream_consumer = GatewayStreamConsumer(
|
||
adapter=_adapter,
|
||
chat_id=source.chat_id,
|
||
config=_consumer_cfg,
|
||
metadata={"thread_id": _progress_thread_id} if _progress_thread_id else None,
|
||
)
|
||
_stream_delta_cb = _stream_consumer.on_delta
|
||
stream_consumer_holder[0] = _stream_consumer
|
||
except Exception as _sc_err:
|
||
logger.debug("Could not set up stream consumer: %s", _sc_err)
|
||
|
||
turn_route = self._resolve_turn_agent_config(message, model, runtime_kwargs)
|
||
|
||
# Check agent cache — reuse the AIAgent from the previous message
|
||
# in this session to preserve the frozen system prompt and tool
|
||
# schemas for prompt cache hits.
|
||
_sig = self._agent_config_signature(
|
||
turn_route["model"],
|
||
turn_route["runtime"],
|
||
enabled_toolsets,
|
||
combined_ephemeral,
|
||
)
|
||
agent = None
|
||
_cache_lock = getattr(self, "_agent_cache_lock", None)
|
||
_cache = getattr(self, "_agent_cache", None)
|
||
if _cache_lock and _cache is not None:
|
||
with _cache_lock:
|
||
cached = _cache.get(session_key)
|
||
if cached and cached[1] == _sig:
|
||
agent = cached[0]
|
||
logger.debug("Reusing cached agent for session %s", session_key)
|
||
|
||
if agent is None:
|
||
# Config changed or first message — create fresh agent
|
||
agent = AIAgent(
|
||
model=turn_route["model"],
|
||
**turn_route["runtime"],
|
||
max_iterations=max_iterations,
|
||
quiet_mode=True,
|
||
verbose_logging=False,
|
||
enabled_toolsets=enabled_toolsets,
|
||
ephemeral_system_prompt=combined_ephemeral or None,
|
||
prefill_messages=self._prefill_messages or None,
|
||
reasoning_config=reasoning_config,
|
||
providers_allowed=pr.get("only"),
|
||
providers_ignored=pr.get("ignore"),
|
||
providers_order=pr.get("order"),
|
||
provider_sort=pr.get("sort"),
|
||
provider_require_parameters=pr.get("require_parameters", False),
|
||
provider_data_collection=pr.get("data_collection"),
|
||
session_id=session_id,
|
||
platform=platform_key,
|
||
honcho_session_key=session_key,
|
||
honcho_manager=honcho_manager,
|
||
honcho_config=honcho_config,
|
||
session_db=self._session_db,
|
||
fallback_model=self._fallback_model,
|
||
)
|
||
if _cache_lock and _cache is not None:
|
||
with _cache_lock:
|
||
_cache[session_key] = (agent, _sig)
|
||
logger.debug("Created new agent for session %s (sig=%s)", session_key, _sig)
|
||
|
||
# Per-message state — callbacks and reasoning config change every
|
||
# turn and must not be baked into the cached agent constructor.
|
||
agent.tool_progress_callback = progress_callback if tool_progress_enabled else None
|
||
agent.step_callback = _step_callback_sync if _hooks_ref.loaded_hooks else None
|
||
agent.stream_delta_callback = _stream_delta_cb
|
||
agent.status_callback = _status_callback_sync
|
||
agent.reasoning_config = reasoning_config
|
||
|
||
# Background review delivery — send "💾 Memory updated" etc. to user
|
||
def _bg_review_send(message: str) -> None:
|
||
if not _status_adapter:
|
||
return
|
||
try:
|
||
asyncio.run_coroutine_threadsafe(
|
||
_status_adapter.send(
|
||
_status_chat_id,
|
||
message,
|
||
metadata=_status_thread_metadata,
|
||
),
|
||
_loop_for_step,
|
||
)
|
||
except Exception as _e:
|
||
logger.debug("background_review_callback error: %s", _e)
|
||
|
||
agent.background_review_callback = _bg_review_send
|
||
|
||
# Store agent reference for interrupt support
|
||
agent_holder[0] = agent
|
||
# Capture the full tool definitions for transcript logging
|
||
tools_holder[0] = agent.tools if hasattr(agent, 'tools') else None
|
||
|
||
# Convert history to agent format.
|
||
# Two cases:
|
||
# 1. Normal path (from transcript): simple {role, content, timestamp} dicts
|
||
# - Strip timestamps, keep role+content
|
||
# 2. Interrupt path (from agent result["messages"]): full agent messages
|
||
# that may include tool_calls, tool_call_id, reasoning, etc.
|
||
# - These must be passed through intact so the API sees valid
|
||
# assistant→tool sequences (dropping tool_calls causes 500 errors)
|
||
agent_history = []
|
||
for msg in history:
|
||
role = msg.get("role")
|
||
if not role:
|
||
continue
|
||
|
||
# Skip metadata entries (tool definitions, session info)
|
||
# -- these are for transcript logging, not for the LLM
|
||
if role in ("session_meta",):
|
||
continue
|
||
|
||
# Skip system messages -- the agent rebuilds its own system prompt
|
||
if role == "system":
|
||
continue
|
||
|
||
# Rich agent messages (tool_calls, tool results) must be passed
|
||
# through intact so the API sees valid assistant→tool sequences
|
||
has_tool_calls = "tool_calls" in msg
|
||
has_tool_call_id = "tool_call_id" in msg
|
||
is_tool_message = role == "tool"
|
||
|
||
if has_tool_calls or has_tool_call_id or is_tool_message:
|
||
clean_msg = {k: v for k, v in msg.items() if k != "timestamp"}
|
||
agent_history.append(clean_msg)
|
||
else:
|
||
# Simple text message - just need role and content
|
||
content = msg.get("content")
|
||
if content:
|
||
# Tag cross-platform mirror messages so the agent knows their origin
|
||
if msg.get("mirror"):
|
||
mirror_src = msg.get("mirror_source", "another session")
|
||
content = f"[Delivered from {mirror_src}] {content}"
|
||
entry = {"role": role, "content": content}
|
||
# Preserve reasoning fields on assistant messages so
|
||
# multi-turn reasoning context survives session reload.
|
||
# The agent's _build_api_kwargs converts these to the
|
||
# provider-specific format (reasoning_content, etc.).
|
||
if role == "assistant":
|
||
for _rkey in ("reasoning", "reasoning_details",
|
||
"codex_reasoning_items"):
|
||
_rval = msg.get(_rkey)
|
||
if _rval:
|
||
entry[_rkey] = _rval
|
||
agent_history.append(entry)
|
||
|
||
# Collect MEDIA paths already in history so we can exclude them
|
||
# from the current turn's extraction. This is compression-safe:
|
||
# even if the message list shrinks, we know which paths are old.
|
||
_history_media_paths: set = set()
|
||
for _hm in agent_history:
|
||
if _hm.get("role") in ("tool", "function"):
|
||
_hc = _hm.get("content", "")
|
||
if "MEDIA:" in _hc:
|
||
for _match in re.finditer(r'MEDIA:(\S+)', _hc):
|
||
_p = _match.group(1).strip().rstrip('",}')
|
||
if _p:
|
||
_history_media_paths.add(_p)
|
||
|
||
result = agent.run_conversation(message, conversation_history=agent_history, task_id=session_id)
|
||
result_holder[0] = result
|
||
|
||
# Signal the stream consumer that the agent is done
|
||
if _stream_consumer is not None:
|
||
_stream_consumer.finish()
|
||
|
||
# Return final response, or a message if something went wrong
|
||
final_response = result.get("final_response")
|
||
|
||
# Extract actual token counts from the agent instance used for this run
|
||
_last_prompt_toks = 0
|
||
_input_toks = 0
|
||
_output_toks = 0
|
||
_agent = agent_holder[0]
|
||
if _agent and hasattr(_agent, "context_compressor"):
|
||
_last_prompt_toks = getattr(_agent.context_compressor, "last_prompt_tokens", 0)
|
||
_input_toks = getattr(_agent, "session_prompt_tokens", 0)
|
||
_output_toks = getattr(_agent, "session_completion_tokens", 0)
|
||
_resolved_model = getattr(_agent, "model", None) if _agent else None
|
||
|
||
if not final_response:
|
||
error_msg = f"⚠️ {result['error']}" if result.get("error") else "(No response generated)"
|
||
return {
|
||
"final_response": error_msg,
|
||
"messages": result.get("messages", []),
|
||
"api_calls": result.get("api_calls", 0),
|
||
"tools": tools_holder[0] or [],
|
||
"history_offset": len(agent_history),
|
||
"last_prompt_tokens": _last_prompt_toks,
|
||
"input_tokens": _input_toks,
|
||
"output_tokens": _output_toks,
|
||
"model": _resolved_model,
|
||
}
|
||
|
||
# Scan tool results for MEDIA:<path> tags that need to be delivered
|
||
# as native audio/file attachments. The TTS tool embeds MEDIA: tags
|
||
# in its JSON response, but the model's final text reply usually
|
||
# doesn't include them. We collect unique tags from tool results and
|
||
# append any that aren't already present in the final response, so the
|
||
# adapter's extract_media() can find and deliver the files exactly once.
|
||
#
|
||
# Uses path-based deduplication against _history_media_paths (collected
|
||
# before run_conversation) instead of index slicing. This is safe even
|
||
# when context compression shrinks the message list. (Fixes #160)
|
||
if "MEDIA:" not in final_response:
|
||
media_tags = []
|
||
has_voice_directive = False
|
||
for msg in result.get("messages", []):
|
||
if msg.get("role") in ("tool", "function"):
|
||
content = msg.get("content", "")
|
||
if "MEDIA:" in content:
|
||
for match in re.finditer(r'MEDIA:(\S+)', content):
|
||
path = match.group(1).strip().rstrip('",}')
|
||
if path and path not in _history_media_paths:
|
||
media_tags.append(f"MEDIA:{path}")
|
||
if "[[audio_as_voice]]" in content:
|
||
has_voice_directive = True
|
||
|
||
if media_tags:
|
||
seen = set()
|
||
unique_tags = []
|
||
for tag in media_tags:
|
||
if tag not in seen:
|
||
seen.add(tag)
|
||
unique_tags.append(tag)
|
||
if has_voice_directive:
|
||
unique_tags.insert(0, "[[audio_as_voice]]")
|
||
final_response = final_response + "\n" + "\n".join(unique_tags)
|
||
|
||
# Sync session_id: the agent may have created a new session during
|
||
# mid-run context compression (_compress_context splits sessions).
|
||
# If so, update the session store entry so the NEXT message loads
|
||
# the compressed transcript, not the stale pre-compression one.
|
||
agent = agent_holder[0]
|
||
if agent and session_key and hasattr(agent, 'session_id') and agent.session_id != session_id:
|
||
logger.info(
|
||
"Session split detected: %s → %s (compression)",
|
||
session_id, agent.session_id,
|
||
)
|
||
entry = self.session_store._entries.get(session_key)
|
||
if entry:
|
||
entry.session_id = agent.session_id
|
||
self.session_store._save()
|
||
|
||
effective_session_id = getattr(agent, 'session_id', session_id) if agent else session_id
|
||
|
||
# Auto-generate session title after first exchange (non-blocking)
|
||
if final_response and self._session_db:
|
||
try:
|
||
from agent.title_generator import maybe_auto_title
|
||
all_msgs = result_holder[0].get("messages", []) if result_holder[0] else []
|
||
maybe_auto_title(
|
||
self._session_db,
|
||
effective_session_id,
|
||
message,
|
||
final_response,
|
||
all_msgs,
|
||
)
|
||
except Exception:
|
||
pass
|
||
|
||
return {
|
||
"final_response": final_response,
|
||
"last_reasoning": result.get("last_reasoning"),
|
||
"messages": result_holder[0].get("messages", []) if result_holder[0] else [],
|
||
"api_calls": result_holder[0].get("api_calls", 0) if result_holder[0] else 0,
|
||
"tools": tools_holder[0] or [],
|
||
"history_offset": len(agent_history),
|
||
"last_prompt_tokens": _last_prompt_toks,
|
||
"input_tokens": _input_toks,
|
||
"output_tokens": _output_toks,
|
||
"model": _resolved_model,
|
||
"session_id": effective_session_id,
|
||
}
|
||
|
||
# Start progress message sender if enabled
|
||
progress_task = None
|
||
if tool_progress_enabled:
|
||
progress_task = asyncio.create_task(send_progress_messages())
|
||
|
||
# Start stream consumer task — polls for consumer creation since it
|
||
# happens inside run_sync (thread pool) after the agent is constructed.
|
||
stream_task = None
|
||
|
||
async def _start_stream_consumer():
|
||
"""Wait for the stream consumer to be created, then run it."""
|
||
for _ in range(200): # Up to 10s wait
|
||
if stream_consumer_holder[0] is not None:
|
||
await stream_consumer_holder[0].run()
|
||
return
|
||
await asyncio.sleep(0.05)
|
||
|
||
stream_task = asyncio.create_task(_start_stream_consumer())
|
||
|
||
# Track this agent as running for this session (for interrupt support)
|
||
# We do this in a callback after the agent is created
|
||
async def track_agent():
|
||
# Wait for agent to be created
|
||
while agent_holder[0] is None:
|
||
await asyncio.sleep(0.05)
|
||
if session_key:
|
||
self._running_agents[session_key] = agent_holder[0]
|
||
|
||
tracking_task = asyncio.create_task(track_agent())
|
||
|
||
# Monitor for interrupts from the adapter (new messages arriving)
|
||
async def monitor_for_interrupt():
|
||
adapter = self.adapters.get(source.platform)
|
||
if not adapter or not session_key:
|
||
return
|
||
|
||
while True:
|
||
await asyncio.sleep(0.2) # Check every 200ms
|
||
# Check if adapter has a pending interrupt for this session.
|
||
# Must use session_key (build_session_key output) — NOT
|
||
# source.chat_id — because the adapter stores interrupt events
|
||
# under the full session key.
|
||
if hasattr(adapter, 'has_pending_interrupt') and adapter.has_pending_interrupt(session_key):
|
||
agent = agent_holder[0]
|
||
if agent:
|
||
pending_event = adapter.get_pending_message(session_key)
|
||
pending_text = pending_event.text if pending_event else None
|
||
logger.debug("Interrupt detected from adapter, signaling agent...")
|
||
agent.interrupt(pending_text)
|
||
break
|
||
|
||
interrupt_monitor = asyncio.create_task(monitor_for_interrupt())
|
||
|
||
try:
|
||
# Run in thread pool to not block
|
||
loop = asyncio.get_event_loop()
|
||
response = await loop.run_in_executor(None, run_sync)
|
||
|
||
# Track fallback model state: if the agent switched to a
|
||
# fallback model during this run, persist it so /model shows
|
||
# the actually-active model instead of the config default.
|
||
_agent = agent_holder[0]
|
||
if _agent is not None and hasattr(_agent, 'model'):
|
||
_cfg_model = _resolve_gateway_model()
|
||
if _agent.model != _cfg_model:
|
||
self._effective_model = _agent.model
|
||
self._effective_provider = getattr(_agent, 'provider', None)
|
||
# Fallback activated — evict cached agent so the next
|
||
# message starts fresh and retries the primary model.
|
||
self._evict_cached_agent(session_key)
|
||
else:
|
||
# Primary model worked — clear any stale fallback state
|
||
self._effective_model = None
|
||
self._effective_provider = None
|
||
|
||
# Check if we were interrupted OR have a queued message (/queue).
|
||
result = result_holder[0]
|
||
adapter = self.adapters.get(source.platform)
|
||
|
||
# Get pending message from adapter.
|
||
# Use session_key (not source.chat_id) to match adapter's storage keys.
|
||
pending = None
|
||
if result and adapter and session_key:
|
||
if result.get("interrupted"):
|
||
# Interrupted — consume the interrupt message
|
||
pending_event = adapter.get_pending_message(session_key)
|
||
if pending_event:
|
||
pending = pending_event.text
|
||
elif result.get("interrupt_message"):
|
||
pending = result.get("interrupt_message")
|
||
else:
|
||
# Normal completion — check for /queue'd messages that were
|
||
# stored without triggering an interrupt.
|
||
pending_event = adapter.get_pending_message(session_key)
|
||
if pending_event:
|
||
pending = pending_event.text
|
||
logger.debug("Processing queued message after agent completion: '%s...'", pending[:40])
|
||
|
||
if pending:
|
||
logger.debug("Processing pending message: '%s...'", pending[:40])
|
||
|
||
# Clear the adapter's interrupt event so the next _run_agent call
|
||
# doesn't immediately re-trigger the interrupt before the new agent
|
||
# even makes its first API call (this was causing an infinite loop).
|
||
if adapter and hasattr(adapter, '_active_sessions') and session_key and session_key in adapter._active_sessions:
|
||
adapter._active_sessions[session_key].clear()
|
||
|
||
# Cap recursion depth to prevent resource exhaustion when the
|
||
# user sends multiple messages while the agent keeps failing. (#816)
|
||
if _interrupt_depth >= self._MAX_INTERRUPT_DEPTH:
|
||
logger.warning(
|
||
"Interrupt recursion depth %d reached for session %s — "
|
||
"queueing message instead of recursing.",
|
||
_interrupt_depth, session_key,
|
||
)
|
||
# Queue the pending message for normal processing on next turn
|
||
adapter = self.adapters.get(source.platform)
|
||
if adapter and hasattr(adapter, 'queue_message'):
|
||
adapter.queue_message(session_key, pending)
|
||
return result_holder[0] or {"final_response": response, "messages": history}
|
||
|
||
was_interrupted = result.get("interrupted")
|
||
if not was_interrupted:
|
||
# Queued message after normal completion — deliver the first
|
||
# response before processing the queued follow-up.
|
||
# Skip if streaming already delivered it.
|
||
_sc = stream_consumer_holder[0]
|
||
_already_streamed = _sc and getattr(_sc, "already_sent", False)
|
||
first_response = result.get("final_response", "")
|
||
if first_response and not _already_streamed:
|
||
try:
|
||
await adapter.send(source.chat_id, first_response,
|
||
metadata=getattr(event, "metadata", None))
|
||
except Exception as e:
|
||
logger.warning("Failed to send first response before queued message: %s", e)
|
||
# else: interrupted — discard the interrupted response ("Operation
|
||
# interrupted." is just noise; the user already knows they sent a
|
||
# new message).
|
||
|
||
# Process the pending message with updated history
|
||
updated_history = result.get("messages", history)
|
||
return await self._run_agent(
|
||
message=pending,
|
||
context_prompt=context_prompt,
|
||
history=updated_history,
|
||
source=source,
|
||
session_id=session_id,
|
||
session_key=session_key,
|
||
_interrupt_depth=_interrupt_depth + 1,
|
||
)
|
||
finally:
|
||
# Stop progress sender and interrupt monitor
|
||
if progress_task:
|
||
progress_task.cancel()
|
||
interrupt_monitor.cancel()
|
||
|
||
# Wait for stream consumer to finish its final edit
|
||
if stream_task:
|
||
try:
|
||
await asyncio.wait_for(stream_task, timeout=5.0)
|
||
except (asyncio.TimeoutError, asyncio.CancelledError):
|
||
stream_task.cancel()
|
||
try:
|
||
await stream_task
|
||
except asyncio.CancelledError:
|
||
pass
|
||
|
||
# Clean up tracking
|
||
tracking_task.cancel()
|
||
if session_key and session_key in self._running_agents:
|
||
del self._running_agents[session_key]
|
||
|
||
# Wait for cancelled tasks
|
||
for task in [progress_task, interrupt_monitor, tracking_task]:
|
||
if task:
|
||
try:
|
||
await task
|
||
except asyncio.CancelledError:
|
||
pass
|
||
|
||
# If streaming already delivered the response, mark it so the
|
||
# caller's send() is skipped (avoiding duplicate messages).
|
||
_sc = stream_consumer_holder[0]
|
||
if _sc and _sc.already_sent and isinstance(response, dict):
|
||
response["already_sent"] = True
|
||
|
||
return response
|
||
|
||
|
||
def _start_cron_ticker(stop_event: threading.Event, adapters=None, interval: int = 60):
|
||
"""
|
||
Background thread that ticks the cron scheduler at a regular interval.
|
||
|
||
Runs inside the gateway process so cronjobs fire automatically without
|
||
needing a separate `hermes cron daemon` or system cron entry.
|
||
|
||
Also refreshes the channel directory every 5 minutes and prunes the
|
||
image/audio/document cache once per hour.
|
||
"""
|
||
from cron.scheduler import tick as cron_tick
|
||
from gateway.platforms.base import cleanup_image_cache, cleanup_document_cache
|
||
|
||
IMAGE_CACHE_EVERY = 60 # ticks — once per hour at default 60s interval
|
||
CHANNEL_DIR_EVERY = 5 # ticks — every 5 minutes
|
||
|
||
logger.info("Cron ticker started (interval=%ds)", interval)
|
||
tick_count = 0
|
||
while not stop_event.is_set():
|
||
try:
|
||
cron_tick(verbose=False)
|
||
except Exception as e:
|
||
logger.debug("Cron tick error: %s", e)
|
||
|
||
tick_count += 1
|
||
|
||
if tick_count % CHANNEL_DIR_EVERY == 0 and adapters:
|
||
try:
|
||
from gateway.channel_directory import build_channel_directory
|
||
build_channel_directory(adapters)
|
||
except Exception as e:
|
||
logger.debug("Channel directory refresh error: %s", e)
|
||
|
||
if tick_count % IMAGE_CACHE_EVERY == 0:
|
||
try:
|
||
removed = cleanup_image_cache(max_age_hours=24)
|
||
if removed:
|
||
logger.info("Image cache cleanup: removed %d stale file(s)", removed)
|
||
except Exception as e:
|
||
logger.debug("Image cache cleanup error: %s", e)
|
||
try:
|
||
removed = cleanup_document_cache(max_age_hours=24)
|
||
if removed:
|
||
logger.info("Document cache cleanup: removed %d stale file(s)", removed)
|
||
except Exception as e:
|
||
logger.debug("Document cache cleanup error: %s", e)
|
||
|
||
stop_event.wait(timeout=interval)
|
||
logger.info("Cron ticker stopped")
|
||
|
||
|
||
async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool = False) -> bool:
|
||
"""
|
||
Start the gateway and run until interrupted.
|
||
|
||
This is the main entry point for running the gateway.
|
||
Returns True if the gateway ran successfully, False if it failed to start.
|
||
A False return causes a non-zero exit code so systemd can auto-restart.
|
||
|
||
Args:
|
||
config: Optional gateway configuration override.
|
||
replace: If True, kill any existing gateway instance before starting.
|
||
Useful for systemd services to avoid restart-loop deadlocks
|
||
when the previous process hasn't fully exited yet.
|
||
"""
|
||
# ── Duplicate-instance guard ──────────────────────────────────────
|
||
# Prevent two gateways from running under the same HERMES_HOME.
|
||
# The PID file is scoped to HERMES_HOME, so future multi-profile
|
||
# setups (each profile using a distinct HERMES_HOME) will naturally
|
||
# allow concurrent instances without tripping this guard.
|
||
import time as _time
|
||
from gateway.status import get_running_pid, remove_pid_file
|
||
existing_pid = get_running_pid()
|
||
if existing_pid is not None and existing_pid != os.getpid():
|
||
if replace:
|
||
logger.info(
|
||
"Replacing existing gateway instance (PID %d) with --replace.",
|
||
existing_pid,
|
||
)
|
||
try:
|
||
os.kill(existing_pid, signal.SIGTERM)
|
||
except ProcessLookupError:
|
||
pass # Already gone
|
||
except PermissionError:
|
||
logger.error(
|
||
"Permission denied killing PID %d. Cannot replace.",
|
||
existing_pid,
|
||
)
|
||
return False
|
||
# Wait up to 10 seconds for the old process to exit
|
||
for _ in range(20):
|
||
try:
|
||
os.kill(existing_pid, 0)
|
||
_time.sleep(0.5)
|
||
except (ProcessLookupError, PermissionError):
|
||
break # Process is gone
|
||
else:
|
||
# Still alive after 10s — force kill
|
||
logger.warning(
|
||
"Old gateway (PID %d) did not exit after SIGTERM, sending SIGKILL.",
|
||
existing_pid,
|
||
)
|
||
try:
|
||
os.kill(existing_pid, signal.SIGKILL)
|
||
_time.sleep(0.5)
|
||
except (ProcessLookupError, PermissionError):
|
||
pass
|
||
remove_pid_file()
|
||
# Also release all scoped locks left by the old process.
|
||
# Stopped (Ctrl+Z) processes don't release locks on exit,
|
||
# leaving stale lock files that block the new gateway from starting.
|
||
try:
|
||
from gateway.status import release_all_scoped_locks
|
||
_released = release_all_scoped_locks()
|
||
if _released:
|
||
logger.info("Released %d stale scoped lock(s) from old gateway.", _released)
|
||
except Exception:
|
||
pass
|
||
else:
|
||
hermes_home = str(get_hermes_home())
|
||
logger.error(
|
||
"Another gateway instance is already running (PID %d, HERMES_HOME=%s). "
|
||
"Use 'hermes gateway restart' to replace it, or 'hermes gateway stop' first.",
|
||
existing_pid, hermes_home,
|
||
)
|
||
print(
|
||
f"\n❌ Gateway already running (PID {existing_pid}).\n"
|
||
f" Use 'hermes gateway restart' to replace it,\n"
|
||
f" or 'hermes gateway stop' to kill it first.\n"
|
||
f" Or use 'hermes gateway run --replace' to auto-replace.\n"
|
||
)
|
||
return False
|
||
|
||
# Sync bundled skills on gateway start (fast -- skips unchanged)
|
||
try:
|
||
from tools.skills_sync import sync_skills
|
||
sync_skills(quiet=True)
|
||
except Exception:
|
||
pass
|
||
|
||
# Configure rotating file log so gateway output is persisted for debugging
|
||
log_dir = _hermes_home / 'logs'
|
||
log_dir.mkdir(parents=True, exist_ok=True)
|
||
file_handler = RotatingFileHandler(
|
||
log_dir / 'gateway.log',
|
||
maxBytes=5 * 1024 * 1024,
|
||
backupCount=3,
|
||
)
|
||
from agent.redact import RedactingFormatter
|
||
file_handler.setFormatter(RedactingFormatter('%(asctime)s %(levelname)s %(name)s: %(message)s'))
|
||
logging.getLogger().addHandler(file_handler)
|
||
logging.getLogger().setLevel(logging.INFO)
|
||
|
||
# Separate errors-only log for easy debugging
|
||
error_handler = RotatingFileHandler(
|
||
log_dir / 'errors.log',
|
||
maxBytes=2 * 1024 * 1024,
|
||
backupCount=2,
|
||
)
|
||
error_handler.setLevel(logging.WARNING)
|
||
error_handler.setFormatter(RedactingFormatter('%(asctime)s %(levelname)s %(name)s: %(message)s'))
|
||
logging.getLogger().addHandler(error_handler)
|
||
|
||
runner = GatewayRunner(config)
|
||
|
||
# Set up signal handlers
|
||
def signal_handler():
|
||
asyncio.create_task(runner.stop())
|
||
|
||
loop = asyncio.get_event_loop()
|
||
for sig in (signal.SIGINT, signal.SIGTERM):
|
||
try:
|
||
loop.add_signal_handler(sig, signal_handler)
|
||
except NotImplementedError:
|
||
pass
|
||
|
||
# Start the gateway
|
||
success = await runner.start()
|
||
if not success:
|
||
return False
|
||
if runner.should_exit_cleanly:
|
||
if runner.exit_reason:
|
||
logger.error("Gateway exiting cleanly: %s", runner.exit_reason)
|
||
return True
|
||
|
||
# Write PID file so CLI can detect gateway is running
|
||
import atexit
|
||
from gateway.status import write_pid_file, remove_pid_file
|
||
write_pid_file()
|
||
atexit.register(remove_pid_file)
|
||
|
||
# Start background cron ticker so scheduled jobs fire automatically
|
||
cron_stop = threading.Event()
|
||
cron_thread = threading.Thread(
|
||
target=_start_cron_ticker,
|
||
args=(cron_stop,),
|
||
kwargs={"adapters": runner.adapters},
|
||
daemon=True,
|
||
name="cron-ticker",
|
||
)
|
||
cron_thread.start()
|
||
|
||
# Wait for shutdown
|
||
await runner.wait_for_shutdown()
|
||
|
||
if runner.should_exit_with_failure:
|
||
if runner.exit_reason:
|
||
logger.error("Gateway exiting with failure: %s", runner.exit_reason)
|
||
return False
|
||
|
||
# Stop cron ticker cleanly
|
||
cron_stop.set()
|
||
cron_thread.join(timeout=5)
|
||
|
||
# Close MCP server connections
|
||
try:
|
||
from tools.mcp_tool import shutdown_mcp_servers
|
||
shutdown_mcp_servers()
|
||
except Exception:
|
||
pass
|
||
|
||
return True
|
||
|
||
|
||
def main():
|
||
"""CLI entry point for the gateway."""
|
||
import argparse
|
||
|
||
parser = argparse.ArgumentParser(description="Hermes Gateway - Multi-platform messaging")
|
||
parser.add_argument("--config", "-c", help="Path to gateway config file")
|
||
parser.add_argument("--verbose", "-v", action="store_true", help="Verbose output")
|
||
|
||
args = parser.parse_args()
|
||
|
||
config = None
|
||
if args.config:
|
||
import json
|
||
with open(args.config, encoding="utf-8") as f:
|
||
data = json.load(f)
|
||
config = GatewayConfig.from_dict(data)
|
||
|
||
# Run the gateway - exit with code 1 if no platforms connected,
|
||
# so systemd Restart=on-failure will retry on transient errors (e.g. DNS)
|
||
success = asyncio.run(start_gateway(config))
|
||
if not success:
|
||
sys.exit(1)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|