From 95dc9aaa75630b4875f1e0dc71698558949f2059 Mon Sep 17 00:00:00 2001 From: Robin Fernandes Date: Thu, 26 Mar 2026 15:27:27 -0700 Subject: [PATCH 001/385] feat: add managed tool gateway and Nous subscription support - add managed modal and gateway-backed tool integrations\n- improve CLI setup, auth, and configuration for subscriber flows\n- expand tests and docs for managed tool support --- .env.example | 11 + agent/prompt_builder.py | 63 +++ environments/patches.py | 15 +- hermes_cli/auth.py | 83 +++ hermes_cli/config.py | 41 +- hermes_cli/main.py | 87 +++- hermes_cli/nous_subscription.py | 437 ++++++++++++++++ hermes_cli/setup.py | 256 ++++++--- hermes_cli/status.py | 25 + hermes_cli/tools_config.py | 176 ++++++- pyproject.toml | 2 +- requirements.txt | 1 + run_agent.py | 5 + tests/agent/test_prompt_builder.py | 59 ++- tests/hermes_cli/test_setup.py | 172 ++++++ tests/hermes_cli/test_setup_noninteractive.py | 47 +- .../hermes_cli/test_status_model_provider.py | 41 ++ tests/hermes_cli/test_tools_config.py | 79 +++ tests/test_cli_provider_resolution.py | 135 ++++- tests/test_run_agent.py | 5 + .../test_managed_browserbase_and_modal.py | 418 +++++++++++++++ tests/tools/test_managed_media_gateways.py | 288 ++++++++++ tests/tools/test_managed_modal_environment.py | 213 ++++++++ tests/tools/test_managed_tool_gateway.py | 70 +++ tests/tools/test_modal_snapshot_isolation.py | 188 +++++++ tests/tools/test_terminal_requirements.py | 45 +- .../tools/test_terminal_tool_requirements.py | 27 + tests/tools/test_transcription_tools.py | 4 + tests/tools/test_web_tools_config.py | 249 ++++++++- tools/browser_providers/browserbase.py | 113 +++- tools/browser_tool.py | 40 +- tools/code_execution_tool.py | 3 +- tools/environments/managed_modal.py | 282 ++++++++++ tools/environments/modal.py | 149 ++++-- tools/image_generation_tool.py | 159 +++++- tools/managed_tool_gateway.py | 160 ++++++ tools/terminal_tool.py | 107 +++- tools/tool_backend_helpers.py | 41 ++ tools/transcription_tools.py | 123 +++-- tools/tts_tool.py | 62 ++- tools/web_tools.py | 490 ++++++++++++------ .../docs/reference/environment-variables.md | 5 + website/docs/user-guide/configuration.md | 7 +- website/docs/user-guide/features/tools.md | 7 + 44 files changed, 4567 insertions(+), 423 deletions(-) create mode 100644 hermes_cli/nous_subscription.py create mode 100644 tests/tools/test_managed_browserbase_and_modal.py create mode 100644 tests/tools/test_managed_media_gateways.py create mode 100644 tests/tools/test_managed_modal_environment.py create mode 100644 tests/tools/test_managed_tool_gateway.py create mode 100644 tests/tools/test_modal_snapshot_isolation.py create mode 100644 tools/environments/managed_modal.py create mode 100644 tools/managed_tool_gateway.py create mode 100644 tools/tool_backend_helpers.py diff --git a/.env.example b/.env.example index d273a6966..5567ca7ef 100644 --- a/.env.example +++ b/.env.example @@ -69,6 +69,17 @@ OPENCODE_GO_API_KEY= # Get at: https://parallel.ai PARALLEL_API_KEY= +# Tool-gateway config (Nous Subscribers only; preferred when available) +# Uses your Nous Subscriber OAuth access token from the Hermes auth store by default. +# Defaults to the Nous production gateway. Override for local dev. +# +# Derive vendor gateway URLs from a shared domain suffix: +# TOOL_GATEWAY_DOMAIN=nousresearch.com +# TOOL_GATEWAY_SCHEME=https +# +# Override the subscriber token (defaults to ~/.hermes/auth.json): +# TOOL_GATEWAY_USER_TOKEN= + # Firecrawl API Key - Web search, extract, and crawl # Get at: https://firecrawl.dev/ FIRECRAWL_API_KEY= diff --git a/agent/prompt_builder.py b/agent/prompt_builder.py index 6ed6e90a7..7a8d6d707 100644 --- a/agent/prompt_builder.py +++ b/agent/prompt_builder.py @@ -422,6 +422,69 @@ def build_skills_system_prompt( ) +def build_nous_subscription_prompt(valid_tool_names: "set[str] | None" = None) -> str: + """Build a compact Nous subscription capability block for the system prompt.""" + try: + from hermes_cli.nous_subscription import get_nous_subscription_features + except Exception as exc: + logger.debug("Failed to import Nous subscription helper: %s", exc) + return "" + + valid_names = set(valid_tool_names or set()) + relevant_tool_names = { + "web_search", + "web_extract", + "browser_navigate", + "browser_snapshot", + "browser_click", + "browser_type", + "browser_scroll", + "browser_console", + "browser_close", + "browser_press", + "browser_get_images", + "browser_vision", + "image_generate", + "text_to_speech", + "terminal", + "process", + "execute_code", + } + + if valid_names and not (valid_names & relevant_tool_names): + return "" + + features = get_nous_subscription_features() + + def _status_line(feature) -> str: + if feature.managed_by_nous: + return f"- {feature.label}: active via Nous subscription" + if feature.active: + current = feature.current_provider or "configured provider" + return f"- {feature.label}: currently using {current}" + if feature.included_by_default and features.nous_auth_present: + return f"- {feature.label}: included with Nous subscription, not currently selected" + if feature.key == "modal" and features.nous_auth_present: + return f"- {feature.label}: optional via Nous subscription" + return f"- {feature.label}: not currently available" + + lines = [ + "# Nous Subscription", + "Nous subscription includes managed web tools (Firecrawl), image generation (FAL), OpenAI TTS, and browser automation (Browserbase) by default. Modal execution is optional.", + "Current capability status:", + ] + lines.extend(_status_line(feature) for feature in features.items()) + lines.extend( + [ + "When a Nous-managed feature is active, do not ask the user for Firecrawl, FAL, OpenAI TTS, or Browserbase API keys.", + "If the user is not subscribed and asks for a capability that Nous subscription would unlock or simplify, suggest Nous subscription as one option alongside direct setup or local alternatives.", + "Do not mention subscription unless the user asks about it or it directly solves the current missing capability.", + "Useful commands: hermes setup, hermes setup tools, hermes setup terminal, hermes status.", + ] + ) + return "\n".join(lines) + + # ========================================================================= # Context files (SOUL.md, AGENTS.md, .cursorrules) # ========================================================================= diff --git a/environments/patches.py b/environments/patches.py index aed78da6e..a5afe751e 100644 --- a/environments/patches.py +++ b/environments/patches.py @@ -11,11 +11,11 @@ Solution: _AsyncWorker thread internally, making it safe for both CLI and Atropos use. No monkey-patching is required. - This module is kept for backward compatibility — apply_patches() is now a no-op. + This module is kept for backward compatibility. apply_patches() is a no-op. Usage: Call apply_patches() once at import time (done automatically by hermes_base_env.py). - This is idempotent — calling it multiple times is safe. + This is idempotent and safe to call multiple times. """ import logging @@ -26,17 +26,10 @@ _patches_applied = False def apply_patches(): - """Apply all monkey patches needed for Atropos compatibility. - - Now a no-op — Modal async safety is built directly into ModalEnvironment. - Safe to call multiple times. - """ + """Apply all monkey patches needed for Atropos compatibility.""" global _patches_applied if _patches_applied: return - # Modal async-safety is now built into tools/environments/modal.py - # via the _AsyncWorker class. No monkey-patching needed. - logger.debug("apply_patches() called — no patches needed (async safety is built-in)") - + logger.debug("apply_patches() called; no patches needed (async safety is built-in)") _patches_applied = True diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py index 493e5a1d8..9eb867352 100644 --- a/hermes_cli/auth.py +++ b/hermes_cli/auth.py @@ -1295,6 +1295,89 @@ def _agent_key_is_usable(state: Dict[str, Any], min_ttl_seconds: int) -> bool: return not _is_expiring(state.get("agent_key_expires_at"), min_ttl_seconds) +def resolve_nous_access_token( + *, + timeout_seconds: float = 15.0, + insecure: Optional[bool] = None, + ca_bundle: Optional[str] = None, + refresh_skew_seconds: int = ACCESS_TOKEN_REFRESH_SKEW_SECONDS, +) -> str: + """Resolve a refresh-aware Nous Portal access token for managed tool gateways.""" + with _auth_store_lock(): + auth_store = _load_auth_store() + state = _load_provider_state(auth_store, "nous") + + if not state: + raise AuthError( + "Hermes is not logged into Nous Portal.", + provider="nous", + relogin_required=True, + ) + + portal_base_url = ( + _optional_base_url(state.get("portal_base_url")) + or os.getenv("HERMES_PORTAL_BASE_URL") + or os.getenv("NOUS_PORTAL_BASE_URL") + or DEFAULT_NOUS_PORTAL_URL + ).rstrip("/") + client_id = str(state.get("client_id") or DEFAULT_NOUS_CLIENT_ID) + verify = _resolve_verify(insecure=insecure, ca_bundle=ca_bundle, auth_state=state) + + access_token = state.get("access_token") + refresh_token = state.get("refresh_token") + if not isinstance(access_token, str) or not access_token: + raise AuthError( + "No access token found for Nous Portal login.", + provider="nous", + relogin_required=True, + ) + + if not _is_expiring(state.get("expires_at"), refresh_skew_seconds): + return access_token + + if not isinstance(refresh_token, str) or not refresh_token: + raise AuthError( + "Session expired and no refresh token is available.", + provider="nous", + relogin_required=True, + ) + + timeout = httpx.Timeout(timeout_seconds if timeout_seconds else 15.0) + with httpx.Client( + timeout=timeout, + headers={"Accept": "application/json"}, + verify=verify, + ) as client: + refreshed = _refresh_access_token( + client=client, + portal_base_url=portal_base_url, + client_id=client_id, + refresh_token=refresh_token, + ) + + now = datetime.now(timezone.utc) + access_ttl = _coerce_ttl_seconds(refreshed.get("expires_in")) + state["access_token"] = refreshed["access_token"] + state["refresh_token"] = refreshed.get("refresh_token") or refresh_token + state["token_type"] = refreshed.get("token_type") or state.get("token_type") or "Bearer" + state["scope"] = refreshed.get("scope") or state.get("scope") + state["obtained_at"] = now.isoformat() + state["expires_in"] = access_ttl + state["expires_at"] = datetime.fromtimestamp( + now.timestamp() + access_ttl, + tz=timezone.utc, + ).isoformat() + state["portal_base_url"] = portal_base_url + state["client_id"] = client_id + state["tls"] = { + "insecure": verify is False, + "ca_bundle": verify if isinstance(verify, str) else None, + } + _save_provider_state(auth_store, "nous", state) + _save_auth_store(auth_store) + return state["access_token"] + + def resolve_nous_runtime_credentials( *, min_key_ttl_seconds: int = DEFAULT_AGENT_KEY_MIN_TTL_SECONDS, diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 826e3a8bc..af13046b0 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -142,6 +142,7 @@ DEFAULT_CONFIG = { "terminal": { "backend": "local", + "modal_mode": "auto", "cwd": ".", # Use current directory "timeout": 180, # Environment variables to pass through to sandboxed execution @@ -407,7 +408,7 @@ DEFAULT_CONFIG = { }, # Config schema version - bump this when adding new required fields - "_config_version": 10, + "_config_version": 11, } # ============================================================================= @@ -422,6 +423,7 @@ ENV_VARS_BY_VERSION: Dict[int, List[str]] = { 5: ["WHATSAPP_ENABLED", "WHATSAPP_MODE", "WHATSAPP_ALLOWED_USERS", "SLACK_BOT_TOKEN", "SLACK_APP_TOKEN", "SLACK_ALLOWED_USERS"], 10: ["TAVILY_API_KEY"], + 11: ["TERMINAL_MODAL_MODE"], } # Required environment variables with metadata for migration prompts. @@ -617,6 +619,38 @@ OPTIONAL_ENV_VARS = { "category": "tool", "advanced": True, }, + "FIRECRAWL_GATEWAY_URL": { + "description": "Exact Firecrawl tool-gateway origin override for Nous Subscribers only (optional)", + "prompt": "Firecrawl gateway URL (leave empty to derive from domain)", + "url": None, + "password": False, + "category": "tool", + "advanced": True, + }, + "TOOL_GATEWAY_DOMAIN": { + "description": "Shared tool-gateway domain suffix for Nous Subscribers only, used to derive vendor hosts, e.g. nousresearch.com -> firecrawl-gateway.nousresearch.com", + "prompt": "Tool-gateway domain suffix", + "url": None, + "password": False, + "category": "tool", + "advanced": True, + }, + "TOOL_GATEWAY_SCHEME": { + "description": "Shared tool-gateway URL scheme for Nous Subscribers only, used to derive vendor hosts (`https` by default, set `http` for local gateway testing)", + "prompt": "Tool-gateway URL scheme", + "url": None, + "password": False, + "category": "tool", + "advanced": True, + }, + "TOOL_GATEWAY_USER_TOKEN": { + "description": "Explicit Nous Subscriber access token for tool-gateway requests (optional; otherwise read from the Hermes auth store)", + "prompt": "Tool-gateway user token", + "url": None, + "password": True, + "category": "tool", + "advanced": True, + }, "TAVILY_API_KEY": { "description": "Tavily API key for AI-native web search, extract, and crawl", "prompt": "Tavily API key", @@ -1808,7 +1842,9 @@ def set_config_value(key: str, value: str): # Check if it's an API key (goes to .env) api_keys = [ 'OPENROUTER_API_KEY', 'OPENAI_API_KEY', 'ANTHROPIC_API_KEY', 'VOICE_TOOLS_OPENAI_KEY', - 'PARALLEL_API_KEY', 'FIRECRAWL_API_KEY', 'FIRECRAWL_API_URL', 'TAVILY_API_KEY', + 'PARALLEL_API_KEY', 'FIRECRAWL_API_KEY', 'FIRECRAWL_API_URL', + 'FIRECRAWL_GATEWAY_URL', 'TOOL_GATEWAY_DOMAIN', 'TOOL_GATEWAY_SCHEME', + 'TOOL_GATEWAY_USER_TOKEN', 'TAVILY_API_KEY', 'BROWSERBASE_API_KEY', 'BROWSERBASE_PROJECT_ID', 'BROWSER_USE_API_KEY', 'FAL_KEY', 'TELEGRAM_BOT_TOKEN', 'DISCORD_BOT_TOKEN', 'TERMINAL_SSH_HOST', 'TERMINAL_SSH_USER', 'TERMINAL_SSH_KEY', @@ -1864,6 +1900,7 @@ def set_config_value(key: str, value: str): # config.yaml is authoritative, but terminal_tool only reads TERMINAL_ENV etc. _config_to_env_sync = { "terminal.backend": "TERMINAL_ENV", + "terminal.modal_mode": "TERMINAL_MODAL_MODE", "terminal.docker_image": "TERMINAL_DOCKER_IMAGE", "terminal.singularity_image": "TERMINAL_SINGULARITY_IMAGE", "terminal.modal_image": "TERMINAL_MODAL_IMAGE", diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 88fbf9cd9..a920c1c1b 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -872,7 +872,7 @@ def cmd_model(args): if selected_provider == "openrouter": _model_flow_openrouter(config, current_model) elif selected_provider == "nous": - _model_flow_nous(config, current_model) + _model_flow_nous(config, current_model, args=args) elif selected_provider == "openai-codex": _model_flow_openai_codex(config, current_model) elif selected_provider == "copilot-acp": @@ -981,7 +981,7 @@ def _model_flow_openrouter(config, current_model=""): print("No change.") -def _model_flow_nous(config, current_model=""): +def _model_flow_nous(config, current_model="", args=None): """Nous Portal provider: ensure logged in, then pick model.""" from hermes_cli.auth import ( get_provider_auth_state, _prompt_model_selection, _save_model_choice, @@ -989,7 +989,11 @@ def _model_flow_nous(config, current_model=""): fetch_nous_models, AuthError, format_auth_error, _login_nous, PROVIDER_REGISTRY, ) - from hermes_cli.config import get_env_value, save_env_value + from hermes_cli.config import get_env_value, save_config, save_env_value + from hermes_cli.nous_subscription import ( + apply_nous_provider_defaults, + get_nous_subscription_explainer_lines, + ) import argparse state = get_provider_auth_state("nous") @@ -998,11 +1002,19 @@ def _model_flow_nous(config, current_model=""): print() try: mock_args = argparse.Namespace( - portal_url=None, inference_url=None, client_id=None, - scope=None, no_browser=False, timeout=15.0, - ca_bundle=None, insecure=False, + portal_url=getattr(args, "portal_url", None), + inference_url=getattr(args, "inference_url", None), + client_id=getattr(args, "client_id", None), + scope=getattr(args, "scope", None), + no_browser=bool(getattr(args, "no_browser", False)), + timeout=getattr(args, "timeout", None) or 15.0, + ca_bundle=getattr(args, "ca_bundle", None), + insecure=bool(getattr(args, "insecure", False)), ) _login_nous(mock_args, PROVIDER_REGISTRY["nous"]) + print() + for line in get_nous_subscription_explainer_lines(): + print(line) except SystemExit: print("Login cancelled or failed.") return @@ -1049,11 +1061,36 @@ def _model_flow_nous(config, current_model=""): # Reactivate Nous as the provider and update config inference_url = creds.get("base_url", "") _update_config_for_provider("nous", inference_url) + current_model_cfg = config.get("model") + if isinstance(current_model_cfg, dict): + model_cfg = dict(current_model_cfg) + elif isinstance(current_model_cfg, str) and current_model_cfg.strip(): + model_cfg = {"default": current_model_cfg.strip()} + else: + model_cfg = {} + model_cfg["provider"] = "nous" + model_cfg["default"] = selected + if inference_url and inference_url.strip(): + model_cfg["base_url"] = inference_url.rstrip("/") + else: + model_cfg.pop("base_url", None) + config["model"] = model_cfg # Clear any custom endpoint that might conflict if get_env_value("OPENAI_BASE_URL"): save_env_value("OPENAI_BASE_URL", "") save_env_value("OPENAI_API_KEY", "") + changed_defaults = apply_nous_provider_defaults(config) + save_config(config) print(f"Default model set to: {selected} (via Nous Portal)") + if "tts" in changed_defaults: + print("TTS provider set to: OpenAI TTS via your Nous subscription") + else: + current_tts = str(config.get("tts", {}).get("provider") or "edge") + if current_tts.lower() not in {"", "edge"}: + print(f"Keeping your existing TTS provider: {current_tts}") + print() + for line in get_nous_subscription_explainer_lines(): + print(line) else: print("No change.") @@ -3174,6 +3211,44 @@ For more help on a command: help="Select default model and provider", description="Interactively select your inference provider and default model" ) + model_parser.add_argument( + "--portal-url", + help="Portal base URL for Nous login (default: production portal)" + ) + model_parser.add_argument( + "--inference-url", + help="Inference API base URL for Nous login (default: production inference API)" + ) + model_parser.add_argument( + "--client-id", + default=None, + help="OAuth client id to use for Nous login (default: hermes-cli)" + ) + model_parser.add_argument( + "--scope", + default=None, + help="OAuth scope to request for Nous login" + ) + model_parser.add_argument( + "--no-browser", + action="store_true", + help="Do not attempt to open the browser automatically during Nous login" + ) + model_parser.add_argument( + "--timeout", + type=float, + default=15.0, + help="HTTP request timeout in seconds for Nous login (default: 15)" + ) + model_parser.add_argument( + "--ca-bundle", + help="Path to CA bundle PEM file for Nous TLS verification" + ) + model_parser.add_argument( + "--insecure", + action="store_true", + help="Disable TLS verification for Nous login (testing only)" + ) model_parser.set_defaults(func=cmd_model) # ========================================================================= diff --git a/hermes_cli/nous_subscription.py b/hermes_cli/nous_subscription.py new file mode 100644 index 000000000..f5f8e8615 --- /dev/null +++ b/hermes_cli/nous_subscription.py @@ -0,0 +1,437 @@ +"""Helpers for Nous subscription managed-tool capabilities.""" + +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path +from typing import Dict, Iterable, Optional, Set + +from hermes_cli.auth import get_nous_auth_status +from hermes_cli.config import get_env_value, load_config +from tools.managed_tool_gateway import is_managed_tool_gateway_ready +from tools.tool_backend_helpers import ( + has_direct_modal_credentials, + normalize_browser_cloud_provider, + normalize_modal_mode, + resolve_openai_audio_api_key, +) + + +_DEFAULT_PLATFORM_TOOLSETS = { + "cli": "hermes-cli", +} + + +@dataclass(frozen=True) +class NousFeatureState: + key: str + label: str + included_by_default: bool + available: bool + active: bool + managed_by_nous: bool + direct_override: bool + toolset_enabled: bool + current_provider: str = "" + explicit_configured: bool = False + + +@dataclass(frozen=True) +class NousSubscriptionFeatures: + subscribed: bool + nous_auth_present: bool + provider_is_nous: bool + features: Dict[str, NousFeatureState] + + @property + def web(self) -> NousFeatureState: + return self.features["web"] + + @property + def image_gen(self) -> NousFeatureState: + return self.features["image_gen"] + + @property + def tts(self) -> NousFeatureState: + return self.features["tts"] + + @property + def browser(self) -> NousFeatureState: + return self.features["browser"] + + @property + def modal(self) -> NousFeatureState: + return self.features["modal"] + + def items(self) -> Iterable[NousFeatureState]: + ordered = ("web", "image_gen", "tts", "browser", "modal") + for key in ordered: + yield self.features[key] + + +def _model_config_dict(config: Dict[str, object]) -> Dict[str, object]: + model_cfg = config.get("model") + if isinstance(model_cfg, dict): + return dict(model_cfg) + if isinstance(model_cfg, str) and model_cfg.strip(): + return {"default": model_cfg.strip()} + return {} + + +def _toolset_enabled(config: Dict[str, object], toolset_key: str) -> bool: + from toolsets import resolve_toolset + + platform_toolsets = config.get("platform_toolsets") + if not isinstance(platform_toolsets, dict) or not platform_toolsets: + platform_toolsets = {"cli": [_DEFAULT_PLATFORM_TOOLSETS["cli"]]} + + target_tools = set(resolve_toolset(toolset_key)) + if not target_tools: + return False + + for platform, raw_toolsets in platform_toolsets.items(): + if isinstance(raw_toolsets, list): + toolset_names = list(raw_toolsets) + else: + default_toolset = _DEFAULT_PLATFORM_TOOLSETS.get(platform) + toolset_names = [default_toolset] if default_toolset else [] + if not toolset_names: + default_toolset = _DEFAULT_PLATFORM_TOOLSETS.get(platform) + if default_toolset: + toolset_names = [default_toolset] + + available_tools: Set[str] = set() + for toolset_name in toolset_names: + if not isinstance(toolset_name, str) or not toolset_name: + continue + try: + available_tools.update(resolve_toolset(toolset_name)) + except Exception: + continue + + if target_tools and target_tools.issubset(available_tools): + return True + + return False + + +def _has_agent_browser() -> bool: + import shutil + + agent_browser_bin = shutil.which("agent-browser") + local_bin = ( + Path(__file__).parent.parent / "node_modules" / ".bin" / "agent-browser" + ) + return bool(agent_browser_bin or local_bin.exists()) + + +def _browser_label(current_provider: str) -> str: + mapping = { + "browserbase": "Browserbase", + "browser-use": "Browser Use", + "local": "Local browser", + } + return mapping.get(current_provider or "local", current_provider or "Local browser") + + +def _tts_label(current_provider: str) -> str: + mapping = { + "openai": "OpenAI TTS", + "elevenlabs": "ElevenLabs", + "edge": "Edge TTS", + "neutts": "NeuTTS", + } + return mapping.get(current_provider or "edge", current_provider or "Edge TTS") +def get_nous_subscription_features( + config: Optional[Dict[str, object]] = None, +) -> NousSubscriptionFeatures: + if config is None: + config = load_config() or {} + config = dict(config) + model_cfg = _model_config_dict(config) + provider_is_nous = str(model_cfg.get("provider") or "").strip().lower() == "nous" + + try: + nous_status = get_nous_auth_status() + except Exception: + nous_status = {} + + nous_auth_present = bool(nous_status.get("logged_in")) + subscribed = provider_is_nous or nous_auth_present + + web_tool_enabled = _toolset_enabled(config, "web") + image_tool_enabled = _toolset_enabled(config, "image_gen") + tts_tool_enabled = _toolset_enabled(config, "tts") + browser_tool_enabled = _toolset_enabled(config, "browser") + modal_tool_enabled = _toolset_enabled(config, "terminal") + + web_backend = str(config.get("web", {}).get("backend") or "").strip().lower() if isinstance(config.get("web"), dict) else "" + tts_provider = str(config.get("tts", {}).get("provider") or "edge").strip().lower() if isinstance(config.get("tts"), dict) else "edge" + browser_provider = normalize_browser_cloud_provider( + config.get("browser", {}).get("cloud_provider") + if isinstance(config.get("browser"), dict) + else None + ) + terminal_backend = ( + str(config.get("terminal", {}).get("backend") or "local").strip().lower() + if isinstance(config.get("terminal"), dict) + else "local" + ) + modal_mode = normalize_modal_mode( + config.get("terminal", {}).get("modal_mode") + if isinstance(config.get("terminal"), dict) + else None + ) + + direct_firecrawl = bool(get_env_value("FIRECRAWL_API_KEY") or get_env_value("FIRECRAWL_API_URL")) + direct_parallel = bool(get_env_value("PARALLEL_API_KEY")) + direct_tavily = bool(get_env_value("TAVILY_API_KEY")) + direct_fal = bool(get_env_value("FAL_KEY")) + direct_openai_tts = bool(resolve_openai_audio_api_key()) + direct_elevenlabs = bool(get_env_value("ELEVENLABS_API_KEY")) + direct_browserbase = bool(get_env_value("BROWSERBASE_API_KEY") and get_env_value("BROWSERBASE_PROJECT_ID")) + direct_browser_use = bool(get_env_value("BROWSER_USE_API_KEY")) + direct_modal = has_direct_modal_credentials() + + managed_web_available = nous_auth_present and is_managed_tool_gateway_ready("firecrawl") + managed_image_available = nous_auth_present and is_managed_tool_gateway_ready("fal-queue") + managed_tts_available = nous_auth_present and is_managed_tool_gateway_ready("openai-audio") + managed_browser_available = nous_auth_present and is_managed_tool_gateway_ready("browserbase") + managed_modal_available = nous_auth_present and is_managed_tool_gateway_ready("modal") + + web_managed = web_backend == "firecrawl" and managed_web_available and not direct_firecrawl + web_active = bool( + web_tool_enabled + and ( + web_managed + or (web_backend == "firecrawl" and direct_firecrawl) + or (web_backend == "parallel" and direct_parallel) + or (web_backend == "tavily" and direct_tavily) + ) + ) + web_available = bool( + managed_web_available or direct_firecrawl or direct_parallel or direct_tavily + ) + + image_managed = image_tool_enabled and managed_image_available and not direct_fal + image_active = bool(image_tool_enabled and (image_managed or direct_fal)) + image_available = bool(managed_image_available or direct_fal) + + tts_current_provider = tts_provider or "edge" + tts_managed = ( + tts_tool_enabled + and tts_current_provider == "openai" + and managed_tts_available + and not direct_openai_tts + ) + tts_available = bool( + tts_current_provider in {"edge", "neutts"} + or (tts_current_provider == "openai" and (managed_tts_available or direct_openai_tts)) + or (tts_current_provider == "elevenlabs" and direct_elevenlabs) + ) + tts_active = bool(tts_tool_enabled and tts_available) + + browser_current_provider = browser_provider or "local" + browser_local_available = _has_agent_browser() + browser_managed = ( + browser_tool_enabled + and browser_current_provider == "browserbase" + and managed_browser_available + and not direct_browserbase + ) + browser_available = bool( + browser_local_available + or (browser_current_provider == "browserbase" and (managed_browser_available or direct_browserbase)) + or (browser_current_provider == "browser-use" and direct_browser_use) + ) + browser_active = bool( + browser_tool_enabled + and ( + (browser_current_provider == "local" and browser_local_available) + or (browser_current_provider == "browserbase" and (managed_browser_available or direct_browserbase)) + or (browser_current_provider == "browser-use" and direct_browser_use) + ) + ) + + if terminal_backend != "modal": + modal_managed = False + modal_available = True + modal_active = bool(modal_tool_enabled) + modal_direct_override = False + elif modal_mode == "managed": + modal_managed = bool(modal_tool_enabled and managed_modal_available) + modal_available = bool(managed_modal_available) + modal_active = bool(modal_tool_enabled and managed_modal_available) + modal_direct_override = False + elif modal_mode == "direct": + modal_managed = False + modal_available = bool(direct_modal) + modal_active = bool(modal_tool_enabled and direct_modal) + modal_direct_override = bool(direct_modal) + else: + modal_managed = bool( + modal_tool_enabled + and managed_modal_available + and not direct_modal + ) + modal_available = bool(managed_modal_available or direct_modal) + modal_active = bool(modal_tool_enabled and (direct_modal or managed_modal_available)) + modal_direct_override = bool(direct_modal) + + tts_explicit_configured = False + raw_tts_cfg = config.get("tts") + if isinstance(raw_tts_cfg, dict) and "provider" in raw_tts_cfg: + tts_explicit_configured = tts_provider not in {"", "edge"} + + features = { + "web": NousFeatureState( + key="web", + label="Web tools", + included_by_default=True, + available=web_available, + active=web_active, + managed_by_nous=web_managed, + direct_override=web_active and not web_managed, + toolset_enabled=web_tool_enabled, + current_provider=web_backend or "", + explicit_configured=bool(web_backend), + ), + "image_gen": NousFeatureState( + key="image_gen", + label="Image generation", + included_by_default=True, + available=image_available, + active=image_active, + managed_by_nous=image_managed, + direct_override=image_active and not image_managed, + toolset_enabled=image_tool_enabled, + current_provider="FAL" if direct_fal else ("Nous Subscription" if image_managed else ""), + explicit_configured=direct_fal, + ), + "tts": NousFeatureState( + key="tts", + label="OpenAI TTS", + included_by_default=True, + available=tts_available, + active=tts_active, + managed_by_nous=tts_managed, + direct_override=tts_active and not tts_managed, + toolset_enabled=tts_tool_enabled, + current_provider=_tts_label(tts_current_provider), + explicit_configured=tts_explicit_configured, + ), + "browser": NousFeatureState( + key="browser", + label="Browser automation", + included_by_default=True, + available=browser_available, + active=browser_active, + managed_by_nous=browser_managed, + direct_override=browser_active and not browser_managed, + toolset_enabled=browser_tool_enabled, + current_provider=_browser_label(browser_current_provider), + explicit_configured=isinstance(config.get("browser"), dict) and "cloud_provider" in config.get("browser", {}), + ), + "modal": NousFeatureState( + key="modal", + label="Modal execution", + included_by_default=False, + available=modal_available, + active=modal_active, + managed_by_nous=modal_managed, + direct_override=terminal_backend == "modal" and modal_direct_override, + toolset_enabled=modal_tool_enabled, + current_provider="Modal" if terminal_backend == "modal" else terminal_backend or "local", + explicit_configured=terminal_backend == "modal", + ), + } + + return NousSubscriptionFeatures( + subscribed=subscribed, + nous_auth_present=nous_auth_present, + provider_is_nous=provider_is_nous, + features=features, + ) + + +def get_nous_subscription_explainer_lines() -> list[str]: + return [ + "Nous subscription enables managed web tools, image generation, OpenAI TTS, and browser automation by default.", + "Those managed tools bill to your Nous subscription. Modal execution is optional and can bill to your subscription too.", + "Change these later with: hermes setup tools, hermes setup terminal, or hermes status.", + ] + + +def apply_nous_provider_defaults(config: Dict[str, object]) -> set[str]: + """Apply provider-level Nous defaults shared by `hermes setup` and `hermes model`.""" + features = get_nous_subscription_features(config) + if not features.provider_is_nous: + return set() + + tts_cfg = config.get("tts") + if not isinstance(tts_cfg, dict): + tts_cfg = {} + config["tts"] = tts_cfg + + current_tts = str(tts_cfg.get("provider") or "edge").strip().lower() + if current_tts not in {"", "edge"}: + return set() + + tts_cfg["provider"] = "openai" + return {"tts"} + + +def apply_nous_managed_defaults( + config: Dict[str, object], + *, + enabled_toolsets: Optional[Iterable[str]] = None, +) -> set[str]: + features = get_nous_subscription_features(config) + if not features.provider_is_nous: + return set() + + selected_toolsets = set(enabled_toolsets or ()) + changed: set[str] = set() + + web_cfg = config.get("web") + if not isinstance(web_cfg, dict): + web_cfg = {} + config["web"] = web_cfg + + tts_cfg = config.get("tts") + if not isinstance(tts_cfg, dict): + tts_cfg = {} + config["tts"] = tts_cfg + + browser_cfg = config.get("browser") + if not isinstance(browser_cfg, dict): + browser_cfg = {} + config["browser"] = browser_cfg + + if "web" in selected_toolsets and not features.web.explicit_configured and not ( + get_env_value("PARALLEL_API_KEY") + or get_env_value("TAVILY_API_KEY") + or get_env_value("FIRECRAWL_API_KEY") + or get_env_value("FIRECRAWL_API_URL") + ): + web_cfg["backend"] = "firecrawl" + changed.add("web") + + if "tts" in selected_toolsets and not features.tts.explicit_configured and not ( + resolve_openai_audio_api_key() + or get_env_value("ELEVENLABS_API_KEY") + ): + tts_cfg["provider"] = "openai" + changed.add("tts") + + if "browser" in selected_toolsets and not features.browser.explicit_configured and not ( + get_env_value("BROWSERBASE_API_KEY") + or get_env_value("BROWSER_USE_API_KEY") + ): + browser_cfg["cloud_provider"] = "browserbase" + changed.add("browser") + + if "image_gen" in selected_toolsets and not get_env_value("FAL_KEY"): + changed.add("image_gen") + + return changed diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py index 54ecbf165..59c8d92c1 100644 --- a/hermes_cli/setup.py +++ b/hermes_cli/setup.py @@ -18,6 +18,12 @@ import sys from pathlib import Path from typing import Optional, Dict, Any +from hermes_cli.nous_subscription import ( + apply_nous_provider_defaults, + get_nous_subscription_explainer_lines, + get_nous_subscription_features, +) + logger = logging.getLogger(__name__) PROJECT_ROOT = Path(__file__).parent.parent.resolve() @@ -52,6 +58,13 @@ def _set_default_model(config: Dict[str, Any], model_name: str) -> None: config["model"] = model_cfg +def _print_nous_subscription_guidance() -> None: + print() + print_header("Nous Subscription Tools") + for line in get_nous_subscription_explainer_lines(): + print_info(line) + + # Default model lists per provider — used as fallback when the live # /models endpoint can't be reached. _DEFAULT_PROVIDER_MODELS = { @@ -560,6 +573,7 @@ def _print_setup_summary(config: dict, hermes_home): print_header("Tool Availability Summary") tool_status = [] + subscription_features = get_nous_subscription_features(config) # Vision — use the same runtime resolver as the actual vision tools try: @@ -581,8 +595,13 @@ def _print_setup_summary(config: dict, hermes_home): tool_status.append(("Mixture of Agents", False, "OPENROUTER_API_KEY")) # Web tools (Parallel, Firecrawl, or Tavily) - if get_env_value("PARALLEL_API_KEY") or get_env_value("FIRECRAWL_API_KEY") or get_env_value("FIRECRAWL_API_URL") or get_env_value("TAVILY_API_KEY"): - tool_status.append(("Web Search & Extract", True, None)) + if subscription_features.web.managed_by_nous: + tool_status.append(("Web Search & Extract (Nous subscription)", True, None)) + elif subscription_features.web.available: + label = "Web Search & Extract" + if subscription_features.web.current_provider: + label = f"Web Search & Extract ({subscription_features.web.current_provider})" + tool_status.append((label, True, None)) else: tool_status.append(("Web Search & Extract", False, "PARALLEL_API_KEY, FIRECRAWL_API_KEY, or TAVILY_API_KEY")) @@ -595,7 +614,9 @@ def _print_setup_summary(config: dict, hermes_home): Path(__file__).parent.parent / "node_modules" / ".bin" / "agent-browser" ).exists() ) - if get_env_value("BROWSERBASE_API_KEY"): + if subscription_features.browser.managed_by_nous: + tool_status.append(("Browser Automation (Nous Browserbase)", True, None)) + elif subscription_features.browser.current_provider == "Browserbase" and subscription_features.browser.available: tool_status.append(("Browser Automation (Browserbase)", True, None)) elif _ab_found: tool_status.append(("Browser Automation (local)", True, None)) @@ -605,16 +626,22 @@ def _print_setup_summary(config: dict, hermes_home): ) # FAL (image generation) - if get_env_value("FAL_KEY"): + if subscription_features.image_gen.managed_by_nous: + tool_status.append(("Image Generation (Nous subscription)", True, None)) + elif subscription_features.image_gen.available: tool_status.append(("Image Generation", True, None)) else: tool_status.append(("Image Generation", False, "FAL_KEY")) # TTS — show configured provider tts_provider = config.get("tts", {}).get("provider", "edge") - if tts_provider == "elevenlabs" and get_env_value("ELEVENLABS_API_KEY"): + if subscription_features.tts.managed_by_nous: + tool_status.append(("Text-to-Speech (OpenAI via Nous subscription)", True, None)) + elif tts_provider == "elevenlabs" and get_env_value("ELEVENLABS_API_KEY"): tool_status.append(("Text-to-Speech (ElevenLabs)", True, None)) - elif tts_provider == "openai" and get_env_value("VOICE_TOOLS_OPENAI_KEY"): + elif tts_provider == "openai" and ( + get_env_value("VOICE_TOOLS_OPENAI_KEY") or get_env_value("OPENAI_API_KEY") + ): tool_status.append(("Text-to-Speech (OpenAI)", True, None)) elif tts_provider == "neutts": try: @@ -629,6 +656,16 @@ def _print_setup_summary(config: dict, hermes_home): else: tool_status.append(("Text-to-Speech (Edge TTS)", True, None)) + if subscription_features.modal.managed_by_nous: + tool_status.append(("Modal Execution (Nous subscription)", True, None)) + elif config.get("terminal", {}).get("backend") == "modal": + if subscription_features.modal.direct_override: + tool_status.append(("Modal Execution (direct Modal)", True, None)) + else: + tool_status.append(("Modal Execution", False, "run 'hermes setup terminal'")) + elif subscription_features.nous_auth_present: + tool_status.append(("Modal Execution (optional via Nous subscription)", True, None)) + # Tinker + WandB (RL training) if get_env_value("TINKER_API_KEY") and get_env_value("WANDB_API_KEY"): tool_status.append(("RL Training (Tinker)", True, None)) @@ -905,6 +942,7 @@ def setup_model_provider(config: dict): ) selected_base_url = None # deferred until after model selection nous_models = [] # populated if Nous login succeeds + nous_subscription_selected = False if provider_idx == 0: # OpenRouter selected_provider = "openrouter" @@ -1000,6 +1038,9 @@ def setup_model_provider(config: dict): except Exception as e: logger.debug("Could not fetch Nous models after login: %s", e) + nous_subscription_selected = True + _print_nous_subscription_guidance() + except SystemExit: print_warning("Nous Portal login was cancelled or failed.") print_info("You can try again later with: hermes model") @@ -1773,10 +1814,20 @@ def setup_model_provider(config: dict): if selected_provider in ("copilot-acp", "copilot", "zai", "kimi-coding", "minimax", "minimax-cn", "kilocode", "anthropic") and selected_base_url is not None: _update_config_for_provider(selected_provider, selected_base_url) + if selected_provider == "nous" and nous_subscription_selected: + changed_defaults = apply_nous_provider_defaults(config) + current_tts = str(config.get("tts", {}).get("provider") or "edge") + if "tts" in changed_defaults: + print_success("TTS provider set to: OpenAI TTS via your Nous subscription") + else: + print_info(f"Keeping your existing TTS provider: {current_tts}") + save_config(config) - # Offer TTS provider selection at the end of model setup - _setup_tts_provider(config) + # Offer TTS provider selection at the end of model setup, except when + # Nous subscription defaults are already being applied. + if selected_provider != "nous": + _setup_tts_provider(config) # ============================================================================= @@ -1844,6 +1895,7 @@ def _setup_tts_provider(config: dict): """Interactive TTS provider selection with install flow for NeuTTS.""" tts_config = config.get("tts", {}) current_provider = tts_config.get("provider", "edge") + subscription_features = get_nous_subscription_features(config) provider_labels = { "edge": "Edge TTS", @@ -1858,20 +1910,36 @@ def _setup_tts_provider(config: dict): print_info(f"Current: {current_label}") print() - choices = [ - "Edge TTS (free, cloud-based, no setup needed)", - "ElevenLabs (premium quality, needs API key)", - "OpenAI TTS (good quality, needs API key)", - "NeuTTS (local on-device, free, ~300MB model download)", - f"Keep current ({current_label})", - ] - idx = prompt_choice("Select TTS provider:", choices, len(choices) - 1) + choices = [] + providers = [] + if subscription_features.nous_auth_present: + choices.append("Nous Subscription (managed OpenAI TTS, billed to your subscription)") + providers.append("nous-openai") + choices.extend( + [ + "Edge TTS (free, cloud-based, no setup needed)", + "ElevenLabs (premium quality, needs API key)", + "OpenAI TTS (good quality, needs API key)", + "NeuTTS (local on-device, free, ~300MB model download)", + ] + ) + providers.extend(["edge", "elevenlabs", "openai", "neutts"]) + choices.append(f"Keep current ({current_label})") + keep_current_idx = len(choices) - 1 + idx = prompt_choice("Select TTS provider:", choices, keep_current_idx) - if idx == 4: # Keep current + if idx == keep_current_idx: return - providers = ["edge", "elevenlabs", "openai", "neutts"] selected = providers[idx] + selected_via_nous = selected == "nous-openai" + if selected == "nous-openai": + selected = "openai" + print_info("OpenAI TTS will use the managed Nous gateway and bill to your subscription.") + if get_env_value("VOICE_TOOLS_OPENAI_KEY") or get_env_value("OPENAI_API_KEY"): + print_warning( + "Direct OpenAI credentials are still configured and may take precedence until removed from ~/.hermes/.env." + ) if selected == "neutts": # Check if already installed @@ -1909,8 +1977,8 @@ def _setup_tts_provider(config: dict): print_warning("No API key provided. Falling back to Edge TTS.") selected = "edge" - elif selected == "openai": - existing = get_env_value("VOICE_TOOLS_OPENAI_KEY") + elif selected == "openai" and not selected_via_nous: + existing = get_env_value("VOICE_TOOLS_OPENAI_KEY") or get_env_value("OPENAI_API_KEY") if not existing: print() api_key = prompt("OpenAI API key for TTS", password=True) @@ -2065,63 +2133,99 @@ def setup_terminal_backend(config: dict): elif selected_backend == "modal": print_success("Terminal backend: Modal") print_info("Serverless cloud sandboxes. Each session gets its own container.") - print_info("Requires a Modal account: https://modal.com") + from tools.managed_tool_gateway import is_managed_tool_gateway_ready + from tools.tool_backend_helpers import normalize_modal_mode - # Check if swe-rex[modal] is installed - try: - __import__("swe_rex") - except ImportError: - print_info("Installing swe-rex[modal]...") - import subprocess - - uv_bin = shutil.which("uv") - if uv_bin: - result = subprocess.run( - [ - uv_bin, - "pip", - "install", - "--python", - sys.executable, - "swe-rex[modal]", - ], - capture_output=True, - text=True, - ) + managed_modal_available = bool( + get_nous_subscription_features(config).nous_auth_present + and is_managed_tool_gateway_ready("modal") + ) + modal_mode = normalize_modal_mode(config.get("terminal", {}).get("modal_mode")) + use_managed_modal = False + if managed_modal_available: + modal_choices = [ + "Use my Nous subscription", + "Use my own Modal account", + ] + if modal_mode == "managed": + default_modal_idx = 0 + elif modal_mode == "direct": + default_modal_idx = 1 else: - result = subprocess.run( - [sys.executable, "-m", "pip", "install", "swe-rex[modal]"], - capture_output=True, - text=True, - ) - if result.returncode == 0: - print_success("swe-rex[modal] installed") - else: - print_warning( - "Install failed — run manually: pip install 'swe-rex[modal]'" - ) + default_modal_idx = 1 if get_env_value("MODAL_TOKEN_ID") else 0 + modal_mode_idx = prompt_choice( + "Select how Modal execution should be billed:", + modal_choices, + default_modal_idx, + ) + use_managed_modal = modal_mode_idx == 0 - # Modal token - print() - print_info("Modal authentication:") - print_info(" Get your token at: https://modal.com/settings") - existing_token = get_env_value("MODAL_TOKEN_ID") - if existing_token: - print_info(" Modal token: already configured") - if prompt_yes_no(" Update Modal credentials?", False): + if use_managed_modal: + config["terminal"]["modal_mode"] = "managed" + print_info("Modal execution will use the managed Nous gateway and bill to your subscription.") + if get_env_value("MODAL_TOKEN_ID") or get_env_value("MODAL_TOKEN_SECRET"): + print_info( + "Direct Modal credentials are still configured, but this backend is pinned to managed mode." + ) + else: + config["terminal"]["modal_mode"] = "direct" + print_info("Requires a Modal account: https://modal.com") + + # Check if swe-rex[modal] is installed + try: + __import__("swe_rex") + except ImportError: + print_info("Installing swe-rex[modal]...") + import subprocess + + uv_bin = shutil.which("uv") + if uv_bin: + result = subprocess.run( + [ + uv_bin, + "pip", + "install", + "--python", + sys.executable, + "swe-rex[modal]", + ], + capture_output=True, + text=True, + ) + else: + result = subprocess.run( + [sys.executable, "-m", "pip", "install", "swe-rex[modal]"], + capture_output=True, + text=True, + ) + if result.returncode == 0: + print_success("swe-rex[modal] installed") + else: + print_warning( + "Install failed — run manually: pip install 'swe-rex[modal]'" + ) + + # Modal token + print() + print_info("Modal authentication:") + print_info(" Get your token at: https://modal.com/settings") + existing_token = get_env_value("MODAL_TOKEN_ID") + if existing_token: + print_info(" Modal token: already configured") + if prompt_yes_no(" Update Modal credentials?", False): + token_id = prompt(" Modal Token ID", password=True) + token_secret = prompt(" Modal Token Secret", password=True) + if token_id: + save_env_value("MODAL_TOKEN_ID", token_id) + if token_secret: + save_env_value("MODAL_TOKEN_SECRET", token_secret) + else: token_id = prompt(" Modal Token ID", password=True) token_secret = prompt(" Modal Token Secret", password=True) if token_id: save_env_value("MODAL_TOKEN_ID", token_id) if token_secret: save_env_value("MODAL_TOKEN_SECRET", token_secret) - else: - token_id = prompt(" Modal Token ID", password=True) - token_secret = prompt(" Modal Token Secret", password=True) - if token_id: - save_env_value("MODAL_TOKEN_ID", token_id) - if token_secret: - save_env_value("MODAL_TOKEN_SECRET", token_secret) _prompt_container_resources(config) @@ -2235,6 +2339,8 @@ def setup_terminal_backend(config: dict): # Sync terminal backend to .env so terminal_tool picks it up directly. # config.yaml is the source of truth, but terminal_tool reads TERMINAL_ENV. save_env_value("TERMINAL_ENV", selected_backend) + if selected_backend == "modal": + save_env_value("TERMINAL_MODAL_MODE", config["terminal"].get("modal_mode", "auto")) save_config(config) print() print_success(f"Terminal backend set to: {selected_backend}") @@ -3089,6 +3195,17 @@ SETUP_SECTIONS = [ ("agent", "Agent Settings", setup_agent_settings), ] +# The returning-user menu intentionally omits standalone TTS because model setup +# already includes TTS selection and tools setup covers the rest of the provider +# configuration. Keep this list in the same order as the visible menu entries. +RETURNING_USER_MENU_SECTION_KEYS = [ + "model", + "terminal", + "gateway", + "tools", + "agent", +] + def run_setup_wizard(args): """Run the interactive setup wizard. @@ -3237,8 +3354,7 @@ def run_setup_wizard(args): # Individual section — map by key, not by position. # SETUP_SECTIONS includes TTS but the returning-user menu skips it, # so positional indexing (choice - 3) would dispatch the wrong section. - _RETURNING_USER_SECTION_KEYS = ["model", "terminal", "gateway", "tools", "agent"] - section_key = _RETURNING_USER_SECTION_KEYS[choice - 3] + section_key = RETURNING_USER_MENU_SECTION_KEYS[choice - 3] section = next((s for s in SETUP_SECTIONS if s[0] == section_key), None) if section: _, label, func = section diff --git a/hermes_cli/status.py b/hermes_cli/status.py index 01f46b766..649d41231 100644 --- a/hermes_cli/status.py +++ b/hermes_cli/status.py @@ -15,6 +15,7 @@ from hermes_cli.auth import AuthError, resolve_provider from hermes_cli.colors import Colors, color from hermes_cli.config import get_env_path, get_env_value, get_hermes_home, load_config from hermes_cli.models import provider_label +from hermes_cli.nous_subscription import get_nous_subscription_features from hermes_cli.runtime_provider import resolve_requested_provider from hermes_constants import OPENROUTER_MODELS_URL @@ -186,6 +187,30 @@ def show_status(args): if codex_status.get("error") and not codex_logged_in: print(f" Error: {codex_status.get('error')}") + # ========================================================================= + # Nous Subscription Features + # ========================================================================= + features = get_nous_subscription_features(config) + print() + print(color("◆ Nous Subscription Features", Colors.CYAN, Colors.BOLD)) + if not features.nous_auth_present: + print(" Nous Portal ✗ not logged in") + else: + print(" Nous Portal ✓ managed tools available") + for feature in features.items(): + if feature.managed_by_nous: + state = "active via Nous subscription" + elif feature.active: + current = feature.current_provider or "configured provider" + state = f"active via {current}" + elif feature.included_by_default and features.nous_auth_present: + state = "included by subscription, not currently selected" + elif feature.key == "modal" and features.nous_auth_present: + state = "available via subscription (optional)" + else: + state = "not configured" + print(f" {feature.label:<15} {check_mark(feature.available or feature.active or feature.managed_by_nous)} {state}") + # ========================================================================= # API-Key Providers # ========================================================================= diff --git a/hermes_cli/tools_config.py b/hermes_cli/tools_config.py index a8f349e9c..be73dfcfa 100644 --- a/hermes_cli/tools_config.py +++ b/hermes_cli/tools_config.py @@ -18,6 +18,10 @@ from hermes_cli.config import ( load_config, save_config, get_env_value, save_env_value, ) from hermes_cli.colors import Colors, color +from hermes_cli.nous_subscription import ( + apply_nous_managed_defaults, + get_nous_subscription_features, +) PROJECT_ROOT = Path(__file__).parent.parent.resolve() @@ -146,6 +150,15 @@ TOOL_CATEGORIES = { "name": "Text-to-Speech", "icon": "🔊", "providers": [ + { + "name": "Nous Subscription", + "tag": "Managed OpenAI TTS billed to your subscription", + "env_vars": [], + "tts_provider": "openai", + "requires_nous_auth": True, + "managed_nous_feature": "tts", + "override_env_vars": ["VOICE_TOOLS_OPENAI_KEY", "OPENAI_API_KEY"], + }, { "name": "Microsoft Edge TTS", "tag": "Free - no API key needed", @@ -176,6 +189,15 @@ TOOL_CATEGORIES = { "setup_note": "A free DuckDuckGo search skill is also included — skip this if you don't need a premium provider.", "icon": "🔍", "providers": [ + { + "name": "Nous Subscription", + "tag": "Managed Firecrawl billed to your subscription", + "web_backend": "firecrawl", + "env_vars": [], + "requires_nous_auth": True, + "managed_nous_feature": "web", + "override_env_vars": ["FIRECRAWL_API_KEY", "FIRECRAWL_API_URL"], + }, { "name": "Firecrawl Cloud", "tag": "Hosted service - search, extract, and crawl", @@ -214,6 +236,14 @@ TOOL_CATEGORIES = { "name": "Image Generation", "icon": "🎨", "providers": [ + { + "name": "Nous Subscription", + "tag": "Managed FAL image generation billed to your subscription", + "env_vars": [], + "requires_nous_auth": True, + "managed_nous_feature": "image_gen", + "override_env_vars": ["FAL_KEY"], + }, { "name": "FAL.ai", "tag": "FLUX 2 Pro with auto-upscaling", @@ -227,11 +257,21 @@ TOOL_CATEGORIES = { "name": "Browser Automation", "icon": "🌐", "providers": [ + { + "name": "Nous Subscription (Browserbase cloud)", + "tag": "Managed Browserbase billed to your subscription", + "env_vars": [], + "browser_provider": "browserbase", + "requires_nous_auth": True, + "managed_nous_feature": "browser", + "override_env_vars": ["BROWSERBASE_API_KEY", "BROWSERBASE_PROJECT_ID"], + "post_setup": "browserbase", + }, { "name": "Local Browser", "tag": "Free headless Chromium (no API key needed)", "env_vars": [], - "browser_provider": None, + "browser_provider": "local", "post_setup": "browserbase", # Same npm install for agent-browser }, { @@ -475,8 +515,11 @@ def _save_platform_tools(config: dict, platform: str, enabled_toolset_keys: Set[ save_config(config) -def _toolset_has_keys(ts_key: str) -> bool: +def _toolset_has_keys(ts_key: str, config: dict = None) -> bool: """Check if a toolset's required API keys are configured.""" + if config is None: + config = load_config() + if ts_key == "vision": try: from agent.auxiliary_client import resolve_vision_provider_client @@ -486,10 +529,16 @@ def _toolset_has_keys(ts_key: str) -> bool: except Exception: return False + if ts_key in {"web", "image_gen", "tts", "browser"}: + features = get_nous_subscription_features(config) + feature = features.features.get(ts_key) + if feature and (feature.available or feature.managed_by_nous): + return True + # Check TOOL_CATEGORIES first (provider-aware) cat = TOOL_CATEGORIES.get(ts_key) if cat: - for provider in cat.get("providers", []): + for provider in _visible_providers(cat, config): env_vars = provider.get("env_vars", []) if env_vars and all(get_env_value(e["key"]) for e in env_vars): return True @@ -629,11 +678,43 @@ def _configure_toolset(ts_key: str, config: dict): _configure_simple_requirements(ts_key) +def _visible_providers(cat: dict, config: dict) -> list[dict]: + """Return provider entries visible for the current auth/config state.""" + features = get_nous_subscription_features(config) + visible = [] + for provider in cat.get("providers", []): + if provider.get("requires_nous_auth") and not features.nous_auth_present: + continue + visible.append(provider) + return visible + + +def _toolset_needs_configuration_prompt(ts_key: str, config: dict) -> bool: + """Return True when enabling this toolset should open provider setup.""" + cat = TOOL_CATEGORIES.get(ts_key) + if not cat: + return not _toolset_has_keys(ts_key, config) + + if ts_key == "tts": + tts_cfg = config.get("tts", {}) + return not isinstance(tts_cfg, dict) or "provider" not in tts_cfg + if ts_key == "web": + web_cfg = config.get("web", {}) + return not isinstance(web_cfg, dict) or "backend" not in web_cfg + if ts_key == "browser": + browser_cfg = config.get("browser", {}) + return not isinstance(browser_cfg, dict) or "cloud_provider" not in browser_cfg + if ts_key == "image_gen": + return not get_env_value("FAL_KEY") + + return not _toolset_has_keys(ts_key, config) + + def _configure_tool_category(ts_key: str, cat: dict, config: dict): """Configure a tool category with provider selection.""" icon = cat.get("icon", "") name = cat["name"] - providers = cat["providers"] + providers = _visible_providers(cat, config) # Check Python version requirement if cat.get("requires_python"): @@ -698,6 +779,27 @@ def _configure_tool_category(ts_key: str, cat: dict, config: dict): def _is_provider_active(provider: dict, config: dict) -> bool: """Check if a provider entry matches the currently active config.""" + managed_feature = provider.get("managed_nous_feature") + if managed_feature: + features = get_nous_subscription_features(config) + feature = features.features.get(managed_feature) + if feature is None: + return False + if managed_feature == "image_gen": + return feature.managed_by_nous + if provider.get("tts_provider"): + return ( + feature.managed_by_nous + and config.get("tts", {}).get("provider") == provider["tts_provider"] + ) + if "browser_provider" in provider: + current = config.get("browser", {}).get("cloud_provider") + return feature.managed_by_nous and provider["browser_provider"] == current + if provider.get("web_backend"): + current = config.get("web", {}).get("backend") + return feature.managed_by_nous and current == provider["web_backend"] + return feature.managed_by_nous + if provider.get("tts_provider"): return config.get("tts", {}).get("provider") == provider["tts_provider"] if "browser_provider" in provider: @@ -724,6 +826,13 @@ def _detect_active_provider_index(providers: list, config: dict) -> int: def _configure_provider(provider: dict, config: dict): """Configure a single provider - prompt for API keys and set config.""" env_vars = provider.get("env_vars", []) + managed_feature = provider.get("managed_nous_feature") + + if provider.get("requires_nous_auth"): + features = get_nous_subscription_features(config) + if not features.nous_auth_present: + _print_warning(" Nous Subscription is only available after logging into Nous Portal.") + return # Set TTS provider in config if applicable if provider.get("tts_provider"): @@ -732,11 +841,12 @@ def _configure_provider(provider: dict, config: dict): # Set browser cloud provider in config if applicable if "browser_provider" in provider: bp = provider["browser_provider"] - if bp: + if bp == "local": + config.setdefault("browser", {})["cloud_provider"] = "local" + _print_success(" Browser set to local mode") + elif bp: config.setdefault("browser", {})["cloud_provider"] = bp _print_success(f" Browser cloud provider set to: {bp}") - else: - config.get("browser", {}).pop("cloud_provider", None) # Set web search backend in config if applicable if provider.get("web_backend"): @@ -744,7 +854,16 @@ def _configure_provider(provider: dict, config: dict): _print_success(f" Web backend set to: {provider['web_backend']}") if not env_vars: + if provider.get("post_setup"): + _run_post_setup(provider["post_setup"]) _print_success(f" {provider['name']} - no configuration needed!") + if managed_feature: + _print_info(" Requests for this tool will be billed to your Nous subscription.") + override_envs = provider.get("override_env_vars", []) + if any(get_env_value(env_var) for env_var in override_envs): + _print_warning( + " Direct credentials are still configured and may take precedence until you remove them from ~/.hermes/.env." + ) return # Prompt for each required env var @@ -847,7 +966,7 @@ def _reconfigure_tool(config: dict): cat = TOOL_CATEGORIES.get(ts_key) reqs = TOOLSET_ENV_REQUIREMENTS.get(ts_key) if cat or reqs: - if _toolset_has_keys(ts_key): + if _toolset_has_keys(ts_key, config): configurable.append((ts_key, ts_label)) if not configurable: @@ -877,7 +996,7 @@ def _configure_tool_category_for_reconfig(ts_key: str, cat: dict, config: dict): """Reconfigure a tool category - provider selection + API key update.""" icon = cat.get("icon", "") name = cat["name"] - providers = cat["providers"] + providers = _visible_providers(cat, config) if len(providers) == 1: provider = providers[0] @@ -912,6 +1031,13 @@ def _configure_tool_category_for_reconfig(ts_key: str, cat: dict, config: dict): def _reconfigure_provider(provider: dict, config: dict): """Reconfigure a provider - update API keys.""" env_vars = provider.get("env_vars", []) + managed_feature = provider.get("managed_nous_feature") + + if provider.get("requires_nous_auth"): + features = get_nous_subscription_features(config) + if not features.nous_auth_present: + _print_warning(" Nous Subscription is only available after logging into Nous Portal.") + return if provider.get("tts_provider"): config.setdefault("tts", {})["provider"] = provider["tts_provider"] @@ -919,12 +1045,12 @@ def _reconfigure_provider(provider: dict, config: dict): if "browser_provider" in provider: bp = provider["browser_provider"] - if bp: + if bp == "local": + config.setdefault("browser", {})["cloud_provider"] = "local" + _print_success(" Browser set to local mode") + elif bp: config.setdefault("browser", {})["cloud_provider"] = bp _print_success(f" Browser cloud provider set to: {bp}") - else: - config.get("browser", {}).pop("cloud_provider", None) - _print_success(" Browser set to local mode") # Set web search backend in config if applicable if provider.get("web_backend"): @@ -932,7 +1058,16 @@ def _reconfigure_provider(provider: dict, config: dict): _print_success(f" Web backend set to: {provider['web_backend']}") if not env_vars: + if provider.get("post_setup"): + _run_post_setup(provider["post_setup"]) _print_success(f" {provider['name']} - no configuration needed!") + if managed_feature: + _print_info(" Requests for this tool will be billed to your Nous subscription.") + override_envs = provider.get("override_env_vars", []) + if any(get_env_value(env_var) for env_var in override_envs): + _print_warning( + " Direct credentials are still configured and may take precedence until you remove them from ~/.hermes/.env." + ) return for var in env_vars: @@ -1041,13 +1176,22 @@ def tools_command(args=None, first_install: bool = False, config: dict = None): label = next((l for k, l, _ in _get_effective_configurable_toolsets() if k == ts), ts) print(color(f" - {label}", Colors.RED)) + auto_configured = apply_nous_managed_defaults( + config, + enabled_toolsets=new_enabled, + ) + for ts_key in sorted(auto_configured): + label = next((l for k, l, _ in CONFIGURABLE_TOOLSETS if k == ts_key), ts_key) + print(color(f" ✓ {label}: using your Nous subscription defaults", Colors.GREEN)) + # Walk through ALL selected tools that have provider options or # need API keys. This ensures browser (Local vs Browserbase), # TTS (Edge vs OpenAI vs ElevenLabs), etc. are shown even when # a free provider exists. to_configure = [ ts_key for ts_key in sorted(new_enabled) - if TOOL_CATEGORIES.get(ts_key) or TOOLSET_ENV_REQUIREMENTS.get(ts_key) + if (TOOL_CATEGORIES.get(ts_key) or TOOLSET_ENV_REQUIREMENTS.get(ts_key)) + and ts_key not in auto_configured ] if to_configure: @@ -1140,7 +1284,7 @@ def tools_command(args=None, first_install: bool = False, config: dict = None): # Configure API keys for newly enabled tools for ts_key in sorted(added): if (TOOL_CATEGORIES.get(ts_key) or TOOLSET_ENV_REQUIREMENTS.get(ts_key)): - if not _toolset_has_keys(ts_key): + if _toolset_needs_configuration_prompt(ts_key, config): _configure_toolset(ts_key, config) _save_platform_tools(config, pk, new_enabled) save_config(config) @@ -1180,7 +1324,7 @@ def tools_command(args=None, first_install: bool = False, config: dict = None): # Configure newly enabled toolsets that need API keys for ts_key in sorted(added): if (TOOL_CATEGORIES.get(ts_key) or TOOLSET_ENV_REQUIREMENTS.get(ts_key)): - if not _toolset_has_keys(ts_key): + if _toolset_needs_configuration_prompt(ts_key, config): _configure_toolset(ts_key, config) _save_platform_tools(config, pkey, new_enabled) diff --git a/pyproject.toml b/pyproject.toml index 8ba6d1f0c..bd5fa6481 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,7 +39,7 @@ dependencies = [ [project.optional-dependencies] modal = ["swe-rex[modal]>=1.4.0,<2"] daytona = ["daytona>=0.148.0,<1"] -dev = ["pytest>=9.0.2,<10", "pytest-asyncio>=1.3.0,<2", "pytest-xdist>=3.0,<4", "mcp>=1.2.0,<2"] +dev = ["debugpy>=1.8.0,<2", "pytest>=9.0.2,<10", "pytest-asyncio>=1.3.0,<2", "pytest-xdist>=3.0,<4", "mcp>=1.2.0,<2"] messaging = ["python-telegram-bot>=22.6,<23", "discord.py[voice]>=2.7.1,<3", "aiohttp>=3.13.3,<4", "slack-bolt>=1.18.0,<2", "slack-sdk>=3.27.0,<4"] cron = ["croniter>=6.0.0,<7"] slack = ["slack-bolt>=1.18.0,<2", "slack-sdk>=3.27.0,<4"] diff --git a/requirements.txt b/requirements.txt index 6e65cc822..3709b1a63 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,6 +15,7 @@ requests jinja2 pydantic>=2.0 PyJWT[crypto] +debugpy # Web tools firecrawl-py diff --git a/run_agent.py b/run_agent.py index 3ad5b3ec4..1a6d57876 100644 --- a/run_agent.py +++ b/run_agent.py @@ -74,6 +74,7 @@ from hermes_constants import OPENROUTER_BASE_URL from agent.prompt_builder import ( DEFAULT_AGENT_IDENTITY, PLATFORM_HINTS, MEMORY_GUIDANCE, SESSION_SEARCH_GUIDANCE, SKILLS_GUIDANCE, + build_nous_subscription_prompt, ) from agent.model_metadata import ( fetch_model_metadata, @@ -2388,6 +2389,10 @@ class AIAgent: if tool_guidance: prompt_parts.append(" ".join(tool_guidance)) + nous_subscription_prompt = build_nous_subscription_prompt(self.valid_tool_names) + if nous_subscription_prompt: + prompt_parts.append(nous_subscription_prompt) + # Honcho CLI awareness: tell Hermes about its own management commands # so it can refer the user to them rather than reinventing answers. if self._honcho and self._honcho_session_key: diff --git a/tests/agent/test_prompt_builder.py b/tests/agent/test_prompt_builder.py index 37fddcc9c..b4d038fc0 100644 --- a/tests/agent/test_prompt_builder.py +++ b/tests/agent/test_prompt_builder.py @@ -5,6 +5,8 @@ import importlib import logging import sys +import pytest + from agent.prompt_builder import ( _scan_context_content, _truncate_content, @@ -15,6 +17,7 @@ from agent.prompt_builder import ( _find_git_root, _strip_yaml_frontmatter, build_skills_system_prompt, + build_nous_subscription_prompt, build_context_files_prompt, CONTEXT_FILE_MAX_CHARS, DEFAULT_AGENT_IDENTITY, @@ -22,6 +25,7 @@ from agent.prompt_builder import ( SESSION_SEARCH_GUIDANCE, PLATFORM_HINTS, ) +from hermes_cli.nous_subscription import NousFeatureState, NousSubscriptionFeatures # ========================================================================= @@ -395,6 +399,53 @@ class TestBuildSkillsSystemPrompt: assert "backend-skill" in result +class TestBuildNousSubscriptionPrompt: + def test_includes_active_subscription_features(self, monkeypatch): + monkeypatch.setattr( + "hermes_cli.nous_subscription.get_nous_subscription_features", + lambda config=None: NousSubscriptionFeatures( + subscribed=True, + nous_auth_present=True, + provider_is_nous=True, + features={ + "web": NousFeatureState("web", "Web tools", True, True, True, True, False, True, "firecrawl"), + "image_gen": NousFeatureState("image_gen", "Image generation", True, True, True, True, False, True, "Nous Subscription"), + "tts": NousFeatureState("tts", "OpenAI TTS", True, True, True, True, False, True, "OpenAI TTS"), + "browser": NousFeatureState("browser", "Browser automation", True, True, True, True, False, True, "Browserbase"), + "modal": NousFeatureState("modal", "Modal execution", False, True, False, False, False, True, "local"), + }, + ), + ) + + prompt = build_nous_subscription_prompt({"web_search", "browser_navigate"}) + + assert "Browserbase" in prompt + assert "Modal execution is optional" in prompt + assert "do not ask the user for Firecrawl, FAL, OpenAI TTS, or Browserbase API keys" in prompt + + def test_non_subscriber_prompt_includes_relevant_upgrade_guidance(self, monkeypatch): + monkeypatch.setattr( + "hermes_cli.nous_subscription.get_nous_subscription_features", + lambda config=None: NousSubscriptionFeatures( + subscribed=False, + nous_auth_present=False, + provider_is_nous=False, + features={ + "web": NousFeatureState("web", "Web tools", True, False, False, False, False, True, ""), + "image_gen": NousFeatureState("image_gen", "Image generation", True, False, False, False, False, True, ""), + "tts": NousFeatureState("tts", "OpenAI TTS", True, False, False, False, False, True, ""), + "browser": NousFeatureState("browser", "Browser automation", True, False, False, False, False, True, ""), + "modal": NousFeatureState("modal", "Modal execution", False, False, False, False, False, True, ""), + }, + ), + ) + + prompt = build_nous_subscription_prompt({"image_generate"}) + + assert "suggest Nous subscription as one option" in prompt + assert "Do not mention subscription unless" in prompt + + # ========================================================================= # Context files prompt builder # ========================================================================= @@ -562,8 +613,12 @@ class TestBuildContextFilesPrompt: assert "Lowercase claude rules" in result def test_claude_md_uppercase_takes_priority(self, tmp_path): - (tmp_path / "CLAUDE.md").write_text("From uppercase.") - (tmp_path / "claude.md").write_text("From lowercase.") + uppercase = tmp_path / "CLAUDE.md" + lowercase = tmp_path / "claude.md" + uppercase.write_text("From uppercase.") + lowercase.write_text("From lowercase.") + if uppercase.samefile(lowercase): + pytest.skip("filesystem is case-insensitive") result = build_context_files_prompt(cwd=str(tmp_path)) assert "From uppercase" in result assert "From lowercase" not in result diff --git a/tests/hermes_cli/test_setup.py b/tests/hermes_cli/test_setup.py index a4c85ba2b..66af7faf0 100644 --- a/tests/hermes_cli/test_setup.py +++ b/tests/hermes_cli/test_setup.py @@ -1,4 +1,6 @@ import json +import sys +import types from hermes_cli.auth import _update_config_for_provider, get_active_provider from hermes_cli.config import load_config, save_config @@ -136,6 +138,8 @@ def test_codex_setup_uses_runtime_access_token_for_live_model_list(tmp_path, mon def fake_prompt_choice(question, choices, default=0): if question == "Select your inference provider:": return 2 # OpenAI Codex + if question == "Configure vision:": + return len(choices) - 1 if question == "Select default model:": return 0 tts_idx = _maybe_keep_current_tts(question, choices) @@ -176,3 +180,171 @@ def test_codex_setup_uses_runtime_access_token_for_live_model_list(tmp_path, mon assert reloaded["model"]["provider"] == "openai-codex" assert reloaded["model"]["default"] == "gpt-5.2-codex" assert reloaded["model"]["base_url"] == "https://chatgpt.com/backend-api/codex" + + +def test_nous_setup_sets_managed_openai_tts_when_unconfigured(tmp_path, monkeypatch, capsys): + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + _clear_provider_env(monkeypatch) + + config = load_config() + + def fake_prompt_choice(question, choices, default=0): + if question == "Select your inference provider:": + return 1 + if question == "Configure vision:": + return len(choices) - 1 + if question == "Select default model:": + return len(choices) - 1 + raise AssertionError(f"Unexpected prompt_choice call: {question}") + + monkeypatch.setattr("hermes_cli.setup.prompt_choice", fake_prompt_choice) + monkeypatch.setattr("hermes_cli.setup.prompt", lambda *args, **kwargs: "") + monkeypatch.setattr("hermes_cli.auth.detect_external_credentials", lambda: []) + + def _fake_login_nous(*args, **kwargs): + auth_path = tmp_path / "auth.json" + auth_path.write_text(json.dumps({"active_provider": "nous", "providers": {"nous": {"access_token": "nous-token"}}})) + _update_config_for_provider("nous", "https://inference.example.com/v1") + + monkeypatch.setattr("hermes_cli.auth._login_nous", _fake_login_nous) + monkeypatch.setattr( + "hermes_cli.auth.resolve_nous_runtime_credentials", + lambda *args, **kwargs: { + "base_url": "https://inference.example.com/v1", + "api_key": "nous-key", + }, + ) + monkeypatch.setattr( + "hermes_cli.auth.fetch_nous_models", + lambda *args, **kwargs: ["gemini-3-flash"], + ) + + setup_model_provider(config) + + out = capsys.readouterr().out + assert config["tts"]["provider"] == "openai" + assert "Nous subscription enables managed web tools" in out + assert "OpenAI TTS via your Nous subscription" in out + + +def test_nous_setup_preserves_existing_tts_provider(tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + _clear_provider_env(monkeypatch) + + config = load_config() + config["tts"] = {"provider": "elevenlabs"} + + def fake_prompt_choice(question, choices, default=0): + if question == "Select your inference provider:": + return 1 + if question == "Configure vision:": + return len(choices) - 1 + if question == "Select default model:": + return len(choices) - 1 + raise AssertionError(f"Unexpected prompt_choice call: {question}") + + monkeypatch.setattr("hermes_cli.setup.prompt_choice", fake_prompt_choice) + monkeypatch.setattr("hermes_cli.setup.prompt", lambda *args, **kwargs: "") + monkeypatch.setattr("hermes_cli.auth.detect_external_credentials", lambda: []) + monkeypatch.setattr( + "hermes_cli.auth._login_nous", + lambda *args, **kwargs: (tmp_path / "auth.json").write_text( + json.dumps({"active_provider": "nous", "providers": {"nous": {"access_token": "nous-token"}}}) + ), + ) + monkeypatch.setattr( + "hermes_cli.auth.resolve_nous_runtime_credentials", + lambda *args, **kwargs: { + "base_url": "https://inference.example.com/v1", + "api_key": "nous-key", + }, + ) + monkeypatch.setattr( + "hermes_cli.auth.fetch_nous_models", + lambda *args, **kwargs: ["gemini-3-flash"], + ) + + setup_model_provider(config) + + assert config["tts"]["provider"] == "elevenlabs" + + +def test_modal_setup_can_use_nous_subscription_without_modal_creds(tmp_path, monkeypatch, capsys): + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + config = load_config() + + def fake_prompt_choice(question, choices, default=0): + if question == "Select terminal backend:": + return 2 + if question == "Select how Modal execution should be billed:": + return 0 + raise AssertionError(f"Unexpected prompt_choice call: {question}") + + def fake_prompt(message, *args, **kwargs): + assert "Modal Token" not in message + raise AssertionError(f"Unexpected prompt call: {message}") + + monkeypatch.setattr("hermes_cli.setup.prompt_choice", fake_prompt_choice) + monkeypatch.setattr("hermes_cli.setup.prompt", fake_prompt) + monkeypatch.setattr("hermes_cli.setup._prompt_container_resources", lambda config: None) + monkeypatch.setattr( + "hermes_cli.setup.get_nous_subscription_features", + lambda config: type("Features", (), {"nous_auth_present": True})(), + ) + monkeypatch.setitem( + sys.modules, + "tools.managed_tool_gateway", + types.SimpleNamespace( + is_managed_tool_gateway_ready=lambda vendor: vendor == "modal", + resolve_managed_tool_gateway=lambda vendor: None, + ), + ) + + from hermes_cli.setup import setup_terminal_backend + + setup_terminal_backend(config) + + out = capsys.readouterr().out + assert config["terminal"]["backend"] == "modal" + assert config["terminal"]["modal_mode"] == "managed" + assert "bill to your subscription" in out + + +def test_modal_setup_persists_direct_mode_when_user_chooses_their_own_account(tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + monkeypatch.delenv("MODAL_TOKEN_ID", raising=False) + monkeypatch.delenv("MODAL_TOKEN_SECRET", raising=False) + config = load_config() + + def fake_prompt_choice(question, choices, default=0): + if question == "Select terminal backend:": + return 2 + if question == "Select how Modal execution should be billed:": + return 1 + raise AssertionError(f"Unexpected prompt_choice call: {question}") + + prompt_values = iter(["token-id", "token-secret", ""]) + + monkeypatch.setattr("hermes_cli.setup.prompt_choice", fake_prompt_choice) + monkeypatch.setattr("hermes_cli.setup.prompt", lambda *args, **kwargs: next(prompt_values)) + monkeypatch.setattr("hermes_cli.setup._prompt_container_resources", lambda config: None) + monkeypatch.setattr( + "hermes_cli.setup.get_nous_subscription_features", + lambda config: type("Features", (), {"nous_auth_present": True})(), + ) + monkeypatch.setitem( + sys.modules, + "tools.managed_tool_gateway", + types.SimpleNamespace( + is_managed_tool_gateway_ready=lambda vendor: vendor == "modal", + resolve_managed_tool_gateway=lambda vendor: None, + ), + ) + monkeypatch.setitem(sys.modules, "swe_rex", object()) + + from hermes_cli.setup import setup_terminal_backend + + setup_terminal_backend(config) + + assert config["terminal"]["backend"] == "modal" + assert config["terminal"]["modal_mode"] == "direct" diff --git a/tests/hermes_cli/test_setup_noninteractive.py b/tests/hermes_cli/test_setup_noninteractive.py index 4e76c013d..ba1514723 100644 --- a/tests/hermes_cli/test_setup_noninteractive.py +++ b/tests/hermes_cli/test_setup_noninteractive.py @@ -1,7 +1,7 @@ """Tests for non-interactive setup and first-run headless behavior.""" from argparse import Namespace -from unittest.mock import patch +from unittest.mock import MagicMock, patch import pytest @@ -92,3 +92,48 @@ class TestNonInteractiveSetup: mock_setup.assert_not_called() out = capsys.readouterr().out assert "hermes config set model.provider custom" in out + + def test_returning_user_terminal_menu_choice_dispatches_terminal_section(self, tmp_path): + """Returning-user menu should map Terminal Backend to the terminal setup, not TTS.""" + from hermes_cli import setup as setup_mod + + args = _make_setup_args() + config = {} + model_section = MagicMock() + tts_section = MagicMock() + terminal_section = MagicMock() + gateway_section = MagicMock() + tools_section = MagicMock() + agent_section = MagicMock() + + with ( + patch.object(setup_mod, "ensure_hermes_home"), + patch.object(setup_mod, "load_config", return_value=config), + patch.object(setup_mod, "get_hermes_home", return_value=tmp_path), + patch.object(setup_mod, "is_interactive_stdin", return_value=True), + patch.object( + setup_mod, + "get_env_value", + side_effect=lambda key: "sk-test" if key == "OPENROUTER_API_KEY" else "", + ), + patch("hermes_cli.auth.get_active_provider", return_value=None), + patch.object(setup_mod, "prompt_choice", return_value=4), + patch.object( + setup_mod, + "SETUP_SECTIONS", + [ + ("model", "Model & Provider", model_section), + ("tts", "Text-to-Speech", tts_section), + ("terminal", "Terminal Backend", terminal_section), + ("gateway", "Messaging Platforms (Gateway)", gateway_section), + ("tools", "Tools", tools_section), + ("agent", "Agent Settings", agent_section), + ], + ), + patch.object(setup_mod, "save_config"), + patch.object(setup_mod, "_print_setup_summary"), + ): + setup_mod.run_setup_wizard(args) + + terminal_section.assert_called_once_with(config) + tts_section.assert_not_called() diff --git a/tests/hermes_cli/test_status_model_provider.py b/tests/hermes_cli/test_status_model_provider.py index 3a9ce17a0..2056aac4f 100644 --- a/tests/hermes_cli/test_status_model_provider.py +++ b/tests/hermes_cli/test_status_model_provider.py @@ -2,6 +2,8 @@ from types import SimpleNamespace +from hermes_cli.nous_subscription import NousFeatureState, NousSubscriptionFeatures + def _patch_common_status_deps(monkeypatch, status_mod, tmp_path, *, openai_base_url=""): import hermes_cli.auth as auth_mod @@ -59,3 +61,42 @@ def test_show_status_displays_legacy_string_model_and_custom_endpoint(monkeypatc out = capsys.readouterr().out assert "Model: qwen3:latest" in out assert "Provider: Custom endpoint" in out + + +def test_show_status_reports_managed_nous_features(monkeypatch, capsys, tmp_path): + from hermes_cli import status as status_mod + + _patch_common_status_deps(monkeypatch, status_mod, tmp_path) + monkeypatch.setattr( + status_mod, + "load_config", + lambda: {"model": {"default": "claude-opus-4-6", "provider": "nous"}}, + raising=False, + ) + monkeypatch.setattr(status_mod, "resolve_requested_provider", lambda requested=None: "nous", raising=False) + monkeypatch.setattr(status_mod, "resolve_provider", lambda requested=None, **kwargs: "nous", raising=False) + monkeypatch.setattr(status_mod, "provider_label", lambda provider: "Nous Portal", raising=False) + monkeypatch.setattr( + status_mod, + "get_nous_subscription_features", + lambda config: NousSubscriptionFeatures( + subscribed=True, + nous_auth_present=True, + provider_is_nous=True, + features={ + "web": NousFeatureState("web", "Web tools", True, True, True, True, False, True, "firecrawl"), + "image_gen": NousFeatureState("image_gen", "Image generation", True, True, True, True, False, True, "Nous Subscription"), + "tts": NousFeatureState("tts", "OpenAI TTS", True, True, True, True, False, True, "OpenAI TTS"), + "browser": NousFeatureState("browser", "Browser automation", True, True, True, True, False, True, "Browserbase"), + "modal": NousFeatureState("modal", "Modal execution", False, True, False, False, False, True, "local"), + }, + ), + raising=False, + ) + + status_mod.show_status(SimpleNamespace(all=False, deep=False)) + + out = capsys.readouterr().out + assert "Nous Subscription Features" in out + assert "Browser automation" in out + assert "active via Nous subscription" in out diff --git a/tests/hermes_cli/test_tools_config.py b/tests/hermes_cli/test_tools_config.py index 676305dbd..ae3455cb8 100644 --- a/tests/hermes_cli/test_tools_config.py +++ b/tests/hermes_cli/test_tools_config.py @@ -3,10 +3,14 @@ from unittest.mock import patch from hermes_cli.tools_config import ( + _configure_provider, _get_platform_tools, _platform_toolset_summary, _save_platform_tools, _toolset_has_keys, + TOOL_CATEGORIES, + _visible_providers, + tools_command, ) @@ -45,6 +49,10 @@ def test_toolset_has_keys_for_vision_accepts_codex_auth(tmp_path, monkeypatch): monkeypatch.delenv("OPENAI_API_KEY", raising=False) monkeypatch.delenv("AUXILIARY_VISION_PROVIDER", raising=False) monkeypatch.delenv("CONTEXT_VISION_PROVIDER", raising=False) + monkeypatch.setattr( + "agent.auxiliary_client.resolve_vision_provider_client", + lambda: ("openai-codex", object(), "gpt-4.1"), + ) assert _toolset_has_keys("vision") is True @@ -204,3 +212,74 @@ def test_save_platform_tools_still_preserves_mcp_with_platform_default_present() # Deselected configurable toolset removed assert "terminal" not in saved + + +def test_visible_providers_include_nous_subscription_when_logged_in(monkeypatch): + config = {"model": {"provider": "nous"}} + + monkeypatch.setattr( + "hermes_cli.nous_subscription.get_nous_auth_status", + lambda: {"logged_in": True}, + ) + + providers = _visible_providers(TOOL_CATEGORIES["browser"], config) + + assert providers[0]["name"].startswith("Nous Subscription") + + +def test_local_browser_provider_is_saved_explicitly(monkeypatch): + config = {} + local_provider = next( + provider + for provider in TOOL_CATEGORIES["browser"]["providers"] + if provider.get("browser_provider") == "local" + ) + monkeypatch.setattr("hermes_cli.tools_config._run_post_setup", lambda key: None) + + _configure_provider(local_provider, config) + + assert config["browser"]["cloud_provider"] == "local" + + +def test_first_install_nous_auto_configures_managed_defaults(monkeypatch): + config = { + "model": {"provider": "nous"}, + "platform_toolsets": {"cli": []}, + } + for env_var in ( + "VOICE_TOOLS_OPENAI_KEY", + "OPENAI_API_KEY", + "ELEVENLABS_API_KEY", + "FIRECRAWL_API_KEY", + "FIRECRAWL_API_URL", + "TAVILY_API_KEY", + "PARALLEL_API_KEY", + "BROWSERBASE_API_KEY", + "BROWSERBASE_PROJECT_ID", + "BROWSER_USE_API_KEY", + "FAL_KEY", + ): + monkeypatch.delenv(env_var, raising=False) + + monkeypatch.setattr( + "hermes_cli.tools_config._prompt_toolset_checklist", + lambda *args, **kwargs: {"web", "image_gen", "tts", "browser"}, + ) + monkeypatch.setattr("hermes_cli.tools_config.save_config", lambda config: None) + monkeypatch.setattr( + "hermes_cli.nous_subscription.get_nous_auth_status", + lambda: {"logged_in": True}, + ) + + configured = [] + monkeypatch.setattr( + "hermes_cli.tools_config._configure_toolset", + lambda ts_key, config: configured.append(ts_key), + ) + + tools_command(first_install=True, config=config) + + assert config["web"]["backend"] == "firecrawl" + assert config["tts"]["provider"] == "openai" + assert config["browser"]["cloud_provider"] == "browserbase" + assert configured == [] diff --git a/tests/test_cli_provider_resolution.py b/tests/test_cli_provider_resolution.py index 667cd33a6..65bcdf5c7 100644 --- a/tests/test_cli_provider_resolution.py +++ b/tests/test_cli_provider_resolution.py @@ -78,6 +78,13 @@ def _install_prompt_toolkit_stubs(): def _import_cli(): + for name in list(sys.modules): + if name == "cli" or name == "run_agent" or name == "tools" or name.startswith("tools."): + sys.modules.pop(name, None) + + if "firecrawl" not in sys.modules: + sys.modules["firecrawl"] = types.SimpleNamespace(Firecrawl=object) + try: importlib.import_module("prompt_toolkit") except ModuleNotFoundError: @@ -269,6 +276,81 @@ def test_codex_provider_replaces_incompatible_default_model(monkeypatch): assert shell.model == "gpt-5.2-codex" +def test_model_flow_nous_prints_subscription_guidance_without_mutating_explicit_tts(monkeypatch, capsys): + config = { + "model": {"provider": "nous", "default": "claude-opus-4-6"}, + "tts": {"provider": "elevenlabs"}, + "browser": {"cloud_provider": "browser-use"}, + } + + monkeypatch.setattr( + "hermes_cli.auth.get_provider_auth_state", + lambda provider: {"access_token": "nous-token"}, + ) + monkeypatch.setattr( + "hermes_cli.auth.resolve_nous_runtime_credentials", + lambda *args, **kwargs: { + "base_url": "https://inference.example.com/v1", + "api_key": "nous-key", + }, + ) + monkeypatch.setattr( + "hermes_cli.auth.fetch_nous_models", + lambda *args, **kwargs: ["claude-opus-4-6"], + ) + monkeypatch.setattr("hermes_cli.auth._prompt_model_selection", lambda model_ids, current_model="": "claude-opus-4-6") + monkeypatch.setattr("hermes_cli.auth._save_model_choice", lambda model: None) + monkeypatch.setattr("hermes_cli.auth._update_config_for_provider", lambda provider, url: None) + monkeypatch.setattr( + "hermes_cli.nous_subscription.get_nous_subscription_explainer_lines", + lambda: ["Nous subscription enables managed web tools."], + ) + + hermes_main._model_flow_nous(config, current_model="claude-opus-4-6") + + out = capsys.readouterr().out + assert "Nous subscription enables managed web tools." in out + assert config["tts"]["provider"] == "elevenlabs" + assert config["browser"]["cloud_provider"] == "browser-use" + + +def test_model_flow_nous_applies_managed_tts_default_when_unconfigured(monkeypatch, capsys): + config = { + "model": {"provider": "nous", "default": "claude-opus-4-6"}, + "tts": {"provider": "edge"}, + } + + monkeypatch.setattr( + "hermes_cli.auth.get_provider_auth_state", + lambda provider: {"access_token": "nous-token"}, + ) + monkeypatch.setattr( + "hermes_cli.auth.resolve_nous_runtime_credentials", + lambda *args, **kwargs: { + "base_url": "https://inference.example.com/v1", + "api_key": "nous-key", + }, + ) + monkeypatch.setattr( + "hermes_cli.auth.fetch_nous_models", + lambda *args, **kwargs: ["claude-opus-4-6"], + ) + monkeypatch.setattr("hermes_cli.auth._prompt_model_selection", lambda model_ids, current_model="": "claude-opus-4-6") + monkeypatch.setattr("hermes_cli.auth._save_model_choice", lambda model: None) + monkeypatch.setattr("hermes_cli.auth._update_config_for_provider", lambda provider, url: None) + monkeypatch.setattr( + "hermes_cli.nous_subscription.get_nous_subscription_explainer_lines", + lambda: ["Nous subscription enables managed web tools."], + ) + + hermes_main._model_flow_nous(config, current_model="claude-opus-4-6") + + out = capsys.readouterr().out + assert "Nous subscription enables managed web tools." in out + assert "OpenAI TTS via your Nous subscription" in out + assert config["tts"]["provider"] == "openai" + + def test_codex_provider_uses_config_model(monkeypatch): """Model comes from config.yaml, not LLM_MODEL env var. Config.yaml is the single source of truth to avoid multi-agent conflicts.""" @@ -468,4 +550,55 @@ def test_model_flow_custom_saves_verified_v1_base_url(monkeypatch, capsys): assert "Saving the working base URL instead" in output assert saved_env["OPENAI_BASE_URL"] == "http://localhost:8000/v1" assert saved_env["OPENAI_API_KEY"] == "local-key" - assert saved_env["MODEL"] == "llm" \ No newline at end of file + assert saved_env["MODEL"] == "llm" + + +def test_cmd_model_forwards_nous_login_tls_options(monkeypatch): + monkeypatch.setattr( + "hermes_cli.config.load_config", + lambda: {"model": {"default": "gpt-5", "provider": "nous"}}, + ) + monkeypatch.setattr("hermes_cli.config.save_config", lambda cfg: None) + monkeypatch.setattr("hermes_cli.config.get_env_value", lambda key: "") + monkeypatch.setattr("hermes_cli.config.save_env_value", lambda key, value: None) + monkeypatch.setattr("hermes_cli.auth.resolve_provider", lambda requested, **kwargs: "nous") + monkeypatch.setattr("hermes_cli.auth.get_provider_auth_state", lambda provider_id: None) + monkeypatch.setattr(hermes_main, "_prompt_provider_choice", lambda choices: 0) + + captured = {} + + def _fake_login(login_args, provider_config): + captured["portal_url"] = login_args.portal_url + captured["inference_url"] = login_args.inference_url + captured["client_id"] = login_args.client_id + captured["scope"] = login_args.scope + captured["no_browser"] = login_args.no_browser + captured["timeout"] = login_args.timeout + captured["ca_bundle"] = login_args.ca_bundle + captured["insecure"] = login_args.insecure + + monkeypatch.setattr("hermes_cli.auth._login_nous", _fake_login) + + hermes_main.cmd_model( + SimpleNamespace( + portal_url="https://portal.nousresearch.com", + inference_url="https://inference.nousresearch.com/v1", + client_id="hermes-local", + scope="openid profile", + no_browser=True, + timeout=7.5, + ca_bundle="/tmp/local-ca.pem", + insecure=True, + ) + ) + + assert captured == { + "portal_url": "https://portal.nousresearch.com", + "inference_url": "https://inference.nousresearch.com/v1", + "client_id": "hermes-local", + "scope": "openid profile", + "no_browser": True, + "timeout": 7.5, + "ca_bundle": "/tmp/local-ca.pem", + "insecure": True, + } diff --git a/tests/test_run_agent.py b/tests/test_run_agent.py index d961244f3..cfed4afbc 100644 --- a/tests/test_run_agent.py +++ b/tests/test_run_agent.py @@ -584,6 +584,11 @@ class TestBuildSystemPrompt: # Should contain current date info like "Conversation started:" assert "Conversation started:" in prompt + def test_includes_nous_subscription_prompt(self, agent, monkeypatch): + monkeypatch.setattr(run_agent, "build_nous_subscription_prompt", lambda tool_names: "NOUS SUBSCRIPTION BLOCK") + prompt = agent._build_system_prompt() + assert "NOUS SUBSCRIPTION BLOCK" in prompt + class TestInvalidateSystemPrompt: def test_clears_cache(self, agent): diff --git a/tests/tools/test_managed_browserbase_and_modal.py b/tests/tools/test_managed_browserbase_and_modal.py new file mode 100644 index 000000000..3d97a4373 --- /dev/null +++ b/tests/tools/test_managed_browserbase_and_modal.py @@ -0,0 +1,418 @@ +import os +import sys +import tempfile +import threading +import types +from importlib.util import module_from_spec, spec_from_file_location +from pathlib import Path +from unittest.mock import patch + +import pytest + + +TOOLS_DIR = Path(__file__).resolve().parents[2] / "tools" + + +def _load_tool_module(module_name: str, filename: str): + spec = spec_from_file_location(module_name, TOOLS_DIR / filename) + assert spec and spec.loader + module = module_from_spec(spec) + sys.modules[module_name] = module + spec.loader.exec_module(module) + return module + + +def _reset_modules(prefixes: tuple[str, ...]): + for name in list(sys.modules): + if name.startswith(prefixes): + sys.modules.pop(name, None) + + +@pytest.fixture(autouse=True) +def _restore_tool_and_agent_modules(): + original_modules = { + name: module + for name, module in sys.modules.items() + if name == "tools" + or name.startswith("tools.") + or name == "agent" + or name.startswith("agent.") + } + try: + yield + finally: + _reset_modules(("tools", "agent")) + sys.modules.update(original_modules) + + +def _install_fake_tools_package(): + _reset_modules(("tools", "agent")) + + tools_package = types.ModuleType("tools") + tools_package.__path__ = [str(TOOLS_DIR)] # type: ignore[attr-defined] + sys.modules["tools"] = tools_package + + env_package = types.ModuleType("tools.environments") + env_package.__path__ = [str(TOOLS_DIR / "environments")] # type: ignore[attr-defined] + sys.modules["tools.environments"] = env_package + + agent_package = types.ModuleType("agent") + agent_package.__path__ = [] # type: ignore[attr-defined] + sys.modules["agent"] = agent_package + sys.modules["agent.auxiliary_client"] = types.SimpleNamespace( + call_llm=lambda *args, **kwargs: "", + ) + + sys.modules["tools.managed_tool_gateway"] = _load_tool_module( + "tools.managed_tool_gateway", + "managed_tool_gateway.py", + ) + + interrupt_event = threading.Event() + sys.modules["tools.interrupt"] = types.SimpleNamespace( + set_interrupt=lambda value=True: interrupt_event.set() if value else interrupt_event.clear(), + is_interrupted=lambda: interrupt_event.is_set(), + _interrupt_event=interrupt_event, + ) + sys.modules["tools.approval"] = types.SimpleNamespace( + detect_dangerous_command=lambda *args, **kwargs: None, + check_dangerous_command=lambda *args, **kwargs: {"approved": True}, + check_all_command_guards=lambda *args, **kwargs: {"approved": True}, + load_permanent_allowlist=lambda *args, **kwargs: [], + DANGEROUS_PATTERNS=[], + ) + + class _Registry: + def register(self, **kwargs): + return None + + sys.modules["tools.registry"] = types.SimpleNamespace(registry=_Registry()) + + class _DummyEnvironment: + def __init__(self, *args, **kwargs): + self.args = args + self.kwargs = kwargs + + def cleanup(self): + return None + + sys.modules["tools.environments.base"] = types.SimpleNamespace(BaseEnvironment=_DummyEnvironment) + sys.modules["tools.environments.local"] = types.SimpleNamespace(LocalEnvironment=_DummyEnvironment) + sys.modules["tools.environments.singularity"] = types.SimpleNamespace( + _get_scratch_dir=lambda: Path(tempfile.gettempdir()), + SingularityEnvironment=_DummyEnvironment, + ) + sys.modules["tools.environments.ssh"] = types.SimpleNamespace(SSHEnvironment=_DummyEnvironment) + sys.modules["tools.environments.docker"] = types.SimpleNamespace(DockerEnvironment=_DummyEnvironment) + sys.modules["tools.environments.modal"] = types.SimpleNamespace(ModalEnvironment=_DummyEnvironment) + sys.modules["tools.environments.managed_modal"] = types.SimpleNamespace(ManagedModalEnvironment=_DummyEnvironment) + + +def test_browserbase_explicit_local_mode_stays_local_even_when_managed_gateway_is_ready(tmp_path): + _install_fake_tools_package() + (tmp_path / "config.yaml").write_text("browser:\n cloud_provider: local\n", encoding="utf-8") + env = os.environ.copy() + env.pop("BROWSERBASE_API_KEY", None) + env.pop("BROWSERBASE_PROJECT_ID", None) + env.update({ + "HERMES_HOME": str(tmp_path), + "TOOL_GATEWAY_USER_TOKEN": "nous-token", + "BROWSERBASE_GATEWAY_URL": "http://127.0.0.1:3009", + }) + + with patch.dict(os.environ, env, clear=True): + browser_tool = _load_tool_module("tools.browser_tool", "browser_tool.py") + + local_mode = browser_tool._is_local_mode() + provider = browser_tool._get_cloud_provider() + + assert local_mode is True + assert provider is None + + +def test_browserbase_managed_gateway_adds_idempotency_key_and_persists_external_call_id(): + _install_fake_tools_package() + env = os.environ.copy() + env.pop("BROWSERBASE_API_KEY", None) + env.pop("BROWSERBASE_PROJECT_ID", None) + env.update({ + "TOOL_GATEWAY_USER_TOKEN": "nous-token", + "BROWSERBASE_GATEWAY_URL": "http://127.0.0.1:3009", + }) + + class _Response: + status_code = 200 + ok = True + text = "" + headers = {"x-external-call-id": "call-browserbase-1"} + + def json(self): + return { + "id": "bb_local_session_1", + "connectUrl": "wss://connect.browserbase.example/session", + } + + with patch.dict(os.environ, env, clear=True): + browserbase_module = _load_tool_module( + "tools.browser_providers.browserbase", + "browser_providers/browserbase.py", + ) + + with patch.object(browserbase_module.requests, "post", return_value=_Response()) as post: + provider = browserbase_module.BrowserbaseProvider() + session = provider.create_session("task-browserbase-managed") + + sent_headers = post.call_args.kwargs["headers"] + assert sent_headers["X-BB-API-Key"] == "nous-token" + assert sent_headers["X-Idempotency-Key"].startswith("browserbase-session-create:") + assert session["external_call_id"] == "call-browserbase-1" + + +def test_browserbase_managed_gateway_reuses_pending_idempotency_key_after_timeout(): + _install_fake_tools_package() + env = os.environ.copy() + env.pop("BROWSERBASE_API_KEY", None) + env.pop("BROWSERBASE_PROJECT_ID", None) + env.update({ + "TOOL_GATEWAY_USER_TOKEN": "nous-token", + "BROWSERBASE_GATEWAY_URL": "http://127.0.0.1:3009", + }) + + class _Response: + status_code = 200 + ok = True + text = "" + headers = {"x-external-call-id": "call-browserbase-2"} + + def json(self): + return { + "id": "bb_local_session_2", + "connectUrl": "wss://connect.browserbase.example/session2", + } + + with patch.dict(os.environ, env, clear=True): + browserbase_module = _load_tool_module( + "tools.browser_providers.browserbase", + "browser_providers/browserbase.py", + ) + provider = browserbase_module.BrowserbaseProvider() + timeout = browserbase_module.requests.Timeout("timed out") + + with patch.object( + browserbase_module.requests, + "post", + side_effect=[timeout, _Response()], + ) as post: + try: + provider.create_session("task-browserbase-timeout") + except browserbase_module.requests.Timeout: + pass + else: + raise AssertionError("Expected Browserbase create_session to propagate timeout") + + provider.create_session("task-browserbase-timeout") + + first_headers = post.call_args_list[0].kwargs["headers"] + second_headers = post.call_args_list[1].kwargs["headers"] + assert first_headers["X-Idempotency-Key"] == second_headers["X-Idempotency-Key"] + + +def test_browserbase_managed_gateway_preserves_pending_idempotency_key_for_in_progress_conflicts(): + _install_fake_tools_package() + env = os.environ.copy() + env.pop("BROWSERBASE_API_KEY", None) + env.pop("BROWSERBASE_PROJECT_ID", None) + env.update({ + "TOOL_GATEWAY_USER_TOKEN": "nous-token", + "BROWSERBASE_GATEWAY_URL": "http://127.0.0.1:3009", + }) + + class _ConflictResponse: + status_code = 409 + ok = False + text = '{"error":{"code":"CONFLICT","message":"Managed Browserbase session creation is already in progress for this idempotency key"}}' + headers = {} + + def json(self): + return { + "error": { + "code": "CONFLICT", + "message": "Managed Browserbase session creation is already in progress for this idempotency key", + } + } + + class _SuccessResponse: + status_code = 200 + ok = True + text = "" + headers = {"x-external-call-id": "call-browserbase-4"} + + def json(self): + return { + "id": "bb_local_session_4", + "connectUrl": "wss://connect.browserbase.example/session4", + } + + with patch.dict(os.environ, env, clear=True): + browserbase_module = _load_tool_module( + "tools.browser_providers.browserbase", + "browser_providers/browserbase.py", + ) + provider = browserbase_module.BrowserbaseProvider() + + with patch.object( + browserbase_module.requests, + "post", + side_effect=[_ConflictResponse(), _SuccessResponse()], + ) as post: + try: + provider.create_session("task-browserbase-conflict") + except RuntimeError: + pass + else: + raise AssertionError("Expected Browserbase create_session to propagate the in-progress conflict") + + provider.create_session("task-browserbase-conflict") + + first_headers = post.call_args_list[0].kwargs["headers"] + second_headers = post.call_args_list[1].kwargs["headers"] + assert first_headers["X-Idempotency-Key"] == second_headers["X-Idempotency-Key"] + + +def test_browserbase_managed_gateway_uses_new_idempotency_key_for_a_new_session_after_success(): + _install_fake_tools_package() + env = os.environ.copy() + env.pop("BROWSERBASE_API_KEY", None) + env.pop("BROWSERBASE_PROJECT_ID", None) + env.update({ + "TOOL_GATEWAY_USER_TOKEN": "nous-token", + "BROWSERBASE_GATEWAY_URL": "http://127.0.0.1:3009", + }) + + class _Response: + status_code = 200 + ok = True + text = "" + headers = {"x-external-call-id": "call-browserbase-3"} + + def json(self): + return { + "id": "bb_local_session_3", + "connectUrl": "wss://connect.browserbase.example/session3", + } + + with patch.dict(os.environ, env, clear=True): + browserbase_module = _load_tool_module( + "tools.browser_providers.browserbase", + "browser_providers/browserbase.py", + ) + provider = browserbase_module.BrowserbaseProvider() + + with patch.object(browserbase_module.requests, "post", side_effect=[_Response(), _Response()]) as post: + provider.create_session("task-browserbase-new") + provider.create_session("task-browserbase-new") + + first_headers = post.call_args_list[0].kwargs["headers"] + second_headers = post.call_args_list[1].kwargs["headers"] + assert first_headers["X-Idempotency-Key"] != second_headers["X-Idempotency-Key"] + + +def test_terminal_tool_prefers_managed_modal_when_gateway_ready_and_no_direct_creds(): + _install_fake_tools_package() + env = os.environ.copy() + env.pop("MODAL_TOKEN_ID", None) + env.pop("MODAL_TOKEN_SECRET", None) + + with patch.dict(os.environ, env, clear=True): + terminal_tool = _load_tool_module("tools.terminal_tool", "terminal_tool.py") + + with ( + patch.object(terminal_tool, "is_managed_tool_gateway_ready", return_value=True), + patch.object(terminal_tool, "_ManagedModalEnvironment", return_value="managed-modal-env") as managed_ctor, + patch.object(terminal_tool, "_ModalEnvironment", return_value="direct-modal-env") as direct_ctor, + patch.object(Path, "exists", return_value=False), + ): + result = terminal_tool._create_environment( + env_type="modal", + image="python:3.11", + cwd="/root", + timeout=60, + container_config={ + "container_cpu": 1, + "container_memory": 2048, + "container_disk": 1024, + "container_persistent": True, + "modal_mode": "auto", + }, + task_id="task-modal-managed", + ) + + assert result == "managed-modal-env" + assert managed_ctor.called + assert not direct_ctor.called + + +def test_terminal_tool_keeps_direct_modal_when_direct_credentials_exist(): + _install_fake_tools_package() + env = os.environ.copy() + env.update({ + "MODAL_TOKEN_ID": "tok-id", + "MODAL_TOKEN_SECRET": "tok-secret", + }) + + with patch.dict(os.environ, env, clear=True): + terminal_tool = _load_tool_module("tools.terminal_tool", "terminal_tool.py") + + with ( + patch.object(terminal_tool, "is_managed_tool_gateway_ready", return_value=True), + patch.object(terminal_tool, "_ManagedModalEnvironment", return_value="managed-modal-env") as managed_ctor, + patch.object(terminal_tool, "_ModalEnvironment", return_value="direct-modal-env") as direct_ctor, + ): + result = terminal_tool._create_environment( + env_type="modal", + image="python:3.11", + cwd="/root", + timeout=60, + container_config={ + "container_cpu": 1, + "container_memory": 2048, + "container_disk": 1024, + "container_persistent": True, + "modal_mode": "auto", + }, + task_id="task-modal-direct", + ) + + assert result == "direct-modal-env" + assert direct_ctor.called + assert not managed_ctor.called + + +def test_terminal_tool_respects_direct_modal_mode_without_falling_back_to_managed(): + _install_fake_tools_package() + env = os.environ.copy() + env.pop("MODAL_TOKEN_ID", None) + env.pop("MODAL_TOKEN_SECRET", None) + + with patch.dict(os.environ, env, clear=True): + terminal_tool = _load_tool_module("tools.terminal_tool", "terminal_tool.py") + + with ( + patch.object(terminal_tool, "is_managed_tool_gateway_ready", return_value=True), + patch.object(Path, "exists", return_value=False), + ): + with pytest.raises(ValueError, match="direct Modal credentials"): + terminal_tool._create_environment( + env_type="modal", + image="python:3.11", + cwd="/root", + timeout=60, + container_config={ + "container_cpu": 1, + "container_memory": 2048, + "container_disk": 1024, + "container_persistent": True, + "modal_mode": "direct", + }, + task_id="task-modal-direct-only", + ) diff --git a/tests/tools/test_managed_media_gateways.py b/tests/tools/test_managed_media_gateways.py new file mode 100644 index 000000000..48cd5f41f --- /dev/null +++ b/tests/tools/test_managed_media_gateways.py @@ -0,0 +1,288 @@ +import sys +import types +from importlib.util import module_from_spec, spec_from_file_location +from pathlib import Path + +import pytest + + +TOOLS_DIR = Path(__file__).resolve().parents[2] / "tools" + + +def _load_tool_module(module_name: str, filename: str): + spec = spec_from_file_location(module_name, TOOLS_DIR / filename) + assert spec and spec.loader + module = module_from_spec(spec) + sys.modules[module_name] = module + spec.loader.exec_module(module) + return module + + +@pytest.fixture(autouse=True) +def _restore_tool_and_agent_modules(): + original_modules = { + name: module + for name, module in sys.modules.items() + if name == "tools" + or name.startswith("tools.") + or name == "agent" + or name.startswith("agent.") + or name in {"fal_client", "openai"} + } + try: + yield + finally: + for name in list(sys.modules): + if ( + name == "tools" + or name.startswith("tools.") + or name == "agent" + or name.startswith("agent.") + or name in {"fal_client", "openai"} + ): + sys.modules.pop(name, None) + sys.modules.update(original_modules) + + +def _install_fake_tools_package(): + tools_package = types.ModuleType("tools") + tools_package.__path__ = [str(TOOLS_DIR)] # type: ignore[attr-defined] + sys.modules["tools"] = tools_package + sys.modules["tools.debug_helpers"] = types.SimpleNamespace( + DebugSession=lambda *args, **kwargs: types.SimpleNamespace( + active=False, + session_id="debug-session", + log_call=lambda *a, **k: None, + save=lambda: None, + get_session_info=lambda: {}, + ) + ) + sys.modules["tools.managed_tool_gateway"] = _load_tool_module( + "tools.managed_tool_gateway", + "managed_tool_gateway.py", + ) + + +def _install_fake_fal_client(captured): + def submit(model, arguments=None, headers=None): + raise AssertionError("managed FAL gateway mode should use fal_client.SyncClient") + + class FakeResponse: + def json(self): + return { + "request_id": "req-123", + "response_url": "http://127.0.0.1:3009/requests/req-123", + "status_url": "http://127.0.0.1:3009/requests/req-123/status", + "cancel_url": "http://127.0.0.1:3009/requests/req-123/cancel", + } + + def _maybe_retry_request(client, method, url, json=None, timeout=None, headers=None): + captured["submit_via"] = "managed_client" + captured["http_client"] = client + captured["method"] = method + captured["submit_url"] = url + captured["arguments"] = json + captured["timeout"] = timeout + captured["headers"] = headers + return FakeResponse() + + class SyncRequestHandle: + def __init__(self, request_id, response_url, status_url, cancel_url, client): + captured["request_id"] = request_id + captured["response_url"] = response_url + captured["status_url"] = status_url + captured["cancel_url"] = cancel_url + captured["handle_client"] = client + + class SyncClient: + def __init__(self, key=None, default_timeout=120.0): + captured["sync_client_inits"] = captured.get("sync_client_inits", 0) + 1 + captured["client_key"] = key + captured["client_timeout"] = default_timeout + self.default_timeout = default_timeout + self._client = object() + + fal_client_module = types.SimpleNamespace( + submit=submit, + SyncClient=SyncClient, + client=types.SimpleNamespace( + _maybe_retry_request=_maybe_retry_request, + _raise_for_status=lambda response: None, + SyncRequestHandle=SyncRequestHandle, + ), + ) + sys.modules["fal_client"] = fal_client_module + return fal_client_module + + +def _install_fake_openai_module(captured, transcription_response=None): + class FakeSpeechResponse: + def stream_to_file(self, output_path): + captured["stream_to_file"] = output_path + + class FakeOpenAI: + def __init__(self, api_key, base_url, **kwargs): + captured["api_key"] = api_key + captured["base_url"] = base_url + captured["client_kwargs"] = kwargs + captured["close_calls"] = captured.get("close_calls", 0) + + def create_speech(**kwargs): + captured["speech_kwargs"] = kwargs + return FakeSpeechResponse() + + def create_transcription(**kwargs): + captured["transcription_kwargs"] = kwargs + return transcription_response + + self.audio = types.SimpleNamespace( + speech=types.SimpleNamespace( + create=create_speech + ), + transcriptions=types.SimpleNamespace( + create=create_transcription + ), + ) + + def close(self): + captured["close_calls"] += 1 + + fake_module = types.SimpleNamespace( + OpenAI=FakeOpenAI, + APIError=Exception, + APIConnectionError=Exception, + APITimeoutError=Exception, + ) + sys.modules["openai"] = fake_module + + +def test_managed_fal_submit_uses_gateway_origin_and_nous_token(monkeypatch): + captured = {} + _install_fake_tools_package() + _install_fake_fal_client(captured) + monkeypatch.delenv("FAL_KEY", raising=False) + monkeypatch.setenv("FAL_QUEUE_GATEWAY_URL", "http://127.0.0.1:3009") + monkeypatch.setenv("TOOL_GATEWAY_USER_TOKEN", "nous-token") + + image_generation_tool = _load_tool_module( + "tools.image_generation_tool", + "image_generation_tool.py", + ) + monkeypatch.setattr(image_generation_tool.uuid, "uuid4", lambda: "fal-submit-123") + + image_generation_tool._submit_fal_request( + "fal-ai/flux-2-pro", + {"prompt": "test prompt", "num_images": 1}, + ) + + assert captured["submit_via"] == "managed_client" + assert captured["client_key"] == "nous-token" + assert captured["submit_url"] == "http://127.0.0.1:3009/fal-ai/flux-2-pro" + assert captured["method"] == "POST" + assert captured["arguments"] == {"prompt": "test prompt", "num_images": 1} + assert captured["headers"] == {"x-idempotency-key": "fal-submit-123"} + assert captured["sync_client_inits"] == 1 + + +def test_managed_fal_submit_reuses_cached_sync_client(monkeypatch): + captured = {} + _install_fake_tools_package() + _install_fake_fal_client(captured) + monkeypatch.delenv("FAL_KEY", raising=False) + monkeypatch.setenv("FAL_QUEUE_GATEWAY_URL", "http://127.0.0.1:3009") + monkeypatch.setenv("TOOL_GATEWAY_USER_TOKEN", "nous-token") + + image_generation_tool = _load_tool_module( + "tools.image_generation_tool", + "image_generation_tool.py", + ) + + image_generation_tool._submit_fal_request("fal-ai/flux-2-pro", {"prompt": "first"}) + first_client = captured["http_client"] + image_generation_tool._submit_fal_request("fal-ai/flux-2-pro", {"prompt": "second"}) + + assert captured["sync_client_inits"] == 1 + assert captured["http_client"] is first_client + + +def test_openai_tts_uses_managed_audio_gateway_when_direct_key_absent(monkeypatch, tmp_path): + captured = {} + _install_fake_tools_package() + _install_fake_openai_module(captured) + monkeypatch.delenv("VOICE_TOOLS_OPENAI_KEY", raising=False) + monkeypatch.setenv("TOOL_GATEWAY_DOMAIN", "nousresearch.com") + monkeypatch.setenv("TOOL_GATEWAY_USER_TOKEN", "nous-token") + + tts_tool = _load_tool_module("tools.tts_tool", "tts_tool.py") + monkeypatch.setattr(tts_tool.uuid, "uuid4", lambda: "tts-call-123") + output_path = tmp_path / "speech.mp3" + tts_tool._generate_openai_tts("hello world", str(output_path), {"openai": {}}) + + assert captured["api_key"] == "nous-token" + assert captured["base_url"] == "https://openai-audio-gateway.nousresearch.com/v1" + assert captured["speech_kwargs"]["model"] == "gpt-4o-mini-tts" + assert captured["speech_kwargs"]["extra_headers"] == {"x-idempotency-key": "tts-call-123"} + assert captured["stream_to_file"] == str(output_path) + assert captured["close_calls"] == 1 + + +def test_openai_tts_accepts_openai_api_key_as_direct_fallback(monkeypatch, tmp_path): + captured = {} + _install_fake_tools_package() + _install_fake_openai_module(captured) + monkeypatch.delenv("VOICE_TOOLS_OPENAI_KEY", raising=False) + monkeypatch.setenv("OPENAI_API_KEY", "openai-direct-key") + monkeypatch.setenv("TOOL_GATEWAY_DOMAIN", "nousresearch.com") + monkeypatch.setenv("TOOL_GATEWAY_USER_TOKEN", "nous-token") + + tts_tool = _load_tool_module("tools.tts_tool", "tts_tool.py") + output_path = tmp_path / "speech.mp3" + tts_tool._generate_openai_tts("hello world", str(output_path), {"openai": {}}) + + assert captured["api_key"] == "openai-direct-key" + assert captured["base_url"] == "https://api.openai.com/v1" + assert captured["close_calls"] == 1 + + +def test_transcription_uses_model_specific_response_formats(monkeypatch, tmp_path): + whisper_capture = {} + _install_fake_tools_package() + _install_fake_openai_module(whisper_capture, transcription_response="hello from whisper") + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + (tmp_path / "config.yaml").write_text("stt:\n provider: openai\n") + monkeypatch.delenv("VOICE_TOOLS_OPENAI_KEY", raising=False) + monkeypatch.setenv("TOOL_GATEWAY_DOMAIN", "nousresearch.com") + monkeypatch.setenv("TOOL_GATEWAY_USER_TOKEN", "nous-token") + + transcription_tools = _load_tool_module( + "tools.transcription_tools", + "transcription_tools.py", + ) + transcription_tools._load_stt_config = lambda: {"provider": "openai"} + audio_path = tmp_path / "audio.wav" + audio_path.write_bytes(b"RIFF0000WAVEfmt ") + + whisper_result = transcription_tools.transcribe_audio(str(audio_path), model="whisper-1") + assert whisper_result["success"] is True + assert whisper_capture["base_url"] == "https://openai-audio-gateway.nousresearch.com/v1" + assert whisper_capture["transcription_kwargs"]["response_format"] == "text" + assert whisper_capture["close_calls"] == 1 + + json_capture = {} + _install_fake_openai_module( + json_capture, + transcription_response=types.SimpleNamespace(text="hello from gpt-4o"), + ) + transcription_tools = _load_tool_module( + "tools.transcription_tools", + "transcription_tools.py", + ) + + json_result = transcription_tools.transcribe_audio( + str(audio_path), + model="gpt-4o-mini-transcribe", + ) + assert json_result["success"] is True + assert json_result["transcript"] == "hello from gpt-4o" + assert json_capture["transcription_kwargs"]["response_format"] == "json" + assert json_capture["close_calls"] == 1 diff --git a/tests/tools/test_managed_modal_environment.py b/tests/tools/test_managed_modal_environment.py new file mode 100644 index 000000000..b52801809 --- /dev/null +++ b/tests/tools/test_managed_modal_environment.py @@ -0,0 +1,213 @@ +import json +import sys +import tempfile +import threading +import types +from importlib.util import module_from_spec, spec_from_file_location +from pathlib import Path + + +TOOLS_DIR = Path(__file__).resolve().parents[2] / "tools" + + +def _load_tool_module(module_name: str, filename: str): + spec = spec_from_file_location(module_name, TOOLS_DIR / filename) + assert spec and spec.loader + module = module_from_spec(spec) + sys.modules[module_name] = module + spec.loader.exec_module(module) + return module + + +def _reset_modules(prefixes: tuple[str, ...]): + for name in list(sys.modules): + if name.startswith(prefixes): + sys.modules.pop(name, None) + + +def _install_fake_tools_package(): + _reset_modules(("tools", "agent", "hermes_cli")) + + hermes_cli = types.ModuleType("hermes_cli") + hermes_cli.__path__ = [] # type: ignore[attr-defined] + sys.modules["hermes_cli"] = hermes_cli + sys.modules["hermes_cli.config"] = types.SimpleNamespace( + get_hermes_home=lambda: Path(tempfile.gettempdir()) / "hermes-home", + ) + + tools_package = types.ModuleType("tools") + tools_package.__path__ = [str(TOOLS_DIR)] # type: ignore[attr-defined] + sys.modules["tools"] = tools_package + + env_package = types.ModuleType("tools.environments") + env_package.__path__ = [str(TOOLS_DIR / "environments")] # type: ignore[attr-defined] + sys.modules["tools.environments"] = env_package + + interrupt_event = threading.Event() + sys.modules["tools.interrupt"] = types.SimpleNamespace( + set_interrupt=lambda value=True: interrupt_event.set() if value else interrupt_event.clear(), + is_interrupted=lambda: interrupt_event.is_set(), + _interrupt_event=interrupt_event, + ) + + class _DummyBaseEnvironment: + def __init__(self, cwd: str, timeout: int, env=None): + self.cwd = cwd + self.timeout = timeout + self.env = env or {} + + def _prepare_command(self, command: str): + return command, None + + sys.modules["tools.environments.base"] = types.SimpleNamespace(BaseEnvironment=_DummyBaseEnvironment) + sys.modules["tools.managed_tool_gateway"] = types.SimpleNamespace( + resolve_managed_tool_gateway=lambda vendor: types.SimpleNamespace( + vendor=vendor, + gateway_origin="https://modal-gateway.example.com", + nous_user_token="user-token", + managed_mode=True, + ) + ) + + return interrupt_event + + +class _FakeResponse: + def __init__(self, status_code: int, payload=None, text: str = ""): + self.status_code = status_code + self._payload = payload + self.text = text + + def json(self): + if isinstance(self._payload, Exception): + raise self._payload + return self._payload + + +def test_managed_modal_execute_polls_until_completed(monkeypatch): + _install_fake_tools_package() + managed_modal = _load_tool_module("tools.environments.managed_modal", "environments/managed_modal.py") + + calls = [] + poll_count = {"value": 0} + + def fake_request(method, url, headers=None, json=None, timeout=None): + calls.append((method, url, json, timeout)) + if method == "POST" and url.endswith("/v1/sandboxes"): + return _FakeResponse(200, {"id": "sandbox-1"}) + if method == "POST" and url.endswith("/execs"): + return _FakeResponse(202, {"execId": json["execId"], "status": "running"}) + if method == "GET" and "/execs/" in url: + poll_count["value"] += 1 + if poll_count["value"] == 1: + return _FakeResponse(200, {"execId": url.rsplit("/", 1)[-1], "status": "running"}) + return _FakeResponse(200, { + "execId": url.rsplit("/", 1)[-1], + "status": "completed", + "output": "hello", + "returncode": 0, + }) + if method == "POST" and url.endswith("/terminate"): + return _FakeResponse(200, {"status": "terminated"}) + raise AssertionError(f"Unexpected request: {method} {url}") + + monkeypatch.setattr(managed_modal.requests, "request", fake_request) + monkeypatch.setattr(managed_modal.time, "sleep", lambda _: None) + + env = managed_modal.ManagedModalEnvironment(image="python:3.11") + result = env.execute("echo hello") + env.cleanup() + + assert result == {"output": "hello", "returncode": 0} + assert any(call[0] == "POST" and call[1].endswith("/execs") for call in calls) + + +def test_managed_modal_create_sends_a_stable_idempotency_key(monkeypatch): + _install_fake_tools_package() + managed_modal = _load_tool_module("tools.environments.managed_modal", "environments/managed_modal.py") + + create_headers = [] + + def fake_request(method, url, headers=None, json=None, timeout=None): + if method == "POST" and url.endswith("/v1/sandboxes"): + create_headers.append(headers or {}) + return _FakeResponse(200, {"id": "sandbox-1"}) + if method == "POST" and url.endswith("/terminate"): + return _FakeResponse(200, {"status": "terminated"}) + raise AssertionError(f"Unexpected request: {method} {url}") + + monkeypatch.setattr(managed_modal.requests, "request", fake_request) + + env = managed_modal.ManagedModalEnvironment(image="python:3.11") + env.cleanup() + + assert len(create_headers) == 1 + assert isinstance(create_headers[0].get("x-idempotency-key"), str) + assert create_headers[0]["x-idempotency-key"] + + +def test_managed_modal_execute_cancels_on_interrupt(monkeypatch): + interrupt_event = _install_fake_tools_package() + managed_modal = _load_tool_module("tools.environments.managed_modal", "environments/managed_modal.py") + + calls = [] + + def fake_request(method, url, headers=None, json=None, timeout=None): + calls.append((method, url, json, timeout)) + if method == "POST" and url.endswith("/v1/sandboxes"): + return _FakeResponse(200, {"id": "sandbox-1"}) + if method == "POST" and url.endswith("/execs"): + return _FakeResponse(202, {"execId": json["execId"], "status": "running"}) + if method == "GET" and "/execs/" in url: + return _FakeResponse(200, {"execId": url.rsplit("/", 1)[-1], "status": "running"}) + if method == "POST" and url.endswith("/cancel"): + return _FakeResponse(202, {"status": "cancelling"}) + if method == "POST" and url.endswith("/terminate"): + return _FakeResponse(200, {"status": "terminated"}) + raise AssertionError(f"Unexpected request: {method} {url}") + + def fake_sleep(_seconds): + interrupt_event.set() + + monkeypatch.setattr(managed_modal.requests, "request", fake_request) + monkeypatch.setattr(managed_modal.time, "sleep", fake_sleep) + + env = managed_modal.ManagedModalEnvironment(image="python:3.11") + result = env.execute("sleep 30") + env.cleanup() + + assert result == { + "output": "[Command interrupted - Modal sandbox exec cancelled]", + "returncode": 130, + } + assert any(call[0] == "POST" and call[1].endswith("/cancel") for call in calls) + poll_calls = [call for call in calls if call[0] == "GET" and "/execs/" in call[1]] + cancel_calls = [call for call in calls if call[0] == "POST" and call[1].endswith("/cancel")] + assert poll_calls[0][3] == (1.0, 5.0) + assert cancel_calls[0][3] == (1.0, 5.0) + + +def test_managed_modal_execute_returns_descriptive_error_on_missing_exec(monkeypatch): + _install_fake_tools_package() + managed_modal = _load_tool_module("tools.environments.managed_modal", "environments/managed_modal.py") + + def fake_request(method, url, headers=None, json=None, timeout=None): + if method == "POST" and url.endswith("/v1/sandboxes"): + return _FakeResponse(200, {"id": "sandbox-1"}) + if method == "POST" and url.endswith("/execs"): + return _FakeResponse(202, {"execId": json["execId"], "status": "running"}) + if method == "GET" and "/execs/" in url: + return _FakeResponse(404, {"error": "not found"}, text="not found") + if method == "POST" and url.endswith("/terminate"): + return _FakeResponse(200, {"status": "terminated"}) + raise AssertionError(f"Unexpected request: {method} {url}") + + monkeypatch.setattr(managed_modal.requests, "request", fake_request) + monkeypatch.setattr(managed_modal.time, "sleep", lambda _: None) + + env = managed_modal.ManagedModalEnvironment(image="python:3.11") + result = env.execute("echo hello") + env.cleanup() + + assert result["returncode"] == 1 + assert "not found" in result["output"].lower() diff --git a/tests/tools/test_managed_tool_gateway.py b/tests/tools/test_managed_tool_gateway.py new file mode 100644 index 000000000..591708345 --- /dev/null +++ b/tests/tools/test_managed_tool_gateway.py @@ -0,0 +1,70 @@ +import os +import json +from datetime import datetime, timedelta, timezone +from importlib.util import module_from_spec, spec_from_file_location +from pathlib import Path +import sys +from unittest.mock import patch + +MODULE_PATH = Path(__file__).resolve().parents[2] / "tools" / "managed_tool_gateway.py" +MODULE_SPEC = spec_from_file_location("managed_tool_gateway_test_module", MODULE_PATH) +assert MODULE_SPEC and MODULE_SPEC.loader +managed_tool_gateway = module_from_spec(MODULE_SPEC) +sys.modules[MODULE_SPEC.name] = managed_tool_gateway +MODULE_SPEC.loader.exec_module(managed_tool_gateway) +resolve_managed_tool_gateway = managed_tool_gateway.resolve_managed_tool_gateway + + +def test_resolve_managed_tool_gateway_derives_vendor_origin_from_shared_domain(): + with patch.dict(os.environ, {"TOOL_GATEWAY_DOMAIN": "nousresearch.com"}, clear=False): + result = resolve_managed_tool_gateway( + "firecrawl", + token_reader=lambda: "nous-token", + ) + + assert result is not None + assert result.gateway_origin == "https://firecrawl-gateway.nousresearch.com" + assert result.nous_user_token == "nous-token" + assert result.managed_mode is True + + +def test_resolve_managed_tool_gateway_uses_vendor_specific_override(): + with patch.dict(os.environ, {"BROWSERBASE_GATEWAY_URL": "http://browserbase-gateway.localhost:3009/"}, clear=False): + result = resolve_managed_tool_gateway( + "browserbase", + token_reader=lambda: "nous-token", + ) + + assert result is not None + assert result.gateway_origin == "http://browserbase-gateway.localhost:3009" + + +def test_resolve_managed_tool_gateway_is_inactive_without_nous_token(): + with patch.dict(os.environ, {"TOOL_GATEWAY_DOMAIN": "nousresearch.com"}, clear=False): + result = resolve_managed_tool_gateway( + "firecrawl", + token_reader=lambda: None, + ) + + assert result is None + + +def test_read_nous_access_token_refreshes_expiring_cached_token(tmp_path, monkeypatch): + monkeypatch.delenv("TOOL_GATEWAY_USER_TOKEN", raising=False) + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + expires_at = (datetime.now(timezone.utc) + timedelta(seconds=30)).isoformat() + (tmp_path / "auth.json").write_text(json.dumps({ + "providers": { + "nous": { + "access_token": "stale-token", + "refresh_token": "refresh-token", + "expires_at": expires_at, + } + } + })) + monkeypatch.setattr( + "hermes_cli.auth.resolve_nous_access_token", + lambda refresh_skew_seconds=120: "fresh-token", + ) + + assert managed_tool_gateway.read_nous_access_token() == "fresh-token" diff --git a/tests/tools/test_modal_snapshot_isolation.py b/tests/tools/test_modal_snapshot_isolation.py new file mode 100644 index 000000000..0b4f7fc56 --- /dev/null +++ b/tests/tools/test_modal_snapshot_isolation.py @@ -0,0 +1,188 @@ +import json +import sys +import types +from importlib.util import module_from_spec, spec_from_file_location +from pathlib import Path + + +REPO_ROOT = Path(__file__).resolve().parents[2] +TOOLS_DIR = REPO_ROOT / "tools" + + +def _load_module(module_name: str, path: Path): + spec = spec_from_file_location(module_name, path) + assert spec and spec.loader + module = module_from_spec(spec) + sys.modules[module_name] = module + spec.loader.exec_module(module) + return module + + +def _reset_modules(prefixes: tuple[str, ...]): + for name in list(sys.modules): + if name.startswith(prefixes): + sys.modules.pop(name, None) + + +def _install_modal_test_modules( + tmp_path: Path, + *, + fail_on_snapshot_ids: set[str] | None = None, + snapshot_id: str = "im-fresh", +): + _reset_modules(("tools", "hermes_cli", "swerex", "modal")) + + hermes_cli = types.ModuleType("hermes_cli") + hermes_cli.__path__ = [] # type: ignore[attr-defined] + sys.modules["hermes_cli"] = hermes_cli + hermes_home = tmp_path / "hermes-home" + sys.modules["hermes_cli.config"] = types.SimpleNamespace( + get_hermes_home=lambda: hermes_home, + ) + + tools_package = types.ModuleType("tools") + tools_package.__path__ = [str(TOOLS_DIR)] # type: ignore[attr-defined] + sys.modules["tools"] = tools_package + + env_package = types.ModuleType("tools.environments") + env_package.__path__ = [str(TOOLS_DIR / "environments")] # type: ignore[attr-defined] + sys.modules["tools.environments"] = env_package + + class _DummyBaseEnvironment: + def __init__(self, cwd: str, timeout: int, env=None): + self.cwd = cwd + self.timeout = timeout + self.env = env or {} + + def _prepare_command(self, command: str): + return command, None + + sys.modules["tools.environments.base"] = types.SimpleNamespace(BaseEnvironment=_DummyBaseEnvironment) + sys.modules["tools.interrupt"] = types.SimpleNamespace(is_interrupted=lambda: False) + + from_id_calls: list[str] = [] + registry_calls: list[tuple[str, list[str] | None]] = [] + deployment_calls: list[dict] = [] + + class _FakeImage: + @staticmethod + def from_id(image_id: str): + from_id_calls.append(image_id) + return {"kind": "snapshot", "image_id": image_id} + + @staticmethod + def from_registry(image: str, setup_dockerfile_commands=None): + registry_calls.append((image, setup_dockerfile_commands)) + return {"kind": "registry", "image": image} + + class _FakeRuntime: + async def execute(self, _command): + return types.SimpleNamespace(stdout="ok", exit_code=0) + + class _FakeModalDeployment: + def __init__(self, **kwargs): + deployment_calls.append(dict(kwargs)) + self.image = kwargs["image"] + self.runtime = _FakeRuntime() + + async def _snapshot_aio(): + return types.SimpleNamespace(object_id=snapshot_id) + + self._sandbox = types.SimpleNamespace( + snapshot_filesystem=types.SimpleNamespace(aio=_snapshot_aio), + ) + + async def start(self): + image = self.image if isinstance(self.image, dict) else {} + image_id = image.get("image_id") + if fail_on_snapshot_ids and image_id in fail_on_snapshot_ids: + raise RuntimeError(f"cannot restore {image_id}") + + async def stop(self): + return None + + class _FakeRexCommand: + def __init__(self, **kwargs): + self.kwargs = kwargs + + sys.modules["modal"] = types.SimpleNamespace(Image=_FakeImage) + + swerex = types.ModuleType("swerex") + swerex.__path__ = [] # type: ignore[attr-defined] + sys.modules["swerex"] = swerex + swerex_deployment = types.ModuleType("swerex.deployment") + swerex_deployment.__path__ = [] # type: ignore[attr-defined] + sys.modules["swerex.deployment"] = swerex_deployment + sys.modules["swerex.deployment.modal"] = types.SimpleNamespace(ModalDeployment=_FakeModalDeployment) + swerex_runtime = types.ModuleType("swerex.runtime") + swerex_runtime.__path__ = [] # type: ignore[attr-defined] + sys.modules["swerex.runtime"] = swerex_runtime + sys.modules["swerex.runtime.abstract"] = types.SimpleNamespace(Command=_FakeRexCommand) + + return { + "snapshot_store": hermes_home / "modal_snapshots.json", + "deployment_calls": deployment_calls, + "from_id_calls": from_id_calls, + "registry_calls": registry_calls, + } + + +def test_modal_environment_migrates_legacy_snapshot_key_and_uses_snapshot_id(tmp_path): + state = _install_modal_test_modules(tmp_path) + snapshot_store = state["snapshot_store"] + snapshot_store.parent.mkdir(parents=True, exist_ok=True) + snapshot_store.write_text(json.dumps({"task-legacy": "im-legacy123"})) + + modal_module = _load_module("tools.environments.modal", TOOLS_DIR / "environments" / "modal.py") + env = modal_module.ModalEnvironment(image="python:3.11", task_id="task-legacy") + + try: + assert state["from_id_calls"] == ["im-legacy123"] + assert state["deployment_calls"][0]["image"] == {"kind": "snapshot", "image_id": "im-legacy123"} + assert json.loads(snapshot_store.read_text()) == {"direct:task-legacy": "im-legacy123"} + finally: + env.cleanup() + + +def test_modal_environment_prunes_stale_direct_snapshot_and_retries_base_image(tmp_path): + state = _install_modal_test_modules(tmp_path, fail_on_snapshot_ids={"im-stale123"}) + snapshot_store = state["snapshot_store"] + snapshot_store.parent.mkdir(parents=True, exist_ok=True) + snapshot_store.write_text(json.dumps({"direct:task-stale": "im-stale123"})) + + modal_module = _load_module("tools.environments.modal", TOOLS_DIR / "environments" / "modal.py") + env = modal_module.ModalEnvironment(image="python:3.11", task_id="task-stale") + + try: + assert [call["image"] for call in state["deployment_calls"]] == [ + {"kind": "snapshot", "image_id": "im-stale123"}, + {"kind": "registry", "image": "python:3.11"}, + ] + assert json.loads(snapshot_store.read_text()) == {} + finally: + env.cleanup() + + +def test_modal_environment_cleanup_writes_namespaced_snapshot_key(tmp_path): + state = _install_modal_test_modules(tmp_path, snapshot_id="im-cleanup456") + snapshot_store = state["snapshot_store"] + + modal_module = _load_module("tools.environments.modal", TOOLS_DIR / "environments" / "modal.py") + env = modal_module.ModalEnvironment(image="python:3.11", task_id="task-cleanup") + env.cleanup() + + assert json.loads(snapshot_store.read_text()) == {"direct:task-cleanup": "im-cleanup456"} + + +def test_resolve_modal_image_uses_snapshot_ids_and_registry_images(tmp_path): + state = _install_modal_test_modules(tmp_path) + modal_module = _load_module("tools.environments.modal", TOOLS_DIR / "environments" / "modal.py") + + snapshot_image = modal_module._resolve_modal_image("im-snapshot123") + registry_image = modal_module._resolve_modal_image("python:3.11") + + assert snapshot_image == {"kind": "snapshot", "image_id": "im-snapshot123"} + assert registry_image == {"kind": "registry", "image": "python:3.11"} + assert state["from_id_calls"] == ["im-snapshot123"] + assert state["registry_calls"][0][0] == "python:3.11" + assert "ensurepip" in state["registry_calls"][0][1][0] diff --git a/tests/tools/test_terminal_requirements.py b/tests/tools/test_terminal_requirements.py index b3bc0b194..c93d68e17 100644 --- a/tests/tools/test_terminal_requirements.py +++ b/tests/tools/test_terminal_requirements.py @@ -8,9 +8,11 @@ def _clear_terminal_env(monkeypatch): """Remove terminal env vars that could affect requirements checks.""" keys = [ "TERMINAL_ENV", + "TERMINAL_MODAL_MODE", "TERMINAL_SSH_HOST", "TERMINAL_SSH_USER", "MODAL_TOKEN_ID", + "MODAL_TOKEN_SECRET", "HOME", "USERPROFILE", ] @@ -63,7 +65,7 @@ def test_modal_backend_without_token_or_config_logs_specific_error(monkeypatch, monkeypatch.setenv("TERMINAL_ENV", "modal") monkeypatch.setenv("HOME", str(tmp_path)) monkeypatch.setenv("USERPROFILE", str(tmp_path)) - # Pretend swerex is installed + monkeypatch.setattr(terminal_tool_module, "is_managed_tool_gateway_ready", lambda _vendor: False) monkeypatch.setattr(terminal_tool_module.importlib.util, "find_spec", lambda _name: object()) with caplog.at_level(logging.ERROR): @@ -71,6 +73,45 @@ def test_modal_backend_without_token_or_config_logs_specific_error(monkeypatch, assert ok is False assert any( - "Modal backend selected but no MODAL_TOKEN_ID environment variable" in record.getMessage() + "Modal backend selected but no direct Modal credentials/config or managed tool gateway was found" in record.getMessage() + for record in caplog.records + ) + + +def test_modal_backend_with_managed_gateway_does_not_require_direct_creds_or_minisweagent(monkeypatch, tmp_path): + _clear_terminal_env(monkeypatch) + monkeypatch.setenv("TERMINAL_ENV", "modal") + monkeypatch.setenv("HOME", str(tmp_path)) + monkeypatch.setenv("USERPROFILE", str(tmp_path)) + monkeypatch.setenv("TERMINAL_MODAL_MODE", "managed") + monkeypatch.setattr(terminal_tool_module, "is_managed_tool_gateway_ready", lambda _vendor: True) + monkeypatch.setattr( + terminal_tool_module, + "ensure_minisweagent_on_path", + lambda *_args, **_kwargs: (_ for _ in ()).throw(AssertionError("should not be called")), + ) + monkeypatch.setattr( + terminal_tool_module.importlib.util, + "find_spec", + lambda _name: (_ for _ in ()).throw(AssertionError("should not be called")), + ) + + assert terminal_tool_module.check_terminal_requirements() is True + + +def test_modal_backend_direct_mode_does_not_fall_back_to_managed(monkeypatch, caplog, tmp_path): + _clear_terminal_env(monkeypatch) + monkeypatch.setenv("TERMINAL_ENV", "modal") + monkeypatch.setenv("TERMINAL_MODAL_MODE", "direct") + monkeypatch.setenv("HOME", str(tmp_path)) + monkeypatch.setenv("USERPROFILE", str(tmp_path)) + monkeypatch.setattr(terminal_tool_module, "is_managed_tool_gateway_ready", lambda _vendor: True) + + with caplog.at_level(logging.ERROR): + ok = terminal_tool_module.check_terminal_requirements() + + assert ok is False + assert any( + "TERMINAL_MODAL_MODE=direct" in record.getMessage() for record in caplog.records ) diff --git a/tests/tools/test_terminal_tool_requirements.py b/tests/tools/test_terminal_tool_requirements.py index 5a347cc6e..216284932 100644 --- a/tests/tools/test_terminal_tool_requirements.py +++ b/tests/tools/test_terminal_tool_requirements.py @@ -26,3 +26,30 @@ class TestTerminalRequirements: names = {tool["function"]["name"] for tool in tools} assert "terminal" in names assert {"read_file", "write_file", "patch", "search_files"}.issubset(names) + + def test_terminal_and_execute_code_tools_resolve_for_managed_modal(self, monkeypatch, tmp_path): + monkeypatch.setenv("HOME", str(tmp_path)) + monkeypatch.setenv("USERPROFILE", str(tmp_path)) + monkeypatch.delenv("MODAL_TOKEN_ID", raising=False) + monkeypatch.delenv("MODAL_TOKEN_SECRET", raising=False) + monkeypatch.setattr( + terminal_tool_module, + "_get_env_config", + lambda: {"env_type": "modal", "modal_mode": "managed"}, + ) + monkeypatch.setattr( + terminal_tool_module, + "is_managed_tool_gateway_ready", + lambda _vendor: True, + ) + monkeypatch.setattr( + terminal_tool_module, + "ensure_minisweagent_on_path", + lambda *_args, **_kwargs: (_ for _ in ()).throw(AssertionError("should not be called")), + ) + + tools = get_tool_definitions(enabled_toolsets=["terminal", "code_execution"], quiet_mode=True) + names = {tool["function"]["name"] for tool in tools} + + assert "terminal" in names + assert "execute_code" in names diff --git a/tests/tools/test_transcription_tools.py b/tests/tools/test_transcription_tools.py index b5c9f9775..d43f89cf1 100644 --- a/tests/tools/test_transcription_tools.py +++ b/tests/tools/test_transcription_tools.py @@ -231,6 +231,7 @@ class TestTranscribeGroq: assert result["success"] is True assert result["transcript"] == "hello world" assert result["provider"] == "groq" + mock_client.close.assert_called_once() def test_whitespace_stripped(self, monkeypatch, sample_wav): monkeypatch.setenv("GROQ_API_KEY", "gsk-test") @@ -272,6 +273,7 @@ class TestTranscribeGroq: assert result["success"] is False assert "API error" in result["error"] + mock_client.close.assert_called_once() def test_permission_error(self, monkeypatch, sample_wav): monkeypatch.setenv("GROQ_API_KEY", "gsk-test") @@ -327,6 +329,7 @@ class TestTranscribeOpenAIExtended: result = _transcribe_openai(sample_wav, "whisper-1") assert result["transcript"] == "hello" + mock_client.close.assert_called_once() def test_permission_error(self, monkeypatch, sample_wav): monkeypatch.setenv("VOICE_TOOLS_OPENAI_KEY", "sk-test") @@ -341,6 +344,7 @@ class TestTranscribeOpenAIExtended: assert result["success"] is False assert "Permission denied" in result["error"] + mock_client.close.assert_called_once() class TestTranscribeLocalCommand: diff --git a/tests/tools/test_web_tools_config.py b/tests/tools/test_web_tools_config.py index d291a005b..1354c2431 100644 --- a/tests/tools/test_web_tools_config.py +++ b/tests/tools/test_web_tools_config.py @@ -5,12 +5,14 @@ Coverage: constructor failure recovery, return value verification, edge cases. _get_backend() — backend selection logic with env var combinations. _get_parallel_client() — Parallel client configuration, singleton caching. - check_web_api_key() — unified availability check. + check_web_api_key() — unified availability check across all web backends. """ +import importlib +import json import os import pytest -from unittest.mock import patch, MagicMock +from unittest.mock import patch, MagicMock, AsyncMock class TestFirecrawlClientConfig: @@ -20,14 +22,30 @@ class TestFirecrawlClientConfig: """Reset client and env vars before each test.""" import tools.web_tools tools.web_tools._firecrawl_client = None - for key in ("FIRECRAWL_API_KEY", "FIRECRAWL_API_URL"): + tools.web_tools._firecrawl_client_config = None + for key in ( + "FIRECRAWL_API_KEY", + "FIRECRAWL_API_URL", + "FIRECRAWL_GATEWAY_URL", + "TOOL_GATEWAY_DOMAIN", + "TOOL_GATEWAY_SCHEME", + "TOOL_GATEWAY_USER_TOKEN", + ): os.environ.pop(key, None) def teardown_method(self): """Reset client after each test.""" import tools.web_tools tools.web_tools._firecrawl_client = None - for key in ("FIRECRAWL_API_KEY", "FIRECRAWL_API_URL"): + tools.web_tools._firecrawl_client_config = None + for key in ( + "FIRECRAWL_API_KEY", + "FIRECRAWL_API_URL", + "FIRECRAWL_GATEWAY_URL", + "TOOL_GATEWAY_DOMAIN", + "TOOL_GATEWAY_SCHEME", + "TOOL_GATEWAY_USER_TOKEN", + ): os.environ.pop(key, None) # ── Configuration matrix ───────────────────────────────────────── @@ -67,9 +85,152 @@ class TestFirecrawlClientConfig: def test_no_config_raises_with_helpful_message(self): """Neither key nor URL → ValueError with guidance.""" with patch("tools.web_tools.Firecrawl"): - from tools.web_tools import _get_firecrawl_client - with pytest.raises(ValueError, match="FIRECRAWL_API_KEY"): + with patch("tools.web_tools._read_nous_access_token", return_value=None): + from tools.web_tools import _get_firecrawl_client + with pytest.raises(ValueError, match="FIRECRAWL_API_KEY"): + _get_firecrawl_client() + + def test_tool_gateway_domain_builds_firecrawl_gateway_origin(self): + """Shared gateway domain should derive the Firecrawl vendor hostname.""" + with patch.dict(os.environ, {"TOOL_GATEWAY_DOMAIN": "nousresearch.com"}): + with patch("tools.web_tools._read_nous_access_token", return_value="nous-token"): + with patch("tools.web_tools.Firecrawl") as mock_fc: + from tools.web_tools import _get_firecrawl_client + result = _get_firecrawl_client() + mock_fc.assert_called_once_with( + api_key="nous-token", + api_url="https://firecrawl-gateway.nousresearch.com", + ) + assert result is mock_fc.return_value + + def test_tool_gateway_scheme_can_switch_derived_gateway_origin_to_http(self): + """Shared gateway scheme should allow local plain-http vendor hosts.""" + with patch.dict(os.environ, { + "TOOL_GATEWAY_DOMAIN": "nousresearch.com", + "TOOL_GATEWAY_SCHEME": "http", + }): + with patch("tools.web_tools._read_nous_access_token", return_value="nous-token"): + with patch("tools.web_tools.Firecrawl") as mock_fc: + from tools.web_tools import _get_firecrawl_client + result = _get_firecrawl_client() + mock_fc.assert_called_once_with( + api_key="nous-token", + api_url="http://firecrawl-gateway.nousresearch.com", + ) + assert result is mock_fc.return_value + + def test_invalid_tool_gateway_scheme_raises(self): + """Unexpected shared gateway schemes should fail fast.""" + with patch.dict(os.environ, { + "TOOL_GATEWAY_DOMAIN": "nousresearch.com", + "TOOL_GATEWAY_SCHEME": "ftp", + }): + with patch("tools.web_tools._read_nous_access_token", return_value="nous-token"): + from tools.web_tools import _get_firecrawl_client + with pytest.raises(ValueError, match="TOOL_GATEWAY_SCHEME"): + _get_firecrawl_client() + + def test_explicit_firecrawl_gateway_url_takes_precedence(self): + """An explicit Firecrawl gateway origin should override the shared domain.""" + with patch.dict(os.environ, { + "FIRECRAWL_GATEWAY_URL": "https://firecrawl-gateway.localhost:3009/", + "TOOL_GATEWAY_DOMAIN": "nousresearch.com", + }): + with patch("tools.web_tools._read_nous_access_token", return_value="nous-token"): + with patch("tools.web_tools.Firecrawl") as mock_fc: + from tools.web_tools import _get_firecrawl_client + _get_firecrawl_client() + mock_fc.assert_called_once_with( + api_key="nous-token", + api_url="https://firecrawl-gateway.localhost:3009", + ) + + def test_default_gateway_domain_targets_nous_production_origin(self): + """Default gateway origin should point at the Firecrawl vendor hostname.""" + with patch("tools.web_tools._read_nous_access_token", return_value="nous-token"): + with patch("tools.web_tools.Firecrawl") as mock_fc: + from tools.web_tools import _get_firecrawl_client _get_firecrawl_client() + mock_fc.assert_called_once_with( + api_key="nous-token", + api_url="https://firecrawl-gateway.nousresearch.com", + ) + + def test_direct_mode_is_preferred_over_tool_gateway(self): + """Explicit Firecrawl config should win over the gateway fallback.""" + with patch.dict(os.environ, { + "FIRECRAWL_API_KEY": "fc-test", + "TOOL_GATEWAY_DOMAIN": "nousresearch.com", + }): + with patch("tools.web_tools._read_nous_access_token", return_value="nous-token"): + with patch("tools.web_tools.Firecrawl") as mock_fc: + from tools.web_tools import _get_firecrawl_client + _get_firecrawl_client() + mock_fc.assert_called_once_with(api_key="fc-test") + + def test_nous_auth_token_respects_hermes_home_override(self, tmp_path): + """Auth lookup should read from HERMES_HOME/auth.json, not ~/.hermes/auth.json.""" + real_home = tmp_path / "real-home" + (real_home / ".hermes").mkdir(parents=True) + + hermes_home = tmp_path / "hermes-home" + hermes_home.mkdir() + (hermes_home / "auth.json").write_text(json.dumps({ + "providers": { + "nous": { + "access_token": "nous-token", + } + } + })) + + with patch.dict(os.environ, { + "HOME": str(real_home), + "HERMES_HOME": str(hermes_home), + }, clear=False): + import tools.web_tools + importlib.reload(tools.web_tools) + assert tools.web_tools._read_nous_access_token() == "nous-token" + + def test_check_auxiliary_model_re_resolves_backend_each_call(self): + """Availability checks should not be pinned to module import state.""" + import tools.web_tools + + # Simulate the pre-fix import-time cache slot for regression coverage. + tools.web_tools.__dict__["_aux_async_client"] = None + + with patch( + "tools.web_tools.get_async_text_auxiliary_client", + side_effect=[(None, None), (MagicMock(base_url="https://api.openrouter.ai/v1"), "test-model")], + ): + assert tools.web_tools.check_auxiliary_model() is False + assert tools.web_tools.check_auxiliary_model() is True + + @pytest.mark.asyncio + async def test_summarizer_re_resolves_backend_after_initial_unavailable_state(self): + """Summarization should pick up a backend that becomes available later in-process.""" + import tools.web_tools + + tools.web_tools.__dict__["_aux_async_client"] = None + + response = MagicMock() + response.choices = [MagicMock(message=MagicMock(content="summary text"))] + + fake_client = MagicMock(base_url="https://api.openrouter.ai/v1") + fake_client.chat.completions.create = AsyncMock(return_value=response) + + with patch( + "tools.web_tools.get_async_text_auxiliary_client", + side_effect=[(None, None), (fake_client, "test-model")], + ): + assert tools.web_tools.check_auxiliary_model() is False + result = await tools.web_tools._call_summarizer_llm( + "Some content worth summarizing", + "Source: https://example.com\n\n", + None, + ) + + assert result == "summary text" + fake_client.chat.completions.create.assert_awaited_once() # ── Singleton caching ──────────────────────────────────────────── @@ -117,9 +278,10 @@ class TestFirecrawlClientConfig: """FIRECRAWL_API_KEY='' with no URL → should raise.""" with patch.dict(os.environ, {"FIRECRAWL_API_KEY": ""}): with patch("tools.web_tools.Firecrawl"): - from tools.web_tools import _get_firecrawl_client - with pytest.raises(ValueError): - _get_firecrawl_client() + with patch("tools.web_tools._read_nous_access_token", return_value=None): + from tools.web_tools import _get_firecrawl_client + with pytest.raises(ValueError): + _get_firecrawl_client() class TestBackendSelection: @@ -130,7 +292,16 @@ class TestBackendSelection: setups. """ - _ENV_KEYS = ("PARALLEL_API_KEY", "FIRECRAWL_API_KEY", "FIRECRAWL_API_URL", "TAVILY_API_KEY") + _ENV_KEYS = ( + "PARALLEL_API_KEY", + "FIRECRAWL_API_KEY", + "FIRECRAWL_API_URL", + "FIRECRAWL_GATEWAY_URL", + "TOOL_GATEWAY_DOMAIN", + "TOOL_GATEWAY_SCHEME", + "TOOL_GATEWAY_USER_TOKEN", + "TAVILY_API_KEY", + ) def setup_method(self): for key in self._ENV_KEYS: @@ -276,10 +447,47 @@ class TestParallelClientConfig: assert client1 is client2 +class TestWebSearchErrorHandling: + """Test suite for web_search_tool() error responses.""" + + def test_search_error_response_does_not_expose_diagnostics(self): + import tools.web_tools + + firecrawl_client = MagicMock() + firecrawl_client.search.side_effect = RuntimeError("boom") + + with patch("tools.web_tools._get_backend", return_value="firecrawl"), \ + patch("tools.web_tools._get_firecrawl_client", return_value=firecrawl_client), \ + patch("tools.interrupt.is_interrupted", return_value=False), \ + patch.object(tools.web_tools._debug, "log_call") as mock_log_call, \ + patch.object(tools.web_tools._debug, "save"): + result = json.loads(tools.web_tools.web_search_tool("test query", limit=3)) + + assert result == {"error": "Error searching web: boom"} + + debug_payload = mock_log_call.call_args.args[1] + assert debug_payload["error"] == "Error searching web: boom" + assert "traceback" not in debug_payload["error"] + assert "exception_type" not in debug_payload["error"] + assert "config" not in result + assert "exception_type" not in result + assert "exception_chain" not in result + assert "traceback" not in result + + class TestCheckWebApiKey: """Test suite for check_web_api_key() unified availability check.""" - _ENV_KEYS = ("PARALLEL_API_KEY", "FIRECRAWL_API_KEY", "FIRECRAWL_API_URL", "TAVILY_API_KEY") + _ENV_KEYS = ( + "PARALLEL_API_KEY", + "FIRECRAWL_API_KEY", + "FIRECRAWL_API_URL", + "FIRECRAWL_GATEWAY_URL", + "TOOL_GATEWAY_DOMAIN", + "TOOL_GATEWAY_SCHEME", + "TOOL_GATEWAY_USER_TOKEN", + "TAVILY_API_KEY", + ) def setup_method(self): for key in self._ENV_KEYS: @@ -329,3 +537,22 @@ class TestCheckWebApiKey: }): from tools.web_tools import check_web_api_key assert check_web_api_key() is True + + def test_tool_gateway_returns_true(self): + with patch("tools.web_tools._read_nous_access_token", return_value="nous-token"): + from tools.web_tools import check_web_api_key + assert check_web_api_key() is True + + def test_configured_backend_must_match_available_provider(self): + with patch("tools.web_tools._load_web_config", return_value={"backend": "parallel"}): + with patch("tools.web_tools._read_nous_access_token", return_value="nous-token"): + with patch.dict(os.environ, {"FIRECRAWL_GATEWAY_URL": "http://127.0.0.1:3002"}, clear=False): + from tools.web_tools import check_web_api_key + assert check_web_api_key() is False + + def test_configured_firecrawl_backend_accepts_managed_gateway(self): + with patch("tools.web_tools._load_web_config", return_value={"backend": "firecrawl"}): + with patch("tools.web_tools._read_nous_access_token", return_value="nous-token"): + with patch.dict(os.environ, {"FIRECRAWL_GATEWAY_URL": "http://127.0.0.1:3002"}, clear=False): + from tools.web_tools import check_web_api_key + assert check_web_api_key() is True diff --git a/tools/browser_providers/browserbase.py b/tools/browser_providers/browserbase.py index 1aad8e6e0..342b430b1 100644 --- a/tools/browser_providers/browserbase.py +++ b/tools/browser_providers/browserbase.py @@ -2,14 +2,57 @@ import logging import os +import threading import uuid -from typing import Dict +from typing import Any, Dict, Optional import requests from tools.browser_providers.base import CloudBrowserProvider +from tools.managed_tool_gateway import resolve_managed_tool_gateway logger = logging.getLogger(__name__) +_pending_create_keys: Dict[str, str] = {} +_pending_create_keys_lock = threading.Lock() + + +def _get_or_create_pending_create_key(task_id: str) -> str: + with _pending_create_keys_lock: + existing = _pending_create_keys.get(task_id) + if existing: + return existing + + created = f"browserbase-session-create:{uuid.uuid4().hex}" + _pending_create_keys[task_id] = created + return created + + +def _clear_pending_create_key(task_id: str) -> None: + with _pending_create_keys_lock: + _pending_create_keys.pop(task_id, None) + + +def _should_preserve_pending_create_key(response: requests.Response) -> bool: + if response.status_code >= 500: + return True + + if response.status_code != 409: + return False + + try: + payload = response.json() + except Exception: + return False + + if not isinstance(payload, dict): + return False + + error = payload.get("error") + if not isinstance(error, dict): + return False + + message = str(error.get("message") or "").lower() + return "already in progress" in message class BrowserbaseProvider(CloudBrowserProvider): @@ -19,28 +62,46 @@ class BrowserbaseProvider(CloudBrowserProvider): return "Browserbase" def is_configured(self) -> bool: - return bool( - os.environ.get("BROWSERBASE_API_KEY") - and os.environ.get("BROWSERBASE_PROJECT_ID") - ) + return self._get_config_or_none() is not None # ------------------------------------------------------------------ # Session lifecycle # ------------------------------------------------------------------ - def _get_config(self) -> Dict[str, str]: + def _get_config_or_none(self) -> Optional[Dict[str, Any]]: api_key = os.environ.get("BROWSERBASE_API_KEY") project_id = os.environ.get("BROWSERBASE_PROJECT_ID") - if not api_key or not project_id: + if api_key and project_id: + return { + "api_key": api_key, + "project_id": project_id, + "base_url": os.environ.get("BROWSERBASE_BASE_URL", "https://api.browserbase.com").rstrip("/"), + "managed_mode": False, + } + + managed = resolve_managed_tool_gateway("browserbase") + if managed is None: + return None + + return { + "api_key": managed.nous_user_token, + "project_id": "managed", + "base_url": managed.gateway_origin.rstrip("/"), + "managed_mode": True, + } + + def _get_config(self) -> Dict[str, Any]: + config = self._get_config_or_none() + if config is None: raise ValueError( - "BROWSERBASE_API_KEY and BROWSERBASE_PROJECT_ID environment " - "variables are required. Get your credentials at " - "https://browserbase.com" + "Browserbase requires either direct BROWSERBASE_API_KEY/BROWSERBASE_PROJECT_ID credentials " + "or a managed Browserbase gateway configuration." ) - return {"api_key": api_key, "project_id": project_id} + return config def create_session(self, task_id: str) -> Dict[str, object]: config = self._get_config() + managed_mode = bool(config.get("managed_mode")) # Optional env-var knobs enable_proxies = os.environ.get("BROWSERBASE_PROXIES", "true").lower() != "false" @@ -80,8 +141,11 @@ class BrowserbaseProvider(CloudBrowserProvider): "Content-Type": "application/json", "X-BB-API-Key": config["api_key"], } + if managed_mode: + headers["X-Idempotency-Key"] = _get_or_create_pending_create_key(task_id) + response = requests.post( - "https://api.browserbase.com/v1/sessions", + f"{config['base_url']}/v1/sessions", headers=headers, json=session_config, timeout=30, @@ -91,7 +155,7 @@ class BrowserbaseProvider(CloudBrowserProvider): keepalive_fallback = False # Handle 402 — paid features unavailable - if response.status_code == 402: + if response.status_code == 402 and not managed_mode: if enable_keep_alive: keepalive_fallback = True logger.warning( @@ -100,7 +164,7 @@ class BrowserbaseProvider(CloudBrowserProvider): ) session_config.pop("keepAlive", None) response = requests.post( - "https://api.browserbase.com/v1/sessions", + f"{config['base_url']}/v1/sessions", headers=headers, json=session_config, timeout=30, @@ -114,20 +178,25 @@ class BrowserbaseProvider(CloudBrowserProvider): ) session_config.pop("proxies", None) response = requests.post( - "https://api.browserbase.com/v1/sessions", + f"{config['base_url']}/v1/sessions", headers=headers, json=session_config, timeout=30, ) if not response.ok: + if managed_mode and not _should_preserve_pending_create_key(response): + _clear_pending_create_key(task_id) raise RuntimeError( f"Failed to create Browserbase session: " f"{response.status_code} {response.text}" ) session_data = response.json() + if managed_mode: + _clear_pending_create_key(task_id) session_name = f"hermes_{task_id}_{uuid.uuid4().hex[:8]}" + external_call_id = response.headers.get("x-external-call-id") if managed_mode else None if enable_proxies and not proxies_fallback: features_enabled["proxies"] = True @@ -146,6 +215,7 @@ class BrowserbaseProvider(CloudBrowserProvider): "bb_session_id": session_data["id"], "cdp_url": session_data["connectUrl"], "features": features_enabled, + "external_call_id": external_call_id, } def close_session(self, session_id: str) -> bool: @@ -157,7 +227,7 @@ class BrowserbaseProvider(CloudBrowserProvider): try: response = requests.post( - f"https://api.browserbase.com/v1/sessions/{session_id}", + f"{config['base_url']}/v1/sessions/{session_id}", headers={ "X-BB-API-Key": config["api_key"], "Content-Type": "application/json", @@ -184,20 +254,19 @@ class BrowserbaseProvider(CloudBrowserProvider): return False def emergency_cleanup(self, session_id: str) -> None: - api_key = os.environ.get("BROWSERBASE_API_KEY") - project_id = os.environ.get("BROWSERBASE_PROJECT_ID") - if not api_key or not project_id: + config = self._get_config_or_none() + if config is None: logger.warning("Cannot emergency-cleanup Browserbase session %s — missing credentials", session_id) return try: requests.post( - f"https://api.browserbase.com/v1/sessions/{session_id}", + f"{config['base_url']}/v1/sessions/{session_id}", headers={ - "X-BB-API-Key": api_key, + "X-BB-API-Key": config["api_key"], "Content-Type": "application/json", }, json={ - "projectId": project_id, + "projectId": config["project_id"], "status": "REQUEST_RELEASE", }, timeout=5, diff --git a/tools/browser_tool.py b/tools/browser_tool.py index e75025482..3018d5231 100644 --- a/tools/browser_tool.py +++ b/tools/browser_tool.py @@ -78,6 +78,7 @@ except Exception: from tools.browser_providers.base import CloudBrowserProvider from tools.browser_providers.browserbase import BrowserbaseProvider from tools.browser_providers.browser_use import BrowserUseProvider +from tools.tool_backend_helpers import normalize_browser_cloud_provider logger = logging.getLogger(__name__) @@ -235,7 +236,9 @@ def _get_cloud_provider() -> Optional[CloudBrowserProvider]: """Return the configured cloud browser provider, or None for local mode. Reads ``config["browser"]["cloud_provider"]`` once and caches the result - for the process lifetime. If unset → local mode (None). + for the process lifetime. An explicit ``local`` provider disables cloud + fallback. If unset, fall back to Browserbase when direct or managed + Browserbase credentials are available. """ global _cached_cloud_provider, _cloud_provider_resolved if _cloud_provider_resolved: @@ -249,14 +252,45 @@ def _get_cloud_provider() -> Optional[CloudBrowserProvider]: import yaml with open(config_path) as f: cfg = yaml.safe_load(f) or {} - provider_key = cfg.get("browser", {}).get("cloud_provider") + browser_cfg = cfg.get("browser", {}) + provider_key = None + if isinstance(browser_cfg, dict) and "cloud_provider" in browser_cfg: + provider_key = normalize_browser_cloud_provider( + browser_cfg.get("cloud_provider") + ) + if provider_key == "local": + _cached_cloud_provider = None + return None if provider_key and provider_key in _PROVIDER_REGISTRY: _cached_cloud_provider = _PROVIDER_REGISTRY[provider_key]() except Exception as e: logger.debug("Could not read cloud_provider from config: %s", e) + + if _cached_cloud_provider is None: + fallback_provider = BrowserbaseProvider() + if fallback_provider.is_configured(): + _cached_cloud_provider = fallback_provider + return _cached_cloud_provider +def _get_browserbase_config_or_none() -> Optional[Dict[str, Any]]: + """Return Browserbase direct or managed config, or None when unavailable.""" + return BrowserbaseProvider()._get_config_or_none() + + +def _get_browserbase_config() -> Dict[str, Any]: + """Return Browserbase config or raise when neither direct nor managed mode is available.""" + return BrowserbaseProvider()._get_config() + + +def _is_local_mode() -> bool: + """Return True when the browser tool will use a local browser backend.""" + if _get_cdp_override(): + return False + return _get_cloud_provider() is None + + def _socket_safe_tmpdir() -> str: """Return a short temp directory path suitable for Unix domain sockets. @@ -1845,7 +1879,7 @@ if __name__ == "__main__": print(" Install: npm install -g agent-browser && agent-browser install --with-deps") if _cp is not None and not _cp.is_configured(): print(f" - {_cp.provider_name()} credentials not configured") - print(" Tip: remove cloud_provider from config to use free local mode instead") + print(" Tip: set browser.cloud_provider to 'local' to use free local mode instead") print("\n📋 Available Browser Tools:") for schema in BROWSER_TOOL_SCHEMAS: diff --git a/tools/code_execution_tool.py b/tools/code_execution_tool.py index 19270c6fe..dbf617444 100644 --- a/tools/code_execution_tool.py +++ b/tools/code_execution_tool.py @@ -757,7 +757,8 @@ def build_execute_code_schema(enabled_sandbox_tools: set = None) -> dict: f"Available via `from hermes_tools import ...`:\n\n" f"{tool_lines}\n\n" "Limits: 5-minute timeout, 50KB stdout cap, max 50 tool calls per script. " - "terminal() is foreground-only (no background or pty).\n\n" + "terminal() is foreground-only (no background or pty). " + "If the session uses a cloud sandbox backend, treat it as resumable task state rather than a durable always-on machine.\n\n" "Print your final result to stdout. Use Python stdlib (json, re, math, csv, " "datetime, collections, etc.) for processing between tool calls.\n\n" "Also available (no import needed — built into hermes_tools):\n" diff --git a/tools/environments/managed_modal.py b/tools/environments/managed_modal.py new file mode 100644 index 000000000..241c69094 --- /dev/null +++ b/tools/environments/managed_modal.py @@ -0,0 +1,282 @@ +"""Managed Modal environment backed by tool-gateway.""" + +from __future__ import annotations + +import json +import logging +import os +import requests +import time +import uuid +from typing import Any, Dict, Optional + +from tools.environments.base import BaseEnvironment +from tools.interrupt import is_interrupted +from tools.managed_tool_gateway import resolve_managed_tool_gateway + +logger = logging.getLogger(__name__) + + +def _request_timeout_env(name: str, default: float) -> float: + try: + value = float(os.getenv(name, str(default))) + return value if value > 0 else default + except (TypeError, ValueError): + return default + + +class ManagedModalEnvironment(BaseEnvironment): + """Gateway-owned Modal sandbox with Hermes-compatible execute/cleanup.""" + + _CONNECT_TIMEOUT_SECONDS = _request_timeout_env("TERMINAL_MANAGED_MODAL_CONNECT_TIMEOUT_SECONDS", 1.0) + _POLL_READ_TIMEOUT_SECONDS = _request_timeout_env("TERMINAL_MANAGED_MODAL_POLL_READ_TIMEOUT_SECONDS", 5.0) + _CANCEL_READ_TIMEOUT_SECONDS = _request_timeout_env("TERMINAL_MANAGED_MODAL_CANCEL_READ_TIMEOUT_SECONDS", 5.0) + + def __init__( + self, + image: str, + cwd: str = "/root", + timeout: int = 60, + modal_sandbox_kwargs: Optional[Dict[str, Any]] = None, + persistent_filesystem: bool = True, + task_id: str = "default", + ): + super().__init__(cwd=cwd, timeout=timeout) + + gateway = resolve_managed_tool_gateway("modal") + if gateway is None: + raise ValueError("Managed Modal requires a configured tool gateway and Nous user token") + + self._gateway_origin = gateway.gateway_origin.rstrip("/") + self._nous_user_token = gateway.nous_user_token + self._task_id = task_id + self._persistent = persistent_filesystem + self._image = image + self._sandbox_kwargs = dict(modal_sandbox_kwargs or {}) + self._create_idempotency_key = str(uuid.uuid4()) + self._sandbox_id = self._create_sandbox() + + def execute(self, command: str, cwd: str = "", *, + timeout: int | None = None, + stdin_data: str | None = None) -> dict: + exec_command, sudo_stdin = self._prepare_command(command) + + # When a sudo password is present, inject it via a shell-level pipe + # (same approach as the direct ModalEnvironment) since the gateway + # cannot pipe subprocess stdin directly. + if sudo_stdin is not None: + import shlex + exec_command = ( + f"printf '%s\\n' {shlex.quote(sudo_stdin.rstrip())} | {exec_command}" + ) + + exec_cwd = cwd or self.cwd + effective_timeout = timeout or self.timeout + exec_id = str(uuid.uuid4()) + payload: Dict[str, Any] = { + "execId": exec_id, + "command": exec_command, + "cwd": exec_cwd, + "timeoutMs": int(effective_timeout * 1000), + } + if stdin_data is not None: + payload["stdinData"] = stdin_data + + try: + response = self._request( + "POST", + f"/v1/sandboxes/{self._sandbox_id}/execs", + json=payload, + timeout=10, + ) + except Exception as exc: + return { + "output": f"Managed Modal exec failed: {exc}", + "returncode": 1, + } + + if response.status_code >= 400: + return { + "output": self._format_error("Managed Modal exec failed", response), + "returncode": 1, + } + + body = response.json() + status = body.get("status") + if status in {"completed", "failed", "cancelled", "timeout"}: + return { + "output": body.get("output", ""), + "returncode": body.get("returncode", 1), + } + + if body.get("execId") != exec_id: + return { + "output": "Managed Modal exec start did not return the expected exec id", + "returncode": 1, + } + + poll_interval = 0.25 + deadline = time.monotonic() + effective_timeout + 10 + + while time.monotonic() < deadline: + if is_interrupted(): + self._cancel_exec(exec_id) + return { + "output": "[Command interrupted - Modal sandbox exec cancelled]", + "returncode": 130, + } + + try: + status_response = self._request( + "GET", + f"/v1/sandboxes/{self._sandbox_id}/execs/{exec_id}", + timeout=(self._CONNECT_TIMEOUT_SECONDS, self._POLL_READ_TIMEOUT_SECONDS), + ) + except Exception as exc: + return { + "output": f"Managed Modal exec poll failed: {exc}", + "returncode": 1, + } + + if status_response.status_code == 404: + return { + "output": "Managed Modal exec not found", + "returncode": 1, + } + + if status_response.status_code >= 400: + return { + "output": self._format_error("Managed Modal exec poll failed", status_response), + "returncode": 1, + } + + status_body = status_response.json() + status = status_body.get("status") + if status in {"completed", "failed", "cancelled", "timeout"}: + return { + "output": status_body.get("output", ""), + "returncode": status_body.get("returncode", 1), + } + + time.sleep(poll_interval) + + self._cancel_exec(exec_id) + return { + "output": f"Managed Modal exec timed out after {effective_timeout}s", + "returncode": 124, + } + + def cleanup(self): + if not getattr(self, "_sandbox_id", None): + return + + try: + self._request( + "POST", + f"/v1/sandboxes/{self._sandbox_id}/terminate", + json={ + "snapshotBeforeTerminate": self._persistent, + }, + timeout=60, + ) + except Exception as exc: + logger.warning("Managed Modal cleanup failed: %s", exc) + finally: + self._sandbox_id = None + + def _create_sandbox(self) -> str: + cpu = self._coerce_number(self._sandbox_kwargs.get("cpu"), 1) + memory = self._coerce_number( + self._sandbox_kwargs.get("memoryMiB", self._sandbox_kwargs.get("memory")), + 5120, + ) + disk = self._coerce_number( + self._sandbox_kwargs.get("ephemeral_disk", self._sandbox_kwargs.get("diskMiB")), + None, + ) + + create_payload = { + "image": self._image, + "cwd": self.cwd, + "cpu": cpu, + "memoryMiB": memory, + "timeoutMs": 3_600_000, + "idleTimeoutMs": max(300_000, int(self.timeout * 1000)), + "persistentFilesystem": self._persistent, + "logicalKey": self._task_id, + } + if disk is not None: + create_payload["diskMiB"] = disk + + response = self._request( + "POST", + "/v1/sandboxes", + json=create_payload, + timeout=60, + extra_headers={ + "x-idempotency-key": self._create_idempotency_key, + }, + ) + if response.status_code >= 400: + raise RuntimeError(self._format_error("Managed Modal create failed", response)) + + body = response.json() + sandbox_id = body.get("id") + if not isinstance(sandbox_id, str) or not sandbox_id: + raise RuntimeError("Managed Modal create did not return a sandbox id") + return sandbox_id + + def _request(self, method: str, path: str, *, + json: Dict[str, Any] | None = None, + timeout: int = 30, + extra_headers: Dict[str, str] | None = None) -> requests.Response: + headers = { + "Authorization": f"Bearer {self._nous_user_token}", + "Content-Type": "application/json", + } + if extra_headers: + headers.update(extra_headers) + + return requests.request( + method, + f"{self._gateway_origin}{path}", + headers=headers, + json=json, + timeout=timeout, + ) + + def _cancel_exec(self, exec_id: str) -> None: + try: + self._request( + "POST", + f"/v1/sandboxes/{self._sandbox_id}/execs/{exec_id}/cancel", + timeout=(self._CONNECT_TIMEOUT_SECONDS, self._CANCEL_READ_TIMEOUT_SECONDS), + ) + except Exception as exc: + logger.warning("Managed Modal exec cancel failed: %s", exc) + + @staticmethod + def _coerce_number(value: Any, default: float) -> float: + try: + if value is None: + return default + return float(value) + except (TypeError, ValueError): + return default + + @staticmethod + def _format_error(prefix: str, response: requests.Response) -> str: + try: + payload = response.json() + if isinstance(payload, dict): + message = payload.get("error") or payload.get("message") or payload.get("code") + if isinstance(message, str) and message: + return f"{prefix}: {message}" + return f"{prefix}: {json.dumps(payload, ensure_ascii=False)}" + except Exception: + pass + + text = response.text.strip() + if text: + return f"{prefix}: {text}" + return f"{prefix}: HTTP {response.status_code}" diff --git a/tools/environments/modal.py b/tools/environments/modal.py index f8210ba78..d499dc4a3 100644 --- a/tools/environments/modal.py +++ b/tools/environments/modal.py @@ -20,6 +20,7 @@ from tools.interrupt import is_interrupted logger = logging.getLogger(__name__) _SNAPSHOT_STORE = get_hermes_home() / "modal_snapshots.json" +_DIRECT_SNAPSHOT_NAMESPACE = "direct" def _load_snapshots() -> Dict[str, str]: @@ -38,12 +39,72 @@ def _save_snapshots(data: Dict[str, str]) -> None: _SNAPSHOT_STORE.write_text(json.dumps(data, indent=2)) -class _AsyncWorker: - """Background thread with its own event loop for async-safe swe-rex calls. +def _direct_snapshot_key(task_id: str) -> str: + return f"{_DIRECT_SNAPSHOT_NAMESPACE}:{task_id}" - Allows sync code to submit async coroutines and block for results, - even when called from inside another running event loop (e.g. Atropos). - """ + +def _get_snapshot_restore_candidate(task_id: str) -> tuple[str | None, bool]: + """Return a snapshot id for direct Modal restore and whether the key is legacy.""" + snapshots = _load_snapshots() + + namespaced_key = _direct_snapshot_key(task_id) + snapshot_id = snapshots.get(namespaced_key) + if isinstance(snapshot_id, str) and snapshot_id: + return snapshot_id, False + + legacy_snapshot_id = snapshots.get(task_id) + if isinstance(legacy_snapshot_id, str) and legacy_snapshot_id: + return legacy_snapshot_id, True + + return None, False + + +def _store_direct_snapshot(task_id: str, snapshot_id: str) -> None: + """Persist the direct Modal snapshot id under the direct namespace.""" + snapshots = _load_snapshots() + snapshots[_direct_snapshot_key(task_id)] = snapshot_id + snapshots.pop(task_id, None) + _save_snapshots(snapshots) + + +def _delete_direct_snapshot(task_id: str, snapshot_id: str | None = None) -> None: + """Remove direct Modal snapshot entries for a task, including legacy keys.""" + snapshots = _load_snapshots() + updated = False + + for key in (_direct_snapshot_key(task_id), task_id): + value = snapshots.get(key) + if value is None: + continue + if snapshot_id is None or value == snapshot_id: + snapshots.pop(key, None) + updated = True + + if updated: + _save_snapshots(snapshots) + + +def _resolve_modal_image(image_spec: Any) -> Any: + """Convert registry references or snapshot ids into Modal image objects.""" + import modal as _modal + + if not isinstance(image_spec, str): + return image_spec + + if image_spec.startswith("im-"): + return _modal.Image.from_id(image_spec) + + return _modal.Image.from_registry( + image_spec, + setup_dockerfile_commands=[ + "RUN rm -rf /usr/local/lib/python*/site-packages/pip* 2>/dev/null; " + "python -m ensurepip --upgrade --default-pip 2>/dev/null || true", + ], + ) + + +class _AsyncWorker: + """Background thread with its own event loop for async-safe swe-rex calls.""" def __init__(self): self._loop: Optional[asyncio.AbstractEventLoop] = None @@ -101,42 +162,20 @@ class ModalEnvironment(BaseEnvironment): sandbox_kwargs = dict(modal_sandbox_kwargs or {}) - # If persistent, try to restore from a previous snapshot - restored_image = None + restored_snapshot_id = None + restored_from_legacy_key = False if self._persistent: - snapshot_id = _load_snapshots().get(self._task_id) - if snapshot_id: - try: - import modal - restored_image = modal.Image.from_id(snapshot_id) - logger.info("Modal: restoring from snapshot %s", snapshot_id[:20]) - except Exception as e: - logger.warning("Modal: failed to restore snapshot, using base image: %s", e) - restored_image = None + restored_snapshot_id, restored_from_legacy_key = _get_snapshot_restore_candidate(self._task_id) + if restored_snapshot_id: + logger.info("Modal: restoring from snapshot %s", restored_snapshot_id[:20]) - effective_image = restored_image if restored_image else image - - # Pre-build a modal.Image with pip fix for Modal's legacy image builder. - # Some task images have broken pip; fix via ensurepip before Modal uses it. - import modal as _modal - if isinstance(effective_image, str): - effective_image = _modal.Image.from_registry( - effective_image, - setup_dockerfile_commands=[ - "RUN rm -rf /usr/local/lib/python*/site-packages/pip* 2>/dev/null; " - "python -m ensurepip --upgrade --default-pip 2>/dev/null || true", - ], - ) - - # Start the async worker thread and create the deployment on it - # so all gRPC channels are bound to the worker's event loop. self._worker.start() from swerex.deployment.modal import ModalDeployment - async def _create_and_start(): + async def _create_and_start(image_spec: Any): deployment = ModalDeployment( - image=effective_image, + image=image_spec, startup_timeout=180.0, runtime_timeout=3600.0, deployment_timeout=3600.0, @@ -146,7 +185,30 @@ class ModalEnvironment(BaseEnvironment): await deployment.start() return deployment - self._deployment = self._worker.run_coroutine(_create_and_start()) + try: + target_image_spec = restored_snapshot_id or image + try: + effective_image = _resolve_modal_image(target_image_spec) + self._deployment = self._worker.run_coroutine(_create_and_start(effective_image)) + except Exception as exc: + if not restored_snapshot_id: + raise + + logger.warning( + "Modal: failed to restore snapshot %s, retrying with base image: %s", + restored_snapshot_id[:20], + exc, + ) + _delete_direct_snapshot(self._task_id, restored_snapshot_id) + base_image = _resolve_modal_image(image) + self._deployment = self._worker.run_coroutine(_create_and_start(base_image)) + else: + if restored_snapshot_id and restored_from_legacy_key: + _store_direct_snapshot(self._task_id, restored_snapshot_id) + logger.info("Modal: migrated legacy snapshot entry for task %s", self._task_id) + except Exception: + self._worker.stop() + raise def execute(self, command: str, cwd: str = "", *, timeout: int | None = None, @@ -160,7 +222,7 @@ class ModalEnvironment(BaseEnvironment): exec_command, sudo_stdin = self._prepare_command(command) # Modal sandboxes execute commands via the Modal SDK and cannot pipe - # subprocess stdin directly the way a local Popen can. When a sudo + # subprocess stdin directly the way a local Popen can. When a sudo # password is present, use a shell-level pipe from printf so that the # password feeds sudo -S without appearing as an echo argument embedded # in the shell string. @@ -175,7 +237,6 @@ class ModalEnvironment(BaseEnvironment): effective_cwd = cwd or self.cwd effective_timeout = timeout or self.timeout - # Run in a background thread so we can poll for interrupts result_holder = {"value": None, "error": None} def _run(): @@ -191,6 +252,7 @@ class ModalEnvironment(BaseEnvironment): merge_output_streams=True, ) ) + output = self._worker.run_coroutine(_do_execute()) result_holder["value"] = { "output": output.stdout, @@ -227,7 +289,7 @@ class ModalEnvironment(BaseEnvironment): if self._persistent: try: - sandbox = getattr(self._deployment, '_sandbox', None) + sandbox = getattr(self._deployment, "_sandbox", None) if sandbox: async def _snapshot(): img = await sandbox.snapshot_filesystem.aio() @@ -239,11 +301,12 @@ class ModalEnvironment(BaseEnvironment): snapshot_id = None if snapshot_id: - snapshots = _load_snapshots() - snapshots[self._task_id] = snapshot_id - _save_snapshots(snapshots) - logger.info("Modal: saved filesystem snapshot %s for task %s", - snapshot_id[:20], self._task_id) + _store_direct_snapshot(self._task_id, snapshot_id) + logger.info( + "Modal: saved filesystem snapshot %s for task %s", + snapshot_id[:20], + self._task_id, + ) except Exception as e: logger.warning("Modal: filesystem snapshot failed: %s", e) diff --git a/tools/image_generation_tool.py b/tools/image_generation_tool.py index 5dadf4998..84edb93fe 100644 --- a/tools/image_generation_tool.py +++ b/tools/image_generation_tool.py @@ -32,9 +32,13 @@ import json import logging import os import datetime +import threading +import uuid from typing import Dict, Any, Optional, Union +from urllib.parse import urlencode import fal_client from tools.debug_helpers import DebugSession +from tools.managed_tool_gateway import resolve_managed_tool_gateway logger = logging.getLogger(__name__) @@ -77,6 +81,137 @@ VALID_OUTPUT_FORMATS = ["jpeg", "png"] VALID_ACCELERATION_MODES = ["none", "regular", "high"] _debug = DebugSession("image_tools", env_var="IMAGE_TOOLS_DEBUG") +_managed_fal_client = None +_managed_fal_client_config = None +_managed_fal_client_lock = threading.Lock() + + +def _resolve_managed_fal_gateway(): + """Return managed fal-queue gateway config when direct FAL credentials are absent.""" + if os.getenv("FAL_KEY"): + return None + return resolve_managed_tool_gateway("fal-queue") + + +def _normalize_fal_queue_url_format(queue_run_origin: str) -> str: + normalized_origin = str(queue_run_origin or "").strip().rstrip("/") + if not normalized_origin: + raise ValueError("Managed FAL queue origin is required") + return f"{normalized_origin}/" + + +class _ManagedFalSyncClient: + """Small per-instance wrapper around fal_client.SyncClient for managed queue hosts.""" + + def __init__(self, *, key: str, queue_run_origin: str): + sync_client_class = getattr(fal_client, "SyncClient", None) + if sync_client_class is None: + raise RuntimeError("fal_client.SyncClient is required for managed FAL gateway mode") + + client_module = getattr(fal_client, "client", None) + if client_module is None: + raise RuntimeError("fal_client.client is required for managed FAL gateway mode") + + self._queue_url_format = _normalize_fal_queue_url_format(queue_run_origin) + self._sync_client = sync_client_class(key=key) + self._http_client = getattr(self._sync_client, "_client", None) + self._maybe_retry_request = getattr(client_module, "_maybe_retry_request", None) + self._raise_for_status = getattr(client_module, "_raise_for_status", None) + self._request_handle_class = getattr(client_module, "SyncRequestHandle", None) + self._add_hint_header = getattr(client_module, "add_hint_header", None) + self._add_priority_header = getattr(client_module, "add_priority_header", None) + self._add_timeout_header = getattr(client_module, "add_timeout_header", None) + + if self._http_client is None: + raise RuntimeError("fal_client.SyncClient._client is required for managed FAL gateway mode") + if self._maybe_retry_request is None or self._raise_for_status is None: + raise RuntimeError("fal_client.client request helpers are required for managed FAL gateway mode") + if self._request_handle_class is None: + raise RuntimeError("fal_client.client.SyncRequestHandle is required for managed FAL gateway mode") + + def submit( + self, + application: str, + arguments: Dict[str, Any], + *, + path: str = "", + hint: Optional[str] = None, + webhook_url: Optional[str] = None, + priority: Any = None, + headers: Optional[Dict[str, str]] = None, + start_timeout: Optional[Union[int, float]] = None, + ): + url = self._queue_url_format + application + if path: + url += "/" + path.lstrip("/") + if webhook_url is not None: + url += "?" + urlencode({"fal_webhook": webhook_url}) + + request_headers = dict(headers or {}) + if hint is not None and self._add_hint_header is not None: + self._add_hint_header(hint, request_headers) + if priority is not None: + if self._add_priority_header is None: + raise RuntimeError("fal_client.client.add_priority_header is required for priority requests") + self._add_priority_header(priority, request_headers) + if start_timeout is not None: + if self._add_timeout_header is None: + raise RuntimeError("fal_client.client.add_timeout_header is required for timeout requests") + self._add_timeout_header(start_timeout, request_headers) + + response = self._maybe_retry_request( + self._http_client, + "POST", + url, + json=arguments, + timeout=getattr(self._sync_client, "default_timeout", 120.0), + headers=request_headers, + ) + self._raise_for_status(response) + + data = response.json() + return self._request_handle_class( + request_id=data["request_id"], + response_url=data["response_url"], + status_url=data["status_url"], + cancel_url=data["cancel_url"], + client=self._http_client, + ) + + +def _get_managed_fal_client(managed_gateway): + """Reuse the managed FAL client so its internal httpx.Client is not leaked per call.""" + global _managed_fal_client, _managed_fal_client_config + + client_config = ( + managed_gateway.gateway_origin.rstrip("/"), + managed_gateway.nous_user_token, + ) + with _managed_fal_client_lock: + if _managed_fal_client is not None and _managed_fal_client_config == client_config: + return _managed_fal_client + + _managed_fal_client = _ManagedFalSyncClient( + key=managed_gateway.nous_user_token, + queue_run_origin=managed_gateway.gateway_origin, + ) + _managed_fal_client_config = client_config + return _managed_fal_client + + +def _submit_fal_request(model: str, arguments: Dict[str, Any]): + """Submit a FAL request using direct credentials or the managed queue gateway.""" + request_headers = {"x-idempotency-key": str(uuid.uuid4())} + managed_gateway = _resolve_managed_fal_gateway() + if managed_gateway is None: + return fal_client.submit(model, arguments=arguments, headers=request_headers) + + managed_client = _get_managed_fal_client(managed_gateway) + return managed_client.submit( + model, + arguments=arguments, + headers=request_headers, + ) def _validate_parameters( @@ -186,9 +321,9 @@ def _upscale_image(image_url: str, original_prompt: str) -> Dict[str, Any]: # The async API (submit_async) caches a global httpx.AsyncClient via # @cached_property, which breaks when asyncio.run() destroys the loop # between calls (gateway thread-pool pattern). - handler = fal_client.submit( + handler = _submit_fal_request( UPSCALER_MODEL, - arguments=upscaler_arguments + arguments=upscaler_arguments, ) # Get the upscaled result (sync — blocks until done) @@ -280,8 +415,10 @@ def image_generate_tool( raise ValueError("Prompt is required and must be a non-empty string") # Check API key availability - if not os.getenv("FAL_KEY"): - raise ValueError("FAL_KEY environment variable not set") + if not (os.getenv("FAL_KEY") or _resolve_managed_fal_gateway()): + raise ValueError( + "FAL_KEY environment variable not set and managed FAL gateway is unavailable" + ) # Validate other parameters validated_params = _validate_parameters( @@ -312,9 +449,9 @@ def image_generate_tool( logger.info(" Guidance: %s", validated_params['guidance_scale']) # Submit request to FAL.ai using sync API (avoids cached event loop issues) - handler = fal_client.submit( + handler = _submit_fal_request( DEFAULT_MODEL, - arguments=arguments + arguments=arguments, ) # Get the result (sync — blocks until done) @@ -379,10 +516,12 @@ def image_generate_tool( error_msg = f"Error generating image: {str(e)}" logger.error("%s", error_msg, exc_info=True) - # Prepare error response - minimal format + # Include error details so callers can diagnose failures response_data = { "success": False, - "image": None + "image": None, + "error": str(e), + "error_type": type(e).__name__, } debug_call_data["error"] = error_msg @@ -400,7 +539,7 @@ def check_fal_api_key() -> bool: Returns: bool: True if API key is set, False otherwise """ - return bool(os.getenv("FAL_KEY")) + return bool(os.getenv("FAL_KEY") or _resolve_managed_fal_gateway()) def check_image_generation_requirements() -> bool: @@ -556,7 +695,7 @@ registry.register( schema=IMAGE_GENERATE_SCHEMA, handler=_handle_image_generate, check_fn=check_image_generation_requirements, - requires_env=["FAL_KEY"], + requires_env=[], is_async=False, # Switched to sync fal_client API to fix "Event loop is closed" in gateway emoji="🎨", ) diff --git a/tools/managed_tool_gateway.py b/tools/managed_tool_gateway.py new file mode 100644 index 000000000..96dd27b30 --- /dev/null +++ b/tools/managed_tool_gateway.py @@ -0,0 +1,160 @@ +"""Generic managed-tool gateway helpers for Nous-hosted vendor passthroughs.""" + +from __future__ import annotations + +import json +import os +from datetime import datetime, timezone +from dataclasses import dataclass +from typing import Callable, Optional + +from hermes_cli.config import get_hermes_home + +_DEFAULT_TOOL_GATEWAY_DOMAIN = "nousresearch.com" +_DEFAULT_TOOL_GATEWAY_SCHEME = "https" +_NOUS_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120 + + +@dataclass(frozen=True) +class ManagedToolGatewayConfig: + vendor: str + gateway_origin: str + nous_user_token: str + managed_mode: bool + + +def auth_json_path(): + """Return the Hermes auth store path, respecting HERMES_HOME overrides.""" + return get_hermes_home() / "auth.json" + + +def _read_nous_provider_state() -> Optional[dict]: + try: + path = auth_json_path() + if not path.is_file(): + return None + data = json.loads(path.read_text()) + providers = data.get("providers", {}) + if not isinstance(providers, dict): + return None + nous_provider = providers.get("nous", {}) + if isinstance(nous_provider, dict): + return nous_provider + except Exception: + pass + return None + + +def _parse_timestamp(value: object) -> Optional[datetime]: + if not isinstance(value, str) or not value.strip(): + return None + normalized = value.strip() + if normalized.endswith("Z"): + normalized = normalized[:-1] + "+00:00" + try: + parsed = datetime.fromisoformat(normalized) + except ValueError: + return None + if parsed.tzinfo is None: + parsed = parsed.replace(tzinfo=timezone.utc) + return parsed.astimezone(timezone.utc) + + +def _access_token_is_expiring(expires_at: object, skew_seconds: int) -> bool: + expires = _parse_timestamp(expires_at) + if expires is None: + return True + remaining = (expires - datetime.now(timezone.utc)).total_seconds() + return remaining <= max(0, int(skew_seconds)) + + +def read_nous_access_token() -> Optional[str]: + """Read a Nous Subscriber OAuth access token from auth store or env override.""" + explicit = os.getenv("TOOL_GATEWAY_USER_TOKEN") + if isinstance(explicit, str) and explicit.strip(): + return explicit.strip() + + nous_provider = _read_nous_provider_state() or {} + access_token = nous_provider.get("access_token") + cached_token = access_token.strip() if isinstance(access_token, str) and access_token.strip() else None + + if cached_token and not _access_token_is_expiring( + nous_provider.get("expires_at"), + _NOUS_ACCESS_TOKEN_REFRESH_SKEW_SECONDS, + ): + return cached_token + + try: + from hermes_cli.auth import resolve_nous_access_token + + refreshed_token = resolve_nous_access_token( + refresh_skew_seconds=_NOUS_ACCESS_TOKEN_REFRESH_SKEW_SECONDS, + ) + if isinstance(refreshed_token, str) and refreshed_token.strip(): + return refreshed_token.strip() + except Exception: + pass + + return cached_token + + +def get_tool_gateway_scheme() -> str: + """Return configured shared gateway URL scheme.""" + scheme = os.getenv("TOOL_GATEWAY_SCHEME", "").strip().lower() + if not scheme: + return _DEFAULT_TOOL_GATEWAY_SCHEME + + if scheme in {"http", "https"}: + return scheme + + raise ValueError("TOOL_GATEWAY_SCHEME must be 'http' or 'https'") + + +def build_vendor_gateway_url(vendor: str) -> str: + """Return the gateway origin for a specific vendor.""" + vendor_key = f"{vendor.upper().replace('-', '_')}_GATEWAY_URL" + explicit_vendor_url = os.getenv(vendor_key, "").strip().rstrip("/") + if explicit_vendor_url: + return explicit_vendor_url + + shared_scheme = get_tool_gateway_scheme() + shared_domain = os.getenv("TOOL_GATEWAY_DOMAIN", "").strip().strip("/") + if shared_domain: + return f"{shared_scheme}://{vendor}-gateway.{shared_domain}" + + return f"{shared_scheme}://{vendor}-gateway.{_DEFAULT_TOOL_GATEWAY_DOMAIN}" + + +def resolve_managed_tool_gateway( + vendor: str, + gateway_builder: Optional[Callable[[str], str]] = None, + token_reader: Optional[Callable[[], Optional[str]]] = None, +) -> Optional[ManagedToolGatewayConfig]: + """Resolve shared managed-tool gateway config for a vendor.""" + resolved_gateway_builder = gateway_builder or build_vendor_gateway_url + resolved_token_reader = token_reader or read_nous_access_token + + gateway_origin = resolved_gateway_builder(vendor) + nous_user_token = resolved_token_reader() + if not gateway_origin or not nous_user_token: + return None + + return ManagedToolGatewayConfig( + vendor=vendor, + gateway_origin=gateway_origin, + nous_user_token=nous_user_token, + managed_mode=True, + ) + + +def is_managed_tool_gateway_ready( + vendor: str, + gateway_builder: Optional[Callable[[str], str]] = None, + token_reader: Optional[Callable[[], Optional[str]]] = None, +) -> bool: + """Return True when gateway URL and Nous access token are available.""" + return resolve_managed_tool_gateway( + vendor, + gateway_builder=gateway_builder, + token_reader=token_reader, + ) is not None diff --git a/tools/terminal_tool.py b/tools/terminal_tool.py index aa917ab1a..13b724bf5 100644 --- a/tools/terminal_tool.py +++ b/tools/terminal_tool.py @@ -3,12 +3,12 @@ Terminal Tool Module A terminal tool that executes commands in local, Docker, Modal, SSH, Singularity, and Daytona environments. -Supports local execution, Docker containers, and Modal cloud sandboxes. +Supports local execution, containerized backends, and Modal cloud sandboxes, including managed gateway mode. Environment Selection (via TERMINAL_ENV environment variable): - "local": Execute directly on the host machine (default, fastest) - "docker": Execute in Docker containers (isolated, requires Docker) -- "modal": Execute in Modal cloud sandboxes (scalable, requires Modal account) +- "modal": Execute in Modal cloud sandboxes (direct Modal or managed gateway) Features: - Multiple execution backends (local, docker, modal) @@ -16,6 +16,10 @@ Features: - VM/container lifecycle management - Automatic cleanup after inactivity +Cloud sandbox note: +- Persistent filesystems preserve working state across sandbox recreation +- Persistent filesystems do NOT guarantee the same live sandbox or long-running processes survive cleanup, idle reaping, or Hermes exit + Usage: from terminal_tool import terminal_tool @@ -50,12 +54,18 @@ logger = logging.getLogger(__name__) from tools.interrupt import is_interrupted, _interrupt_event # noqa: F401 — re-exported +def ensure_minisweagent_on_path(_repo_root: Path | None = None) -> None: + """Backward-compatible no-op after minisweagent_path.py removal.""" + return + + # ============================================================================= # Custom Singularity Environment with more space # ============================================================================= # Singularity helpers (scratch dir, SIF cache) now live in tools/environments/singularity.py from tools.environments.singularity import _get_scratch_dir +from tools.tool_backend_helpers import has_direct_modal_credentials, normalize_modal_mode # Disk usage warning threshold (in GB) @@ -361,10 +371,12 @@ from tools.environments.singularity import SingularityEnvironment as _Singularit from tools.environments.ssh import SSHEnvironment as _SSHEnvironment from tools.environments.docker import DockerEnvironment as _DockerEnvironment from tools.environments.modal import ModalEnvironment as _ModalEnvironment +from tools.environments.managed_modal import ManagedModalEnvironment as _ManagedModalEnvironment +from tools.managed_tool_gateway import is_managed_tool_gateway_ready # Tool description for LLM -TERMINAL_TOOL_DESCRIPTION = """Execute shell commands on a Linux environment. Filesystem persists between calls. +TERMINAL_TOOL_DESCRIPTION = """Execute shell commands on a Linux environment. Filesystem usually persists between calls. Do NOT use cat/head/tail to read files — use read_file instead. Do NOT use grep/rg/find to search — use search_files instead. @@ -380,6 +392,7 @@ Working directory: Use 'workdir' for per-command cwd. PTY mode: Set pty=true for interactive CLI tools (Codex, Claude Code, Python REPL). Do NOT use vim/nano/interactive tools without pty=true — they hang without a pseudo-terminal. Pipe git output to cat if it might page. +Important: cloud sandboxes may be cleaned up, idled out, or recreated between turns. Persistent filesystem means files can resume later; it does NOT guarantee a continuously running machine or surviving background processes. Use terminal sandboxes for task work, not durable hosting. """ # Global state for environment lifecycle management @@ -493,6 +506,7 @@ def _get_env_config() -> Dict[str, Any]: return { "env_type": env_type, + "modal_mode": normalize_modal_mode(os.getenv("TERMINAL_MODAL_MODE", "auto")), "docker_image": os.getenv("TERMINAL_DOCKER_IMAGE", default_image), "docker_forward_env": _parse_env_var("TERMINAL_DOCKER_FORWARD_ENV", "[]", json.loads, "valid JSON"), "singularity_image": os.getenv("TERMINAL_SINGULARITY_IMAGE", f"docker://{default_image}"), @@ -525,6 +539,27 @@ def _get_env_config() -> Dict[str, Any]: } +def _get_modal_backend_state(modal_mode: object | None) -> Dict[str, Any]: + """Resolve direct vs managed Modal backend selection.""" + normalized_mode = normalize_modal_mode(modal_mode) + has_direct = has_direct_modal_credentials() + managed_ready = is_managed_tool_gateway_ready("modal") + + if normalized_mode == "managed": + selected_backend = "managed" if managed_ready else None + elif normalized_mode == "direct": + selected_backend = "direct" if has_direct else None + else: + selected_backend = "direct" if has_direct else "managed" if managed_ready else None + + return { + "mode": normalized_mode, + "has_direct": has_direct, + "managed_ready": managed_ready, + "selected_backend": selected_backend, + } + + def _create_environment(env_type: str, image: str, cwd: str, timeout: int, ssh_config: dict = None, container_config: dict = None, local_config: dict = None, @@ -590,7 +625,29 @@ def _create_environment(env_type: str, image: str, cwd: str, timeout: int, sandbox_kwargs["ephemeral_disk"] = disk except Exception: pass - + + modal_state = _get_modal_backend_state(cc.get("modal_mode")) + + if modal_state["selected_backend"] == "managed": + return _ManagedModalEnvironment( + image=image, cwd=cwd, timeout=timeout, + modal_sandbox_kwargs=sandbox_kwargs, + persistent_filesystem=persistent, task_id=task_id, + ) + + if modal_state["selected_backend"] != "direct": + if modal_state["mode"] == "managed": + raise ValueError( + "Modal backend is configured for managed mode, but the managed tool gateway is unavailable." + ) + if modal_state["mode"] == "direct": + raise ValueError( + "Modal backend is configured for direct mode, but no direct Modal credentials/config were found." + ) + raise ValueError( + "Modal backend selected but no direct Modal credentials/config or managed tool gateway was found." + ) + return _ModalEnvironment( image=image, cwd=cwd, timeout=timeout, modal_sandbox_kwargs=sandbox_kwargs, @@ -956,6 +1013,7 @@ def terminal_tool( "container_memory": config.get("container_memory", 5120), "container_disk": config.get("container_disk", 51200), "container_persistent": config.get("container_persistent", True), + "modal_mode": config.get("modal_mode", "auto"), "docker_volumes": config.get("docker_volumes", []), "docker_mount_cwd_to_workspace": config.get("docker_mount_cwd_to_workspace", False), } @@ -1173,10 +1231,14 @@ def terminal_tool( }, ensure_ascii=False) except Exception as e: + import traceback + tb_str = traceback.format_exc() + logger.error("terminal_tool exception:\n%s", tb_str) return json.dumps({ "output": "", "exit_code": -1, "error": f"Failed to execute command: {str(e)}", + "traceback": tb_str, "status": "error" }, ensure_ascii=False) @@ -1216,18 +1278,35 @@ def check_terminal_requirements() -> bool: return True elif env_type == "modal": + modal_state = _get_modal_backend_state(config.get("modal_mode")) + if modal_state["selected_backend"] == "managed": + return True + + if modal_state["selected_backend"] != "direct": + if modal_state["mode"] == "managed": + logger.error( + "Modal backend selected with TERMINAL_MODAL_MODE=managed, but the managed " + "tool gateway is unavailable. Configure the managed gateway or choose " + "TERMINAL_MODAL_MODE=direct/auto." + ) + elif modal_state["mode"] == "direct": + logger.error( + "Modal backend selected with TERMINAL_MODAL_MODE=direct, but no direct " + "Modal credentials/config were found. Configure Modal or choose " + "TERMINAL_MODAL_MODE=managed/auto." + ) + else: + logger.error( + "Modal backend selected but no direct Modal credentials/config or managed " + "tool gateway was found. Configure Modal, set up the managed gateway, " + "or choose a different TERMINAL_ENV." + ) + return False + if importlib.util.find_spec("swerex") is None: - logger.error("swe-rex is required for modal terminal backend: pip install 'swe-rex[modal]'") - return False - has_token = os.getenv("MODAL_TOKEN_ID") is not None - has_config = Path.home().joinpath(".modal.toml").exists() - if not (has_token or has_config): - logger.error( - "Modal backend selected but no MODAL_TOKEN_ID environment variable " - "or ~/.modal.toml config file was found. Configure Modal or choose " - "a different TERMINAL_ENV." - ) + logger.error("swe-rex is required for direct modal terminal backend: pip install 'swe-rex[modal]'") return False + return True elif env_type == "daytona": diff --git a/tools/tool_backend_helpers.py b/tools/tool_backend_helpers.py new file mode 100644 index 000000000..bcf93e849 --- /dev/null +++ b/tools/tool_backend_helpers.py @@ -0,0 +1,41 @@ +"""Shared helpers for tool backend selection.""" + +from __future__ import annotations + +import os +from pathlib import Path + + +_DEFAULT_BROWSER_PROVIDER = "local" +_DEFAULT_MODAL_MODE = "auto" +_VALID_MODAL_MODES = {"auto", "direct", "managed"} + + +def normalize_browser_cloud_provider(value: object | None) -> str: + """Return a normalized browser provider key.""" + provider = str(value or _DEFAULT_BROWSER_PROVIDER).strip().lower() + return provider or _DEFAULT_BROWSER_PROVIDER + + +def normalize_modal_mode(value: object | None) -> str: + """Return a normalized modal execution mode.""" + mode = str(value or _DEFAULT_MODAL_MODE).strip().lower() + if mode in _VALID_MODAL_MODES: + return mode + return _DEFAULT_MODAL_MODE + + +def has_direct_modal_credentials() -> bool: + """Return True when direct Modal credentials/config are available.""" + return bool( + (os.getenv("MODAL_TOKEN_ID") and os.getenv("MODAL_TOKEN_SECRET")) + or (Path.home() / ".modal.toml").exists() + ) + + +def resolve_openai_audio_api_key() -> str: + """Prefer the voice-tools key, but fall back to the normal OpenAI key.""" + return ( + os.getenv("VOICE_TOOLS_OPENAI_KEY", "") + or os.getenv("OPENAI_API_KEY", "") + ).strip() diff --git a/tools/transcription_tools.py b/tools/transcription_tools.py index 0c0a1fc9f..ae05358b8 100644 --- a/tools/transcription_tools.py +++ b/tools/transcription_tools.py @@ -31,6 +31,10 @@ import subprocess import tempfile from pathlib import Path from typing import Optional, Dict, Any +from urllib.parse import urljoin + +from tools.managed_tool_gateway import resolve_managed_tool_gateway +from tools.tool_backend_helpers import resolve_openai_audio_api_key from hermes_constants import get_hermes_home @@ -41,8 +45,17 @@ logger = logging.getLogger(__name__) # --------------------------------------------------------------------------- import importlib.util as _ilu -_HAS_FASTER_WHISPER = _ilu.find_spec("faster_whisper") is not None -_HAS_OPENAI = _ilu.find_spec("openai") is not None + + +def _safe_find_spec(module_name: str) -> bool: + try: + return _ilu.find_spec(module_name) is not None + except (ImportError, ValueError): + return module_name in globals() or module_name in os.sys.modules + + +_HAS_FASTER_WHISPER = _safe_find_spec("faster_whisper") +_HAS_OPENAI = _safe_find_spec("openai") # --------------------------------------------------------------------------- # Constants @@ -116,9 +129,9 @@ def is_stt_enabled(stt_config: Optional[dict] = None) -> bool: return bool(enabled) -def _resolve_openai_api_key() -> str: - """Prefer the voice-tools key, but fall back to the normal OpenAI key.""" - return os.getenv("VOICE_TOOLS_OPENAI_KEY", "") or os.getenv("OPENAI_API_KEY", "") +def _has_openai_audio_backend() -> bool: + """Return True when OpenAI audio can use direct credentials or the managed gateway.""" + return bool(resolve_openai_audio_api_key() or resolve_managed_tool_gateway("openai-audio")) def _find_binary(binary_name: str) -> Optional[str]: @@ -210,7 +223,7 @@ def _get_provider(stt_config: dict) -> str: return "none" if provider == "openai": - if _HAS_OPENAI and _resolve_openai_api_key(): + if _HAS_OPENAI and _has_openai_audio_backend(): return "openai" logger.warning( "STT provider 'openai' configured but no API key available" @@ -228,7 +241,7 @@ def _get_provider(stt_config: dict) -> str: if _HAS_OPENAI and os.getenv("GROQ_API_KEY"): logger.info("No local STT available, using Groq Whisper API") return "groq" - if _HAS_OPENAI and _resolve_openai_api_key(): + if _HAS_OPENAI and _has_openai_audio_backend(): logger.info("No local STT available, using OpenAI Whisper API") return "openai" return "none" @@ -404,19 +417,23 @@ def _transcribe_groq(file_path: str, model_name: str) -> Dict[str, Any]: try: from openai import OpenAI, APIError, APIConnectionError, APITimeoutError client = OpenAI(api_key=api_key, base_url=GROQ_BASE_URL, timeout=30, max_retries=0) + try: + with open(file_path, "rb") as audio_file: + transcription = client.audio.transcriptions.create( + model=model_name, + file=audio_file, + response_format="text", + ) - with open(file_path, "rb") as audio_file: - transcription = client.audio.transcriptions.create( - model=model_name, - file=audio_file, - response_format="text", - ) + transcript_text = str(transcription).strip() + logger.info("Transcribed %s via Groq API (%s, %d chars)", + Path(file_path).name, model_name, len(transcript_text)) - transcript_text = str(transcription).strip() - logger.info("Transcribed %s via Groq API (%s, %d chars)", - Path(file_path).name, model_name, len(transcript_text)) - - return {"success": True, "transcript": transcript_text, "provider": "groq"} + return {"success": True, "transcript": transcript_text, "provider": "groq"} + finally: + close = getattr(client, "close", None) + if callable(close): + close() except PermissionError: return {"success": False, "transcript": "", "error": f"Permission denied: {file_path}"} @@ -437,12 +454,13 @@ def _transcribe_groq(file_path: str, model_name: str) -> Dict[str, Any]: def _transcribe_openai(file_path: str, model_name: str) -> Dict[str, Any]: """Transcribe using OpenAI Whisper API (paid).""" - api_key = _resolve_openai_api_key() - if not api_key: + try: + api_key, base_url = _resolve_openai_audio_client_config() + except ValueError as exc: return { "success": False, "transcript": "", - "error": "Neither VOICE_TOOLS_OPENAI_KEY nor OPENAI_API_KEY is set", + "error": str(exc), } if not _HAS_OPENAI: @@ -455,20 +473,24 @@ def _transcribe_openai(file_path: str, model_name: str) -> Dict[str, Any]: try: from openai import OpenAI, APIError, APIConnectionError, APITimeoutError - client = OpenAI(api_key=api_key, base_url=OPENAI_BASE_URL, timeout=30, max_retries=0) + client = OpenAI(api_key=api_key, base_url=base_url, timeout=30, max_retries=0) + try: + with open(file_path, "rb") as audio_file: + transcription = client.audio.transcriptions.create( + model=model_name, + file=audio_file, + response_format="text" if model_name == "whisper-1" else "json", + ) - with open(file_path, "rb") as audio_file: - transcription = client.audio.transcriptions.create( - model=model_name, - file=audio_file, - response_format="text", - ) + transcript_text = _extract_transcript_text(transcription) + logger.info("Transcribed %s via OpenAI API (%s, %d chars)", + Path(file_path).name, model_name, len(transcript_text)) - transcript_text = str(transcription).strip() - logger.info("Transcribed %s via OpenAI API (%s, %d chars)", - Path(file_path).name, model_name, len(transcript_text)) - - return {"success": True, "transcript": transcript_text, "provider": "openai"} + return {"success": True, "transcript": transcript_text, "provider": "openai"} + finally: + close = getattr(client, "close", None) + if callable(close): + close() except PermissionError: return {"success": False, "transcript": "", "error": f"Permission denied: {file_path}"} @@ -554,3 +576,38 @@ def transcribe_audio(file_path: str, model: Optional[str] = None) -> Dict[str, A "or OPENAI_API_KEY for the OpenAI Whisper API." ), } + + +def _resolve_openai_audio_client_config() -> tuple[str, str]: + """Return direct OpenAI audio config or a managed gateway fallback.""" + direct_api_key = resolve_openai_audio_api_key() + if direct_api_key: + return direct_api_key, OPENAI_BASE_URL + + managed_gateway = resolve_managed_tool_gateway("openai-audio") + if managed_gateway is None: + raise ValueError( + "Neither VOICE_TOOLS_OPENAI_KEY nor OPENAI_API_KEY is set, and the managed OpenAI audio gateway is unavailable" + ) + + return managed_gateway.nous_user_token, urljoin( + f"{managed_gateway.gateway_origin.rstrip('/')}/", "v1" + ) + + +def _extract_transcript_text(transcription: Any) -> str: + """Normalize text and JSON transcription responses to a plain string.""" + if isinstance(transcription, str): + return transcription.strip() + + if hasattr(transcription, "text"): + value = getattr(transcription, "text") + if isinstance(value, str): + return value.strip() + + if isinstance(transcription, dict): + value = transcription.get("text") + if isinstance(value, str): + return value.strip() + + return str(transcription).strip() diff --git a/tools/tts_tool.py b/tools/tts_tool.py index eed3961df..c71cdb1e8 100644 --- a/tools/tts_tool.py +++ b/tools/tts_tool.py @@ -32,11 +32,15 @@ import shutil import subprocess import tempfile import threading +import uuid from pathlib import Path from hermes_constants import get_hermes_home from typing import Callable, Dict, Any, Optional +from urllib.parse import urljoin logger = logging.getLogger(__name__) +from tools.managed_tool_gateway import resolve_managed_tool_gateway +from tools.tool_backend_helpers import resolve_openai_audio_api_key # --------------------------------------------------------------------------- # Lazy imports -- providers are imported only when actually used to avoid @@ -74,6 +78,7 @@ DEFAULT_ELEVENLABS_MODEL_ID = "eleven_multilingual_v2" DEFAULT_ELEVENLABS_STREAMING_MODEL_ID = "eleven_flash_v2_5" DEFAULT_OPENAI_MODEL = "gpt-4o-mini-tts" DEFAULT_OPENAI_VOICE = "alloy" +DEFAULT_OPENAI_BASE_URL = "https://api.openai.com/v1" DEFAULT_OUTPUT_DIR = str(get_hermes_home() / "audio_cache") MAX_TEXT_LENGTH = 4000 @@ -233,14 +238,12 @@ def _generate_openai_tts(text: str, output_path: str, tts_config: Dict[str, Any] Returns: Path to the saved audio file. """ - api_key = os.getenv("VOICE_TOOLS_OPENAI_KEY", "") - if not api_key: - raise ValueError("VOICE_TOOLS_OPENAI_KEY not set. Get one at https://platform.openai.com/api-keys") + api_key, base_url = _resolve_openai_audio_client_config() oai_config = tts_config.get("openai", {}) model = oai_config.get("model", DEFAULT_OPENAI_MODEL) voice = oai_config.get("voice", DEFAULT_OPENAI_VOICE) - base_url = oai_config.get("base_url", "https://api.openai.com/v1") + base_url = oai_config.get("base_url", base_url) # Determine response format from extension if output_path.endswith(".ogg"): @@ -250,15 +253,21 @@ def _generate_openai_tts(text: str, output_path: str, tts_config: Dict[str, Any] OpenAIClient = _import_openai_client() client = OpenAIClient(api_key=api_key, base_url=base_url) - response = client.audio.speech.create( - model=model, - voice=voice, - input=text, - response_format=response_format, - ) + try: + response = client.audio.speech.create( + model=model, + voice=voice, + input=text, + response_format=response_format, + extra_headers={"x-idempotency-key": str(uuid.uuid4())}, + ) - response.stream_to_file(output_path) - return output_path + response.stream_to_file(output_path) + return output_path + finally: + close = getattr(client, "close", None) + if callable(close): + close() # =========================================================================== @@ -539,7 +548,7 @@ def check_tts_requirements() -> bool: pass try: _import_openai_client() - if os.getenv("VOICE_TOOLS_OPENAI_KEY"): + if _has_openai_audio_backend(): return True except ImportError: pass @@ -548,6 +557,28 @@ def check_tts_requirements() -> bool: return False +def _resolve_openai_audio_client_config() -> tuple[str, str]: + """Return direct OpenAI audio config or a managed gateway fallback.""" + direct_api_key = resolve_openai_audio_api_key() + if direct_api_key: + return direct_api_key, DEFAULT_OPENAI_BASE_URL + + managed_gateway = resolve_managed_tool_gateway("openai-audio") + if managed_gateway is None: + raise ValueError( + "Neither VOICE_TOOLS_OPENAI_KEY nor OPENAI_API_KEY is set, and the managed OpenAI audio gateway is unavailable" + ) + + return managed_gateway.nous_user_token, urljoin( + f"{managed_gateway.gateway_origin.rstrip('/')}/", "v1" + ) + + +def _has_openai_audio_backend() -> bool: + """Return True when OpenAI audio can use direct credentials or the managed gateway.""" + return bool(resolve_openai_audio_api_key() or resolve_managed_tool_gateway("openai-audio")) + + # =========================================================================== # Streaming TTS: sentence-by-sentence pipeline for ElevenLabs # =========================================================================== @@ -802,7 +833,10 @@ if __name__ == "__main__": print(f" ElevenLabs: {'installed' if _check(_import_elevenlabs, 'el') else 'not installed (pip install elevenlabs)'}") print(f" API Key: {'set' if os.getenv('ELEVENLABS_API_KEY') else 'not set'}") print(f" OpenAI: {'installed' if _check(_import_openai_client, 'oai') else 'not installed'}") - print(f" API Key: {'set' if os.getenv('VOICE_TOOLS_OPENAI_KEY') else 'not set (VOICE_TOOLS_OPENAI_KEY)'}") + print( + " API Key: " + f"{'set' if resolve_openai_audio_api_key() else 'not set (VOICE_TOOLS_OPENAI_KEY or OPENAI_API_KEY)'}" + ) print(f" ffmpeg: {'✅ found' if _has_ffmpeg() else '❌ not found (needed for Telegram Opus)'}") print(f"\n Output dir: {DEFAULT_OUTPUT_DIR}") diff --git a/tools/web_tools.py b/tools/web_tools.py index d4afc06ae..1ebf36d77 100644 --- a/tools/web_tools.py +++ b/tools/web_tools.py @@ -4,15 +4,18 @@ Standalone Web Tools Module This module provides generic web tools that work with multiple backend providers. Backend is selected during ``hermes tools`` setup (web.backend in config.yaml). +When available, Hermes can route Firecrawl calls through a Nous-hosted tool-gateway +for Nous Subscribers only. Available tools: - web_search_tool: Search the web for information - web_extract_tool: Extract content from specific web pages -- web_crawl_tool: Crawl websites with specific instructions (Firecrawl only) +- web_crawl_tool: Crawl websites with specific instructions Backend compatibility: -- Firecrawl: https://docs.firecrawl.dev/introduction (search, extract, crawl) +- Firecrawl: https://docs.firecrawl.dev/introduction (search, extract, crawl; direct or derived firecrawl-gateway. for Nous Subscribers) - Parallel: https://docs.parallel.ai (search, extract) +- Tavily: https://tavily.com (search, extract, crawl) LLM Processing: - Uses OpenRouter API with Gemini 3 Flash Preview for intelligent content extraction @@ -44,8 +47,13 @@ import asyncio from typing import List, Dict, Any, Optional import httpx from firecrawl import Firecrawl -from agent.auxiliary_client import async_call_llm +from agent.auxiliary_client import get_async_text_auxiliary_client from tools.debug_helpers import DebugSession +from tools.managed_tool_gateway import ( + build_vendor_gateway_url, + read_nous_access_token as _read_nous_access_token, + resolve_managed_tool_gateway, +) from tools.url_safety import is_safe_url from tools.website_policy import check_website_access @@ -78,10 +86,13 @@ def _get_backend() -> str: return configured # Fallback for manual / legacy config — use whichever key is present. - has_firecrawl = _has_env("FIRECRAWL_API_KEY") or _has_env("FIRECRAWL_API_URL") + has_firecrawl = ( + _has_env("FIRECRAWL_API_KEY") + or _has_env("FIRECRAWL_API_URL") + or _is_tool_gateway_ready() + ) has_parallel = _has_env("PARALLEL_API_KEY") has_tavily = _has_env("TAVILY_API_KEY") - if has_tavily and not has_firecrawl and not has_parallel: return "tavily" if has_parallel and not has_firecrawl: @@ -90,35 +101,100 @@ def _get_backend() -> str: # Default to firecrawl (backward compat, or when both are set) return "firecrawl" + +def _is_backend_available(backend: str) -> bool: + """Return True when the selected backend is currently usable.""" + if backend == "parallel": + return _has_env("PARALLEL_API_KEY") + if backend == "firecrawl": + return check_firecrawl_api_key() + if backend == "tavily": + return _has_env("TAVILY_API_KEY") + return False + # ─── Firecrawl Client ──────────────────────────────────────────────────────── _firecrawl_client = None +_firecrawl_client_config = None + + +def _get_direct_firecrawl_config() -> Optional[tuple[Dict[str, str], tuple[str, Optional[str], Optional[str]]]]: + """Return explicit direct Firecrawl kwargs + cache key, or None when unset.""" + api_key = os.getenv("FIRECRAWL_API_KEY", "").strip() + api_url = os.getenv("FIRECRAWL_API_URL", "").strip().rstrip("/") + + if not api_key and not api_url: + return None + + kwargs: Dict[str, str] = {} + if api_key: + kwargs["api_key"] = api_key + if api_url: + kwargs["api_url"] = api_url + + return kwargs, ("direct", api_url or None, api_key or None) + + +def _get_firecrawl_gateway_url() -> str: + """Return configured Firecrawl gateway URL.""" + return build_vendor_gateway_url("firecrawl") + + +def _is_tool_gateway_ready() -> bool: + """Return True when gateway URL and a Nous Subscriber token are available.""" + return resolve_managed_tool_gateway("firecrawl", token_reader=_read_nous_access_token) is not None + + +def _has_direct_firecrawl_config() -> bool: + """Return True when direct Firecrawl config is explicitly configured.""" + return _get_direct_firecrawl_config() is not None + + +def _raise_web_backend_configuration_error() -> None: + """Raise a clear error for unsupported web backend configuration.""" + raise ValueError( + "Web tools are not configured. " + "Set FIRECRAWL_API_KEY for cloud Firecrawl, set FIRECRAWL_API_URL for a self-hosted Firecrawl instance, " + "or, if you are a Nous Subscriber, login to Nous (`hermes model`) and provide " + "FIRECRAWL_GATEWAY_URL or TOOL_GATEWAY_DOMAIN." + ) + def _get_firecrawl_client(): - """Get or create the Firecrawl client (lazy initialization). + """Get or create Firecrawl client. - Uses the cloud API by default (requires FIRECRAWL_API_KEY). - Set FIRECRAWL_API_URL to point at a self-hosted instance instead — - in that case the API key is optional (set USE_DB_AUTHENTICATION=false - on your Firecrawl server to disable auth entirely). + Direct Firecrawl takes precedence when explicitly configured. Otherwise + Hermes falls back to the Firecrawl tool-gateway for logged-in Nous Subscribers. """ - global _firecrawl_client - if _firecrawl_client is None: - api_key = os.getenv("FIRECRAWL_API_KEY") - api_url = os.getenv("FIRECRAWL_API_URL") - if not api_key and not api_url: - logger.error("Firecrawl client initialization failed: missing configuration.") - raise ValueError( - "Firecrawl client not configured. " - "Set FIRECRAWL_API_KEY (cloud) or FIRECRAWL_API_URL (self-hosted). " - "This tool requires Firecrawl to be available." - ) - kwargs = {} - if api_key: - kwargs["api_key"] = api_key - if api_url: - kwargs["api_url"] = api_url - _firecrawl_client = Firecrawl(**kwargs) + global _firecrawl_client, _firecrawl_client_config + + direct_config = _get_direct_firecrawl_config() + if direct_config is not None: + kwargs, client_config = direct_config + else: + managed_gateway = resolve_managed_tool_gateway( + "firecrawl", + token_reader=_read_nous_access_token, + ) + if managed_gateway is None: + logger.error("Firecrawl client initialization failed: missing direct config and tool-gateway auth.") + _raise_web_backend_configuration_error() + + kwargs = { + "api_key": managed_gateway.nous_user_token, + "api_url": managed_gateway.gateway_origin, + } + client_config = ( + "tool-gateway", + kwargs["api_url"], + managed_gateway.nous_user_token, + ) + + if _firecrawl_client is not None and _firecrawl_client_config == client_config: + return _firecrawl_client + + _firecrawl_client = Firecrawl(**kwargs) + _firecrawl_client_config = client_config return _firecrawl_client # ─── Parallel Client ───────────────────────────────────────────────────────── @@ -243,10 +319,112 @@ def _normalize_tavily_documents(response: dict, fallback_url: str = "") -> List[ return documents +def _to_plain_object(value: Any) -> Any: + """Convert SDK objects to plain python data structures when possible.""" + if value is None: + return None + + if isinstance(value, (dict, list, str, int, float, bool)): + return value + + if hasattr(value, "model_dump"): + try: + return value.model_dump() + except Exception: + pass + + if hasattr(value, "__dict__"): + try: + return {k: v for k, v in value.__dict__.items() if not k.startswith("_")} + except Exception: + pass + + return value + + +def _normalize_result_list(values: Any) -> List[Dict[str, Any]]: + """Normalize mixed SDK/list payloads into a list of dicts.""" + if not isinstance(values, list): + return [] + + normalized: List[Dict[str, Any]] = [] + for item in values: + plain = _to_plain_object(item) + if isinstance(plain, dict): + normalized.append(plain) + return normalized + + +def _extract_web_search_results(response: Any) -> List[Dict[str, Any]]: + """Extract Firecrawl search results across SDK/direct/gateway response shapes.""" + response_plain = _to_plain_object(response) + + if isinstance(response_plain, dict): + data = response_plain.get("data") + if isinstance(data, list): + return _normalize_result_list(data) + + if isinstance(data, dict): + data_web = _normalize_result_list(data.get("web")) + if data_web: + return data_web + data_results = _normalize_result_list(data.get("results")) + if data_results: + return data_results + + top_web = _normalize_result_list(response_plain.get("web")) + if top_web: + return top_web + + top_results = _normalize_result_list(response_plain.get("results")) + if top_results: + return top_results + + if hasattr(response, "web"): + return _normalize_result_list(getattr(response, "web", [])) + + return [] + + +def _extract_scrape_payload(scrape_result: Any) -> Dict[str, Any]: + """Normalize Firecrawl scrape payload shape across SDK and gateway variants.""" + result_plain = _to_plain_object(scrape_result) + if not isinstance(result_plain, dict): + return {} + + nested = result_plain.get("data") + if isinstance(nested, dict): + return nested + + return result_plain + + DEFAULT_MIN_LENGTH_FOR_SUMMARIZATION = 5000 -# Allow per-task override via env var -DEFAULT_SUMMARIZER_MODEL = os.getenv("AUXILIARY_WEB_EXTRACT_MODEL", "").strip() or None +def _is_nous_auxiliary_client(client: Any) -> bool: + """Return True when the resolved auxiliary backend is Nous Portal.""" + base_url = str(getattr(client, "base_url", "") or "").lower() + return "nousresearch.com" in base_url + + +def _resolve_web_extract_auxiliary(model: Optional[str] = None) -> tuple[Optional[Any], Optional[str], Dict[str, Any]]: + """Resolve the current web-extract auxiliary client, model, and extra body.""" + client, default_model = get_async_text_auxiliary_client("web_extract") + configured_model = os.getenv("AUXILIARY_WEB_EXTRACT_MODEL", "").strip() + effective_model = model or configured_model or default_model + + extra_body: Dict[str, Any] = {} + if client is not None and _is_nous_auxiliary_client(client): + from agent.auxiliary_client import get_auxiliary_extra_body + extra_body = get_auxiliary_extra_body() or {"tags": ["product=hermes-agent"]} + + return client, effective_model, extra_body + + +def _get_default_summarizer_model() -> Optional[str]: + """Return the current default model for web extraction summarization.""" + _, model, _ = _resolve_web_extract_auxiliary() + return model _debug = DebugSession("web_tools", env_var="WEB_TOOLS_DEBUG") @@ -255,7 +433,7 @@ async def process_content_with_llm( content: str, url: str = "", title: str = "", - model: str = DEFAULT_SUMMARIZER_MODEL, + model: Optional[str] = None, min_length: int = DEFAULT_MIN_LENGTH_FOR_SUMMARIZATION ) -> Optional[str]: """ @@ -338,7 +516,7 @@ async def process_content_with_llm( async def _call_summarizer_llm( content: str, context_str: str, - model: str, + model: Optional[str], max_tokens: int = 20000, is_chunk: bool = False, chunk_info: str = "" @@ -404,22 +582,22 @@ Create a markdown summary that captures all key information in a well-organized, for attempt in range(max_retries): try: - call_kwargs = { - "task": "web_extract", - "messages": [ + aux_client, effective_model, extra_body = _resolve_web_extract_auxiliary(model) + if aux_client is None or not effective_model: + logger.warning("No auxiliary model available for web content processing") + return None + from agent.auxiliary_client import auxiliary_max_tokens_param + response = await aux_client.chat.completions.create( + model=effective_model, + messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt} ], - "temperature": 0.1, - "max_tokens": max_tokens, - } - if model: - call_kwargs["model"] = model - response = await async_call_llm(**call_kwargs) + temperature=0.1, + **auxiliary_max_tokens_param(max_tokens), + **({} if not extra_body else {"extra_body": extra_body}), + ) return response.choices[0].message.content.strip() - except RuntimeError: - logger.warning("No auxiliary model available for web content processing") - return None except Exception as api_error: last_error = api_error if attempt < max_retries - 1: @@ -436,7 +614,7 @@ Create a markdown summary that captures all key information in a well-organized, async def _process_large_content_chunked( content: str, context_str: str, - model: str, + model: Optional[str], chunk_size: int, max_output_size: int ) -> Optional[str]: @@ -523,18 +701,25 @@ Synthesize these into ONE cohesive, comprehensive summary that: Create a single, unified markdown summary.""" try: - call_kwargs = { - "task": "web_extract", - "messages": [ + aux_client, effective_model, extra_body = _resolve_web_extract_auxiliary(model) + if aux_client is None or not effective_model: + logger.warning("No auxiliary model for synthesis, concatenating summaries") + fallback = "\n\n".join(summaries) + if len(fallback) > max_output_size: + fallback = fallback[:max_output_size] + "\n\n[... truncated ...]" + return fallback + + from agent.auxiliary_client import auxiliary_max_tokens_param + response = await aux_client.chat.completions.create( + model=effective_model, + messages=[ {"role": "system", "content": "You synthesize multiple summaries into one cohesive, comprehensive summary. Be thorough but concise."}, {"role": "user", "content": synthesis_prompt} ], - "temperature": 0.1, - "max_tokens": 20000, - } - if model: - call_kwargs["model"] = model - response = await async_call_llm(**call_kwargs) + temperature=0.1, + **auxiliary_max_tokens_param(20000), + **({} if not extra_body else {"extra_body": extra_body}), + ) final_summary = response.choices[0].message.content.strip() # Enforce hard cap @@ -750,35 +935,7 @@ def web_search_tool(query: str, limit: int = 5) -> str: limit=limit ) - # The response is a SearchData object with web, news, and images attributes - # When not scraping, the results are directly in these attributes - web_results = [] - - # Check if response has web attribute (SearchData object) - if hasattr(response, 'web'): - # Response is a SearchData object with web attribute - if response.web: - # Convert each SearchResultWeb object to dict - for result in response.web: - if hasattr(result, 'model_dump'): - # Pydantic model - use model_dump - web_results.append(result.model_dump()) - elif hasattr(result, '__dict__'): - # Regular object - use __dict__ - web_results.append(result.__dict__) - elif isinstance(result, dict): - # Already a dict - web_results.append(result) - elif hasattr(response, 'model_dump'): - # Response has model_dump method - use it to get dict - response_dict = response.model_dump() - if 'web' in response_dict and response_dict['web']: - web_results = response_dict['web'] - elif isinstance(response, dict): - # Response is already a dictionary - if 'web' in response and response['web']: - web_results = response['web'] - + web_results = _extract_web_search_results(response) results_count = len(web_results) logger.info("Found %d search results", results_count) @@ -807,11 +964,11 @@ def web_search_tool(query: str, limit: int = 5) -> str: except Exception as e: error_msg = f"Error searching web: {str(e)}" logger.debug("%s", error_msg) - + debug_call_data["error"] = error_msg _debug.log_call("web_search_tool", debug_call_data) _debug.save() - + return json.dumps({"error": error_msg}, ensure_ascii=False) @@ -819,7 +976,7 @@ async def web_extract_tool( urls: List[str], format: str = None, use_llm_processing: bool = True, - model: str = DEFAULT_SUMMARIZER_MODEL, + model: Optional[str] = None, min_length: int = DEFAULT_MIN_LENGTH_FOR_SUMMARIZATION ) -> str: """ @@ -832,7 +989,7 @@ async def web_extract_tool( urls (List[str]): List of URLs to extract content from format (str): Desired output format ("markdown" or "html", optional) use_llm_processing (bool): Whether to process content with LLM for summarization (default: True) - model (str): The model to use for LLM processing (default: google/gemini-3-flash-preview) + model (Optional[str]): The model to use for LLM processing (defaults to current auxiliary backend model) min_length (int): Minimum content length to trigger LLM processing (default: 5000) Returns: @@ -929,39 +1086,11 @@ async def web_extract_tool( formats=formats ) - # Process the result - properly handle object serialization - metadata = {} + scrape_payload = _extract_scrape_payload(scrape_result) + metadata = scrape_payload.get("metadata", {}) title = "" - content_markdown = None - content_html = None - - # Extract data from the scrape result - if hasattr(scrape_result, 'model_dump'): - # Pydantic model - use model_dump to get dict - result_dict = scrape_result.model_dump() - content_markdown = result_dict.get('markdown') - content_html = result_dict.get('html') - metadata = result_dict.get('metadata', {}) - elif hasattr(scrape_result, '__dict__'): - # Regular object with attributes - content_markdown = getattr(scrape_result, 'markdown', None) - content_html = getattr(scrape_result, 'html', None) - - # Handle metadata - convert to dict if it's an object - metadata_obj = getattr(scrape_result, 'metadata', {}) - if hasattr(metadata_obj, 'model_dump'): - metadata = metadata_obj.model_dump() - elif hasattr(metadata_obj, '__dict__'): - metadata = metadata_obj.__dict__ - elif isinstance(metadata_obj, dict): - metadata = metadata_obj - else: - metadata = {} - elif isinstance(scrape_result, dict): - # Already a dictionary - content_markdown = scrape_result.get('markdown') - content_html = scrape_result.get('html') - metadata = scrape_result.get('metadata', {}) + content_markdown = scrape_payload.get("markdown") + content_html = scrape_payload.get("html") # Ensure metadata is a dict (not an object) if not isinstance(metadata, dict): @@ -1019,9 +1148,11 @@ async def web_extract_tool( debug_call_data["pages_extracted"] = pages_extracted debug_call_data["original_response_size"] = len(json.dumps(response)) + effective_model = model or _get_default_summarizer_model() + auxiliary_available = check_auxiliary_model() # Process each result with LLM if enabled - if use_llm_processing: + if use_llm_processing and auxiliary_available: logger.info("Processing extracted content with LLM (parallel)...") debug_call_data["processing_applied"].append("llm_processing") @@ -1039,7 +1170,7 @@ async def web_extract_tool( # Process content with LLM processed = await process_content_with_llm( - raw_content, url, title, model, min_length + raw_content, url, title, effective_model, min_length ) if processed: @@ -1055,7 +1186,7 @@ async def web_extract_tool( "original_size": original_size, "processed_size": processed_size, "compression_ratio": compression_ratio, - "model_used": model + "model_used": effective_model } return result, metrics, "processed" else: @@ -1087,6 +1218,9 @@ async def web_extract_tool( else: logger.warning("%s (no content to process)", url) else: + if use_llm_processing and not auxiliary_available: + logger.warning("LLM processing requested but no auxiliary model available, returning raw content") + debug_call_data["processing_applied"].append("llm_processing_unavailable") # Print summary of extracted pages for debugging (original behavior) for result in response.get('results', []): url = result.get('url', 'Unknown URL') @@ -1141,7 +1275,7 @@ async def web_crawl_tool( instructions: str = None, depth: str = "basic", use_llm_processing: bool = True, - model: str = DEFAULT_SUMMARIZER_MODEL, + model: Optional[str] = None, min_length: int = DEFAULT_MIN_LENGTH_FOR_SUMMARIZATION ) -> str: """ @@ -1155,7 +1289,7 @@ async def web_crawl_tool( instructions (str): Instructions for what to crawl/extract using LLM intelligence (optional) depth (str): Depth of extraction ("basic" or "advanced", default: "basic") use_llm_processing (bool): Whether to process content with LLM for summarization (default: True) - model (str): The model to use for LLM processing (default: google/gemini-3-flash-preview) + model (Optional[str]): The model to use for LLM processing (defaults to current auxiliary backend model) min_length (int): Minimum content length to trigger LLM processing (default: 5000) Returns: @@ -1185,6 +1319,8 @@ async def web_crawl_tool( } try: + effective_model = model or _get_default_summarizer_model() + auxiliary_available = check_auxiliary_model() backend = _get_backend() # Tavily supports crawl via its /crawl endpoint @@ -1229,7 +1365,7 @@ async def web_crawl_tool( debug_call_data["original_response_size"] = len(json.dumps(response)) # Process each result with LLM if enabled - if use_llm_processing: + if use_llm_processing and auxiliary_available: logger.info("Processing crawled content with LLM (parallel)...") debug_call_data["processing_applied"].append("llm_processing") @@ -1240,12 +1376,12 @@ async def web_crawl_tool( if not content: return result, None, "no_content" original_size = len(content) - processed = await process_content_with_llm(content, page_url, title, model, min_length) + processed = await process_content_with_llm(content, page_url, title, effective_model, min_length) if processed: result['raw_content'] = content result['content'] = processed metrics = {"url": page_url, "original_size": original_size, "processed_size": len(processed), - "compression_ratio": len(processed) / original_size if original_size else 1.0, "model_used": model} + "compression_ratio": len(processed) / original_size if original_size else 1.0, "model_used": effective_model} return result, metrics, "processed" metrics = {"url": page_url, "original_size": original_size, "processed_size": original_size, "compression_ratio": 1.0, "model_used": None, "reason": "content_too_short"} @@ -1258,6 +1394,10 @@ async def web_crawl_tool( debug_call_data["compression_metrics"].append(metrics) debug_call_data["pages_processed_with_llm"] += 1 + if use_llm_processing and not auxiliary_available: + logger.warning("LLM processing requested but no auxiliary model available, returning raw content") + debug_call_data["processing_applied"].append("llm_processing_unavailable") + trimmed_results = [{"url": r.get("url", ""), "title": r.get("title", ""), "content": r.get("content", ""), "error": r.get("error"), **({ "blocked_by_policy": r["blocked_by_policy"]} if "blocked_by_policy" in r else {})} for r in response.get("results", [])] result_json = json.dumps({"results": trimmed_results}, indent=2, ensure_ascii=False) @@ -1267,10 +1407,12 @@ async def web_crawl_tool( _debug.save() return cleaned_result - # web_crawl requires Firecrawl — Parallel has no crawl API - if not (os.getenv("FIRECRAWL_API_KEY") or os.getenv("FIRECRAWL_API_URL")): + # web_crawl requires Firecrawl or the Firecrawl tool-gateway — Parallel has no crawl API + if not check_firecrawl_api_key(): return json.dumps({ - "error": "web_crawl requires Firecrawl. Set FIRECRAWL_API_KEY, " + "error": "web_crawl requires Firecrawl. Set FIRECRAWL_API_KEY, FIRECRAWL_API_URL, " + "or, if you are a Nous Subscriber, login to Nous and use FIRECRAWL_GATEWAY_URL, " + "or TOOL_GATEWAY_DOMAIN, " "or use web_search + web_extract instead.", "success": False, }, ensure_ascii=False) @@ -1431,7 +1573,7 @@ async def web_crawl_tool( debug_call_data["original_response_size"] = len(json.dumps(response)) # Process each result with LLM if enabled - if use_llm_processing: + if use_llm_processing and auxiliary_available: logger.info("Processing crawled content with LLM (parallel)...") debug_call_data["processing_applied"].append("llm_processing") @@ -1449,7 +1591,7 @@ async def web_crawl_tool( # Process content with LLM processed = await process_content_with_llm( - content, page_url, title, model, min_length + content, page_url, title, effective_model, min_length ) if processed: @@ -1465,7 +1607,7 @@ async def web_crawl_tool( "original_size": original_size, "processed_size": processed_size, "compression_ratio": compression_ratio, - "model_used": model + "model_used": effective_model } return result, metrics, "processed" else: @@ -1497,6 +1639,9 @@ async def web_crawl_tool( else: logger.warning("%s (no content to process)", page_url) else: + if use_llm_processing and not auxiliary_available: + logger.warning("LLM processing requested but no auxiliary model available, returning raw content") + debug_call_data["processing_applied"].append("llm_processing_unavailable") # Print summary of crawled pages for debugging (original behavior) for result in response.get('results', []): page_url = result.get('url', 'Unknown URL') @@ -1540,38 +1685,34 @@ async def web_crawl_tool( return json.dumps({"error": error_msg}, ensure_ascii=False) -# Convenience function to check if API key is available +# Convenience function to check Firecrawl credentials def check_firecrawl_api_key() -> bool: """ - Check if the Firecrawl API key is available in environment variables. + Check whether the Firecrawl backend is available. + + Availability is true when either: + 1) direct Firecrawl config (`FIRECRAWL_API_KEY` or `FIRECRAWL_API_URL`), or + 2) Firecrawl gateway origin + Nous Subscriber access token + (fallback when direct Firecrawl is not configured). Returns: - bool: True if API key is set, False otherwise + bool: True if direct Firecrawl or the tool-gateway can be used. """ - return bool(os.getenv("FIRECRAWL_API_KEY")) + return _has_direct_firecrawl_config() or _is_tool_gateway_ready() def check_web_api_key() -> bool: - """Check if any web backend API key is available (Parallel, Firecrawl, or Tavily).""" - return bool( - os.getenv("PARALLEL_API_KEY") - or os.getenv("FIRECRAWL_API_KEY") - or os.getenv("FIRECRAWL_API_URL") - or os.getenv("TAVILY_API_KEY") - ) + """Check whether the configured web backend is available.""" + configured = _load_web_config().get("backend", "").lower().strip() + if configured in ("parallel", "firecrawl", "tavily"): + return _is_backend_available(configured) + return any(_is_backend_available(backend) for backend in ("parallel", "firecrawl", "tavily")) def check_auxiliary_model() -> bool: """Check if an auxiliary text model is available for LLM content processing.""" - try: - from agent.auxiliary_client import resolve_provider_client - for p in ("openrouter", "nous", "custom", "codex"): - client, _ = resolve_provider_client(p) - if client is not None: - return True - return False - except Exception: - return False + client, _, _ = _resolve_web_extract_auxiliary() + return client is not None def get_debug_session_info() -> Dict[str, Any]: @@ -1588,7 +1729,11 @@ if __name__ == "__main__": # Check if API keys are available web_available = check_web_api_key() + tool_gateway_available = _is_tool_gateway_ready() + firecrawl_key_available = bool(os.getenv("FIRECRAWL_API_KEY", "").strip()) + firecrawl_url_available = bool(os.getenv("FIRECRAWL_API_URL", "").strip()) nous_available = check_auxiliary_model() + default_summarizer_model = _get_default_summarizer_model() if web_available: backend = _get_backend() @@ -1598,17 +1743,28 @@ if __name__ == "__main__": elif backend == "tavily": print(" Using Tavily API (https://tavily.com)") else: - print(" Using Firecrawl API (https://firecrawl.dev)") + if firecrawl_url_available: + print(f" Using self-hosted Firecrawl: {os.getenv('FIRECRAWL_API_URL').strip().rstrip('/')}") + elif firecrawl_key_available: + print(" Using direct Firecrawl cloud API") + elif tool_gateway_available: + print(f" Using Firecrawl tool-gateway: {_get_firecrawl_gateway_url()}") + else: + print(" Firecrawl backend selected but not configured") else: print("❌ No web search backend configured") - print("Set PARALLEL_API_KEY, TAVILY_API_KEY, or FIRECRAWL_API_KEY") + print( + "Set PARALLEL_API_KEY, TAVILY_API_KEY, FIRECRAWL_API_KEY, FIRECRAWL_API_URL, " + "or, if you are a Nous Subscriber, login to Nous and use " + "FIRECRAWL_GATEWAY_URL or TOOL_GATEWAY_DOMAIN" + ) if not nous_available: print("❌ No auxiliary model available for LLM content processing") print("Set OPENROUTER_API_KEY, configure Nous Portal, or set OPENAI_BASE_URL + OPENAI_API_KEY") print("⚠️ Without an auxiliary model, LLM content processing will be disabled") else: - print(f"✅ Auxiliary model available: {DEFAULT_SUMMARIZER_MODEL}") + print(f"✅ Auxiliary model available: {default_summarizer_model}") if not web_available: exit(1) @@ -1616,7 +1772,7 @@ if __name__ == "__main__": print("🛠️ Web tools ready for use!") if nous_available: - print(f"🧠 LLM content processing available with {DEFAULT_SUMMARIZER_MODEL}") + print(f"🧠 LLM content processing available with {default_summarizer_model}") print(f" Default min length for processing: {DEFAULT_MIN_LENGTH_FOR_SUMMARIZATION} chars") # Show debug mode status @@ -1711,7 +1867,16 @@ registry.register( schema=WEB_SEARCH_SCHEMA, handler=lambda args, **kw: web_search_tool(args.get("query", ""), limit=5), check_fn=check_web_api_key, - requires_env=["PARALLEL_API_KEY", "FIRECRAWL_API_KEY", "TAVILY_API_KEY"], + requires_env=[ + "PARALLEL_API_KEY", + "TAVILY_API_KEY", + "FIRECRAWL_GATEWAY_URL", + "TOOL_GATEWAY_DOMAIN", + "TOOL_GATEWAY_SCHEME", + "TOOL_GATEWAY_USER_TOKEN", + "FIRECRAWL_API_KEY", + "FIRECRAWL_API_URL", + ], emoji="🔍", ) registry.register( @@ -1721,7 +1886,16 @@ registry.register( handler=lambda args, **kw: web_extract_tool( args.get("urls", [])[:5] if isinstance(args.get("urls"), list) else [], "markdown"), check_fn=check_web_api_key, - requires_env=["PARALLEL_API_KEY", "FIRECRAWL_API_KEY", "TAVILY_API_KEY"], + requires_env=[ + "PARALLEL_API_KEY", + "TAVILY_API_KEY", + "FIRECRAWL_GATEWAY_URL", + "TOOL_GATEWAY_DOMAIN", + "TOOL_GATEWAY_SCHEME", + "TOOL_GATEWAY_USER_TOKEN", + "FIRECRAWL_API_KEY", + "FIRECRAWL_API_URL", + ], is_async=True, emoji="📄", ) diff --git a/website/docs/reference/environment-variables.md b/website/docs/reference/environment-variables.md index 39fb0b83a..d7d689580 100644 --- a/website/docs/reference/environment-variables.md +++ b/website/docs/reference/environment-variables.md @@ -78,6 +78,9 @@ For native Anthropic auth, Hermes prefers Claude Code's own credential files whe | `FIRECRAWL_API_KEY` | Web scraping ([firecrawl.dev](https://firecrawl.dev/)) | | `FIRECRAWL_API_URL` | Custom Firecrawl API endpoint for self-hosted instances (optional) | | `TAVILY_API_KEY` | Tavily API key for AI-native web search, extract, and crawl ([app.tavily.com](https://app.tavily.com/home)) | +| `TOOL_GATEWAY_DOMAIN` | Shared tool-gateway domain suffix for Nous Subscribers only, used to derive vendor hosts, for example `nousresearch.com` -> `firecrawl-gateway.nousresearch.com` | +| `TOOL_GATEWAY_SCHEME` | Shared tool-gateway URL scheme for Nous Subscribers only, used to derive vendor hosts, `https` by default and `http` for local gateway testing | +| `TOOL_GATEWAY_USER_TOKEN` | Explicit Nous Subscriber access token for tool-gateway calls (optional; otherwise Hermes reads `~/.hermes/auth.json`) | | `BROWSERBASE_API_KEY` | Browser automation ([browserbase.com](https://browserbase.com/)) | | `BROWSERBASE_PROJECT_ID` | Browserbase project ID | | `BROWSER_USE_API_KEY` | Browser Use cloud browser API key ([browser-use.com](https://browser-use.com/)) | @@ -114,6 +117,8 @@ For native Anthropic auth, Hermes prefers Claude Code's own credential files whe | `TERMINAL_CWD` | Working directory for all terminal sessions | | `SUDO_PASSWORD` | Enable sudo without interactive prompt | +For cloud sandbox backends, persistence is filesystem-oriented. `TERMINAL_LIFETIME_SECONDS` controls when Hermes cleans up an idle terminal session, and later resumes may recreate the sandbox rather than keep the same live processes running. + ## SSH Backend | Variable | Description | diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md index 7e5dc5373..d8226062f 100644 --- a/website/docs/user-guide/configuration.md +++ b/website/docs/user-guide/configuration.md @@ -695,6 +695,8 @@ terminal: persistent_shell: true # Enabled by default for SSH backend ``` +For cloud sandboxes such as Modal and Daytona, `container_persistent: true` means Hermes will try to preserve filesystem state across sandbox recreation. It does not promise that the same live sandbox, PID space, or background processes will still be running later. + ### Common Terminal Backend Issues If terminal commands fail immediately or the terminal tool is reported as disabled, check the following: @@ -723,8 +725,9 @@ If terminal commands fail immediately or the terminal tool is reported as disabl - If either value is missing, Hermes will log a clear error and refuse to use the SSH backend. - **Modal backend** - - You need either a `MODAL_TOKEN_ID` environment variable or a `~/.modal.toml` config file. - - If neither is present, the backend check fails and Hermes will report that the Modal backend is not available. + - Hermes can use either direct Modal credentials (`MODAL_TOKEN_ID` plus `MODAL_TOKEN_SECRET`, or `~/.modal.toml`) or a configured managed tool gateway with a Nous user token. + - Modal persistence is resumable filesystem state, not durable process continuity. If you need something to stay continuously up, use a deployment-oriented tool instead of the terminal sandbox. + - If neither direct credentials nor a managed gateway is present, Hermes will report that the Modal backend is not available. When in doubt, set `terminal.backend` back to `local` and verify that commands run there first. diff --git a/website/docs/user-guide/features/tools.md b/website/docs/user-guide/features/tools.md index 981d2caf2..bbea0a262 100644 --- a/website/docs/user-guide/features/tools.md +++ b/website/docs/user-guide/features/tools.md @@ -109,6 +109,13 @@ modal setup hermes config set terminal.backend modal ``` +Hermes can use Modal in two modes: + +- **Direct Modal**: Hermes talks to your Modal account directly. +- **Managed Modal**: Hermes talks to a gateway that owns the vendor credentials. + +In both cases, Modal is best treated as a task sandbox, not a deployment target. Persistent mode preserves filesystem state so later turns can resume your work, but Hermes may still clean up or recreate the live sandbox. Long-running servers and background processes are not guaranteed to survive idle cleanup, session teardown, or Hermes exit. + ### Container Resources Configure CPU, memory, disk, and persistence for all container backends: -- 2.43.0 From 1cbb1b99cc89a6dfd5a93a2a9362839afdbde56d Mon Sep 17 00:00:00 2001 From: Robin Fernandes Date: Mon, 30 Mar 2026 13:28:10 +0900 Subject: [PATCH 002/385] Gate tool-gateway behind an env var, so it's not in users' faces until we're ready. Even if users enable it, it'll be blocked server-side for now, until we unlock for non-admin users on tool-gateway. --- .env.example | 11 --- agent/prompt_builder.py | 4 + agent/smart_model_routing.py | 10 +-- gateway/config.py | 10 +-- hermes_cli/config.py | 12 ++- hermes_cli/nous_subscription.py | 21 +++-- hermes_cli/plugins.py | 4 +- hermes_cli/setup.py | 13 +++- hermes_cli/status.py | 40 +++++----- hermes_cli/tools_config.py | 10 ++- run_agent.py | 6 +- tests/agent/test_prompt_builder.py | 9 +++ tests/hermes_cli/test_setup.py | 3 + .../hermes_cli/test_status_model_provider.py | 22 ++++++ tests/hermes_cli/test_tools_config.py | 16 ++++ tests/test_cli_provider_resolution.py | 2 + tests/test_utils_truthy_values.py | 29 +++++++ .../test_managed_browserbase_and_modal.py | 5 ++ tests/tools/test_managed_media_gateways.py | 5 ++ tests/tools/test_managed_tool_gateway.py | 37 ++++++++- tests/tools/test_terminal_requirements.py | 22 +++++- .../tools/test_terminal_tool_requirements.py | 1 + tests/tools/test_web_tools_config.py | 29 ++++++- tools/browser_providers/browserbase.py | 12 ++- tools/image_generation_tool.py | 8 +- tools/managed_tool_gateway.py | 4 + tools/terminal_tool.py | 76 +++++++++++++++---- tools/tool_backend_helpers.py | 18 ++++- tools/transcription_tools.py | 16 ++-- tools/tts_tool.py | 9 ++- tools/web_tools.py | 76 +++++++++++-------- utils.py | 19 +++++ .../docs/reference/environment-variables.md | 3 - website/docs/user-guide/configuration.md | 4 +- website/docs/user-guide/features/tools.md | 7 -- 35 files changed, 426 insertions(+), 147 deletions(-) create mode 100644 tests/test_utils_truthy_values.py diff --git a/.env.example b/.env.example index 5567ca7ef..d273a6966 100644 --- a/.env.example +++ b/.env.example @@ -69,17 +69,6 @@ OPENCODE_GO_API_KEY= # Get at: https://parallel.ai PARALLEL_API_KEY= -# Tool-gateway config (Nous Subscribers only; preferred when available) -# Uses your Nous Subscriber OAuth access token from the Hermes auth store by default. -# Defaults to the Nous production gateway. Override for local dev. -# -# Derive vendor gateway URLs from a shared domain suffix: -# TOOL_GATEWAY_DOMAIN=nousresearch.com -# TOOL_GATEWAY_SCHEME=https -# -# Override the subscriber token (defaults to ~/.hermes/auth.json): -# TOOL_GATEWAY_USER_TOKEN= - # Firecrawl API Key - Web search, extract, and crawl # Get at: https://firecrawl.dev/ FIRECRAWL_API_KEY= diff --git a/agent/prompt_builder.py b/agent/prompt_builder.py index 7a8d6d707..878c8658c 100644 --- a/agent/prompt_builder.py +++ b/agent/prompt_builder.py @@ -426,10 +426,14 @@ def build_nous_subscription_prompt(valid_tool_names: "set[str] | None" = None) - """Build a compact Nous subscription capability block for the system prompt.""" try: from hermes_cli.nous_subscription import get_nous_subscription_features + from tools.tool_backend_helpers import managed_nous_tools_enabled except Exception as exc: logger.debug("Failed to import Nous subscription helper: %s", exc) return "" + if not managed_nous_tools_enabled(): + return "" + valid_names = set(valid_tool_names or set()) relevant_tool_names = { "web_search", diff --git a/agent/smart_model_routing.py b/agent/smart_model_routing.py index d57cd1b83..dd445a03f 100644 --- a/agent/smart_model_routing.py +++ b/agent/smart_model_routing.py @@ -6,6 +6,8 @@ import os import re from typing import Any, Dict, Optional +from utils import is_truthy_value + _COMPLEX_KEYWORDS = { "debug", "debugging", @@ -47,13 +49,7 @@ _URL_RE = re.compile(r"https?://|www\.", re.IGNORECASE) def _coerce_bool(value: Any, default: bool = False) -> bool: - if value is None: - return default - if isinstance(value, bool): - return value - if isinstance(value, str): - return value.strip().lower() in {"1", "true", "yes", "on"} - return bool(value) + return is_truthy_value(value, default=default) def _coerce_int(value: Any, default: int) -> int: diff --git a/gateway/config.py b/gateway/config.py index 935a50d74..1f84c7689 100644 --- a/gateway/config.py +++ b/gateway/config.py @@ -17,19 +17,14 @@ from typing import Dict, List, Optional, Any from enum import Enum from hermes_cli.config import get_hermes_home +from utils import is_truthy_value logger = logging.getLogger(__name__) def _coerce_bool(value: Any, default: bool = True) -> bool: """Coerce bool-ish config values, preserving a caller-provided default.""" - if value is None: - return default - if isinstance(value, bool): - return value - if isinstance(value, str): - return value.strip().lower() in ("true", "1", "yes", "on") - return bool(value) + return is_truthy_value(value, default=default) def _normalize_unauthorized_dm_behavior(value: Any, default: str = "pair") -> str: @@ -818,4 +813,3 @@ def _apply_env_overrides(config: GatewayConfig) -> None: except ValueError: pass - diff --git a/hermes_cli/config.py b/hermes_cli/config.py index b5ed25d6d..211e264e4 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -22,6 +22,8 @@ import tempfile from pathlib import Path from typing import Dict, Any, Optional, List, Tuple +from tools.tool_backend_helpers import managed_nous_tools_enabled as _managed_nous_tools_enabled + _IS_WINDOWS = platform.system() == "Windows" _ENV_VAR_NAME_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$") # Env var names written to .env that aren't in OPTIONAL_ENV_VARS @@ -39,7 +41,6 @@ _EXTRA_ENV_KEYS = frozenset({ "MATTERMOST_HOME_CHANNEL", "MATTERMOST_REPLY_MODE", "MATRIX_PASSWORD", "MATRIX_ENCRYPTION", "MATRIX_HOME_ROOM", }) - import yaml from hermes_cli.colors import Colors, color @@ -959,6 +960,15 @@ OPTIONAL_ENV_VARS = { }, } +if not _managed_nous_tools_enabled(): + for _hidden_var in ( + "FIRECRAWL_GATEWAY_URL", + "TOOL_GATEWAY_DOMAIN", + "TOOL_GATEWAY_SCHEME", + "TOOL_GATEWAY_USER_TOKEN", + ): + OPTIONAL_ENV_VARS.pop(_hidden_var, None) + def get_missing_env_vars(required_only: bool = False) -> List[Dict[str, Any]]: """ diff --git a/hermes_cli/nous_subscription.py b/hermes_cli/nous_subscription.py index f5f8e8615..063732235 100644 --- a/hermes_cli/nous_subscription.py +++ b/hermes_cli/nous_subscription.py @@ -11,6 +11,7 @@ from hermes_cli.config import get_env_value, load_config from tools.managed_tool_gateway import is_managed_tool_gateway_ready from tools.tool_backend_helpers import ( has_direct_modal_credentials, + managed_nous_tools_enabled, normalize_browser_cloud_provider, normalize_modal_mode, resolve_openai_audio_api_key, @@ -156,6 +157,7 @@ def get_nous_subscription_features( except Exception: nous_status = {} + managed_tools_flag = managed_nous_tools_enabled() nous_auth_present = bool(nous_status.get("logged_in")) subscribed = provider_is_nous or nous_auth_present @@ -193,11 +195,11 @@ def get_nous_subscription_features( direct_browser_use = bool(get_env_value("BROWSER_USE_API_KEY")) direct_modal = has_direct_modal_credentials() - managed_web_available = nous_auth_present and is_managed_tool_gateway_ready("firecrawl") - managed_image_available = nous_auth_present and is_managed_tool_gateway_ready("fal-queue") - managed_tts_available = nous_auth_present and is_managed_tool_gateway_ready("openai-audio") - managed_browser_available = nous_auth_present and is_managed_tool_gateway_ready("browserbase") - managed_modal_available = nous_auth_present and is_managed_tool_gateway_ready("modal") + managed_web_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("firecrawl") + managed_image_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("fal-queue") + managed_tts_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("openai-audio") + managed_browser_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("browserbase") + managed_modal_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("modal") web_managed = web_backend == "firecrawl" and managed_web_available and not direct_firecrawl web_active = bool( @@ -355,6 +357,9 @@ def get_nous_subscription_features( def get_nous_subscription_explainer_lines() -> list[str]: + if not managed_nous_tools_enabled(): + return [] + return [ "Nous subscription enables managed web tools, image generation, OpenAI TTS, and browser automation by default.", "Those managed tools bill to your Nous subscription. Modal execution is optional and can bill to your subscription too.", @@ -364,6 +369,9 @@ def get_nous_subscription_explainer_lines() -> list[str]: def apply_nous_provider_defaults(config: Dict[str, object]) -> set[str]: """Apply provider-level Nous defaults shared by `hermes setup` and `hermes model`.""" + if not managed_nous_tools_enabled(): + return set() + features = get_nous_subscription_features(config) if not features.provider_is_nous: return set() @@ -386,6 +394,9 @@ def apply_nous_managed_defaults( *, enabled_toolsets: Optional[Iterable[str]] = None, ) -> set[str]: + if not managed_nous_tools_enabled(): + return set() + features = get_nous_subscription_features(config) if not features.provider_is_nous: return set() diff --git a/hermes_cli/plugins.py b/hermes_cli/plugins.py index 5e27535a0..c5195ffa7 100644 --- a/hermes_cli/plugins.py +++ b/hermes_cli/plugins.py @@ -38,6 +38,8 @@ from dataclasses import dataclass, field from pathlib import Path from typing import Any, Callable, Dict, List, Optional, Set +from utils import env_var_enabled + try: import yaml except ImportError: # pragma: no cover – yaml is optional at import time @@ -65,7 +67,7 @@ _NS_PARENT = "hermes_plugins" def _env_enabled(name: str) -> bool: """Return True when an env var is set to a truthy opt-in value.""" - return os.getenv(name, "").strip().lower() in {"1", "true", "yes", "on"} + return env_var_enabled(name) # --------------------------------------------------------------------------- diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py index 59c8d92c1..1abf37610 100644 --- a/hermes_cli/setup.py +++ b/hermes_cli/setup.py @@ -23,6 +23,7 @@ from hermes_cli.nous_subscription import ( get_nous_subscription_explainer_lines, get_nous_subscription_features, ) +from tools.tool_backend_helpers import managed_nous_tools_enabled logger = logging.getLogger(__name__) @@ -59,9 +60,13 @@ def _set_default_model(config: Dict[str, Any], model_name: str) -> None: def _print_nous_subscription_guidance() -> None: + lines = get_nous_subscription_explainer_lines() + if not lines: + return + print() print_header("Nous Subscription Tools") - for line in get_nous_subscription_explainer_lines(): + for line in lines: print_info(line) @@ -663,7 +668,7 @@ def _print_setup_summary(config: dict, hermes_home): tool_status.append(("Modal Execution (direct Modal)", True, None)) else: tool_status.append(("Modal Execution", False, "run 'hermes setup terminal'")) - elif subscription_features.nous_auth_present: + elif managed_nous_tools_enabled() and subscription_features.nous_auth_present: tool_status.append(("Modal Execution (optional via Nous subscription)", True, None)) # Tinker + WandB (RL training) @@ -1912,7 +1917,7 @@ def _setup_tts_provider(config: dict): choices = [] providers = [] - if subscription_features.nous_auth_present: + if managed_nous_tools_enabled() and subscription_features.nous_auth_present: choices.append("Nous Subscription (managed OpenAI TTS, billed to your subscription)") providers.append("nous-openai") choices.extend( @@ -2137,6 +2142,8 @@ def setup_terminal_backend(config: dict): from tools.tool_backend_helpers import normalize_modal_mode managed_modal_available = bool( + managed_nous_tools_enabled() + and get_nous_subscription_features(config).nous_auth_present and is_managed_tool_gateway_ready("modal") ) diff --git a/hermes_cli/status.py b/hermes_cli/status.py index 649d41231..4b68c084b 100644 --- a/hermes_cli/status.py +++ b/hermes_cli/status.py @@ -18,6 +18,7 @@ from hermes_cli.models import provider_label from hermes_cli.nous_subscription import get_nous_subscription_features from hermes_cli.runtime_provider import resolve_requested_provider from hermes_constants import OPENROUTER_MODELS_URL +from tools.tool_backend_helpers import managed_nous_tools_enabled def check_mark(ok: bool) -> str: if ok: @@ -190,26 +191,27 @@ def show_status(args): # ========================================================================= # Nous Subscription Features # ========================================================================= - features = get_nous_subscription_features(config) - print() - print(color("◆ Nous Subscription Features", Colors.CYAN, Colors.BOLD)) - if not features.nous_auth_present: - print(" Nous Portal ✗ not logged in") - else: - print(" Nous Portal ✓ managed tools available") - for feature in features.items(): - if feature.managed_by_nous: - state = "active via Nous subscription" - elif feature.active: - current = feature.current_provider or "configured provider" - state = f"active via {current}" - elif feature.included_by_default and features.nous_auth_present: - state = "included by subscription, not currently selected" - elif feature.key == "modal" and features.nous_auth_present: - state = "available via subscription (optional)" + if managed_nous_tools_enabled(): + features = get_nous_subscription_features(config) + print() + print(color("◆ Nous Subscription Features", Colors.CYAN, Colors.BOLD)) + if not features.nous_auth_present: + print(" Nous Portal ✗ not logged in") else: - state = "not configured" - print(f" {feature.label:<15} {check_mark(feature.available or feature.active or feature.managed_by_nous)} {state}") + print(" Nous Portal ✓ managed tools available") + for feature in features.items(): + if feature.managed_by_nous: + state = "active via Nous subscription" + elif feature.active: + current = feature.current_provider or "configured provider" + state = f"active via {current}" + elif feature.included_by_default and features.nous_auth_present: + state = "included by subscription, not currently selected" + elif feature.key == "modal" and features.nous_auth_present: + state = "available via subscription (optional)" + else: + state = "not configured" + print(f" {feature.label:<15} {check_mark(feature.available or feature.active or feature.managed_by_nous)} {state}") # ========================================================================= # API-Key Providers diff --git a/hermes_cli/tools_config.py b/hermes_cli/tools_config.py index 2226d5173..4046f40ac 100644 --- a/hermes_cli/tools_config.py +++ b/hermes_cli/tools_config.py @@ -22,6 +22,7 @@ from hermes_cli.nous_subscription import ( apply_nous_managed_defaults, get_nous_subscription_features, ) +from tools.tool_backend_helpers import managed_nous_tools_enabled PROJECT_ROOT = Path(__file__).parent.parent.resolve() @@ -737,6 +738,8 @@ def _visible_providers(cat: dict, config: dict) -> list[dict]: features = get_nous_subscription_features(config) visible = [] for provider in cat.get("providers", []): + if provider.get("managed_nous_feature") and not managed_nous_tools_enabled(): + continue if provider.get("requires_nous_auth") and not features.nous_auth_present: continue visible.append(provider) @@ -1234,9 +1237,10 @@ def tools_command(args=None, first_install: bool = False, config: dict = None): config, enabled_toolsets=new_enabled, ) - for ts_key in sorted(auto_configured): - label = next((l for k, l, _ in CONFIGURABLE_TOOLSETS if k == ts_key), ts_key) - print(color(f" ✓ {label}: using your Nous subscription defaults", Colors.GREEN)) + if managed_nous_tools_enabled(): + for ts_key in sorted(auto_configured): + label = next((l for k, l, _ in CONFIGURABLE_TOOLSETS if k == ts_key), ts_key) + print(color(f" ✓ {label}: using your Nous subscription defaults", Colors.GREEN)) # Walk through ALL selected tools that have provider options or # need API keys. This ensures browser (Local vs Browserbase), diff --git a/run_agent.py b/run_agent.py index 186e20711..cd3884c52 100644 --- a/run_agent.py +++ b/run_agent.py @@ -96,7 +96,7 @@ from agent.trajectory import ( convert_scratchpad_to_think, has_incomplete_scratchpad, save_trajectory as _save_trajectory_to_file, ) -from utils import atomic_json_write +from utils import atomic_json_write, env_var_enabled HONCHO_TOOL_NAMES = { "honcho_context", @@ -2005,7 +2005,7 @@ class AIAgent: self._vprint(f"{self.log_prefix}🧾 Request debug dump written to: {dump_file}") - if os.getenv("HERMES_DUMP_REQUEST_STDOUT", "").strip().lower() in {"1", "true", "yes", "on"}: + if env_var_enabled("HERMES_DUMP_REQUEST_STDOUT"): print(json.dumps(dump_payload, ensure_ascii=False, indent=2, default=str)) return dump_file @@ -6052,7 +6052,7 @@ class AIAgent: if self.api_mode == "codex_responses": api_kwargs = self._preflight_codex_api_kwargs(api_kwargs, allow_stream=False) - if os.getenv("HERMES_DUMP_REQUESTS", "").strip().lower() in {"1", "true", "yes", "on"}: + if env_var_enabled("HERMES_DUMP_REQUESTS"): self._dump_api_request_debug(api_kwargs, reason="preflight") # Always prefer the streaming path — even without stream diff --git a/tests/agent/test_prompt_builder.py b/tests/agent/test_prompt_builder.py index f1859b036..deeac8990 100644 --- a/tests/agent/test_prompt_builder.py +++ b/tests/agent/test_prompt_builder.py @@ -401,6 +401,7 @@ class TestBuildSkillsSystemPrompt: class TestBuildNousSubscriptionPrompt: def test_includes_active_subscription_features(self, monkeypatch): + monkeypatch.setenv("HERMES_ENABLE_NOUS_MANAGED_TOOLS", "1") monkeypatch.setattr( "hermes_cli.nous_subscription.get_nous_subscription_features", lambda config=None: NousSubscriptionFeatures( @@ -424,6 +425,7 @@ class TestBuildNousSubscriptionPrompt: assert "do not ask the user for Firecrawl, FAL, OpenAI TTS, or Browserbase API keys" in prompt def test_non_subscriber_prompt_includes_relevant_upgrade_guidance(self, monkeypatch): + monkeypatch.setenv("HERMES_ENABLE_NOUS_MANAGED_TOOLS", "1") monkeypatch.setattr( "hermes_cli.nous_subscription.get_nous_subscription_features", lambda config=None: NousSubscriptionFeatures( @@ -445,6 +447,13 @@ class TestBuildNousSubscriptionPrompt: assert "suggest Nous subscription as one option" in prompt assert "Do not mention subscription unless" in prompt + def test_feature_flag_off_returns_empty_prompt(self, monkeypatch): + monkeypatch.delenv("HERMES_ENABLE_NOUS_MANAGED_TOOLS", raising=False) + + prompt = build_nous_subscription_prompt({"web_search"}) + + assert prompt == "" + # ========================================================================= # Context files prompt builder diff --git a/tests/hermes_cli/test_setup.py b/tests/hermes_cli/test_setup.py index 66af7faf0..1a4839de4 100644 --- a/tests/hermes_cli/test_setup.py +++ b/tests/hermes_cli/test_setup.py @@ -183,6 +183,7 @@ def test_codex_setup_uses_runtime_access_token_for_live_model_list(tmp_path, mon def test_nous_setup_sets_managed_openai_tts_when_unconfigured(tmp_path, monkeypatch, capsys): + monkeypatch.setenv("HERMES_ENABLE_NOUS_MANAGED_TOOLS", "1") monkeypatch.setenv("HERMES_HOME", str(tmp_path)) _clear_provider_env(monkeypatch) @@ -270,6 +271,7 @@ def test_nous_setup_preserves_existing_tts_provider(tmp_path, monkeypatch): def test_modal_setup_can_use_nous_subscription_without_modal_creds(tmp_path, monkeypatch, capsys): + monkeypatch.setenv("HERMES_ENABLE_NOUS_MANAGED_TOOLS", "1") monkeypatch.setenv("HERMES_HOME", str(tmp_path)) config = load_config() @@ -311,6 +313,7 @@ def test_modal_setup_can_use_nous_subscription_without_modal_creds(tmp_path, mon def test_modal_setup_persists_direct_mode_when_user_chooses_their_own_account(tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_ENABLE_NOUS_MANAGED_TOOLS", "1") monkeypatch.setenv("HERMES_HOME", str(tmp_path)) monkeypatch.delenv("MODAL_TOKEN_ID", raising=False) monkeypatch.delenv("MODAL_TOKEN_SECRET", raising=False) diff --git a/tests/hermes_cli/test_status_model_provider.py b/tests/hermes_cli/test_status_model_provider.py index 2056aac4f..1e6531d37 100644 --- a/tests/hermes_cli/test_status_model_provider.py +++ b/tests/hermes_cli/test_status_model_provider.py @@ -64,6 +64,7 @@ def test_show_status_displays_legacy_string_model_and_custom_endpoint(monkeypatc def test_show_status_reports_managed_nous_features(monkeypatch, capsys, tmp_path): + monkeypatch.setenv("HERMES_ENABLE_NOUS_MANAGED_TOOLS", "1") from hermes_cli import status as status_mod _patch_common_status_deps(monkeypatch, status_mod, tmp_path) @@ -100,3 +101,24 @@ def test_show_status_reports_managed_nous_features(monkeypatch, capsys, tmp_path assert "Nous Subscription Features" in out assert "Browser automation" in out assert "active via Nous subscription" in out + + +def test_show_status_hides_nous_subscription_section_when_feature_flag_is_off(monkeypatch, capsys, tmp_path): + monkeypatch.delenv("HERMES_ENABLE_NOUS_MANAGED_TOOLS", raising=False) + from hermes_cli import status as status_mod + + _patch_common_status_deps(monkeypatch, status_mod, tmp_path) + monkeypatch.setattr( + status_mod, + "load_config", + lambda: {"model": {"default": "claude-opus-4-6", "provider": "nous"}}, + raising=False, + ) + monkeypatch.setattr(status_mod, "resolve_requested_provider", lambda requested=None: "nous", raising=False) + monkeypatch.setattr(status_mod, "resolve_provider", lambda requested=None, **kwargs: "nous", raising=False) + monkeypatch.setattr(status_mod, "provider_label", lambda provider: "Nous Portal", raising=False) + + status_mod.show_status(SimpleNamespace(all=False, deep=False)) + + out = capsys.readouterr().out + assert "Nous Subscription Features" not in out diff --git a/tests/hermes_cli/test_tools_config.py b/tests/hermes_cli/test_tools_config.py index ebcef8327..dccbce9d3 100644 --- a/tests/hermes_cli/test_tools_config.py +++ b/tests/hermes_cli/test_tools_config.py @@ -248,6 +248,7 @@ def test_save_platform_tools_still_preserves_mcp_with_platform_default_present() def test_visible_providers_include_nous_subscription_when_logged_in(monkeypatch): + monkeypatch.setenv("HERMES_ENABLE_NOUS_MANAGED_TOOLS", "1") config = {"model": {"provider": "nous"}} monkeypatch.setattr( @@ -260,6 +261,20 @@ def test_visible_providers_include_nous_subscription_when_logged_in(monkeypatch) assert providers[0]["name"].startswith("Nous Subscription") +def test_visible_providers_hide_nous_subscription_when_feature_flag_is_off(monkeypatch): + monkeypatch.delenv("HERMES_ENABLE_NOUS_MANAGED_TOOLS", raising=False) + config = {"model": {"provider": "nous"}} + + monkeypatch.setattr( + "hermes_cli.nous_subscription.get_nous_auth_status", + lambda: {"logged_in": True}, + ) + + providers = _visible_providers(TOOL_CATEGORIES["browser"], config) + + assert all(not provider["name"].startswith("Nous Subscription") for provider in providers) + + def test_local_browser_provider_is_saved_explicitly(monkeypatch): config = {} local_provider = next( @@ -275,6 +290,7 @@ def test_local_browser_provider_is_saved_explicitly(monkeypatch): def test_first_install_nous_auto_configures_managed_defaults(monkeypatch): + monkeypatch.setenv("HERMES_ENABLE_NOUS_MANAGED_TOOLS", "1") config = { "model": {"provider": "nous"}, "platform_toolsets": {"cli": []}, diff --git a/tests/test_cli_provider_resolution.py b/tests/test_cli_provider_resolution.py index 65bcdf5c7..cef89cf16 100644 --- a/tests/test_cli_provider_resolution.py +++ b/tests/test_cli_provider_resolution.py @@ -277,6 +277,7 @@ def test_codex_provider_replaces_incompatible_default_model(monkeypatch): def test_model_flow_nous_prints_subscription_guidance_without_mutating_explicit_tts(monkeypatch, capsys): + monkeypatch.setenv("HERMES_ENABLE_NOUS_MANAGED_TOOLS", "1") config = { "model": {"provider": "nous", "default": "claude-opus-4-6"}, "tts": {"provider": "elevenlabs"}, @@ -315,6 +316,7 @@ def test_model_flow_nous_prints_subscription_guidance_without_mutating_explicit_ def test_model_flow_nous_applies_managed_tts_default_when_unconfigured(monkeypatch, capsys): + monkeypatch.setenv("HERMES_ENABLE_NOUS_MANAGED_TOOLS", "1") config = { "model": {"provider": "nous", "default": "claude-opus-4-6"}, "tts": {"provider": "edge"}, diff --git a/tests/test_utils_truthy_values.py b/tests/test_utils_truthy_values.py new file mode 100644 index 000000000..f6d2856f4 --- /dev/null +++ b/tests/test_utils_truthy_values.py @@ -0,0 +1,29 @@ +"""Tests for shared truthy-value helpers.""" + +from utils import env_var_enabled, is_truthy_value + + +def test_is_truthy_value_accepts_common_truthy_strings(): + assert is_truthy_value("true") is True + assert is_truthy_value(" YES ") is True + assert is_truthy_value("on") is True + assert is_truthy_value("1") is True + + +def test_is_truthy_value_respects_default_for_none(): + assert is_truthy_value(None, default=True) is True + assert is_truthy_value(None, default=False) is False + + +def test_is_truthy_value_rejects_falsey_strings(): + assert is_truthy_value("false") is False + assert is_truthy_value("0") is False + assert is_truthy_value("off") is False + + +def test_env_var_enabled_uses_shared_truthy_rules(monkeypatch): + monkeypatch.setenv("HERMES_TEST_BOOL", "YeS") + assert env_var_enabled("HERMES_TEST_BOOL") is True + + monkeypatch.setenv("HERMES_TEST_BOOL", "no") + assert env_var_enabled("HERMES_TEST_BOOL") is False diff --git a/tests/tools/test_managed_browserbase_and_modal.py b/tests/tools/test_managed_browserbase_and_modal.py index 3d97a4373..085f19cfd 100644 --- a/tests/tools/test_managed_browserbase_and_modal.py +++ b/tests/tools/test_managed_browserbase_and_modal.py @@ -45,6 +45,11 @@ def _restore_tool_and_agent_modules(): sys.modules.update(original_modules) +@pytest.fixture(autouse=True) +def _enable_managed_nous_tools(monkeypatch): + monkeypatch.setenv("HERMES_ENABLE_NOUS_MANAGED_TOOLS", "1") + + def _install_fake_tools_package(): _reset_modules(("tools", "agent")) diff --git a/tests/tools/test_managed_media_gateways.py b/tests/tools/test_managed_media_gateways.py index 48cd5f41f..9a2d8391c 100644 --- a/tests/tools/test_managed_media_gateways.py +++ b/tests/tools/test_managed_media_gateways.py @@ -44,6 +44,11 @@ def _restore_tool_and_agent_modules(): sys.modules.update(original_modules) +@pytest.fixture(autouse=True) +def _enable_managed_nous_tools(monkeypatch): + monkeypatch.setenv("HERMES_ENABLE_NOUS_MANAGED_TOOLS", "1") + + def _install_fake_tools_package(): tools_package = types.ModuleType("tools") tools_package.__path__ = [str(TOOLS_DIR)] # type: ignore[attr-defined] diff --git a/tests/tools/test_managed_tool_gateway.py b/tests/tools/test_managed_tool_gateway.py index 591708345..39b9125e1 100644 --- a/tests/tools/test_managed_tool_gateway.py +++ b/tests/tools/test_managed_tool_gateway.py @@ -16,7 +16,14 @@ resolve_managed_tool_gateway = managed_tool_gateway.resolve_managed_tool_gateway def test_resolve_managed_tool_gateway_derives_vendor_origin_from_shared_domain(): - with patch.dict(os.environ, {"TOOL_GATEWAY_DOMAIN": "nousresearch.com"}, clear=False): + with patch.dict( + os.environ, + { + "HERMES_ENABLE_NOUS_MANAGED_TOOLS": "1", + "TOOL_GATEWAY_DOMAIN": "nousresearch.com", + }, + clear=False, + ): result = resolve_managed_tool_gateway( "firecrawl", token_reader=lambda: "nous-token", @@ -29,7 +36,14 @@ def test_resolve_managed_tool_gateway_derives_vendor_origin_from_shared_domain() def test_resolve_managed_tool_gateway_uses_vendor_specific_override(): - with patch.dict(os.environ, {"BROWSERBASE_GATEWAY_URL": "http://browserbase-gateway.localhost:3009/"}, clear=False): + with patch.dict( + os.environ, + { + "HERMES_ENABLE_NOUS_MANAGED_TOOLS": "1", + "BROWSERBASE_GATEWAY_URL": "http://browserbase-gateway.localhost:3009/", + }, + clear=False, + ): result = resolve_managed_tool_gateway( "browserbase", token_reader=lambda: "nous-token", @@ -40,7 +54,14 @@ def test_resolve_managed_tool_gateway_uses_vendor_specific_override(): def test_resolve_managed_tool_gateway_is_inactive_without_nous_token(): - with patch.dict(os.environ, {"TOOL_GATEWAY_DOMAIN": "nousresearch.com"}, clear=False): + with patch.dict( + os.environ, + { + "HERMES_ENABLE_NOUS_MANAGED_TOOLS": "1", + "TOOL_GATEWAY_DOMAIN": "nousresearch.com", + }, + clear=False, + ): result = resolve_managed_tool_gateway( "firecrawl", token_reader=lambda: None, @@ -49,6 +70,16 @@ def test_resolve_managed_tool_gateway_is_inactive_without_nous_token(): assert result is None +def test_resolve_managed_tool_gateway_is_disabled_without_feature_flag(): + with patch.dict(os.environ, {"TOOL_GATEWAY_DOMAIN": "nousresearch.com"}, clear=False): + result = resolve_managed_tool_gateway( + "firecrawl", + token_reader=lambda: "nous-token", + ) + + assert result is None + + def test_read_nous_access_token_refreshes_expiring_cached_token(tmp_path, monkeypatch): monkeypatch.delenv("TOOL_GATEWAY_USER_TOKEN", raising=False) monkeypatch.setenv("HERMES_HOME", str(tmp_path)) diff --git a/tests/tools/test_terminal_requirements.py b/tests/tools/test_terminal_requirements.py index c93d68e17..c55fc8310 100644 --- a/tests/tools/test_terminal_requirements.py +++ b/tests/tools/test_terminal_requirements.py @@ -7,6 +7,7 @@ terminal_tool_module = importlib.import_module("tools.terminal_tool") def _clear_terminal_env(monkeypatch): """Remove terminal env vars that could affect requirements checks.""" keys = [ + "HERMES_ENABLE_NOUS_MANAGED_TOOLS", "TERMINAL_ENV", "TERMINAL_MODAL_MODE", "TERMINAL_SSH_HOST", @@ -73,13 +74,14 @@ def test_modal_backend_without_token_or_config_logs_specific_error(monkeypatch, assert ok is False assert any( - "Modal backend selected but no direct Modal credentials/config or managed tool gateway was found" in record.getMessage() + "Modal backend selected but no direct Modal credentials/config was found" in record.getMessage() for record in caplog.records ) def test_modal_backend_with_managed_gateway_does_not_require_direct_creds_or_minisweagent(monkeypatch, tmp_path): _clear_terminal_env(monkeypatch) + monkeypatch.setenv("HERMES_ENABLE_NOUS_MANAGED_TOOLS", "1") monkeypatch.setenv("TERMINAL_ENV", "modal") monkeypatch.setenv("HOME", str(tmp_path)) monkeypatch.setenv("USERPROFILE", str(tmp_path)) @@ -115,3 +117,21 @@ def test_modal_backend_direct_mode_does_not_fall_back_to_managed(monkeypatch, ca "TERMINAL_MODAL_MODE=direct" in record.getMessage() for record in caplog.records ) + + +def test_modal_backend_managed_mode_without_feature_flag_logs_clear_error(monkeypatch, caplog, tmp_path): + _clear_terminal_env(monkeypatch) + monkeypatch.setenv("TERMINAL_ENV", "modal") + monkeypatch.setenv("TERMINAL_MODAL_MODE", "managed") + monkeypatch.setenv("HOME", str(tmp_path)) + monkeypatch.setenv("USERPROFILE", str(tmp_path)) + monkeypatch.setattr(terminal_tool_module, "is_managed_tool_gateway_ready", lambda _vendor: False) + + with caplog.at_level(logging.ERROR): + ok = terminal_tool_module.check_terminal_requirements() + + assert ok is False + assert any( + "HERMES_ENABLE_NOUS_MANAGED_TOOLS is not enabled" in record.getMessage() + for record in caplog.records + ) diff --git a/tests/tools/test_terminal_tool_requirements.py b/tests/tools/test_terminal_tool_requirements.py index 216284932..d0ce42735 100644 --- a/tests/tools/test_terminal_tool_requirements.py +++ b/tests/tools/test_terminal_tool_requirements.py @@ -28,6 +28,7 @@ class TestTerminalRequirements: assert {"read_file", "write_file", "patch", "search_files"}.issubset(names) def test_terminal_and_execute_code_tools_resolve_for_managed_modal(self, monkeypatch, tmp_path): + monkeypatch.setenv("HERMES_ENABLE_NOUS_MANAGED_TOOLS", "1") monkeypatch.setenv("HOME", str(tmp_path)) monkeypatch.setenv("USERPROFILE", str(tmp_path)) monkeypatch.delenv("MODAL_TOKEN_ID", raising=False) diff --git a/tests/tools/test_web_tools_config.py b/tests/tools/test_web_tools_config.py index 1354c2431..93ab6846f 100644 --- a/tests/tools/test_web_tools_config.py +++ b/tests/tools/test_web_tools_config.py @@ -11,6 +11,8 @@ Coverage: import importlib import json import os +import sys +import types import pytest from unittest.mock import patch, MagicMock, AsyncMock @@ -24,6 +26,7 @@ class TestFirecrawlClientConfig: tools.web_tools._firecrawl_client = None tools.web_tools._firecrawl_client_config = None for key in ( + "HERMES_ENABLE_NOUS_MANAGED_TOOLS", "FIRECRAWL_API_KEY", "FIRECRAWL_API_URL", "FIRECRAWL_GATEWAY_URL", @@ -32,6 +35,7 @@ class TestFirecrawlClientConfig: "TOOL_GATEWAY_USER_TOKEN", ): os.environ.pop(key, None) + os.environ["HERMES_ENABLE_NOUS_MANAGED_TOOLS"] = "1" def teardown_method(self): """Reset client after each test.""" @@ -39,6 +43,7 @@ class TestFirecrawlClientConfig: tools.web_tools._firecrawl_client = None tools.web_tools._firecrawl_client_config = None for key in ( + "HERMES_ENABLE_NOUS_MANAGED_TOOLS", "FIRECRAWL_API_KEY", "FIRECRAWL_API_URL", "FIRECRAWL_GATEWAY_URL", @@ -293,6 +298,7 @@ class TestBackendSelection: """ _ENV_KEYS = ( + "HERMES_ENABLE_NOUS_MANAGED_TOOLS", "PARALLEL_API_KEY", "FIRECRAWL_API_KEY", "FIRECRAWL_API_URL", @@ -304,8 +310,10 @@ class TestBackendSelection: ) def setup_method(self): + os.environ["HERMES_ENABLE_NOUS_MANAGED_TOOLS"] = "1" for key in self._ENV_KEYS: - os.environ.pop(key, None) + if key != "HERMES_ENABLE_NOUS_MANAGED_TOOLS": + os.environ.pop(key, None) def teardown_method(self): for key in self._ENV_KEYS: @@ -417,11 +425,25 @@ class TestParallelClientConfig: import tools.web_tools tools.web_tools._parallel_client = None os.environ.pop("PARALLEL_API_KEY", None) + fake_parallel = types.ModuleType("parallel") + + class Parallel: + def __init__(self, api_key): + self.api_key = api_key + + class AsyncParallel: + def __init__(self, api_key): + self.api_key = api_key + + fake_parallel.Parallel = Parallel + fake_parallel.AsyncParallel = AsyncParallel + sys.modules["parallel"] = fake_parallel def teardown_method(self): import tools.web_tools tools.web_tools._parallel_client = None os.environ.pop("PARALLEL_API_KEY", None) + sys.modules.pop("parallel", None) def test_creates_client_with_key(self): """PARALLEL_API_KEY set → creates Parallel client.""" @@ -479,6 +501,7 @@ class TestCheckWebApiKey: """Test suite for check_web_api_key() unified availability check.""" _ENV_KEYS = ( + "HERMES_ENABLE_NOUS_MANAGED_TOOLS", "PARALLEL_API_KEY", "FIRECRAWL_API_KEY", "FIRECRAWL_API_URL", @@ -490,8 +513,10 @@ class TestCheckWebApiKey: ) def setup_method(self): + os.environ["HERMES_ENABLE_NOUS_MANAGED_TOOLS"] = "1" for key in self._ENV_KEYS: - os.environ.pop(key, None) + if key != "HERMES_ENABLE_NOUS_MANAGED_TOOLS": + os.environ.pop(key, None) def teardown_method(self): for key in self._ENV_KEYS: diff --git a/tools/browser_providers/browserbase.py b/tools/browser_providers/browserbase.py index 342b430b1..5c580c3f3 100644 --- a/tools/browser_providers/browserbase.py +++ b/tools/browser_providers/browserbase.py @@ -10,6 +10,7 @@ import requests from tools.browser_providers.base import CloudBrowserProvider from tools.managed_tool_gateway import resolve_managed_tool_gateway +from tools.tool_backend_helpers import managed_nous_tools_enabled logger = logging.getLogger(__name__) _pending_create_keys: Dict[str, str] = {} @@ -93,10 +94,15 @@ class BrowserbaseProvider(CloudBrowserProvider): def _get_config(self) -> Dict[str, Any]: config = self._get_config_or_none() if config is None: - raise ValueError( - "Browserbase requires either direct BROWSERBASE_API_KEY/BROWSERBASE_PROJECT_ID credentials " - "or a managed Browserbase gateway configuration." + message = ( + "Browserbase requires direct BROWSERBASE_API_KEY/BROWSERBASE_PROJECT_ID credentials." ) + if managed_nous_tools_enabled(): + message = ( + "Browserbase requires either direct BROWSERBASE_API_KEY/BROWSERBASE_PROJECT_ID " + "credentials or a managed Browserbase gateway configuration." + ) + raise ValueError(message) return config def create_session(self, task_id: str) -> Dict[str, object]: diff --git a/tools/image_generation_tool.py b/tools/image_generation_tool.py index 84edb93fe..77e090529 100644 --- a/tools/image_generation_tool.py +++ b/tools/image_generation_tool.py @@ -39,6 +39,7 @@ from urllib.parse import urlencode import fal_client from tools.debug_helpers import DebugSession from tools.managed_tool_gateway import resolve_managed_tool_gateway +from tools.tool_backend_helpers import managed_nous_tools_enabled logger = logging.getLogger(__name__) @@ -416,9 +417,10 @@ def image_generate_tool( # Check API key availability if not (os.getenv("FAL_KEY") or _resolve_managed_fal_gateway()): - raise ValueError( - "FAL_KEY environment variable not set and managed FAL gateway is unavailable" - ) + message = "FAL_KEY environment variable not set" + if managed_nous_tools_enabled(): + message += " and managed FAL gateway is unavailable" + raise ValueError(message) # Validate other parameters validated_params = _validate_parameters( diff --git a/tools/managed_tool_gateway.py b/tools/managed_tool_gateway.py index 96dd27b30..4d9da52bf 100644 --- a/tools/managed_tool_gateway.py +++ b/tools/managed_tool_gateway.py @@ -9,6 +9,7 @@ from dataclasses import dataclass from typing import Callable, Optional from hermes_cli.config import get_hermes_home +from tools.tool_backend_helpers import managed_nous_tools_enabled _DEFAULT_TOOL_GATEWAY_DOMAIN = "nousresearch.com" _DEFAULT_TOOL_GATEWAY_SCHEME = "https" @@ -131,6 +132,9 @@ def resolve_managed_tool_gateway( token_reader: Optional[Callable[[], Optional[str]]] = None, ) -> Optional[ManagedToolGatewayConfig]: """Resolve shared managed-tool gateway config for a vendor.""" + if not managed_nous_tools_enabled(): + return None + resolved_gateway_builder = gateway_builder or build_vendor_gateway_url resolved_token_reader = token_reader or read_nous_access_token diff --git a/tools/terminal_tool.py b/tools/terminal_tool.py index 13b724bf5..d9d2fa4f7 100644 --- a/tools/terminal_tool.py +++ b/tools/terminal_tool.py @@ -65,7 +65,12 @@ def ensure_minisweagent_on_path(_repo_root: Path | None = None) -> None: # Singularity helpers (scratch dir, SIF cache) now live in tools/environments/singularity.py from tools.environments.singularity import _get_scratch_dir -from tools.tool_backend_helpers import has_direct_modal_credentials, normalize_modal_mode +from tools.tool_backend_helpers import ( + coerce_modal_mode, + has_direct_modal_credentials, + managed_nous_tools_enabled, + normalize_modal_mode, +) # Disk usage warning threshold (in GB) @@ -506,7 +511,7 @@ def _get_env_config() -> Dict[str, Any]: return { "env_type": env_type, - "modal_mode": normalize_modal_mode(os.getenv("TERMINAL_MODAL_MODE", "auto")), + "modal_mode": coerce_modal_mode(os.getenv("TERMINAL_MODAL_MODE", "auto")), "docker_image": os.getenv("TERMINAL_DOCKER_IMAGE", default_image), "docker_forward_env": _parse_env_var("TERMINAL_DOCKER_FORWARD_ENV", "[]", json.loads, "valid JSON"), "singularity_image": os.getenv("TERMINAL_SINGULARITY_IMAGE", f"docker://{default_image}"), @@ -541,9 +546,13 @@ def _get_env_config() -> Dict[str, Any]: def _get_modal_backend_state(modal_mode: object | None) -> Dict[str, Any]: """Resolve direct vs managed Modal backend selection.""" + requested_mode = coerce_modal_mode(modal_mode) normalized_mode = normalize_modal_mode(modal_mode) has_direct = has_direct_modal_credentials() managed_ready = is_managed_tool_gateway_ready("modal") + managed_mode_blocked = ( + requested_mode == "managed" and not managed_nous_tools_enabled() + ) if normalized_mode == "managed": selected_backend = "managed" if managed_ready else None @@ -553,9 +562,11 @@ def _get_modal_backend_state(modal_mode: object | None) -> Dict[str, Any]: selected_backend = "direct" if has_direct else "managed" if managed_ready else None return { + "requested_mode": requested_mode, "mode": normalized_mode, "has_direct": has_direct, "managed_ready": managed_ready, + "managed_mode_blocked": managed_mode_blocked, "selected_backend": selected_backend, } @@ -636,6 +647,13 @@ def _create_environment(env_type: str, image: str, cwd: str, timeout: int, ) if modal_state["selected_backend"] != "direct": + if modal_state["managed_mode_blocked"]: + raise ValueError( + "Modal backend is configured for managed mode, but " + "HERMES_ENABLE_NOUS_MANAGED_TOOLS is not enabled and no direct " + "Modal credentials/config were found. Enable the feature flag or " + "choose TERMINAL_MODAL_MODE=direct/auto." + ) if modal_state["mode"] == "managed": raise ValueError( "Modal backend is configured for managed mode, but the managed tool gateway is unavailable." @@ -644,9 +662,12 @@ def _create_environment(env_type: str, image: str, cwd: str, timeout: int, raise ValueError( "Modal backend is configured for direct mode, but no direct Modal credentials/config were found." ) - raise ValueError( - "Modal backend selected but no direct Modal credentials/config or managed tool gateway was found." - ) + message = "Modal backend selected but no direct Modal credentials/config was found." + if managed_nous_tools_enabled(): + message = ( + "Modal backend selected but no direct Modal credentials/config or managed tool gateway was found." + ) + raise ValueError(message) return _ModalEnvironment( image=image, cwd=cwd, timeout=timeout, @@ -1283,25 +1304,48 @@ def check_terminal_requirements() -> bool: return True if modal_state["selected_backend"] != "direct": + if modal_state["managed_mode_blocked"]: + logger.error( + "Modal backend selected with TERMINAL_MODAL_MODE=managed, but " + "HERMES_ENABLE_NOUS_MANAGED_TOOLS is not enabled and no direct " + "Modal credentials/config were found. Enable the feature flag " + "or choose TERMINAL_MODAL_MODE=direct/auto." + ) + return False if modal_state["mode"] == "managed": logger.error( "Modal backend selected with TERMINAL_MODAL_MODE=managed, but the managed " "tool gateway is unavailable. Configure the managed gateway or choose " "TERMINAL_MODAL_MODE=direct/auto." ) + return False elif modal_state["mode"] == "direct": - logger.error( - "Modal backend selected with TERMINAL_MODAL_MODE=direct, but no direct " - "Modal credentials/config were found. Configure Modal or choose " - "TERMINAL_MODAL_MODE=managed/auto." - ) + if managed_nous_tools_enabled(): + logger.error( + "Modal backend selected with TERMINAL_MODAL_MODE=direct, but no direct " + "Modal credentials/config were found. Configure Modal or choose " + "TERMINAL_MODAL_MODE=managed/auto." + ) + else: + logger.error( + "Modal backend selected with TERMINAL_MODAL_MODE=direct, but no direct " + "Modal credentials/config were found. Configure Modal or choose " + "TERMINAL_MODAL_MODE=auto." + ) + return False else: - logger.error( - "Modal backend selected but no direct Modal credentials/config or managed " - "tool gateway was found. Configure Modal, set up the managed gateway, " - "or choose a different TERMINAL_ENV." - ) - return False + if managed_nous_tools_enabled(): + logger.error( + "Modal backend selected but no direct Modal credentials/config or managed " + "tool gateway was found. Configure Modal, set up the managed gateway, " + "or choose a different TERMINAL_ENV." + ) + else: + logger.error( + "Modal backend selected but no direct Modal credentials/config was found. " + "Configure Modal or choose a different TERMINAL_ENV." + ) + return False if importlib.util.find_spec("swerex") is None: logger.error("swe-rex is required for direct modal terminal backend: pip install 'swe-rex[modal]'") diff --git a/tools/tool_backend_helpers.py b/tools/tool_backend_helpers.py index bcf93e849..4b8d9d157 100644 --- a/tools/tool_backend_helpers.py +++ b/tools/tool_backend_helpers.py @@ -5,26 +5,40 @@ from __future__ import annotations import os from pathlib import Path +from utils import env_var_enabled _DEFAULT_BROWSER_PROVIDER = "local" _DEFAULT_MODAL_MODE = "auto" _VALID_MODAL_MODES = {"auto", "direct", "managed"} +def managed_nous_tools_enabled() -> bool: + """Return True when the hidden Nous-managed tools feature flag is enabled.""" + return env_var_enabled("HERMES_ENABLE_NOUS_MANAGED_TOOLS") + + def normalize_browser_cloud_provider(value: object | None) -> str: """Return a normalized browser provider key.""" provider = str(value or _DEFAULT_BROWSER_PROVIDER).strip().lower() return provider or _DEFAULT_BROWSER_PROVIDER -def normalize_modal_mode(value: object | None) -> str: - """Return a normalized modal execution mode.""" +def coerce_modal_mode(value: object | None) -> str: + """Return the requested modal mode when valid, else the default.""" mode = str(value or _DEFAULT_MODAL_MODE).strip().lower() if mode in _VALID_MODAL_MODES: return mode return _DEFAULT_MODAL_MODE +def normalize_modal_mode(value: object | None) -> str: + """Return a normalized modal execution mode.""" + mode = coerce_modal_mode(value) + if mode == "managed" and not managed_nous_tools_enabled(): + return "direct" + return mode + + def has_direct_modal_credentials() -> bool: """Return True when direct Modal credentials/config are available.""" return bool( diff --git a/tools/transcription_tools.py b/tools/transcription_tools.py index ae05358b8..4a1f7ed51 100644 --- a/tools/transcription_tools.py +++ b/tools/transcription_tools.py @@ -33,8 +33,9 @@ from pathlib import Path from typing import Optional, Dict, Any from urllib.parse import urljoin +from utils import is_truthy_value from tools.managed_tool_gateway import resolve_managed_tool_gateway -from tools.tool_backend_helpers import resolve_openai_audio_api_key +from tools.tool_backend_helpers import managed_nous_tools_enabled, resolve_openai_audio_api_key from hermes_constants import get_hermes_home @@ -122,11 +123,7 @@ def is_stt_enabled(stt_config: Optional[dict] = None) -> bool: if stt_config is None: stt_config = _load_stt_config() enabled = stt_config.get("enabled", True) - if isinstance(enabled, str): - return enabled.strip().lower() in ("true", "1", "yes", "on") - if enabled is None: - return True - return bool(enabled) + return is_truthy_value(enabled, default=True) def _has_openai_audio_backend() -> bool: @@ -586,9 +583,10 @@ def _resolve_openai_audio_client_config() -> tuple[str, str]: managed_gateway = resolve_managed_tool_gateway("openai-audio") if managed_gateway is None: - raise ValueError( - "Neither VOICE_TOOLS_OPENAI_KEY nor OPENAI_API_KEY is set, and the managed OpenAI audio gateway is unavailable" - ) + message = "Neither VOICE_TOOLS_OPENAI_KEY nor OPENAI_API_KEY is set" + if managed_nous_tools_enabled(): + message += ", and the managed OpenAI audio gateway is unavailable" + raise ValueError(message) return managed_gateway.nous_user_token, urljoin( f"{managed_gateway.gateway_origin.rstrip('/')}/", "v1" diff --git a/tools/tts_tool.py b/tools/tts_tool.py index c71cdb1e8..9210c3318 100644 --- a/tools/tts_tool.py +++ b/tools/tts_tool.py @@ -40,7 +40,7 @@ from urllib.parse import urljoin logger = logging.getLogger(__name__) from tools.managed_tool_gateway import resolve_managed_tool_gateway -from tools.tool_backend_helpers import resolve_openai_audio_api_key +from tools.tool_backend_helpers import managed_nous_tools_enabled, resolve_openai_audio_api_key # --------------------------------------------------------------------------- # Lazy imports -- providers are imported only when actually used to avoid @@ -565,9 +565,10 @@ def _resolve_openai_audio_client_config() -> tuple[str, str]: managed_gateway = resolve_managed_tool_gateway("openai-audio") if managed_gateway is None: - raise ValueError( - "Neither VOICE_TOOLS_OPENAI_KEY nor OPENAI_API_KEY is set, and the managed OpenAI audio gateway is unavailable" - ) + message = "Neither VOICE_TOOLS_OPENAI_KEY nor OPENAI_API_KEY is set" + if managed_nous_tools_enabled(): + message += ", and the managed OpenAI audio gateway is unavailable" + raise ValueError(message) return managed_gateway.nous_user_token, urljoin( f"{managed_gateway.gateway_origin.rstrip('/')}/", "v1" diff --git a/tools/web_tools.py b/tools/web_tools.py index 1ebf36d77..7e9e84483 100644 --- a/tools/web_tools.py +++ b/tools/web_tools.py @@ -54,6 +54,7 @@ from tools.managed_tool_gateway import ( read_nous_access_token as _read_nous_access_token, resolve_managed_tool_gateway, ) +from tools.tool_backend_helpers import managed_nous_tools_enabled from tools.url_safety import is_safe_url from tools.website_policy import check_website_access @@ -152,12 +153,46 @@ def _has_direct_firecrawl_config() -> bool: def _raise_web_backend_configuration_error() -> None: """Raise a clear error for unsupported web backend configuration.""" - raise ValueError( + message = ( "Web tools are not configured. " - "Set FIRECRAWL_API_KEY for cloud Firecrawl, set FIRECRAWL_API_URL for a self-hosted Firecrawl instance, " - "or, if you are a Nous Subscriber, login to Nous (`hermes model`) and provide " - "FIRECRAWL_GATEWAY_URL or TOOL_GATEWAY_DOMAIN." + "Set FIRECRAWL_API_KEY for cloud Firecrawl or set FIRECRAWL_API_URL for a self-hosted Firecrawl instance." ) + if managed_nous_tools_enabled(): + message += ( + " If you have the hidden Nous-managed tools flag enabled, you can also login to Nous " + "(`hermes model`) and provide FIRECRAWL_GATEWAY_URL or TOOL_GATEWAY_DOMAIN." + ) + raise ValueError(message) + + +def _firecrawl_backend_help_suffix() -> str: + """Return optional managed-gateway guidance for Firecrawl help text.""" + if not managed_nous_tools_enabled(): + return "" + return ( + ", or, if you have the hidden Nous-managed tools flag enabled, login to Nous and use " + "FIRECRAWL_GATEWAY_URL or TOOL_GATEWAY_DOMAIN" + ) + + +def _web_requires_env() -> list[str]: + """Return tool metadata env vars for the currently enabled web backends.""" + requires = [ + "PARALLEL_API_KEY", + "TAVILY_API_KEY", + "FIRECRAWL_API_KEY", + "FIRECRAWL_API_URL", + ] + if managed_nous_tools_enabled(): + requires.extend( + [ + "FIRECRAWL_GATEWAY_URL", + "TOOL_GATEWAY_DOMAIN", + "TOOL_GATEWAY_SCHEME", + "TOOL_GATEWAY_USER_TOKEN", + ] + ) + return requires def _get_firecrawl_client(): @@ -1410,10 +1445,8 @@ async def web_crawl_tool( # web_crawl requires Firecrawl or the Firecrawl tool-gateway — Parallel has no crawl API if not check_firecrawl_api_key(): return json.dumps({ - "error": "web_crawl requires Firecrawl. Set FIRECRAWL_API_KEY, FIRECRAWL_API_URL, " - "or, if you are a Nous Subscriber, login to Nous and use FIRECRAWL_GATEWAY_URL, " - "or TOOL_GATEWAY_DOMAIN, " - "or use web_search + web_extract instead.", + "error": "web_crawl requires Firecrawl. Set FIRECRAWL_API_KEY, FIRECRAWL_API_URL" + f"{_firecrawl_backend_help_suffix()}, or use web_search + web_extract instead.", "success": False, }, ensure_ascii=False) @@ -1754,9 +1787,8 @@ if __name__ == "__main__": else: print("❌ No web search backend configured") print( - "Set PARALLEL_API_KEY, TAVILY_API_KEY, FIRECRAWL_API_KEY, FIRECRAWL_API_URL, " - "or, if you are a Nous Subscriber, login to Nous and use " - "FIRECRAWL_GATEWAY_URL or TOOL_GATEWAY_DOMAIN" + "Set PARALLEL_API_KEY, TAVILY_API_KEY, FIRECRAWL_API_KEY, FIRECRAWL_API_URL" + f"{_firecrawl_backend_help_suffix()}" ) if not nous_available: @@ -1867,16 +1899,7 @@ registry.register( schema=WEB_SEARCH_SCHEMA, handler=lambda args, **kw: web_search_tool(args.get("query", ""), limit=5), check_fn=check_web_api_key, - requires_env=[ - "PARALLEL_API_KEY", - "TAVILY_API_KEY", - "FIRECRAWL_GATEWAY_URL", - "TOOL_GATEWAY_DOMAIN", - "TOOL_GATEWAY_SCHEME", - "TOOL_GATEWAY_USER_TOKEN", - "FIRECRAWL_API_KEY", - "FIRECRAWL_API_URL", - ], + requires_env=_web_requires_env(), emoji="🔍", ) registry.register( @@ -1886,16 +1909,7 @@ registry.register( handler=lambda args, **kw: web_extract_tool( args.get("urls", [])[:5] if isinstance(args.get("urls"), list) else [], "markdown"), check_fn=check_web_api_key, - requires_env=[ - "PARALLEL_API_KEY", - "TAVILY_API_KEY", - "FIRECRAWL_GATEWAY_URL", - "TOOL_GATEWAY_DOMAIN", - "TOOL_GATEWAY_SCHEME", - "TOOL_GATEWAY_USER_TOKEN", - "FIRECRAWL_API_KEY", - "FIRECRAWL_API_URL", - ], + requires_env=_web_requires_env(), is_async=True, emoji="📄", ) diff --git a/utils.py b/utils.py index 66d552909..9a2105d54 100644 --- a/utils.py +++ b/utils.py @@ -9,6 +9,25 @@ from typing import Any, Union import yaml +TRUTHY_STRINGS = frozenset({"1", "true", "yes", "on"}) + + +def is_truthy_value(value: Any, default: bool = False) -> bool: + """Coerce bool-ish values using the project's shared truthy string set.""" + if value is None: + return default + if isinstance(value, bool): + return value + if isinstance(value, str): + return value.strip().lower() in TRUTHY_STRINGS + return bool(value) + + +def env_var_enabled(name: str, default: str = "") -> bool: + """Return True when an environment variable is set to a truthy value.""" + return is_truthy_value(os.getenv(name, default), default=False) + + def atomic_json_write( path: Union[str, Path], data: Any, diff --git a/website/docs/reference/environment-variables.md b/website/docs/reference/environment-variables.md index d7d689580..d228c3927 100644 --- a/website/docs/reference/environment-variables.md +++ b/website/docs/reference/environment-variables.md @@ -78,9 +78,6 @@ For native Anthropic auth, Hermes prefers Claude Code's own credential files whe | `FIRECRAWL_API_KEY` | Web scraping ([firecrawl.dev](https://firecrawl.dev/)) | | `FIRECRAWL_API_URL` | Custom Firecrawl API endpoint for self-hosted instances (optional) | | `TAVILY_API_KEY` | Tavily API key for AI-native web search, extract, and crawl ([app.tavily.com](https://app.tavily.com/home)) | -| `TOOL_GATEWAY_DOMAIN` | Shared tool-gateway domain suffix for Nous Subscribers only, used to derive vendor hosts, for example `nousresearch.com` -> `firecrawl-gateway.nousresearch.com` | -| `TOOL_GATEWAY_SCHEME` | Shared tool-gateway URL scheme for Nous Subscribers only, used to derive vendor hosts, `https` by default and `http` for local gateway testing | -| `TOOL_GATEWAY_USER_TOKEN` | Explicit Nous Subscriber access token for tool-gateway calls (optional; otherwise Hermes reads `~/.hermes/auth.json`) | | `BROWSERBASE_API_KEY` | Browser automation ([browserbase.com](https://browserbase.com/)) | | `BROWSERBASE_PROJECT_ID` | Browserbase project ID | | `BROWSER_USE_API_KEY` | Browser Use cloud browser API key ([browser-use.com](https://browser-use.com/)) | diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md index 1d3085798..4aa5afb0b 100644 --- a/website/docs/user-guide/configuration.md +++ b/website/docs/user-guide/configuration.md @@ -725,9 +725,9 @@ If terminal commands fail immediately or the terminal tool is reported as disabl - If either value is missing, Hermes will log a clear error and refuse to use the SSH backend. - **Modal backend** - - Hermes can use either direct Modal credentials (`MODAL_TOKEN_ID` plus `MODAL_TOKEN_SECRET`, or `~/.modal.toml`) or a configured managed tool gateway with a Nous user token. + - You need either a `MODAL_TOKEN_ID` environment variable or a `~/.modal.toml` config file. - Modal persistence is resumable filesystem state, not durable process continuity. If you need something to stay continuously up, use a deployment-oriented tool instead of the terminal sandbox. - - If neither direct credentials nor a managed gateway is present, Hermes will report that the Modal backend is not available. + - If neither is present, the backend check fails and Hermes will report that the Modal backend is not available. When in doubt, set `terminal.backend` back to `local` and verify that commands run there first. diff --git a/website/docs/user-guide/features/tools.md b/website/docs/user-guide/features/tools.md index bbea0a262..981d2caf2 100644 --- a/website/docs/user-guide/features/tools.md +++ b/website/docs/user-guide/features/tools.md @@ -109,13 +109,6 @@ modal setup hermes config set terminal.backend modal ``` -Hermes can use Modal in two modes: - -- **Direct Modal**: Hermes talks to your Modal account directly. -- **Managed Modal**: Hermes talks to a gateway that owns the vendor credentials. - -In both cases, Modal is best treated as a task sandbox, not a deployment target. Persistent mode preserves filesystem state so later turns can resume your work, but Hermes may still clean up or recreate the live sandbox. Long-running servers and background processes are not guaranteed to survive idle cleanup, session teardown, or Hermes exit. - ### Container Resources Configure CPU, memory, disk, and persistence for all container backends: -- 2.43.0 From e08778fa1ee377f7128641f8cc03b0de046bd8da Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 30 Mar 2026 08:29:38 -0700 Subject: [PATCH 003/385] chore: release v0.6.0 (2026.3.30) (#3985) --- RELEASE_v0.6.0.md | 249 +++++++++++++++++++++++++++++++++++++++++ hermes_cli/__init__.py | 4 +- pyproject.toml | 2 +- 3 files changed, 252 insertions(+), 3 deletions(-) create mode 100644 RELEASE_v0.6.0.md diff --git a/RELEASE_v0.6.0.md b/RELEASE_v0.6.0.md new file mode 100644 index 000000000..5bef7c6c5 --- /dev/null +++ b/RELEASE_v0.6.0.md @@ -0,0 +1,249 @@ +# Hermes Agent v0.6.0 (v2026.3.30) + +**Release Date:** March 30, 2026 + +> The multi-instance release — Profiles for running isolated agent instances, MCP server mode, Docker container, fallback provider chains, two new messaging platforms (Feishu/Lark and WeCom), Telegram webhook mode, Slack multi-workspace OAuth, 95 PRs and 16 resolved issues in 2 days. + +--- + +## ✨ Highlights + +- **Profiles — Multi-Instance Hermes** — Run multiple isolated Hermes instances from the same installation. Each profile gets its own config, memory, sessions, skills, and gateway service. Create with `hermes profile create`, switch with `hermes -p `, export/import for sharing. Full token-lock isolation prevents two profiles from using the same bot credential. ([#3681](https://github.com/NousResearch/hermes-agent/pull/3681)) + +- **MCP Server Mode** — Expose Hermes conversations and sessions to any MCP-compatible client (Claude Desktop, Cursor, VS Code, etc.) via `hermes mcp serve`. Browse conversations, read messages, search across sessions, and manage attachments — all through the Model Context Protocol. Supports both stdio and Streamable HTTP transports. ([#3795](https://github.com/NousResearch/hermes-agent/pull/3795)) + +- **Docker Container** — Official Dockerfile for running Hermes Agent in a container. Supports both CLI and gateway modes with volume-mounted config. ([#3668](https://github.com/NousResearch/hermes-agent/pull/3668), closes [#850](https://github.com/NousResearch/hermes-agent/issues/850)) + +- **Ordered Fallback Provider Chain** — Configure multiple inference providers with automatic failover. When your primary provider returns errors or is unreachable, Hermes automatically tries the next provider in the chain. Configure via `fallback_providers` in config.yaml. ([#3813](https://github.com/NousResearch/hermes-agent/pull/3813), closes [#1734](https://github.com/NousResearch/hermes-agent/issues/1734)) + +- **Feishu/Lark Platform Support** — Full gateway adapter for Feishu (飞书) and Lark with event subscriptions, message cards, group chat, image/file attachments, and interactive card callbacks. ([#3799](https://github.com/NousResearch/hermes-agent/pull/3799), [#3817](https://github.com/NousResearch/hermes-agent/pull/3817), closes [#1788](https://github.com/NousResearch/hermes-agent/issues/1788)) + +- **WeCom (Enterprise WeChat) Platform Support** — New gateway adapter for WeCom (企业微信) with text/image/voice messages, group chats, and callback verification. ([#3847](https://github.com/NousResearch/hermes-agent/pull/3847)) + +- **Slack Multi-Workspace OAuth** — Connect a single Hermes gateway to multiple Slack workspaces via OAuth token file. Each workspace gets its own bot token, resolved dynamically per incoming event. ([#3903](https://github.com/NousResearch/hermes-agent/pull/3903)) + +- **Telegram Webhook Mode & Group Controls** — Run the Telegram adapter in webhook mode as an alternative to polling — faster response times and better for production deployments behind a reverse proxy. New group mention gating controls when the bot responds: always, only when @mentioned, or via regex triggers. ([#3880](https://github.com/NousResearch/hermes-agent/pull/3880), [#3870](https://github.com/NousResearch/hermes-agent/pull/3870)) + +- **Exa Search Backend** — Add Exa as an alternative web search and content extraction backend alongside Firecrawl and DuckDuckGo. Set `EXA_API_KEY` and configure as preferred backend. ([#3648](https://github.com/NousResearch/hermes-agent/pull/3648)) + +- **Skills & Credentials on Remote Backends** — Mount skill directories and credential files into Modal and Docker containers, so remote terminal sessions have access to the same skills and secrets as local execution. ([#3890](https://github.com/NousResearch/hermes-agent/pull/3890), [#3671](https://github.com/NousResearch/hermes-agent/pull/3671), closes [#3665](https://github.com/NousResearch/hermes-agent/issues/3665), [#3433](https://github.com/NousResearch/hermes-agent/issues/3433)) + +--- + +## 🏗️ Core Agent & Architecture + +### Provider & Model Support +- **Ordered fallback provider chain** — automatic failover across multiple configured providers ([#3813](https://github.com/NousResearch/hermes-agent/pull/3813)) +- **Fix api_mode on provider switch** — switching providers via `hermes model` now correctly clears stale `api_mode` instead of hardcoding `chat_completions`, fixing 404s for providers with Anthropic-compatible endpoints ([#3726](https://github.com/NousResearch/hermes-agent/pull/3726), [#3857](https://github.com/NousResearch/hermes-agent/pull/3857), closes [#3685](https://github.com/NousResearch/hermes-agent/issues/3685)) +- **Stop silent OpenRouter fallback** — when no provider is configured, Hermes now raises a clear error instead of silently routing to OpenRouter ([#3807](https://github.com/NousResearch/hermes-agent/pull/3807), [#3862](https://github.com/NousResearch/hermes-agent/pull/3862)) +- **Gemini 3.1 preview models** — added to OpenRouter and Nous Portal catalogs ([#3803](https://github.com/NousResearch/hermes-agent/pull/3803), closes [#3753](https://github.com/NousResearch/hermes-agent/issues/3753)) +- **Gemini direct API context length** — full context length resolution for direct Google AI endpoints ([#3876](https://github.com/NousResearch/hermes-agent/pull/3876)) +- **gpt-5.4-mini** added to Codex fallback catalog ([#3855](https://github.com/NousResearch/hermes-agent/pull/3855)) +- **Curated model lists preferred** over live API probe when the probe returns fewer models ([#3856](https://github.com/NousResearch/hermes-agent/pull/3856), [#3867](https://github.com/NousResearch/hermes-agent/pull/3867)) +- **User-friendly 429 rate limit messages** with Retry-After countdown ([#3809](https://github.com/NousResearch/hermes-agent/pull/3809)) +- **Auxiliary client placeholder key** for local servers without auth requirements ([#3842](https://github.com/NousResearch/hermes-agent/pull/3842)) +- **INFO-level logging** for auxiliary provider resolution ([#3866](https://github.com/NousResearch/hermes-agent/pull/3866)) + +### Agent Loop & Conversation +- **Subagent status reporting** — reports `completed` status when summary exists instead of generic failure ([#3829](https://github.com/NousResearch/hermes-agent/pull/3829)) +- **Session log file updated during compression** — prevents stale file references after context compression ([#3835](https://github.com/NousResearch/hermes-agent/pull/3835)) +- **Omit empty tools param** — sends no `tools` parameter when empty instead of `None`, fixing compatibility with strict providers ([#3820](https://github.com/NousResearch/hermes-agent/pull/3820)) + +### Profiles & Multi-Instance +- **Profiles system** — `hermes profile create/list/switch/delete/export/import/rename`. Each profile gets isolated HERMES_HOME, gateway service, CLI wrapper. Token locks prevent credential collisions. Tab completion for profile names. ([#3681](https://github.com/NousResearch/hermes-agent/pull/3681)) +- **Profile-aware display paths** — all user-facing `~/.hermes` paths replaced with `display_hermes_home()` to show the correct profile directory ([#3623](https://github.com/NousResearch/hermes-agent/pull/3623)) +- **Lazy display_hermes_home imports** — prevents `ImportError` during `hermes update` when modules cache stale bytecode ([#3776](https://github.com/NousResearch/hermes-agent/pull/3776)) +- **HERMES_HOME for protected paths** — `.env` write-deny path now respects HERMES_HOME instead of hardcoded `~/.hermes` ([#3840](https://github.com/NousResearch/hermes-agent/pull/3840)) + +--- + +## 📱 Messaging Platforms (Gateway) + +### New Platforms +- **Feishu/Lark** — Full adapter with event subscriptions, message cards, group chat, image/file attachments, interactive card callbacks ([#3799](https://github.com/NousResearch/hermes-agent/pull/3799), [#3817](https://github.com/NousResearch/hermes-agent/pull/3817)) +- **WeCom (Enterprise WeChat)** — Text/image/voice messages, group chats, callback verification ([#3847](https://github.com/NousResearch/hermes-agent/pull/3847)) + +### Telegram +- **Webhook mode** — run as webhook endpoint instead of polling for production deployments ([#3880](https://github.com/NousResearch/hermes-agent/pull/3880)) +- **Group mention gating & regex triggers** — configurable bot response behavior in groups: always, @mention-only, or regex-matched ([#3870](https://github.com/NousResearch/hermes-agent/pull/3870)) +- **Gracefully handle deleted reply targets** — no more crashes when the message being replied to was deleted ([#3858](https://github.com/NousResearch/hermes-agent/pull/3858), closes [#3229](https://github.com/NousResearch/hermes-agent/issues/3229)) + +### Discord +- **Message processing reactions** — adds a reaction emoji while processing and removes it when done, giving visual feedback in channels ([#3871](https://github.com/NousResearch/hermes-agent/pull/3871)) +- **DISCORD_IGNORE_NO_MENTION** — skip messages that @mention other users/bots but not Hermes ([#3640](https://github.com/NousResearch/hermes-agent/pull/3640)) +- **Clean up deferred "thinking..."** — properly removes the "thinking..." indicator after slash commands complete ([#3674](https://github.com/NousResearch/hermes-agent/pull/3674), closes [#3595](https://github.com/NousResearch/hermes-agent/issues/3595)) + +### Slack +- **Multi-workspace OAuth** — connect to multiple Slack workspaces from a single gateway via OAuth token file ([#3903](https://github.com/NousResearch/hermes-agent/pull/3903)) + +### WhatsApp +- **Persistent aiohttp session** — reuse HTTP sessions across requests instead of creating new ones per message ([#3818](https://github.com/NousResearch/hermes-agent/pull/3818)) +- **LID↔phone alias resolution** — correctly match Linked ID and phone number formats in allowlists ([#3830](https://github.com/NousResearch/hermes-agent/pull/3830)) +- **Skip reply prefix in bot mode** — cleaner message formatting when running as a WhatsApp bot ([#3931](https://github.com/NousResearch/hermes-agent/pull/3931)) + +### Matrix +- **Native voice messages via MSC3245** — send voice messages as proper Matrix voice events instead of file attachments ([#3877](https://github.com/NousResearch/hermes-agent/pull/3877)) + +### Mattermost +- **Configurable mention behavior** — respond to messages without requiring @mention ([#3664](https://github.com/NousResearch/hermes-agent/pull/3664)) + +### Signal +- **URL-encode phone numbers** and correct attachment RPC parameter — fixes delivery failures with certain phone number formats ([#3670](https://github.com/NousResearch/hermes-agent/pull/3670)) — @kshitijk4poor + +### Email +- **Close SMTP/IMAP connections on failure** — prevents connection leaks during error scenarios ([#3804](https://github.com/NousResearch/hermes-agent/pull/3804)) + +### Gateway Core +- **Atomic config writes** — use atomic file writes for config.yaml to prevent data loss during crashes ([#3800](https://github.com/NousResearch/hermes-agent/pull/3800)) +- **Home channel env overrides** — apply environment variable overrides for home channels consistently ([#3796](https://github.com/NousResearch/hermes-agent/pull/3796), [#3808](https://github.com/NousResearch/hermes-agent/pull/3808)) +- **Replace print() with logger** — BasePlatformAdapter now uses proper logging instead of print statements ([#3669](https://github.com/NousResearch/hermes-agent/pull/3669)) +- **Cron delivery labels** — resolve human-friendly delivery labels via channel directory ([#3860](https://github.com/NousResearch/hermes-agent/pull/3860), closes [#1945](https://github.com/NousResearch/hermes-agent/issues/1945)) +- **Cron [SILENT] tightening** — prevent agents from prefixing reports with [SILENT] to suppress delivery ([#3901](https://github.com/NousResearch/hermes-agent/pull/3901)) +- **Background task media delivery** and vision download timeout fixes ([#3919](https://github.com/NousResearch/hermes-agent/pull/3919)) +- **Boot-md hook** — example built-in hook to run a BOOT.md file on gateway startup ([#3733](https://github.com/NousResearch/hermes-agent/pull/3733)) + +--- + +## 🖥️ CLI & User Experience + +### Interactive CLI +- **Configurable tool preview length** — show full file paths by default instead of truncating at 40 chars ([#3841](https://github.com/NousResearch/hermes-agent/pull/3841)) +- **Tool token context display** — `hermes tools` checklist now shows estimated token cost per toolset ([#3805](https://github.com/NousResearch/hermes-agent/pull/3805)) +- **/bg spinner TUI fix** — route background task spinner through the TUI widget to prevent status bar collision ([#3643](https://github.com/NousResearch/hermes-agent/pull/3643)) +- **Prevent status bar wrapping** into duplicate rows ([#3883](https://github.com/NousResearch/hermes-agent/pull/3883)) — @kshitijk4poor +- **Handle closed stdout ValueError** in safe print paths — fixes crashes when stdout is closed during gateway thread shutdown ([#3843](https://github.com/NousResearch/hermes-agent/pull/3843), closes [#3534](https://github.com/NousResearch/hermes-agent/issues/3534)) +- **Remove input() from /tools disable** — eliminates freeze in terminal when disabling tools ([#3918](https://github.com/NousResearch/hermes-agent/pull/3918)) +- **TTY guard for interactive CLI commands** — prevent CPU spin when launched without a terminal ([#3933](https://github.com/NousResearch/hermes-agent/pull/3933)) +- **Argparse entrypoint** — use argparse in the top-level launcher for cleaner error handling ([#3874](https://github.com/NousResearch/hermes-agent/pull/3874)) +- **Lazy-initialized tools show yellow** in banner instead of red, reducing false alarm about "missing" tools ([#3822](https://github.com/NousResearch/hermes-agent/pull/3822)) +- **Honcho tools shown in banner** when configured ([#3810](https://github.com/NousResearch/hermes-agent/pull/3810)) + +### Setup & Configuration +- **Auto-install matrix-nio** during `hermes setup` when Matrix is selected ([#3802](https://github.com/NousResearch/hermes-agent/pull/3802), [#3873](https://github.com/NousResearch/hermes-agent/pull/3873)) +- **Session export stdout support** — export sessions to stdout with `-` for piping ([#3641](https://github.com/NousResearch/hermes-agent/pull/3641), closes [#3609](https://github.com/NousResearch/hermes-agent/issues/3609)) +- **Configurable approval timeouts** — set how long dangerous command approval prompts wait before auto-denying ([#3886](https://github.com/NousResearch/hermes-agent/pull/3886), closes [#3765](https://github.com/NousResearch/hermes-agent/issues/3765)) +- **Clear __pycache__ during update** — prevents stale bytecode ImportError after `hermes update` ([#3819](https://github.com/NousResearch/hermes-agent/pull/3819)) + +--- + +## 🔧 Tool System + +### MCP +- **MCP Server Mode** — `hermes mcp serve` exposes conversations, sessions, and attachments to MCP clients via stdio or Streamable HTTP ([#3795](https://github.com/NousResearch/hermes-agent/pull/3795)) +- **Dynamic tool discovery** — respond to `notifications/tools/list_changed` events to pick up new tools from MCP servers without reconnecting ([#3812](https://github.com/NousResearch/hermes-agent/pull/3812)) +- **Non-deprecated HTTP transport** — switched from `sse_client` to `streamable_http_client` ([#3646](https://github.com/NousResearch/hermes-agent/pull/3646)) + +### Web Tools +- **Exa search backend** — alternative to Firecrawl and DuckDuckGo for web search and extraction ([#3648](https://github.com/NousResearch/hermes-agent/pull/3648)) + +### Browser +- **Guard against None LLM responses** in browser snapshot and vision tools ([#3642](https://github.com/NousResearch/hermes-agent/pull/3642)) + +### Terminal & Remote Backends +- **Mount skill directories** into Modal and Docker containers ([#3890](https://github.com/NousResearch/hermes-agent/pull/3890)) +- **Mount credential files** into remote backends with mtime+size caching ([#3671](https://github.com/NousResearch/hermes-agent/pull/3671)) +- **Preserve partial output** when commands time out instead of losing everything ([#3868](https://github.com/NousResearch/hermes-agent/pull/3868)) +- **Stop marking persisted env vars as missing** on remote backends ([#3650](https://github.com/NousResearch/hermes-agent/pull/3650)) + +### Audio +- **.aac format support** in transcription tool ([#3865](https://github.com/NousResearch/hermes-agent/pull/3865), closes [#1963](https://github.com/NousResearch/hermes-agent/issues/1963)) +- **Audio download retry** — retry logic for `cache_audio_from_url` matching the existing image download pattern ([#3401](https://github.com/NousResearch/hermes-agent/pull/3401)) — @binhnt92 + +### Vision +- **Reject non-image files** and enforce website-only policy for vision analysis ([#3845](https://github.com/NousResearch/hermes-agent/pull/3845)) + +### Tool Schema +- **Ensure name field** always present in tool definitions, fixing `KeyError: 'name'` crashes ([#3811](https://github.com/NousResearch/hermes-agent/pull/3811), closes [#3729](https://github.com/NousResearch/hermes-agent/issues/3729)) + +### ACP (Editor Integration) +- **Complete session management surface** for VS Code/Zed/JetBrains clients — proper task lifecycle, cancel support, session persistence ([#3675](https://github.com/NousResearch/hermes-agent/pull/3675)) + +--- + +## 🧩 Skills & Plugins + +### Skills System +- **External skill directories** — configure additional skill directories via `skills.external_dirs` in config.yaml ([#3678](https://github.com/NousResearch/hermes-agent/pull/3678)) +- **Category path traversal blocked** — prevents `../` attacks in skill category names ([#3844](https://github.com/NousResearch/hermes-agent/pull/3844)) +- **parallel-cli moved to optional-skills** — reduces default skill footprint ([#3673](https://github.com/NousResearch/hermes-agent/pull/3673)) — @kshitijk4poor + +### New Skills +- **memento-flashcards** — spaced repetition flashcard system ([#3827](https://github.com/NousResearch/hermes-agent/pull/3827)) +- **songwriting-and-ai-music** — songwriting craft and AI music generation prompts ([#3834](https://github.com/NousResearch/hermes-agent/pull/3834)) +- **SiYuan Note** — integration with SiYuan note-taking app ([#3742](https://github.com/NousResearch/hermes-agent/pull/3742)) +- **Scrapling** — web scraping skill using Scrapling library ([#3742](https://github.com/NousResearch/hermes-agent/pull/3742)) +- **one-three-one-rule** — communication framework skill ([#3797](https://github.com/NousResearch/hermes-agent/pull/3797)) + +### Plugin System +- **Plugin enable/disable commands** — `hermes plugins enable/disable ` for managing plugin state without removing them ([#3747](https://github.com/NousResearch/hermes-agent/pull/3747)) +- **Plugin message injection** — plugins can now inject messages into the conversation stream on behalf of the user via `ctx.inject_message()` ([#3778](https://github.com/NousResearch/hermes-agent/pull/3778)) — @winglian +- **Honcho self-hosted support** — allow local Honcho instances without requiring an API key ([#3644](https://github.com/NousResearch/hermes-agent/pull/3644)) + +--- + +## 🔒 Security & Reliability + +### Security Hardening +- **Hardened dangerous command detection** — expanded pattern matching for risky shell commands and added file tool path guards for sensitive locations (`/etc/`, `/boot/`, docker.sock) ([#3872](https://github.com/NousResearch/hermes-agent/pull/3872)) +- **Sensitive path write checks** in approval system — catch writes to system config files through file tools, not just terminal ([#3859](https://github.com/NousResearch/hermes-agent/pull/3859)) +- **Secret redaction expansion** — now covers ElevenLabs, Tavily, and Exa API keys ([#3920](https://github.com/NousResearch/hermes-agent/pull/3920)) +- **Vision file rejection** — reject non-image files passed to vision analysis to prevent information disclosure ([#3845](https://github.com/NousResearch/hermes-agent/pull/3845)) +- **Category path traversal blocking** — prevent directory traversal in skill category names ([#3844](https://github.com/NousResearch/hermes-agent/pull/3844)) + +### Reliability +- **Atomic config.yaml writes** — prevent data loss during gateway crashes ([#3800](https://github.com/NousResearch/hermes-agent/pull/3800)) +- **Clear __pycache__ on update** — prevent stale bytecode from causing ImportError after updates ([#3819](https://github.com/NousResearch/hermes-agent/pull/3819)) +- **Lazy imports for update safety** — prevent ImportError chains during `hermes update` when modules reference new functions ([#3776](https://github.com/NousResearch/hermes-agent/pull/3776)) +- **Restore terminalbench2 from patch corruption** — recovered file damaged by patch tool's secret redaction ([#3801](https://github.com/NousResearch/hermes-agent/pull/3801)) +- **Terminal timeout preserves partial output** — no more lost command output on timeout ([#3868](https://github.com/NousResearch/hermes-agent/pull/3868)) + +--- + +## 🐛 Notable Bug Fixes + +- **OpenClaw migration model config overwrite** — migration no longer overwrites model config dict with a string ([#3924](https://github.com/NousResearch/hermes-agent/pull/3924)) — @0xbyt4 +- **OpenClaw migration expanded** — covers full data footprint including sessions, cron, memory ([#3869](https://github.com/NousResearch/hermes-agent/pull/3869)) +- **Telegram deleted reply targets** — gracefully handle replies to deleted messages instead of crashing ([#3858](https://github.com/NousResearch/hermes-agent/pull/3858)) +- **Discord "thinking..." persistence** — properly cleans up deferred response indicators ([#3674](https://github.com/NousResearch/hermes-agent/pull/3674)) +- **WhatsApp LID↔phone aliases** — fixes allowlist matching failures with Linked ID format ([#3830](https://github.com/NousResearch/hermes-agent/pull/3830)) +- **Signal URL-encoded phone numbers** — fixes delivery failures with certain formats ([#3670](https://github.com/NousResearch/hermes-agent/pull/3670)) +- **Email connection leaks** — properly close SMTP/IMAP connections on error ([#3804](https://github.com/NousResearch/hermes-agent/pull/3804)) +- **_safe_print ValueError** — no more gateway thread crashes on closed stdout ([#3843](https://github.com/NousResearch/hermes-agent/pull/3843)) +- **Tool schema KeyError 'name'** — ensure name field always present in tool definitions ([#3811](https://github.com/NousResearch/hermes-agent/pull/3811)) +- **api_mode stale on provider switch** — correctly clear when switching providers via `hermes model` ([#3857](https://github.com/NousResearch/hermes-agent/pull/3857)) + +--- + +## 🧪 Testing + +- Resolved 10+ CI failures across hooks, tiktoken, plugins, and skill tests ([#3848](https://github.com/NousResearch/hermes-agent/pull/3848), [#3721](https://github.com/NousResearch/hermes-agent/pull/3721), [#3936](https://github.com/NousResearch/hermes-agent/pull/3936)) + +--- + +## 📚 Documentation + +- **Comprehensive OpenClaw migration guide** — step-by-step guide for migrating from OpenClaw/Claw3D to Hermes Agent ([#3864](https://github.com/NousResearch/hermes-agent/pull/3864), [#3900](https://github.com/NousResearch/hermes-agent/pull/3900)) +- **Credential file passthrough docs** — document how to forward credential files and env vars to remote backends ([#3677](https://github.com/NousResearch/hermes-agent/pull/3677)) +- **DuckDuckGo requirements clarified** — note runtime dependency on duckduckgo-search package ([#3680](https://github.com/NousResearch/hermes-agent/pull/3680)) +- **Skills catalog updated** — added red-teaming category and optional skills listing ([#3745](https://github.com/NousResearch/hermes-agent/pull/3745)) +- **Feishu docs MDX fix** — escape angle-bracket URLs that break Docusaurus build ([#3902](https://github.com/NousResearch/hermes-agent/pull/3902)) + +--- + +## 👥 Contributors + +### Core +- **@teknium1** — 90 PRs across all subsystems + +### Community Contributors +- **@kshitijk4poor** — 3 PRs: Signal phone number fix ([#3670](https://github.com/NousResearch/hermes-agent/pull/3670)), parallel-cli to optional-skills ([#3673](https://github.com/NousResearch/hermes-agent/pull/3673)), status bar wrapping fix ([#3883](https://github.com/NousResearch/hermes-agent/pull/3883)) +- **@winglian** — 1 PR: Plugin message injection interface ([#3778](https://github.com/NousResearch/hermes-agent/pull/3778)) +- **@binhnt92** — 1 PR: Audio download retry logic ([#3401](https://github.com/NousResearch/hermes-agent/pull/3401)) +- **@0xbyt4** — 1 PR: OpenClaw migration model config fix ([#3924](https://github.com/NousResearch/hermes-agent/pull/3924)) + +### Issues Resolved from Community +@Material-Scientist ([#850](https://github.com/NousResearch/hermes-agent/issues/850)), @hanxu98121 ([#1734](https://github.com/NousResearch/hermes-agent/issues/1734)), @penwyp ([#1788](https://github.com/NousResearch/hermes-agent/issues/1788)), @dan-and ([#1945](https://github.com/NousResearch/hermes-agent/issues/1945)), @AdrianScott ([#1963](https://github.com/NousResearch/hermes-agent/issues/1963)), @clawdbot47 ([#3229](https://github.com/NousResearch/hermes-agent/issues/3229)), @alanfwilliams ([#3404](https://github.com/NousResearch/hermes-agent/issues/3404)), @kentimsit ([#3433](https://github.com/NousResearch/hermes-agent/issues/3433)), @hayka-pacha ([#3534](https://github.com/NousResearch/hermes-agent/issues/3534)), @primmer ([#3595](https://github.com/NousResearch/hermes-agent/issues/3595)), @dagelf ([#3609](https://github.com/NousResearch/hermes-agent/issues/3609)), @HenkDz ([#3685](https://github.com/NousResearch/hermes-agent/issues/3685)), @tmdgusya ([#3729](https://github.com/NousResearch/hermes-agent/issues/3729)), @TypQxQ ([#3753](https://github.com/NousResearch/hermes-agent/issues/3753)), @acsezen ([#3765](https://github.com/NousResearch/hermes-agent/issues/3765)) + +--- + +**Full Changelog**: [v2026.3.28...v2026.3.30](https://github.com/NousResearch/hermes-agent/compare/v2026.3.28...v2026.3.30) diff --git a/hermes_cli/__init__.py b/hermes_cli/__init__.py index 797c7e8d6..5f4b1b9cf 100644 --- a/hermes_cli/__init__.py +++ b/hermes_cli/__init__.py @@ -11,5 +11,5 @@ Provides subcommands for: - hermes cron - Manage cron jobs """ -__version__ = "0.5.0" -__release_date__ = "2026.3.28" +__version__ = "0.6.0" +__release_date__ = "2026.3.30" diff --git a/pyproject.toml b/pyproject.toml index 38974e328..c3154d1ae 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "hermes-agent" -version = "0.5.0" +version = "0.6.0" description = "The self-improving AI agent — creates skills from experience, improves them during use, and runs anywhere" readme = "README.md" requires-python = ">=3.11" -- 2.43.0 From 37825189dddcff5686ff5f3dab4025c7313e72a0 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 30 Mar 2026 08:37:19 -0700 Subject: [PATCH 004/385] fix(skills): validate hub bundle paths before install (#3986) Co-authored-by: Gutslabs --- hermes_cli/skills_hub.py | 19 ++++++- tests/tools/test_skills_hub.py | 79 +++++++++++++++++++++++++++ tools/skills_hub.py | 98 ++++++++++++++++++++++++++++------ 3 files changed, 178 insertions(+), 18 deletions(-) diff --git a/hermes_cli/skills_hub.py b/hermes_cli/skills_hub.py index 359e8b912..370b69ab0 100644 --- a/hermes_cli/skills_hub.py +++ b/hermes_cli/skills_hub.py @@ -354,7 +354,14 @@ def do_install(identifier: str, category: str = "", force: bool = False, extra_metadata.update(getattr(bundle, "metadata", {}) or {}) # Quarantine the bundle - q_path = quarantine_bundle(bundle) + try: + q_path = quarantine_bundle(bundle) + except ValueError as exc: + c.print(f"[bold red]Installation blocked:[/] {exc}\n") + from tools.skills_hub import append_audit_log + append_audit_log("BLOCKED", bundle.name, bundle.source, + bundle.trust_level, "invalid_path", str(exc)) + return c.print(f"[dim]Quarantined to {q_path.relative_to(q_path.parent.parent.parent)}[/]") # Scan @@ -414,7 +421,15 @@ def do_install(identifier: str, category: str = "", force: bool = False, return # Install - install_dir = install_from_quarantine(q_path, bundle.name, category, bundle, result) + try: + install_dir = install_from_quarantine(q_path, bundle.name, category, bundle, result) + except ValueError as exc: + c.print(f"[bold red]Installation blocked:[/] {exc}\n") + shutil.rmtree(q_path, ignore_errors=True) + from tools.skills_hub import append_audit_log + append_audit_log("BLOCKED", bundle.name, bundle.source, + bundle.trust_level, "invalid_path", str(exc)) + return from tools.skills_hub import SKILLS_DIR c.print(f"[bold green]Installed:[/] {install_dir.relative_to(SKILLS_DIR)}") c.print(f"[dim]Files: {', '.join(bundle.files.keys())}[/]\n") diff --git a/tests/tools/test_skills_hub.py b/tests/tools/test_skills_hub.py index a55a91e00..58e035469 100644 --- a/tests/tools/test_skills_hub.py +++ b/tests/tools/test_skills_hub.py @@ -5,6 +5,7 @@ from pathlib import Path from unittest.mock import patch, MagicMock import httpx +import pytest from tools.skills_hub import ( GitHubAuth, @@ -648,6 +649,29 @@ class TestWellKnownSkillSource: assert bundle.files["SKILL.md"] == "# Code Review\n" assert bundle.files["references/checklist.md"] == "- [ ] security\n" + @patch("tools.skills_hub._write_index_cache") + @patch("tools.skills_hub._read_index_cache", return_value=None) + @patch("tools.skills_hub.httpx.get") + def test_fetch_rejects_unsafe_file_paths_from_well_known_endpoint(self, mock_get, _mock_read_cache, _mock_write_cache): + def fake_get(url, *args, **kwargs): + if url.endswith("/index.json"): + return MagicMock(status_code=200, json=lambda: { + "skills": [{ + "name": "code-review", + "description": "Review code", + "files": ["SKILL.md", "../../../escape.txt"], + }] + }) + if url.endswith("/code-review/SKILL.md"): + return MagicMock(status_code=200, text="# Code Review\n") + raise AssertionError(url) + + mock_get.side_effect = fake_get + + bundle = self._source().fetch("well-known:https://example.com/.well-known/skills/code-review") + + assert bundle is None + class TestCheckForSkillUpdates: def test_bundle_content_hash_matches_installed_content_hash(self, tmp_path): @@ -1143,6 +1167,61 @@ class TestQuarantineBundleBinaryAssets: assert (q_path / "SKILL.md").read_text(encoding="utf-8").startswith("---") assert (q_path / "assets" / "neutts-cli" / "samples" / "jo.wav").read_bytes() == b"RIFF\x00\x01fakewav" + def test_quarantine_bundle_rejects_traversal_file_paths(self, tmp_path): + import tools.skills_hub as hub + + hub_dir = tmp_path / "skills" / ".hub" + with patch.object(hub, "SKILLS_DIR", tmp_path / "skills"), \ + patch.object(hub, "HUB_DIR", hub_dir), \ + patch.object(hub, "LOCK_FILE", hub_dir / "lock.json"), \ + patch.object(hub, "QUARANTINE_DIR", hub_dir / "quarantine"), \ + patch.object(hub, "AUDIT_LOG", hub_dir / "audit.log"), \ + patch.object(hub, "TAPS_FILE", hub_dir / "taps.json"), \ + patch.object(hub, "INDEX_CACHE_DIR", hub_dir / "index-cache"): + bundle = SkillBundle( + name="demo", + files={ + "SKILL.md": "---\nname: demo\n---\n", + "../../../escape.txt": "owned", + }, + source="well-known", + identifier="well-known:https://example.com/.well-known/skills/demo", + trust_level="community", + ) + + with pytest.raises(ValueError, match="Unsafe bundle file path"): + quarantine_bundle(bundle) + + assert not (tmp_path / "skills" / "escape.txt").exists() + + def test_quarantine_bundle_rejects_absolute_file_paths(self, tmp_path): + import tools.skills_hub as hub + + hub_dir = tmp_path / "skills" / ".hub" + absolute_target = tmp_path / "outside.txt" + with patch.object(hub, "SKILLS_DIR", tmp_path / "skills"), \ + patch.object(hub, "HUB_DIR", hub_dir), \ + patch.object(hub, "LOCK_FILE", hub_dir / "lock.json"), \ + patch.object(hub, "QUARANTINE_DIR", hub_dir / "quarantine"), \ + patch.object(hub, "AUDIT_LOG", hub_dir / "audit.log"), \ + patch.object(hub, "TAPS_FILE", hub_dir / "taps.json"), \ + patch.object(hub, "INDEX_CACHE_DIR", hub_dir / "index-cache"): + bundle = SkillBundle( + name="demo", + files={ + "SKILL.md": "---\nname: demo\n---\n", + str(absolute_target): "owned", + }, + source="well-known", + identifier="well-known:https://example.com/.well-known/skills/demo", + trust_level="community", + ) + + with pytest.raises(ValueError, match="Unsafe bundle file path"): + quarantine_bundle(bundle) + + assert not absolute_target.exists() + # --------------------------------------------------------------------------- # GitHubSource._download_directory — tree API + fallback (#2940) diff --git a/tools/skills_hub.py b/tools/skills_hub.py index 86f8e47d1..a824c3e3b 100644 --- a/tools/skills_hub.py +++ b/tools/skills_hub.py @@ -24,7 +24,7 @@ import time from abc import ABC, abstractmethod from dataclasses import dataclass, field from datetime import datetime, timezone -from pathlib import Path +from pathlib import Path, PurePosixPath from hermes_constants import get_hermes_home from typing import Any, Dict, List, Optional, Tuple, Union from urllib.parse import urlparse, urlunparse @@ -85,6 +85,43 @@ class SkillBundle: metadata: Dict[str, Any] = field(default_factory=dict) +def _normalize_bundle_path(path_value: str, *, field_name: str, allow_nested: bool) -> str: + """Normalize and validate bundle-controlled paths before touching disk.""" + if not isinstance(path_value, str): + raise ValueError(f"Unsafe {field_name}: expected a string") + + raw = path_value.strip() + if not raw: + raise ValueError(f"Unsafe {field_name}: empty path") + + normalized = raw.replace("\\", "/") + path = PurePosixPath(normalized) + parts = [part for part in path.parts if part not in ("", ".")] + + if normalized.startswith("/") or path.is_absolute(): + raise ValueError(f"Unsafe {field_name}: {path_value}") + if not parts or any(part == ".." for part in parts): + raise ValueError(f"Unsafe {field_name}: {path_value}") + if re.fullmatch(r"[A-Za-z]:", parts[0]): + raise ValueError(f"Unsafe {field_name}: {path_value}") + if not allow_nested and len(parts) != 1: + raise ValueError(f"Unsafe {field_name}: {path_value}") + + return "/".join(parts) + + +def _validate_skill_name(name: str) -> str: + return _normalize_bundle_path(name, field_name="skill name", allow_nested=False) + + +def _validate_category_name(category: str) -> str: + return _normalize_bundle_path(category, field_name="category", allow_nested=False) + + +def _validate_bundle_rel_path(rel_path: str) -> str: + return _normalize_bundle_path(rel_path, field_name="bundle file path", allow_nested=True) + + # --------------------------------------------------------------------------- # GitHub Authentication # --------------------------------------------------------------------------- @@ -701,6 +738,12 @@ class WellKnownSkillSource(SkillSource): if not parsed: return None + try: + skill_name = _validate_skill_name(parsed["skill_name"]) + except ValueError: + logger.warning("Well-known skill identifier contained unsafe skill name: %s", identifier) + return None + entry = self._index_entry(parsed["index_url"], parsed["skill_name"]) if not entry: return None @@ -713,19 +756,28 @@ class WellKnownSkillSource(SkillSource): for rel_path in files: if not isinstance(rel_path, str) or not rel_path: continue - text = self._fetch_text(f"{parsed['skill_url']}/{rel_path}") + try: + safe_rel_path = _validate_bundle_rel_path(rel_path) + except ValueError: + logger.warning( + "Well-known skill %s advertised unsafe file path: %r", + identifier, + rel_path, + ) + return None + text = self._fetch_text(f"{parsed['skill_url']}/{safe_rel_path}") if text is None: return None - downloaded[rel_path] = text + downloaded[safe_rel_path] = text if "SKILL.md" not in downloaded: return None return SkillBundle( - name=parsed["skill_name"], + name=skill_name, files=downloaded, source="well-known", - identifier=self._wrap_identifier(parsed["base_url"], parsed["skill_name"]), + identifier=self._wrap_identifier(parsed["base_url"], skill_name), trust_level="community", metadata={ "index_url": parsed["index_url"], @@ -1752,9 +1804,10 @@ class ClawHubSource(SkillSource): for info in zf.infolist(): if info.is_dir(): continue - # Sanitize path — strip leading slashes and .. - name = info.filename.lstrip("/") - if ".." in name or name.startswith("/"): + try: + name = _validate_bundle_rel_path(info.filename) + except ValueError: + logger.debug("Skipping unsafe ZIP member path: %s", info.filename) continue # Only extract text-sized files (skip large binaries) if info.file_size > 500_000: @@ -2423,13 +2476,19 @@ def ensure_hub_dirs() -> None: def quarantine_bundle(bundle: SkillBundle) -> Path: """Write a skill bundle to the quarantine directory for scanning.""" ensure_hub_dirs() - dest = QUARANTINE_DIR / bundle.name + skill_name = _validate_skill_name(bundle.name) + validated_files: List[Tuple[str, Union[str, bytes]]] = [] + for rel_path, file_content in bundle.files.items(): + safe_rel_path = _validate_bundle_rel_path(rel_path) + validated_files.append((safe_rel_path, file_content)) + + dest = QUARANTINE_DIR / skill_name if dest.exists(): shutil.rmtree(dest) dest.mkdir(parents=True) - for rel_path, file_content in bundle.files.items(): - file_dest = dest / rel_path + for rel_path, file_content in validated_files: + file_dest = dest.joinpath(*rel_path.split("/")) file_dest.parent.mkdir(parents=True, exist_ok=True) if isinstance(file_content, bytes): file_dest.write_bytes(file_content) @@ -2447,10 +2506,17 @@ def install_from_quarantine( scan_result: ScanResult, ) -> Path: """Move a scanned skill from quarantine into the skills directory.""" - if category: - install_dir = SKILLS_DIR / category / skill_name + safe_skill_name = _validate_skill_name(skill_name) + safe_category = _validate_category_name(category) if category else "" + quarantine_resolved = quarantine_path.resolve() + quarantine_root = QUARANTINE_DIR.resolve() + if not quarantine_resolved.is_relative_to(quarantine_root): + raise ValueError(f"Unsafe quarantine path: {quarantine_path}") + + if safe_category: + install_dir = SKILLS_DIR / safe_category / safe_skill_name else: - install_dir = SKILLS_DIR / skill_name + install_dir = SKILLS_DIR / safe_skill_name if install_dir.exists(): shutil.rmtree(install_dir) @@ -2461,7 +2527,7 @@ def install_from_quarantine( # Record in lock file lock = HubLockFile() lock.record_install( - name=skill_name, + name=safe_skill_name, source=bundle.source, identifier=bundle.identifier, trust_level=bundle.trust_level, @@ -2473,7 +2539,7 @@ def install_from_quarantine( ) append_audit_log( - "INSTALL", skill_name, bundle.source, + "INSTALL", safe_skill_name, bundle.source, bundle.trust_level, scan_result.verdict, content_hash(install_dir), ) -- 2.43.0 From 97d6813f513b28ce6cd7d6919c729702dfb3d5f3 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 30 Mar 2026 09:43:56 -0700 Subject: [PATCH 005/385] fix(cache): use deterministic call_id fallbacks instead of random UUIDs (#3991) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the API doesn't provide a call_id for tool calls, the fallback generated a random uuid4 hex. This made every API call's input unique when replayed, preventing OpenAI's prompt cache from matching the prefix across turns. Replaced all four uuid4 fallback sites with a deterministic hash of (function_name, arguments, position_index). The same tool call now always produces the same fallback call_id, preserving cache-friendly input stability. Affected code paths: - _chat_messages_to_responses_input() — Codex input reconstruction - _normalize_codex_response() — function_call and custom_tool_call - _build_assistant_message() — assistant message construction --- run_agent.py | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/run_agent.py b/run_agent.py index 30453c01c..13eba7fe7 100644 --- a/run_agent.py +++ b/run_agent.py @@ -2907,6 +2907,19 @@ class AIAgent: }) return converted or None + @staticmethod + def _deterministic_call_id(fn_name: str, arguments: str, index: int = 0) -> str: + """Generate a deterministic call_id from tool call content. + + Used as a fallback when the API doesn't provide a call_id. + Deterministic IDs prevent cache invalidation — random UUIDs would + make every API call's prefix unique, breaking OpenAI's prompt cache. + """ + import hashlib + seed = f"{fn_name}:{arguments}:{index}" + digest = hashlib.sha256(seed.encode("utf-8", errors="replace")).hexdigest()[:12] + return f"call_{digest}" + @staticmethod def _split_responses_tool_id(raw_id: Any) -> tuple[Optional[str], Optional[str]]: """Split a stored tool id into (call_id, response_item_id).""" @@ -3013,7 +3026,8 @@ class AIAgent: ): call_id = f"call_{embedded_response_item_id[len('fc_'):]}" else: - call_id = f"call_{uuid.uuid4().hex[:12]}" + _raw_args = str(fn.get("arguments", "{}")) + call_id = self._deterministic_call_id(fn_name, _raw_args, len(items)) call_id = call_id.strip() arguments = fn.get("arguments", "{}") @@ -3377,7 +3391,7 @@ class AIAgent: embedded_call_id, _ = self._split_responses_tool_id(raw_item_id) call_id = raw_call_id if isinstance(raw_call_id, str) and raw_call_id.strip() else embedded_call_id if not isinstance(call_id, str) or not call_id.strip(): - call_id = f"call_{uuid.uuid4().hex[:12]}" + call_id = self._deterministic_call_id(fn_name, arguments, len(tool_calls)) call_id = call_id.strip() response_item_id = raw_item_id if isinstance(raw_item_id, str) else None response_item_id = self._derive_responses_function_call_id(call_id, response_item_id) @@ -3398,7 +3412,7 @@ class AIAgent: embedded_call_id, _ = self._split_responses_tool_id(raw_item_id) call_id = raw_call_id if isinstance(raw_call_id, str) and raw_call_id.strip() else embedded_call_id if not isinstance(call_id, str) or not call_id.strip(): - call_id = f"call_{uuid.uuid4().hex[:12]}" + call_id = self._deterministic_call_id(fn_name, arguments, len(tool_calls)) call_id = call_id.strip() response_item_id = raw_item_id if isinstance(raw_item_id, str) else None response_item_id = self._derive_responses_function_call_id(call_id, response_item_id) @@ -4933,7 +4947,10 @@ class AIAgent: if isinstance(raw_id, str) and raw_id.strip(): call_id = raw_id.strip() else: - call_id = f"call_{uuid.uuid4().hex[:12]}" + _fn = getattr(tool_call, "function", None) + _fn_name = getattr(_fn, "name", "") if _fn else "" + _fn_args = getattr(_fn, "arguments", "{}") if _fn else "{}" + call_id = self._deterministic_call_id(_fn_name, _fn_args, len(tool_calls)) call_id = call_id.strip() response_item_id = getattr(tool_call, "response_item_id", None) -- 2.43.0 From 5ceed021dcd2bb8ecac43cdf8db0c3849dd43aa2 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 30 Mar 2026 10:57:30 -0700 Subject: [PATCH 006/385] feat(gateway): skill-aware slash commands, paginated /commands, Telegram 100-cap (#3934) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat(gateway): skill-aware slash commands, paginated /commands, Telegram 100-cap Map active skills to Telegram's slash command menu so users can discover and invoke skills directly. Three changes: 1. Telegram menu now includes active skill commands alongside built-in commands, capped at 100 entries (Telegram Bot API limit). Overflow commands remain callable but hidden from the picker. Logged at startup when cap is hit. 2. New /commands [page] gateway command for paginated browsing of all commands + skills. /help now shows first 10 skill commands and points to /commands for the full list. 3. When a user types a slash command that matches a disabled or uninstalled skill, they get actionable guidance: - Disabled: 'Enable it with: hermes skills config' - Optional (not installed): 'Install with: hermes skills install official/' Built on ideas from PR #3921 by @kshitijk4poor. * chore: move 21 niche skills to optional-skills Move specialized/niche skills from built-in (skills/) to optional (optional-skills/) to reduce the default skill count. Users can install them with: hermes skills install official// Moved skills (21): - mlops: accelerate, chroma, faiss, flash-attention, hermes-atropos-environments, huggingface-tokenizers, instructor, lambda-labs, llava, nemo-curator, pinecone, pytorch-lightning, qdrant, saelens, simpo, slime, tensorrt-llm, torchtitan - research: domain-intel, duckduckgo-search - devops: inference-sh cli Built-in skills: 96 → 75 Optional skills: 22 → 43 * fix: only include repo built-in skills in Telegram menu, not user-installed User-installed skills (from hub or manually added) stay accessible via /skills and by typing the command directly, but don't get registered in the Telegram slash command picker. Only skills whose SKILL.md is under the repo's skills/ directory are included in the menu. This keeps the Telegram menu focused on the curated built-in set while user-installed skills remain discoverable through /skills and /commands. --- gateway/platforms/telegram.py | 10 +- gateway/run.py | 114 +++++++++++++++++- hermes_cli/commands.py | 43 +++++++ .../devops}/cli/SKILL.md | 0 .../devops}/cli/references/app-discovery.md | 0 .../devops}/cli/references/authentication.md | 0 .../devops}/cli/references/cli-reference.md | 0 .../devops}/cli/references/running-apps.md | 0 .../mlops}/accelerate/SKILL.md | 0 .../accelerate/references/custom-plugins.md | 0 .../references/megatron-integration.md | 0 .../accelerate/references/performance.md | 0 .../mlops}/chroma/SKILL.md | 0 .../mlops}/chroma/references/integration.md | 0 .../mlops}/faiss/SKILL.md | 0 .../mlops}/faiss/references/index_types.md | 0 .../mlops}/flash-attention/SKILL.md | 0 .../flash-attention/references/benchmarks.md | 0 .../references/transformers-integration.md | 0 .../hermes-atropos-environments/SKILL.md | 0 .../references/agentresult-fields.md | 0 .../references/atropos-base-env.md | 0 .../references/usage-patterns.md | 0 .../mlops}/huggingface-tokenizers/SKILL.md | 0 .../references/algorithms.md | 0 .../references/integration.md | 0 .../references/pipeline.md | 0 .../references/training.md | 0 .../mlops}/instructor/SKILL.md | 0 .../mlops}/instructor/references/examples.md | 0 .../mlops}/instructor/references/providers.md | 0 .../instructor/references/validation.md | 0 .../mlops}/lambda-labs/SKILL.md | 0 .../lambda-labs/references/advanced-usage.md | 0 .../lambda-labs/references/troubleshooting.md | 0 .../mlops}/llava/SKILL.md | 0 .../mlops}/llava/references/training.md | 0 .../mlops}/nemo-curator/SKILL.md | 0 .../nemo-curator/references/deduplication.md | 0 .../nemo-curator/references/filtering.md | 0 .../mlops}/pinecone/SKILL.md | 0 .../mlops}/pinecone/references/deployment.md | 0 .../mlops}/pytorch-lightning/SKILL.md | 0 .../pytorch-lightning/references/callbacks.md | 0 .../references/distributed.md | 0 .../references/hyperparameter-tuning.md | 0 .../mlops}/qdrant/SKILL.md | 0 .../qdrant/references/advanced-usage.md | 0 .../qdrant/references/troubleshooting.md | 0 .../mlops}/saelens/SKILL.md | 0 .../mlops}/saelens/references/README.md | 0 .../mlops}/saelens/references/api.md | 0 .../mlops}/saelens/references/tutorials.md | 0 .../mlops}/simpo/SKILL.md | 0 .../mlops}/simpo/references/datasets.md | 0 .../simpo/references/hyperparameters.md | 0 .../mlops}/simpo/references/loss-functions.md | 0 .../mlops}/slime/SKILL.md | 0 .../mlops}/slime/references/api-reference.md | 0 .../slime/references/troubleshooting.md | 0 .../mlops}/tensorrt-llm/SKILL.md | 0 .../tensorrt-llm/references/multi-gpu.md | 0 .../tensorrt-llm/references/optimization.md | 0 .../mlops}/tensorrt-llm/references/serving.md | 0 .../mlops}/torchtitan/SKILL.md | 0 .../torchtitan/references/checkpoint.md | 0 .../torchtitan/references/custom-models.md | 0 .../mlops}/torchtitan/references/float8.md | 0 .../mlops}/torchtitan/references/fsdp.md | 0 .../research/domain-intel/SKILL.md | 0 .../domain-intel/scripts/domain_intel.py | 0 .../research/duckduckgo-search/SKILL.md | 0 .../duckduckgo-search/scripts/duckduckgo.sh | 0 73 files changed, 163 insertions(+), 4 deletions(-) rename {skills/inference-sh => optional-skills/devops}/cli/SKILL.md (100%) rename {skills/inference-sh => optional-skills/devops}/cli/references/app-discovery.md (100%) rename {skills/inference-sh => optional-skills/devops}/cli/references/authentication.md (100%) rename {skills/inference-sh => optional-skills/devops}/cli/references/cli-reference.md (100%) rename {skills/inference-sh => optional-skills/devops}/cli/references/running-apps.md (100%) rename {skills/mlops/training => optional-skills/mlops}/accelerate/SKILL.md (100%) rename {skills/mlops/training => optional-skills/mlops}/accelerate/references/custom-plugins.md (100%) rename {skills/mlops/training => optional-skills/mlops}/accelerate/references/megatron-integration.md (100%) rename {skills/mlops/training => optional-skills/mlops}/accelerate/references/performance.md (100%) rename {skills/mlops/vector-databases => optional-skills/mlops}/chroma/SKILL.md (100%) rename {skills/mlops/vector-databases => optional-skills/mlops}/chroma/references/integration.md (100%) rename {skills/mlops/vector-databases => optional-skills/mlops}/faiss/SKILL.md (100%) rename {skills/mlops/vector-databases => optional-skills/mlops}/faiss/references/index_types.md (100%) rename {skills/mlops/training => optional-skills/mlops}/flash-attention/SKILL.md (100%) rename {skills/mlops/training => optional-skills/mlops}/flash-attention/references/benchmarks.md (100%) rename {skills/mlops/training => optional-skills/mlops}/flash-attention/references/transformers-integration.md (100%) rename {skills/mlops/training => optional-skills/mlops}/hermes-atropos-environments/SKILL.md (100%) rename {skills/mlops/training => optional-skills/mlops}/hermes-atropos-environments/references/agentresult-fields.md (100%) rename {skills/mlops/training => optional-skills/mlops}/hermes-atropos-environments/references/atropos-base-env.md (100%) rename {skills/mlops/training => optional-skills/mlops}/hermes-atropos-environments/references/usage-patterns.md (100%) rename {skills/mlops/evaluation => optional-skills/mlops}/huggingface-tokenizers/SKILL.md (100%) rename {skills/mlops/evaluation => optional-skills/mlops}/huggingface-tokenizers/references/algorithms.md (100%) rename {skills/mlops/evaluation => optional-skills/mlops}/huggingface-tokenizers/references/integration.md (100%) rename {skills/mlops/evaluation => optional-skills/mlops}/huggingface-tokenizers/references/pipeline.md (100%) rename {skills/mlops/evaluation => optional-skills/mlops}/huggingface-tokenizers/references/training.md (100%) rename {skills/mlops/inference => optional-skills/mlops}/instructor/SKILL.md (100%) rename {skills/mlops/inference => optional-skills/mlops}/instructor/references/examples.md (100%) rename {skills/mlops/inference => optional-skills/mlops}/instructor/references/providers.md (100%) rename {skills/mlops/inference => optional-skills/mlops}/instructor/references/validation.md (100%) rename {skills/mlops/cloud => optional-skills/mlops}/lambda-labs/SKILL.md (100%) rename {skills/mlops/cloud => optional-skills/mlops}/lambda-labs/references/advanced-usage.md (100%) rename {skills/mlops/cloud => optional-skills/mlops}/lambda-labs/references/troubleshooting.md (100%) rename {skills/mlops/models => optional-skills/mlops}/llava/SKILL.md (100%) rename {skills/mlops/models => optional-skills/mlops}/llava/references/training.md (100%) rename {skills/mlops/evaluation => optional-skills/mlops}/nemo-curator/SKILL.md (100%) rename {skills/mlops/evaluation => optional-skills/mlops}/nemo-curator/references/deduplication.md (100%) rename {skills/mlops/evaluation => optional-skills/mlops}/nemo-curator/references/filtering.md (100%) rename {skills/mlops/vector-databases => optional-skills/mlops}/pinecone/SKILL.md (100%) rename {skills/mlops/vector-databases => optional-skills/mlops}/pinecone/references/deployment.md (100%) rename {skills/mlops/training => optional-skills/mlops}/pytorch-lightning/SKILL.md (100%) rename {skills/mlops/training => optional-skills/mlops}/pytorch-lightning/references/callbacks.md (100%) rename {skills/mlops/training => optional-skills/mlops}/pytorch-lightning/references/distributed.md (100%) rename {skills/mlops/training => optional-skills/mlops}/pytorch-lightning/references/hyperparameter-tuning.md (100%) rename {skills/mlops/vector-databases => optional-skills/mlops}/qdrant/SKILL.md (100%) rename {skills/mlops/vector-databases => optional-skills/mlops}/qdrant/references/advanced-usage.md (100%) rename {skills/mlops/vector-databases => optional-skills/mlops}/qdrant/references/troubleshooting.md (100%) rename {skills/mlops/evaluation => optional-skills/mlops}/saelens/SKILL.md (100%) rename {skills/mlops/evaluation => optional-skills/mlops}/saelens/references/README.md (100%) rename {skills/mlops/evaluation => optional-skills/mlops}/saelens/references/api.md (100%) rename {skills/mlops/evaluation => optional-skills/mlops}/saelens/references/tutorials.md (100%) rename {skills/mlops/training => optional-skills/mlops}/simpo/SKILL.md (100%) rename {skills/mlops/training => optional-skills/mlops}/simpo/references/datasets.md (100%) rename {skills/mlops/training => optional-skills/mlops}/simpo/references/hyperparameters.md (100%) rename {skills/mlops/training => optional-skills/mlops}/simpo/references/loss-functions.md (100%) rename {skills/mlops/training => optional-skills/mlops}/slime/SKILL.md (100%) rename {skills/mlops/training => optional-skills/mlops}/slime/references/api-reference.md (100%) rename {skills/mlops/training => optional-skills/mlops}/slime/references/troubleshooting.md (100%) rename {skills/mlops/inference => optional-skills/mlops}/tensorrt-llm/SKILL.md (100%) rename {skills/mlops/inference => optional-skills/mlops}/tensorrt-llm/references/multi-gpu.md (100%) rename {skills/mlops/inference => optional-skills/mlops}/tensorrt-llm/references/optimization.md (100%) rename {skills/mlops/inference => optional-skills/mlops}/tensorrt-llm/references/serving.md (100%) rename {skills/mlops/training => optional-skills/mlops}/torchtitan/SKILL.md (100%) rename {skills/mlops/training => optional-skills/mlops}/torchtitan/references/checkpoint.md (100%) rename {skills/mlops/training => optional-skills/mlops}/torchtitan/references/custom-models.md (100%) rename {skills/mlops/training => optional-skills/mlops}/torchtitan/references/float8.md (100%) rename {skills/mlops/training => optional-skills/mlops}/torchtitan/references/fsdp.md (100%) rename {skills => optional-skills}/research/domain-intel/SKILL.md (100%) rename {skills => optional-skills}/research/domain-intel/scripts/domain_intel.py (100%) rename {skills => optional-skills}/research/duckduckgo-search/SKILL.md (100%) rename {skills => optional-skills}/research/duckduckgo-search/scripts/duckduckgo.sh (100%) diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py index e17d104a6..91223d7b7 100644 --- a/gateway/platforms/telegram.py +++ b/gateway/platforms/telegram.py @@ -622,10 +622,16 @@ class TelegramAdapter(BasePlatformAdapter): # gateway command there automatically adds it to the Telegram menu. try: from telegram import BotCommand - from hermes_cli.commands import telegram_bot_commands + from hermes_cli.commands import telegram_menu_commands + menu_commands, hidden_count = telegram_menu_commands(max_commands=100) await self._bot.set_my_commands([ - BotCommand(name, desc) for name, desc in telegram_bot_commands() + BotCommand(name, desc) for name, desc in menu_commands ]) + if hidden_count: + logger.info( + "[%s] Telegram menu: %d commands registered, %d hidden (over 100 limit). Use /commands for full list.", + self.name, len(menu_commands), hidden_count, + ) except Exception as e: logger.warning( "[%s] Could not register Telegram command menu: %s", diff --git a/gateway/run.py b/gateway/run.py index 3b5193042..2bd623b62 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -301,6 +301,50 @@ def _resolve_runtime_agent_kwargs() -> dict: } +def _check_unavailable_skill(command_name: str) -> str | None: + """Check if a command matches a known-but-inactive skill. + + Returns a helpful message if the skill exists but is disabled or only + available as an optional install. Returns None if no match found. + """ + # Normalize: command uses hyphens, skill names may use hyphens or underscores + normalized = command_name.lower().replace("_", "-") + try: + from tools.skills_tool import SKILLS_DIR, _get_disabled_skill_names + disabled = _get_disabled_skill_names() + + # Check disabled built-in skills + for skill_md in SKILLS_DIR.rglob("SKILL.md"): + if any(part in ('.git', '.github', '.hub') for part in skill_md.parts): + continue + name = skill_md.parent.name.lower().replace("_", "-") + if name == normalized and name in disabled: + return ( + f"The **{command_name}** skill is installed but disabled.\n" + f"Enable it with: `hermes skills config`" + ) + + # Check optional skills (shipped with repo but not installed) + from hermes_constants import get_hermes_home + repo_root = Path(__file__).resolve().parent.parent + optional_dir = repo_root / "optional-skills" + if optional_dir.exists(): + for skill_md in optional_dir.rglob("SKILL.md"): + name = skill_md.parent.name.lower().replace("_", "-") + if name == normalized: + # Build install path: official// + rel = skill_md.parent.relative_to(optional_dir) + parts = list(rel.parts) + install_path = f"official/{'/'.join(parts)}" + return ( + f"The **{command_name}** skill is available but not installed.\n" + f"Install it with: `hermes skills install {install_path}`" + ) + except Exception: + pass + return None + + def _platform_config_key(platform: "Platform") -> str: """Map a Platform enum to its config.yaml key (LOCAL→"cli", rest→enum value).""" return "cli" if platform == Platform.LOCAL else platform.value @@ -1817,6 +1861,9 @@ class GatewayRunner: if canonical == "help": return await self._handle_help_command(event) + + if canonical == "commands": + return await self._handle_commands_command(event) if canonical == "status": return await self._handle_status_command(event) @@ -1974,6 +2021,12 @@ class GatewayRunner: if msg: event.text = msg # Fall through to normal message processing with skill content + else: + # Not an active skill — check if it's a known-but-disabled or + # uninstalled skill and give actionable guidance. + _unavail_msg = _check_unavailable_skill(command) + if _unavail_msg: + return _unavail_msg except Exception as e: logger.debug("Skill command check failed (non-fatal): %s", e) @@ -3065,12 +3118,69 @@ class GatewayRunner: from agent.skill_commands import get_skill_commands skill_cmds = get_skill_commands() if skill_cmds: - lines.append(f"\n⚡ **Skill Commands** ({len(skill_cmds)} installed):") - for cmd in sorted(skill_cmds): + lines.append(f"\n⚡ **Skill Commands** ({len(skill_cmds)} active):") + # Show first 10, then point to /commands for the rest + sorted_cmds = sorted(skill_cmds) + for cmd in sorted_cmds[:10]: lines.append(f"`{cmd}` — {skill_cmds[cmd]['description']}") + if len(sorted_cmds) > 10: + lines.append(f"\n... and {len(sorted_cmds) - 10} more. Use `/commands` for the full paginated list.") except Exception: pass return "\n".join(lines) + + async def _handle_commands_command(self, event: MessageEvent) -> str: + """Handle /commands [page] - paginated list of all commands and skills.""" + from hermes_cli.commands import gateway_help_lines + + raw_args = event.get_command_args().strip() + if raw_args: + try: + requested_page = int(raw_args) + except ValueError: + return "Usage: `/commands [page]`" + else: + requested_page = 1 + + # Build combined entry list: built-in commands + skill commands + entries = list(gateway_help_lines()) + try: + from agent.skill_commands import get_skill_commands + skill_cmds = get_skill_commands() + if skill_cmds: + entries.append("") + entries.append("⚡ **Skill Commands**:") + for cmd in sorted(skill_cmds): + desc = skill_cmds[cmd].get("description", "").strip() or "Skill command" + entries.append(f"`{cmd}` — {desc}") + except Exception: + pass + + if not entries: + return "No commands available." + + from gateway.config import Platform + page_size = 15 if event.source.platform == Platform.TELEGRAM else 20 + total_pages = max(1, (len(entries) + page_size - 1) // page_size) + page = max(1, min(requested_page, total_pages)) + start = (page - 1) * page_size + page_entries = entries[start:start + page_size] + + lines = [ + f"📚 **Commands** ({len(entries)} total, page {page}/{total_pages})", + "", + *page_entries, + ] + if total_pages > 1: + nav_parts = [] + if page > 1: + nav_parts.append(f"`/commands {page - 1}` ← prev") + if page < total_pages: + nav_parts.append(f"next → `/commands {page + 1}`") + lines.extend(["", " | ".join(nav_parts)]) + if page != requested_page: + lines.append(f"_(Requested page {requested_page} was out of range, showing page {page}.)_") + return "\n".join(lines) async def _handle_provider_command(self, event: MessageEvent) -> str: """Handle /provider command - show available providers.""" diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py index d442f7f94..b115dd6ca 100644 --- a/hermes_cli/commands.py +++ b/hermes_cli/commands.py @@ -118,6 +118,8 @@ COMMAND_REGISTRY: list[CommandDef] = [ "Tools & Skills", cli_only=True), # Info + CommandDef("commands", "Browse all commands and skills (paginated)", "Info", + gateway_only=True, args_hint="[page]"), CommandDef("help", "Show available commands", "Info"), CommandDef("usage", "Show token usage for the current session", "Info"), CommandDef("insights", "Show usage insights and analytics", "Info", @@ -361,6 +363,47 @@ def telegram_bot_commands() -> list[tuple[str, str]]: return result +def telegram_menu_commands(max_commands: int = 100) -> tuple[list[tuple[str, str]], int]: + """Return Telegram menu commands (built-in + active skills), capped to the Bot API limit. + + Built-in commands come first, then active skill commands. Commands beyond + ``max_commands`` remain callable in the gateway; they are just omitted from + Telegram's native slash-command picker. + + Returns: + (menu_commands, hidden_count) where hidden_count is the number of + commands omitted due to the cap. + """ + all_commands = list(telegram_bot_commands()) + + # Append active BUILT-IN skill commands only (not user-installed hub skills). + # User-installed skills stay accessible via /skills and by typing the command + # directly, but don't clutter the Telegram menu. + try: + from agent.skill_commands import get_skill_commands + from pathlib import Path + # The repo's built-in skills live under /skills/ + _repo_skills_dir = str(Path(__file__).resolve().parent.parent / "skills") + skill_cmds = get_skill_commands() + for cmd_key in sorted(skill_cmds): + info = skill_cmds[cmd_key] + # Only include skills whose SKILL.md is in the repo's skills/ dir + skill_path = info.get("skill_md_path", "") + if not skill_path.startswith(_repo_skills_dir): + continue + name = cmd_key.lstrip("/").replace("-", "_") + desc = info.get("description", "") + # Telegram descriptions max 256 chars + if len(desc) > 256: + desc = desc[:253] + "..." + all_commands.append((name, desc)) + except Exception: + pass + + hidden_count = max(0, len(all_commands) - max_commands) + return all_commands[:max_commands], hidden_count + + def slack_subcommand_map() -> dict[str, str]: """Return subcommand -> /command mapping for Slack /hermes handler. diff --git a/skills/inference-sh/cli/SKILL.md b/optional-skills/devops/cli/SKILL.md similarity index 100% rename from skills/inference-sh/cli/SKILL.md rename to optional-skills/devops/cli/SKILL.md diff --git a/skills/inference-sh/cli/references/app-discovery.md b/optional-skills/devops/cli/references/app-discovery.md similarity index 100% rename from skills/inference-sh/cli/references/app-discovery.md rename to optional-skills/devops/cli/references/app-discovery.md diff --git a/skills/inference-sh/cli/references/authentication.md b/optional-skills/devops/cli/references/authentication.md similarity index 100% rename from skills/inference-sh/cli/references/authentication.md rename to optional-skills/devops/cli/references/authentication.md diff --git a/skills/inference-sh/cli/references/cli-reference.md b/optional-skills/devops/cli/references/cli-reference.md similarity index 100% rename from skills/inference-sh/cli/references/cli-reference.md rename to optional-skills/devops/cli/references/cli-reference.md diff --git a/skills/inference-sh/cli/references/running-apps.md b/optional-skills/devops/cli/references/running-apps.md similarity index 100% rename from skills/inference-sh/cli/references/running-apps.md rename to optional-skills/devops/cli/references/running-apps.md diff --git a/skills/mlops/training/accelerate/SKILL.md b/optional-skills/mlops/accelerate/SKILL.md similarity index 100% rename from skills/mlops/training/accelerate/SKILL.md rename to optional-skills/mlops/accelerate/SKILL.md diff --git a/skills/mlops/training/accelerate/references/custom-plugins.md b/optional-skills/mlops/accelerate/references/custom-plugins.md similarity index 100% rename from skills/mlops/training/accelerate/references/custom-plugins.md rename to optional-skills/mlops/accelerate/references/custom-plugins.md diff --git a/skills/mlops/training/accelerate/references/megatron-integration.md b/optional-skills/mlops/accelerate/references/megatron-integration.md similarity index 100% rename from skills/mlops/training/accelerate/references/megatron-integration.md rename to optional-skills/mlops/accelerate/references/megatron-integration.md diff --git a/skills/mlops/training/accelerate/references/performance.md b/optional-skills/mlops/accelerate/references/performance.md similarity index 100% rename from skills/mlops/training/accelerate/references/performance.md rename to optional-skills/mlops/accelerate/references/performance.md diff --git a/skills/mlops/vector-databases/chroma/SKILL.md b/optional-skills/mlops/chroma/SKILL.md similarity index 100% rename from skills/mlops/vector-databases/chroma/SKILL.md rename to optional-skills/mlops/chroma/SKILL.md diff --git a/skills/mlops/vector-databases/chroma/references/integration.md b/optional-skills/mlops/chroma/references/integration.md similarity index 100% rename from skills/mlops/vector-databases/chroma/references/integration.md rename to optional-skills/mlops/chroma/references/integration.md diff --git a/skills/mlops/vector-databases/faiss/SKILL.md b/optional-skills/mlops/faiss/SKILL.md similarity index 100% rename from skills/mlops/vector-databases/faiss/SKILL.md rename to optional-skills/mlops/faiss/SKILL.md diff --git a/skills/mlops/vector-databases/faiss/references/index_types.md b/optional-skills/mlops/faiss/references/index_types.md similarity index 100% rename from skills/mlops/vector-databases/faiss/references/index_types.md rename to optional-skills/mlops/faiss/references/index_types.md diff --git a/skills/mlops/training/flash-attention/SKILL.md b/optional-skills/mlops/flash-attention/SKILL.md similarity index 100% rename from skills/mlops/training/flash-attention/SKILL.md rename to optional-skills/mlops/flash-attention/SKILL.md diff --git a/skills/mlops/training/flash-attention/references/benchmarks.md b/optional-skills/mlops/flash-attention/references/benchmarks.md similarity index 100% rename from skills/mlops/training/flash-attention/references/benchmarks.md rename to optional-skills/mlops/flash-attention/references/benchmarks.md diff --git a/skills/mlops/training/flash-attention/references/transformers-integration.md b/optional-skills/mlops/flash-attention/references/transformers-integration.md similarity index 100% rename from skills/mlops/training/flash-attention/references/transformers-integration.md rename to optional-skills/mlops/flash-attention/references/transformers-integration.md diff --git a/skills/mlops/training/hermes-atropos-environments/SKILL.md b/optional-skills/mlops/hermes-atropos-environments/SKILL.md similarity index 100% rename from skills/mlops/training/hermes-atropos-environments/SKILL.md rename to optional-skills/mlops/hermes-atropos-environments/SKILL.md diff --git a/skills/mlops/training/hermes-atropos-environments/references/agentresult-fields.md b/optional-skills/mlops/hermes-atropos-environments/references/agentresult-fields.md similarity index 100% rename from skills/mlops/training/hermes-atropos-environments/references/agentresult-fields.md rename to optional-skills/mlops/hermes-atropos-environments/references/agentresult-fields.md diff --git a/skills/mlops/training/hermes-atropos-environments/references/atropos-base-env.md b/optional-skills/mlops/hermes-atropos-environments/references/atropos-base-env.md similarity index 100% rename from skills/mlops/training/hermes-atropos-environments/references/atropos-base-env.md rename to optional-skills/mlops/hermes-atropos-environments/references/atropos-base-env.md diff --git a/skills/mlops/training/hermes-atropos-environments/references/usage-patterns.md b/optional-skills/mlops/hermes-atropos-environments/references/usage-patterns.md similarity index 100% rename from skills/mlops/training/hermes-atropos-environments/references/usage-patterns.md rename to optional-skills/mlops/hermes-atropos-environments/references/usage-patterns.md diff --git a/skills/mlops/evaluation/huggingface-tokenizers/SKILL.md b/optional-skills/mlops/huggingface-tokenizers/SKILL.md similarity index 100% rename from skills/mlops/evaluation/huggingface-tokenizers/SKILL.md rename to optional-skills/mlops/huggingface-tokenizers/SKILL.md diff --git a/skills/mlops/evaluation/huggingface-tokenizers/references/algorithms.md b/optional-skills/mlops/huggingface-tokenizers/references/algorithms.md similarity index 100% rename from skills/mlops/evaluation/huggingface-tokenizers/references/algorithms.md rename to optional-skills/mlops/huggingface-tokenizers/references/algorithms.md diff --git a/skills/mlops/evaluation/huggingface-tokenizers/references/integration.md b/optional-skills/mlops/huggingface-tokenizers/references/integration.md similarity index 100% rename from skills/mlops/evaluation/huggingface-tokenizers/references/integration.md rename to optional-skills/mlops/huggingface-tokenizers/references/integration.md diff --git a/skills/mlops/evaluation/huggingface-tokenizers/references/pipeline.md b/optional-skills/mlops/huggingface-tokenizers/references/pipeline.md similarity index 100% rename from skills/mlops/evaluation/huggingface-tokenizers/references/pipeline.md rename to optional-skills/mlops/huggingface-tokenizers/references/pipeline.md diff --git a/skills/mlops/evaluation/huggingface-tokenizers/references/training.md b/optional-skills/mlops/huggingface-tokenizers/references/training.md similarity index 100% rename from skills/mlops/evaluation/huggingface-tokenizers/references/training.md rename to optional-skills/mlops/huggingface-tokenizers/references/training.md diff --git a/skills/mlops/inference/instructor/SKILL.md b/optional-skills/mlops/instructor/SKILL.md similarity index 100% rename from skills/mlops/inference/instructor/SKILL.md rename to optional-skills/mlops/instructor/SKILL.md diff --git a/skills/mlops/inference/instructor/references/examples.md b/optional-skills/mlops/instructor/references/examples.md similarity index 100% rename from skills/mlops/inference/instructor/references/examples.md rename to optional-skills/mlops/instructor/references/examples.md diff --git a/skills/mlops/inference/instructor/references/providers.md b/optional-skills/mlops/instructor/references/providers.md similarity index 100% rename from skills/mlops/inference/instructor/references/providers.md rename to optional-skills/mlops/instructor/references/providers.md diff --git a/skills/mlops/inference/instructor/references/validation.md b/optional-skills/mlops/instructor/references/validation.md similarity index 100% rename from skills/mlops/inference/instructor/references/validation.md rename to optional-skills/mlops/instructor/references/validation.md diff --git a/skills/mlops/cloud/lambda-labs/SKILL.md b/optional-skills/mlops/lambda-labs/SKILL.md similarity index 100% rename from skills/mlops/cloud/lambda-labs/SKILL.md rename to optional-skills/mlops/lambda-labs/SKILL.md diff --git a/skills/mlops/cloud/lambda-labs/references/advanced-usage.md b/optional-skills/mlops/lambda-labs/references/advanced-usage.md similarity index 100% rename from skills/mlops/cloud/lambda-labs/references/advanced-usage.md rename to optional-skills/mlops/lambda-labs/references/advanced-usage.md diff --git a/skills/mlops/cloud/lambda-labs/references/troubleshooting.md b/optional-skills/mlops/lambda-labs/references/troubleshooting.md similarity index 100% rename from skills/mlops/cloud/lambda-labs/references/troubleshooting.md rename to optional-skills/mlops/lambda-labs/references/troubleshooting.md diff --git a/skills/mlops/models/llava/SKILL.md b/optional-skills/mlops/llava/SKILL.md similarity index 100% rename from skills/mlops/models/llava/SKILL.md rename to optional-skills/mlops/llava/SKILL.md diff --git a/skills/mlops/models/llava/references/training.md b/optional-skills/mlops/llava/references/training.md similarity index 100% rename from skills/mlops/models/llava/references/training.md rename to optional-skills/mlops/llava/references/training.md diff --git a/skills/mlops/evaluation/nemo-curator/SKILL.md b/optional-skills/mlops/nemo-curator/SKILL.md similarity index 100% rename from skills/mlops/evaluation/nemo-curator/SKILL.md rename to optional-skills/mlops/nemo-curator/SKILL.md diff --git a/skills/mlops/evaluation/nemo-curator/references/deduplication.md b/optional-skills/mlops/nemo-curator/references/deduplication.md similarity index 100% rename from skills/mlops/evaluation/nemo-curator/references/deduplication.md rename to optional-skills/mlops/nemo-curator/references/deduplication.md diff --git a/skills/mlops/evaluation/nemo-curator/references/filtering.md b/optional-skills/mlops/nemo-curator/references/filtering.md similarity index 100% rename from skills/mlops/evaluation/nemo-curator/references/filtering.md rename to optional-skills/mlops/nemo-curator/references/filtering.md diff --git a/skills/mlops/vector-databases/pinecone/SKILL.md b/optional-skills/mlops/pinecone/SKILL.md similarity index 100% rename from skills/mlops/vector-databases/pinecone/SKILL.md rename to optional-skills/mlops/pinecone/SKILL.md diff --git a/skills/mlops/vector-databases/pinecone/references/deployment.md b/optional-skills/mlops/pinecone/references/deployment.md similarity index 100% rename from skills/mlops/vector-databases/pinecone/references/deployment.md rename to optional-skills/mlops/pinecone/references/deployment.md diff --git a/skills/mlops/training/pytorch-lightning/SKILL.md b/optional-skills/mlops/pytorch-lightning/SKILL.md similarity index 100% rename from skills/mlops/training/pytorch-lightning/SKILL.md rename to optional-skills/mlops/pytorch-lightning/SKILL.md diff --git a/skills/mlops/training/pytorch-lightning/references/callbacks.md b/optional-skills/mlops/pytorch-lightning/references/callbacks.md similarity index 100% rename from skills/mlops/training/pytorch-lightning/references/callbacks.md rename to optional-skills/mlops/pytorch-lightning/references/callbacks.md diff --git a/skills/mlops/training/pytorch-lightning/references/distributed.md b/optional-skills/mlops/pytorch-lightning/references/distributed.md similarity index 100% rename from skills/mlops/training/pytorch-lightning/references/distributed.md rename to optional-skills/mlops/pytorch-lightning/references/distributed.md diff --git a/skills/mlops/training/pytorch-lightning/references/hyperparameter-tuning.md b/optional-skills/mlops/pytorch-lightning/references/hyperparameter-tuning.md similarity index 100% rename from skills/mlops/training/pytorch-lightning/references/hyperparameter-tuning.md rename to optional-skills/mlops/pytorch-lightning/references/hyperparameter-tuning.md diff --git a/skills/mlops/vector-databases/qdrant/SKILL.md b/optional-skills/mlops/qdrant/SKILL.md similarity index 100% rename from skills/mlops/vector-databases/qdrant/SKILL.md rename to optional-skills/mlops/qdrant/SKILL.md diff --git a/skills/mlops/vector-databases/qdrant/references/advanced-usage.md b/optional-skills/mlops/qdrant/references/advanced-usage.md similarity index 100% rename from skills/mlops/vector-databases/qdrant/references/advanced-usage.md rename to optional-skills/mlops/qdrant/references/advanced-usage.md diff --git a/skills/mlops/vector-databases/qdrant/references/troubleshooting.md b/optional-skills/mlops/qdrant/references/troubleshooting.md similarity index 100% rename from skills/mlops/vector-databases/qdrant/references/troubleshooting.md rename to optional-skills/mlops/qdrant/references/troubleshooting.md diff --git a/skills/mlops/evaluation/saelens/SKILL.md b/optional-skills/mlops/saelens/SKILL.md similarity index 100% rename from skills/mlops/evaluation/saelens/SKILL.md rename to optional-skills/mlops/saelens/SKILL.md diff --git a/skills/mlops/evaluation/saelens/references/README.md b/optional-skills/mlops/saelens/references/README.md similarity index 100% rename from skills/mlops/evaluation/saelens/references/README.md rename to optional-skills/mlops/saelens/references/README.md diff --git a/skills/mlops/evaluation/saelens/references/api.md b/optional-skills/mlops/saelens/references/api.md similarity index 100% rename from skills/mlops/evaluation/saelens/references/api.md rename to optional-skills/mlops/saelens/references/api.md diff --git a/skills/mlops/evaluation/saelens/references/tutorials.md b/optional-skills/mlops/saelens/references/tutorials.md similarity index 100% rename from skills/mlops/evaluation/saelens/references/tutorials.md rename to optional-skills/mlops/saelens/references/tutorials.md diff --git a/skills/mlops/training/simpo/SKILL.md b/optional-skills/mlops/simpo/SKILL.md similarity index 100% rename from skills/mlops/training/simpo/SKILL.md rename to optional-skills/mlops/simpo/SKILL.md diff --git a/skills/mlops/training/simpo/references/datasets.md b/optional-skills/mlops/simpo/references/datasets.md similarity index 100% rename from skills/mlops/training/simpo/references/datasets.md rename to optional-skills/mlops/simpo/references/datasets.md diff --git a/skills/mlops/training/simpo/references/hyperparameters.md b/optional-skills/mlops/simpo/references/hyperparameters.md similarity index 100% rename from skills/mlops/training/simpo/references/hyperparameters.md rename to optional-skills/mlops/simpo/references/hyperparameters.md diff --git a/skills/mlops/training/simpo/references/loss-functions.md b/optional-skills/mlops/simpo/references/loss-functions.md similarity index 100% rename from skills/mlops/training/simpo/references/loss-functions.md rename to optional-skills/mlops/simpo/references/loss-functions.md diff --git a/skills/mlops/training/slime/SKILL.md b/optional-skills/mlops/slime/SKILL.md similarity index 100% rename from skills/mlops/training/slime/SKILL.md rename to optional-skills/mlops/slime/SKILL.md diff --git a/skills/mlops/training/slime/references/api-reference.md b/optional-skills/mlops/slime/references/api-reference.md similarity index 100% rename from skills/mlops/training/slime/references/api-reference.md rename to optional-skills/mlops/slime/references/api-reference.md diff --git a/skills/mlops/training/slime/references/troubleshooting.md b/optional-skills/mlops/slime/references/troubleshooting.md similarity index 100% rename from skills/mlops/training/slime/references/troubleshooting.md rename to optional-skills/mlops/slime/references/troubleshooting.md diff --git a/skills/mlops/inference/tensorrt-llm/SKILL.md b/optional-skills/mlops/tensorrt-llm/SKILL.md similarity index 100% rename from skills/mlops/inference/tensorrt-llm/SKILL.md rename to optional-skills/mlops/tensorrt-llm/SKILL.md diff --git a/skills/mlops/inference/tensorrt-llm/references/multi-gpu.md b/optional-skills/mlops/tensorrt-llm/references/multi-gpu.md similarity index 100% rename from skills/mlops/inference/tensorrt-llm/references/multi-gpu.md rename to optional-skills/mlops/tensorrt-llm/references/multi-gpu.md diff --git a/skills/mlops/inference/tensorrt-llm/references/optimization.md b/optional-skills/mlops/tensorrt-llm/references/optimization.md similarity index 100% rename from skills/mlops/inference/tensorrt-llm/references/optimization.md rename to optional-skills/mlops/tensorrt-llm/references/optimization.md diff --git a/skills/mlops/inference/tensorrt-llm/references/serving.md b/optional-skills/mlops/tensorrt-llm/references/serving.md similarity index 100% rename from skills/mlops/inference/tensorrt-llm/references/serving.md rename to optional-skills/mlops/tensorrt-llm/references/serving.md diff --git a/skills/mlops/training/torchtitan/SKILL.md b/optional-skills/mlops/torchtitan/SKILL.md similarity index 100% rename from skills/mlops/training/torchtitan/SKILL.md rename to optional-skills/mlops/torchtitan/SKILL.md diff --git a/skills/mlops/training/torchtitan/references/checkpoint.md b/optional-skills/mlops/torchtitan/references/checkpoint.md similarity index 100% rename from skills/mlops/training/torchtitan/references/checkpoint.md rename to optional-skills/mlops/torchtitan/references/checkpoint.md diff --git a/skills/mlops/training/torchtitan/references/custom-models.md b/optional-skills/mlops/torchtitan/references/custom-models.md similarity index 100% rename from skills/mlops/training/torchtitan/references/custom-models.md rename to optional-skills/mlops/torchtitan/references/custom-models.md diff --git a/skills/mlops/training/torchtitan/references/float8.md b/optional-skills/mlops/torchtitan/references/float8.md similarity index 100% rename from skills/mlops/training/torchtitan/references/float8.md rename to optional-skills/mlops/torchtitan/references/float8.md diff --git a/skills/mlops/training/torchtitan/references/fsdp.md b/optional-skills/mlops/torchtitan/references/fsdp.md similarity index 100% rename from skills/mlops/training/torchtitan/references/fsdp.md rename to optional-skills/mlops/torchtitan/references/fsdp.md diff --git a/skills/research/domain-intel/SKILL.md b/optional-skills/research/domain-intel/SKILL.md similarity index 100% rename from skills/research/domain-intel/SKILL.md rename to optional-skills/research/domain-intel/SKILL.md diff --git a/skills/research/domain-intel/scripts/domain_intel.py b/optional-skills/research/domain-intel/scripts/domain_intel.py similarity index 100% rename from skills/research/domain-intel/scripts/domain_intel.py rename to optional-skills/research/domain-intel/scripts/domain_intel.py diff --git a/skills/research/duckduckgo-search/SKILL.md b/optional-skills/research/duckduckgo-search/SKILL.md similarity index 100% rename from skills/research/duckduckgo-search/SKILL.md rename to optional-skills/research/duckduckgo-search/SKILL.md diff --git a/skills/research/duckduckgo-search/scripts/duckduckgo.sh b/optional-skills/research/duckduckgo-search/scripts/duckduckgo.sh similarity index 100% rename from skills/research/duckduckgo-search/scripts/duckduckgo.sh rename to optional-skills/research/duckduckgo-search/scripts/duckduckgo.sh -- 2.43.0 From 9fd78c7a8ebb5b4f74df2d881d0cc8b4a4b7ceff Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 30 Mar 2026 11:01:13 -0700 Subject: [PATCH 007/385] fix: use SKILLS_DIR not repo path for Telegram menu skill filter (#4005) Skills are synced to ~/.hermes/skills/ (SKILLS_DIR), not the repo's skills/ directory. The previous filter compared against the repo path so no skills matched. Now checks SKILLS_DIR and excludes .hub/ subdirectory (user-installed hub skills). --- hermes_cli/commands.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py index b115dd6ca..26247c066 100644 --- a/hermes_cli/commands.py +++ b/hermes_cli/commands.py @@ -381,16 +381,20 @@ def telegram_menu_commands(max_commands: int = 100) -> tuple[list[tuple[str, str # directly, but don't clutter the Telegram menu. try: from agent.skill_commands import get_skill_commands - from pathlib import Path - # The repo's built-in skills live under /skills/ - _repo_skills_dir = str(Path(__file__).resolve().parent.parent / "skills") + from tools.skills_tool import SKILLS_DIR + # Built-in skills are synced to SKILLS_DIR (~/.hermes/skills/). + # Hub-installed skills go into SKILLS_DIR/.hub/. Exclude .hub/ skills + # from the menu — they're user-installed, not repo built-in. + _skills_dir = str(SKILLS_DIR.resolve()) + _hub_dir = str((SKILLS_DIR / ".hub").resolve()) skill_cmds = get_skill_commands() for cmd_key in sorted(skill_cmds): info = skill_cmds[cmd_key] - # Only include skills whose SKILL.md is in the repo's skills/ dir skill_path = info.get("skill_md_path", "") - if not skill_path.startswith(_repo_skills_dir): + if not skill_path.startswith(_skills_dir): continue + if skill_path.startswith(_hub_dir): + continue # hub-installed, not built-in name = cmd_key.lstrip("/").replace("-", "_") desc = info.get("description", "") # Telegram descriptions max 256 chars -- 2.43.0 From da3e22bcfa2c583204cbe0742a6b691d9b681da5 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 30 Mar 2026 11:05:20 -0700 Subject: [PATCH 008/385] =?UTF-8?q?fix:=20cap=20Telegram=20menu=20at=2050?= =?UTF-8?q?=20commands=20=E2=80=94=20API=20rejects=20above=20~60=20(#4006)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: use SKILLS_DIR not repo path for Telegram menu skill filter Skills are synced to ~/.hermes/skills/ (SKILLS_DIR), not the repo's skills/ directory. The previous filter compared against the repo path so no skills matched. Now checks SKILLS_DIR and excludes .hub/ subdirectory (user-installed hub skills). * fix: cap Telegram menu at 50 commands — API rejects above ~60 Telegram's setMyCommands returns BOT_COMMANDS_TOO_MUCH when registering close to 100 commands despite docs claiming 100 is the limit. Metadata overhead causes rejection above ~60. Cap at 50 for reliability — remaining commands accessible via /commands. --- gateway/platforms/telegram.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py index 91223d7b7..ac3efd92f 100644 --- a/gateway/platforms/telegram.py +++ b/gateway/platforms/telegram.py @@ -623,7 +623,9 @@ class TelegramAdapter(BasePlatformAdapter): try: from telegram import BotCommand from hermes_cli.commands import telegram_menu_commands - menu_commands, hidden_count = telegram_menu_commands(max_commands=100) + # Telegram docs say 100, but setMyCommands returns + # BOT_COMMANDS_TOO_MUCH above ~60 due to metadata overhead. + menu_commands, hidden_count = telegram_menu_commands(max_commands=50) await self._bot.set_my_commands([ BotCommand(name, desc) for name, desc in menu_commands ]) -- 2.43.0 From 0976bf6cd0653a6097dd01cd2a15e160af9dda55 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 30 Mar 2026 11:17:09 -0700 Subject: [PATCH 009/385] feat: add /yolo slash command to toggle dangerous command approvals (#3990) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a /yolo command that toggles HERMES_YOLO_MODE at runtime, skipping all dangerous command approval prompts for the current session. Works in both CLI and gateway (Telegram, Discord, etc.). - /yolo -> ON: all commands auto-approved, no confirmation prompts - /yolo -> OFF: normal approval flow restored The --yolo CLI flag already existed for launch-time opt-in. This adds the ability to toggle mid-session without restarting. Session-scoped — resets when the process ends. Uses the existing HERMES_YOLO_MODE env var that check_all_command_guards() already respects. --- cli.py | 13 +++++++++++++ gateway/run.py | 13 +++++++++++++ hermes_cli/commands.py | 2 ++ 3 files changed, 28 insertions(+) diff --git a/cli.py b/cli.py index 706221506..223c40563 100644 --- a/cli.py +++ b/cli.py @@ -3836,6 +3836,8 @@ class HermesCLI: self.console.print(f" Status bar {state}") elif canonical == "verbose": self._toggle_verbose() + elif canonical == "yolo": + self._toggle_yolo() elif canonical == "reasoning": self._handle_reasoning_command(cmd_original) elif canonical == "compress": @@ -4434,6 +4436,17 @@ class HermesCLI: } _cprint(labels.get(self.tool_progress_mode, "")) + def _toggle_yolo(self): + """Toggle YOLO mode — skip all dangerous command approval prompts.""" + import os + current = bool(os.environ.get("HERMES_YOLO_MODE")) + if current: + os.environ.pop("HERMES_YOLO_MODE", None) + self.console.print(" ⚠ YOLO mode [bold red]OFF[/] — dangerous commands will require approval.") + else: + os.environ["HERMES_YOLO_MODE"] = "1" + self.console.print(" ⚡ YOLO mode [bold green]ON[/] — all commands auto-approved. Use with caution.") + def _handle_reasoning_command(self, cmd: str): """Handle /reasoning — manage effort level and display toggle. diff --git a/gateway/run.py b/gateway/run.py index 2bd623b62..de077ede8 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -1877,6 +1877,9 @@ class GatewayRunner: if canonical == "verbose": return await self._handle_verbose_command(event) + if canonical == "yolo": + return await self._handle_yolo_command(event) + if canonical == "provider": return await self._handle_provider_command(event) @@ -4109,6 +4112,16 @@ class GatewayRunner: else: return f"🧠 ✓ Reasoning effort set to `{effort}` (this session only)" + async def _handle_yolo_command(self, event: MessageEvent) -> str: + """Handle /yolo — toggle dangerous command approval bypass.""" + current = bool(os.environ.get("HERMES_YOLO_MODE")) + if current: + os.environ.pop("HERMES_YOLO_MODE", None) + return "⚠️ YOLO mode **OFF** — dangerous commands will require approval." + else: + os.environ["HERMES_YOLO_MODE"] = "1" + return "⚡ YOLO mode **ON** — all commands auto-approved. Use with caution." + async def _handle_verbose_command(self, event: MessageEvent) -> str: """Handle /verbose command — cycle tool progress display mode. diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py index 26247c066..f043ec73f 100644 --- a/hermes_cli/commands.py +++ b/hermes_cli/commands.py @@ -90,6 +90,8 @@ COMMAND_REGISTRY: list[CommandDef] = [ CommandDef("verbose", "Cycle tool progress display: off -> new -> all -> verbose", "Configuration", cli_only=True, gateway_config_gate="display.tool_progress_command"), + CommandDef("yolo", "Toggle YOLO mode (skip all dangerous command approvals)", + "Configuration"), CommandDef("reasoning", "Manage reasoning effort and display", "Configuration", args_hint="[level|show|hide]", subcommands=("none", "low", "minimal", "medium", "high", "xhigh", "show", "hide", "on", "off")), -- 2.43.0 From f3069c649ca7c16692a54fb1434a8c29b894f4a7 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 30 Mar 2026 11:17:15 -0700 Subject: [PATCH 010/385] fix(cli): add missing subprocess.run() timeouts in doctor and status (#4009) Add timeout parameters to 4 subprocess.run() calls that could hang indefinitely if the child process blocks (e.g., unresponsive docker daemon, systemctl waiting for D-Bus): - doctor.py: docker info (timeout=10), ssh check (timeout=15) - status.py: systemctl is-active (timeout=5), launchctl list (timeout=5) Each call site now catches subprocess.TimeoutExpired and treats it as a failure, consistent with how non-zero return codes are already handled. Add AST-based regression test that verifies every subprocess.run() call in CLI modules specifies a timeout keyword argument. Co-authored-by: dieutx --- hermes_cli/doctor.py | 23 ++++++---- hermes_cli/status.py | 32 ++++++++------ tests/hermes_cli/test_subprocess_timeouts.py | 44 ++++++++++++++++++++ 3 files changed, 79 insertions(+), 20 deletions(-) create mode 100644 tests/hermes_cli/test_subprocess_timeouts.py diff --git a/hermes_cli/doctor.py b/hermes_cli/doctor.py index a0a841905..b9fd8d327 100644 --- a/hermes_cli/doctor.py +++ b/hermes_cli/doctor.py @@ -406,8 +406,11 @@ def run_doctor(args): if terminal_env == "docker": if shutil.which("docker"): # Check if docker daemon is running - result = subprocess.run(["docker", "info"], capture_output=True) - if result.returncode == 0: + try: + result = subprocess.run(["docker", "info"], capture_output=True, timeout=10) + except subprocess.TimeoutExpired: + result = None + if result is not None and result.returncode == 0: check_ok("docker", "(daemon running)") else: check_fail("docker daemon not running") @@ -426,12 +429,16 @@ def run_doctor(args): ssh_host = os.getenv("TERMINAL_SSH_HOST") if ssh_host: # Try to connect - result = subprocess.run( - ["ssh", "-o", "ConnectTimeout=5", "-o", "BatchMode=yes", ssh_host, "echo ok"], - capture_output=True, - text=True - ) - if result.returncode == 0: + try: + result = subprocess.run( + ["ssh", "-o", "ConnectTimeout=5", "-o", "BatchMode=yes", ssh_host, "echo ok"], + capture_output=True, + text=True, + timeout=15 + ) + except subprocess.TimeoutExpired: + result = None + if result is not None and result.returncode == 0: check_ok(f"SSH connection to {ssh_host}") else: check_fail(f"SSH connection to {ssh_host}") diff --git a/hermes_cli/status.py b/hermes_cli/status.py index 3a03aabb1..aeb159a55 100644 --- a/hermes_cli/status.py +++ b/hermes_cli/status.py @@ -285,23 +285,31 @@ def show_status(args): _gw_svc = get_service_name() except Exception: _gw_svc = "hermes-gateway" - result = subprocess.run( - ["systemctl", "--user", "is-active", _gw_svc], - capture_output=True, - text=True - ) - is_active = result.stdout.strip() == "active" + try: + result = subprocess.run( + ["systemctl", "--user", "is-active", _gw_svc], + capture_output=True, + text=True, + timeout=5 + ) + is_active = result.stdout.strip() == "active" + except subprocess.TimeoutExpired: + is_active = False print(f" Status: {check_mark(is_active)} {'running' if is_active else 'stopped'}") print(" Manager: systemd (user)") elif sys.platform == 'darwin': from hermes_cli.gateway import get_launchd_label - result = subprocess.run( - ["launchctl", "list", get_launchd_label()], - capture_output=True, - text=True - ) - is_loaded = result.returncode == 0 + try: + result = subprocess.run( + ["launchctl", "list", get_launchd_label()], + capture_output=True, + text=True, + timeout=5 + ) + is_loaded = result.returncode == 0 + except subprocess.TimeoutExpired: + is_loaded = False print(f" Status: {check_mark(is_loaded)} {'loaded' if is_loaded else 'not loaded'}") print(" Manager: launchd") else: diff --git a/tests/hermes_cli/test_subprocess_timeouts.py b/tests/hermes_cli/test_subprocess_timeouts.py new file mode 100644 index 000000000..47146aac4 --- /dev/null +++ b/tests/hermes_cli/test_subprocess_timeouts.py @@ -0,0 +1,44 @@ +"""Tests for subprocess.run() timeout coverage in CLI utilities.""" +import ast +from pathlib import Path + +import pytest + + +# Parameterise over every CLI module that calls subprocess.run +_CLI_MODULES = [ + "hermes_cli/doctor.py", + "hermes_cli/status.py", + "hermes_cli/clipboard.py", + "hermes_cli/banner.py", +] + + +def _subprocess_run_calls(filepath: str) -> list[dict]: + """Parse a Python file and return info about subprocess.run() calls.""" + source = Path(filepath).read_text() + tree = ast.parse(source, filename=filepath) + calls = [] + for node in ast.walk(tree): + if not isinstance(node, ast.Call): + continue + func = node.func + if (isinstance(func, ast.Attribute) and func.attr == "run" + and isinstance(func.value, ast.Name) + and func.value.id == "subprocess"): + has_timeout = any(kw.arg == "timeout" for kw in node.keywords) + calls.append({"line": node.lineno, "has_timeout": has_timeout}) + return calls + + +@pytest.mark.parametrize("filepath", _CLI_MODULES) +def test_all_subprocess_run_calls_have_timeout(filepath): + """Every subprocess.run() call in CLI modules must specify a timeout.""" + if not Path(filepath).exists(): + pytest.skip(f"{filepath} not found") + calls = _subprocess_run_calls(filepath) + missing = [c for c in calls if not c["has_timeout"]] + assert not missing, ( + f"{filepath} has subprocess.run() without timeout at " + f"line(s): {[c['line'] for c in missing]}" + ) -- 2.43.0 From 60ecde8ac7d4b6b82bb80b411629947d0993d88b Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 30 Mar 2026 11:21:13 -0700 Subject: [PATCH 011/385] fix: fit all 100 commands in Telegram menu with 40-char descriptions (#4010) * fix: truncate skill descriptions to 100 chars in Telegram menu * fix: 40-char desc cap + 100 command limit for Telegram menu setMyCommands has an undocumented total payload size limit. 50 commands with 256-char descriptions failed, 50 with 100-char worked, and 100 with 40-char descriptions also works (~5300 total chars). Truncate skill descriptions to 40 chars in the menu picker and set cap back to 100. Full descriptions available via /commands. --- gateway/platforms/telegram.py | 7 ++++--- hermes_cli/commands.py | 7 ++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py index ac3efd92f..db1b19431 100644 --- a/gateway/platforms/telegram.py +++ b/gateway/platforms/telegram.py @@ -623,9 +623,10 @@ class TelegramAdapter(BasePlatformAdapter): try: from telegram import BotCommand from hermes_cli.commands import telegram_menu_commands - # Telegram docs say 100, but setMyCommands returns - # BOT_COMMANDS_TOO_MUCH above ~60 due to metadata overhead. - menu_commands, hidden_count = telegram_menu_commands(max_commands=50) + # Telegram allows up to 100 commands but has an undocumented + # payload size limit. Skill descriptions are truncated to 40 + # chars in telegram_menu_commands() to fit 100 commands safely. + menu_commands, hidden_count = telegram_menu_commands(max_commands=100) await self._bot.set_my_commands([ BotCommand(name, desc) for name, desc in menu_commands ]) diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py index f043ec73f..a14432624 100644 --- a/hermes_cli/commands.py +++ b/hermes_cli/commands.py @@ -399,9 +399,10 @@ def telegram_menu_commands(max_commands: int = 100) -> tuple[list[tuple[str, str continue # hub-installed, not built-in name = cmd_key.lstrip("/").replace("-", "_") desc = info.get("description", "") - # Telegram descriptions max 256 chars - if len(desc) > 256: - desc = desc[:253] + "..." + # Keep descriptions short — setMyCommands has an undocumented + # total payload limit. 40 chars fits 100 commands safely. + if len(desc) > 40: + desc = desc[:37] + "..." all_commands.append((name, desc)) except Exception: pass -- 2.43.0 From ea342f238209d99285a0780da5167e902d02e2e4 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 30 Mar 2026 11:24:10 -0700 Subject: [PATCH 012/385] Fix banner alignment in installer script (#4011) Co-authored-by: Ahmed Khaled --- scripts/install.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/install.sh b/scripts/install.sh index 6fbb22b45..d46771e6a 100755 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -94,7 +94,7 @@ print_banner() { echo "" echo -e "${MAGENTA}${BOLD}" echo "┌─────────────────────────────────────────────────────────┐" - echo "│ ⚕ Hermes Agent Installer │" + echo "│ ⚕ Hermes Agent Installer │" echo "├─────────────────────────────────────────────────────────┤" echo "│ An open source AI agent by Nous Research. │" echo "└─────────────────────────────────────────────────────────┘" -- 2.43.0 From 86250a3e45ffe9c1a6f3e60b6d8a0cd49c366e53 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 30 Mar 2026 12:59:58 -0700 Subject: [PATCH 013/385] docs: expand terminal backends section + fix docs build (#4016) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat(telegram): add webhook mode as alternative to polling When TELEGRAM_WEBHOOK_URL is set, the adapter starts an HTTP webhook server (via python-telegram-bot's start_webhook()) instead of long polling. This enables cloud platforms like Fly.io and Railway to auto-wake suspended machines on inbound HTTP traffic. Polling remains the default — no behavior change unless the env var is set. Env vars: TELEGRAM_WEBHOOK_URL Public HTTPS URL for Telegram to push to TELEGRAM_WEBHOOK_PORT Local listen port (default 8443) TELEGRAM_WEBHOOK_SECRET Secret token for update verification Cherry-picked and adapted from PR #2022 by SHL0MS. Preserved all current main enhancements (network error recovery, polling conflict detection, DM topics setup). Co-authored-by: SHL0MS * fix: send_document call in background task delivery + vision download timeout Two fixes salvaged from PR #2269 by amethystani: 1. gateway/run.py: adapter.send_file() → adapter.send_document() send_file() doesn't exist on BasePlatformAdapter. Background task media files were silently never delivered (AttributeError swallowed by except Exception: pass). 2. tools/vision_tools.py: configurable image download timeout via HERMES_VISION_DOWNLOAD_TIMEOUT env var (default 30s), plus guard against raise None when max_retries=0. The third fix in #2269 (opencode-go auth config) was already resolved on main. Co-authored-by: amethystani * docs: expand terminal backends section + fix feishu MDX build error --------- Co-authored-by: SHL0MS Co-authored-by: amethystani --- website/docs/user-guide/configuration.md | 206 +++++++++++++++++------ 1 file changed, 156 insertions(+), 50 deletions(-) diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md index 48d76dd80..c3aa96f53 100644 --- a/website/docs/user-guide/configuration.md +++ b/website/docs/user-guide/configuration.md @@ -699,65 +699,171 @@ Use this when you want lower latency or cost without fully changing your default ## Terminal Backend Configuration -Configure which environment the agent uses for terminal commands: +Hermes supports six terminal backends. Each determines where the agent's shell commands actually execute — your local machine, a Docker container, a remote server via SSH, a Modal cloud sandbox, a Daytona workspace, or a Singularity/Apptainer container. ```yaml terminal: - backend: local # or: docker, ssh, singularity, modal, daytona - cwd: "." # Working directory ("." = current dir) - timeout: 180 # Command timeout in seconds - - # Docker-specific settings - docker_image: "nikolaik/python-nodejs:python3.11-nodejs20" - docker_mount_cwd_to_workspace: false # SECURITY: off by default. Opt in to mount the launch cwd into /workspace. - docker_forward_env: # Optional explicit allowlist for env passthrough - - "GITHUB_TOKEN" - docker_volumes: # Additional explicit host mounts - - "/home/user/projects:/workspace/projects" - - "/home/user/data:/data:ro" # :ro for read-only - - # Container resource limits (docker, singularity, modal, daytona) - container_cpu: 1 # CPU cores - container_memory: 5120 # MB (default 5GB) - container_disk: 51200 # MB (default 50GB) - container_persistent: true # Persist filesystem across sessions - - # Persistent shell — keep a long-lived bash process across commands - persistent_shell: true # Enabled by default for SSH backend + backend: local # local | docker | ssh | modal | daytona | singularity + cwd: "." # Working directory ("." = current dir for local, "/root" for containers) + timeout: 180 # Per-command timeout in seconds ``` +### Backend Overview + +| Backend | Where commands run | Isolation | Best for | +|---------|-------------------|-----------|----------| +| **local** | Your machine directly | None | Development, personal use | +| **docker** | Docker container | Full (namespaces, cap-drop) | Safe sandboxing, CI/CD | +| **ssh** | Remote server via SSH | Network boundary | Remote dev, powerful hardware | +| **modal** | Modal cloud sandbox | Full (cloud VM) | Ephemeral cloud compute, evals | +| **daytona** | Daytona workspace | Full (cloud container) | Managed cloud dev environments | +| **singularity** | Singularity/Apptainer container | Namespaces (--containall) | HPC clusters, shared machines | + +### Local Backend + +The default. Commands run directly on your machine with no isolation. No special setup required. + +```yaml +terminal: + backend: local +``` + +:::warning +The agent has the same filesystem access as your user account. Use `hermes tools` to disable tools you don't want, or switch to Docker for sandboxing. +::: + +### Docker Backend + +Runs commands inside a Docker container with security hardening (all capabilities dropped, no privilege escalation, PID limits). + +```yaml +terminal: + backend: docker + docker_image: "nikolaik/python-nodejs:python3.11-nodejs20" + docker_mount_cwd_to_workspace: false # Mount launch dir into /workspace + docker_forward_env: # Env vars to forward into container + - "GITHUB_TOKEN" + docker_volumes: # Host directory mounts + - "/home/user/projects:/workspace/projects" + - "/home/user/data:/data:ro" # :ro for read-only + + # Resource limits + container_cpu: 1 # CPU cores (0 = unlimited) + container_memory: 5120 # MB (0 = unlimited) + container_disk: 51200 # MB (requires overlay2 on XFS+pquota) + container_persistent: true # Persist /workspace and /root across sessions +``` + +**Requirements:** Docker Desktop or Docker Engine installed and running. Hermes probes `$PATH` plus common macOS install locations (`/usr/local/bin/docker`, `/opt/homebrew/bin/docker`, Docker Desktop app bundle). + +**Container lifecycle:** Each session starts a long-lived container (`docker run -d ... sleep 2h`). Commands run via `docker exec` with a login shell. On cleanup, the container is stopped and removed. + +**Security hardening:** +- `--cap-drop ALL` with only `DAC_OVERRIDE`, `CHOWN`, `FOWNER` added back +- `--security-opt no-new-privileges` +- `--pids-limit 256` +- Size-limited tmpfs for `/tmp` (512MB), `/var/tmp` (256MB), `/run` (64MB) + +**Credential forwarding:** Env vars listed in `docker_forward_env` are resolved from your shell environment first, then `~/.hermes/.env`. Skills can also declare `required_environment_variables` which are merged automatically. + +### SSH Backend + +Runs commands on a remote server over SSH. Uses ControlMaster for connection reuse (5-minute idle keepalive). Persistent shell is enabled by default — state (cwd, env vars) survives across commands. + +```yaml +terminal: + backend: ssh + persistent_shell: true # Keep a long-lived bash session (default: true) +``` + +**Required environment variables:** + +```bash +TERMINAL_SSH_HOST=my-server.example.com +TERMINAL_SSH_USER=ubuntu +``` + +**Optional:** + +| Variable | Default | Description | +|----------|---------|-------------| +| `TERMINAL_SSH_PORT` | `22` | SSH port | +| `TERMINAL_SSH_KEY` | (system default) | Path to SSH private key | +| `TERMINAL_SSH_PERSISTENT` | `true` | Enable persistent shell | + +**How it works:** Connects at init time with `BatchMode=yes` and `StrictHostKeyChecking=accept-new`. Persistent shell keeps a single `bash -l` process alive on the remote host, communicating via temporary files. Commands that need `stdin_data` or `sudo` automatically fall back to one-shot mode. + +### Modal Backend + +Runs commands in a [Modal](https://modal.com) cloud sandbox. Each task gets an isolated VM with configurable CPU, memory, and disk. Filesystem can be snapshot/restored across sessions. + +```yaml +terminal: + backend: modal + container_cpu: 1 # CPU cores + container_memory: 5120 # MB (5GB) + container_disk: 51200 # MB (50GB) + container_persistent: true # Snapshot/restore filesystem +``` + +**Required:** Either `MODAL_TOKEN_ID` + `MODAL_TOKEN_SECRET` environment variables, or a `~/.modal.toml` config file. + +**Persistence:** When enabled, the sandbox filesystem is snapshotted on cleanup and restored on next session. Snapshots are tracked in `~/.hermes/modal_snapshots.json`. + +**Credential files:** Automatically mounted from `~/.hermes/` (OAuth tokens, etc.) and synced before each command. + +### Daytona Backend + +Runs commands in a [Daytona](https://daytona.io) managed workspace. Supports stop/resume for persistence. + +```yaml +terminal: + backend: daytona + container_cpu: 1 # CPU cores + container_memory: 5120 # MB → converted to GiB + container_disk: 10240 # MB → converted to GiB (max 10 GiB) + container_persistent: true # Stop/resume instead of delete +``` + +**Required:** `DAYTONA_API_KEY` environment variable. + +**Persistence:** When enabled, sandboxes are stopped (not deleted) on cleanup and resumed on next session. Sandbox names follow the pattern `hermes-{task_id}`. + +**Disk limit:** Daytona enforces a 10 GiB maximum. Requests above this are capped with a warning. + +### Singularity/Apptainer Backend + +Runs commands in a [Singularity/Apptainer](https://apptainer.org) container. Designed for HPC clusters and shared machines where Docker isn't available. + +```yaml +terminal: + backend: singularity + singularity_image: "docker://nikolaik/python-nodejs:python3.11-nodejs20" + container_cpu: 1 # CPU cores + container_memory: 5120 # MB + container_persistent: true # Writable overlay persists across sessions +``` + +**Requirements:** `apptainer` or `singularity` binary in `$PATH`. + +**Image handling:** Docker URLs (`docker://...`) are automatically converted to SIF files and cached. Existing `.sif` files are used directly. + +**Scratch directory:** Resolved in order: `TERMINAL_SCRATCH_DIR` → `TERMINAL_SANDBOX_DIR/singularity` → `/scratch/$USER/hermes-agent` (HPC convention) → `~/.hermes/sandboxes/singularity`. + +**Isolation:** Uses `--containall --no-home` for full namespace isolation without mounting the host home directory. + ### Common Terminal Backend Issues -If terminal commands fail immediately or the terminal tool is reported as disabled, check the following: +If terminal commands fail immediately or the terminal tool is reported as disabled: -- **Local backend** - - No special requirements. This is the safest default when you are just getting started. +- **Local** — No special requirements. The safest default when getting started. +- **Docker** — Run `docker version` to verify Docker is working. If it fails, fix Docker or `hermes config set terminal.backend local`. +- **SSH** — Both `TERMINAL_SSH_HOST` and `TERMINAL_SSH_USER` must be set. Hermes logs a clear error if either is missing. +- **Modal** — Needs `MODAL_TOKEN_ID` env var or `~/.modal.toml`. Run `hermes doctor` to check. +- **Daytona** — Needs `DAYTONA_API_KEY`. The Daytona SDK handles server URL configuration. +- **Singularity** — Needs `apptainer` or `singularity` in `$PATH`. Common on HPC clusters. -- **Docker backend** - - Ensure Docker Desktop (or the Docker daemon) is installed and running. - - Hermes needs to be able to find the `docker` CLI. It checks your `$PATH` first and also probes common Docker Desktop install locations on macOS. Run: - ```bash - docker version - ``` - If this fails, fix your Docker installation or switch back to the local backend: - ```bash - hermes config set terminal.backend local - ``` - -- **SSH backend** - - Both `TERMINAL_SSH_HOST` and `TERMINAL_SSH_USER` must be set, for example: - ```bash - export TERMINAL_ENV=ssh - export TERMINAL_SSH_HOST=my-server.example.com - export TERMINAL_SSH_USER=ubuntu - ``` - - If either value is missing, Hermes will log a clear error and refuse to use the SSH backend. - -- **Modal backend** - - You need either a `MODAL_TOKEN_ID` environment variable or a `~/.modal.toml` config file. - - If neither is present, the backend check fails and Hermes will report that the Modal backend is not available. - -When in doubt, set `terminal.backend` back to `local` and verify that commands run there first. +When in doubt, set `terminal.backend` back to `local` and verify commands run there first. ### Docker Volume Mounts -- 2.43.0 From 158f49f19a6bb8dfd818f477ade43e3800a3178e Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 30 Mar 2026 13:04:06 -0700 Subject: [PATCH 014/385] =?UTF-8?q?fix:=20enforce=20priority=20order=20in?= =?UTF-8?q?=20Telegram=20menu=20=E2=80=94=20core=20>=20plugins=20>=20skill?= =?UTF-8?q?s=20(#4023)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The menu now has explicit priority tiers: 1. Core CommandDef commands (always included, never bumped) 2. Plugin slash commands (take precedence over skills) 3. Built-in skill commands (fill remaining slots alphabetically) Only skills get trimmed when the 100-command cap is hit. Adding new core commands or plugin commands automatically pushes skills out, not the other way around. --- hermes_cli/commands.py | 45 +++++++++++++++++++++++++++++------------- 1 file changed, 31 insertions(+), 14 deletions(-) diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py index a14432624..3b1eb37ff 100644 --- a/hermes_cli/commands.py +++ b/hermes_cli/commands.py @@ -366,27 +366,41 @@ def telegram_bot_commands() -> list[tuple[str, str]]: def telegram_menu_commands(max_commands: int = 100) -> tuple[list[tuple[str, str]], int]: - """Return Telegram menu commands (built-in + active skills), capped to the Bot API limit. + """Return Telegram menu commands capped to the Bot API limit. - Built-in commands come first, then active skill commands. Commands beyond - ``max_commands`` remain callable in the gateway; they are just omitted from - Telegram's native slash-command picker. + Priority order (higher priority = never bumped by overflow): + 1. Core CommandDef commands (always included) + 2. Plugin slash commands (take precedence over skills) + 3. Built-in skill commands (fill remaining slots, alphabetical) + + Skills are the only tier that gets trimmed when the cap is hit. + User-installed hub skills are excluded — accessible via /skills. Returns: (menu_commands, hidden_count) where hidden_count is the number of - commands omitted due to the cap. + skill commands omitted due to the cap. """ all_commands = list(telegram_bot_commands()) - # Append active BUILT-IN skill commands only (not user-installed hub skills). - # User-installed skills stay accessible via /skills and by typing the command - # directly, but don't clutter the Telegram menu. + # Plugin slash commands get priority over skills + try: + from hermes_cli.plugins import get_plugin_manager + pm = get_plugin_manager() + plugin_cmds = getattr(pm, "_plugin_commands", {}) + for cmd_name in sorted(plugin_cmds): + tg_name = cmd_name.replace("-", "_") + desc = "Plugin command" + if len(desc) > 40: + desc = desc[:37] + "..." + all_commands.append((tg_name, desc)) + except Exception: + pass + + # Remaining slots go to built-in skill commands (not hub-installed). + skill_entries: list[tuple[str, str]] = [] try: from agent.skill_commands import get_skill_commands from tools.skills_tool import SKILLS_DIR - # Built-in skills are synced to SKILLS_DIR (~/.hermes/skills/). - # Hub-installed skills go into SKILLS_DIR/.hub/. Exclude .hub/ skills - # from the menu — they're user-installed, not repo built-in. _skills_dir = str(SKILLS_DIR.resolve()) _hub_dir = str((SKILLS_DIR / ".hub").resolve()) skill_cmds = get_skill_commands() @@ -396,18 +410,21 @@ def telegram_menu_commands(max_commands: int = 100) -> tuple[list[tuple[str, str if not skill_path.startswith(_skills_dir): continue if skill_path.startswith(_hub_dir): - continue # hub-installed, not built-in + continue name = cmd_key.lstrip("/").replace("-", "_") desc = info.get("description", "") # Keep descriptions short — setMyCommands has an undocumented # total payload limit. 40 chars fits 100 commands safely. if len(desc) > 40: desc = desc[:37] + "..." - all_commands.append((name, desc)) + skill_entries.append((name, desc)) except Exception: pass - hidden_count = max(0, len(all_commands) - max_commands) + # Skills fill remaining slots — they're the only tier that gets trimmed + remaining_slots = max(0, max_commands - len(all_commands)) + hidden_count = max(0, len(skill_entries) - remaining_slots) + all_commands.extend(skill_entries[:remaining_slots]) return all_commands[:max_commands], hidden_count -- 2.43.0 From ed9af6e5892f6e33d75c4de5efa7cc8110c281f9 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 30 Mar 2026 13:16:16 -0700 Subject: [PATCH 015/385] fix: create AsyncOpenAI lazily in trajectory_compressor to avoid closed event loop (#4013) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The AsyncOpenAI client was created once at __init__ and stored as an instance attribute. process_directory() calls asyncio.run() which creates and closes a fresh event loop. On a second call, the client's httpx transport is still bound to the closed loop, raising RuntimeError: "Event loop is closed" — the same pattern fixed by PR #3398 for the main agent loop. Create the client lazily in _get_async_client() so each asyncio.run() gets a client bound to the current loop. Co-authored-by: binhnt92 --- tests/test_trajectory_compressor_async.py | 115 ++++++++++++++++++++++ trajectory_compressor.py | 27 ++++- 2 files changed, 138 insertions(+), 4 deletions(-) create mode 100644 tests/test_trajectory_compressor_async.py diff --git a/tests/test_trajectory_compressor_async.py b/tests/test_trajectory_compressor_async.py new file mode 100644 index 000000000..2b276d03d --- /dev/null +++ b/tests/test_trajectory_compressor_async.py @@ -0,0 +1,115 @@ +"""Tests for trajectory_compressor AsyncOpenAI event loop binding. + +The AsyncOpenAI client was created once at __init__ time and stored as an +instance attribute. When process_directory() calls asyncio.run() — which +creates and closes a fresh event loop — the client's internal httpx +transport remains bound to the now-closed loop. A second call to +process_directory() would fail with "Event loop is closed". + +The fix creates the AsyncOpenAI client lazily via _get_async_client() so +each asyncio.run() gets a client bound to the current loop. +""" + +import types +from unittest.mock import MagicMock, patch + +import pytest + + +class TestAsyncClientLazyCreation: + """trajectory_compressor.py — _get_async_client()""" + + def test_async_client_none_after_init(self): + """async_client should be None after __init__ (not eagerly created).""" + from trajectory_compressor import TrajectoryCompressor + + comp = TrajectoryCompressor.__new__(TrajectoryCompressor) + comp.config = MagicMock() + comp.config.base_url = "https://api.example.com/v1" + comp.config.api_key_env = "TEST_API_KEY" + comp._use_call_llm = False + comp.async_client = None + comp._async_client_api_key = "test-key" + + assert comp.async_client is None + + def test_get_async_client_creates_new_client(self): + """_get_async_client() should create a fresh AsyncOpenAI instance.""" + from trajectory_compressor import TrajectoryCompressor + + comp = TrajectoryCompressor.__new__(TrajectoryCompressor) + comp.config = MagicMock() + comp.config.base_url = "https://api.example.com/v1" + comp._async_client_api_key = "test-key" + comp.async_client = None + + mock_async_openai = MagicMock() + with patch("openai.AsyncOpenAI", mock_async_openai): + client = comp._get_async_client() + + mock_async_openai.assert_called_once_with( + api_key="test-key", + base_url="https://api.example.com/v1", + ) + assert comp.async_client is not None + + def test_get_async_client_creates_fresh_each_call(self): + """Each call to _get_async_client() creates a NEW client instance, + so it binds to the current event loop.""" + from trajectory_compressor import TrajectoryCompressor + + comp = TrajectoryCompressor.__new__(TrajectoryCompressor) + comp.config = MagicMock() + comp.config.base_url = "https://api.example.com/v1" + comp._async_client_api_key = "test-key" + comp.async_client = None + + call_count = 0 + instances = [] + + def mock_constructor(**kwargs): + nonlocal call_count + call_count += 1 + instance = MagicMock() + instances.append(instance) + return instance + + with patch("openai.AsyncOpenAI", side_effect=mock_constructor): + client1 = comp._get_async_client() + client2 = comp._get_async_client() + + # Should have created two separate instances + assert call_count == 2 + assert instances[0] is not instances[1] + + +class TestSourceLineVerification: + """Verify the actual source has the lazy pattern applied.""" + + @staticmethod + def _read_file() -> str: + import os + base = os.path.dirname(os.path.dirname(__file__)) + with open(os.path.join(base, "trajectory_compressor.py")) as f: + return f.read() + + def test_no_eager_async_openai_in_init(self): + """__init__ should NOT create AsyncOpenAI eagerly.""" + src = self._read_file() + # The old pattern: self.async_client = AsyncOpenAI(...) in _init_summarizer + # should not exist — only self.async_client = None + lines = src.split("\n") + for i, line in enumerate(lines, 1): + if "self.async_client = AsyncOpenAI(" in line and "_get_async_client" not in lines[max(0,i-3):i+1]: + # Allow it inside _get_async_client method + # Check if we're inside _get_async_client by looking at context + context = "\n".join(lines[max(0,i-10):i+1]) + if "_get_async_client" not in context: + pytest.fail( + f"Line {i}: AsyncOpenAI created eagerly outside _get_async_client()" + ) + + def test_get_async_client_method_exists(self): + """_get_async_client method should exist.""" + src = self._read_file() + assert "def _get_async_client(self)" in src diff --git a/trajectory_compressor.py b/trajectory_compressor.py index fd69cd18a..2dfdda7af 100644 --- a/trajectory_compressor.py +++ b/trajectory_compressor.py @@ -375,15 +375,34 @@ class TrajectoryCompressor: raise RuntimeError( f"Missing API key. Set {self.config.api_key_env} " f"environment variable.") - from openai import OpenAI, AsyncOpenAI + from openai import OpenAI self.client = OpenAI( api_key=api_key, base_url=self.config.base_url) - self.async_client = AsyncOpenAI( - api_key=api_key, base_url=self.config.base_url) + # AsyncOpenAI is created lazily in _get_async_client() so it + # binds to the current event loop — avoids "Event loop is closed" + # when process_directory() is called multiple times (each call + # creates a new loop via asyncio.run()). + self.async_client = None + self._async_client_api_key = api_key print(f"✅ Initialized summarizer client: {self.config.summarization_model}") print(f" Max concurrent requests: {self.config.max_concurrent_requests}") + def _get_async_client(self): + """Return an AsyncOpenAI client bound to the current event loop. + + Created lazily so that each ``asyncio.run()`` call in + ``process_directory()`` gets a client tied to its own loop, + avoiding "Event loop is closed" errors on repeated calls. + """ + from openai import AsyncOpenAI + # Always create a fresh client so it binds to the running loop. + self.async_client = AsyncOpenAI( + api_key=self._async_client_api_key, + base_url=self.config.base_url, + ) + return self.async_client + def _detect_provider(self) -> str: """Detect the provider name from the configured base_url.""" url = (self.config.base_url or "").lower() @@ -615,7 +634,7 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix.""" max_tokens=self.config.summary_target_tokens * 2, ) else: - response = await self.async_client.chat.completions.create( + response = await self._get_async_client().chat.completions.create( model=self.config.summarization_model, messages=[{"role": "user", "content": prompt}], temperature=self.config.temperature, -- 2.43.0 From 7dac75f2ae0773b18e8088b678355c59dd164aa0 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 30 Mar 2026 13:18:21 -0700 Subject: [PATCH 016/385] fix: prevent context pressure warning spam after compression (#4012) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: add /yolo slash command to toggle dangerous command approvals Adds a /yolo command that toggles HERMES_YOLO_MODE at runtime, skipping all dangerous command approval prompts for the current session. Works in both CLI and gateway (Telegram, Discord, etc.). - /yolo -> ON: all commands auto-approved, no confirmation prompts - /yolo -> OFF: normal approval flow restored The --yolo CLI flag already existed for launch-time opt-in. This adds the ability to toggle mid-session without restarting. Session-scoped — resets when the process ends. Uses the existing HERMES_YOLO_MODE env var that check_all_command_guards() already respects. * fix: prevent context pressure warning spam (agent loop + gateway rate-limit) Two complementary fixes for repeated context pressure warnings spamming gateway users (Telegram, Discord, etc.): 1. Agent-level loop fix (run_agent.py): After compression, only reset _context_pressure_warned if the post-compression estimate is actually below the 85% warning level. Previously the flag was unconditionally reset, causing the warning to re-fire every loop iteration when compression couldn't reduce below 85% of the threshold (e.g. very low threshold like 15%, or system prompt alone exceeds the warning level). 2. Gateway-level rate-limit (gateway/run.py, salvaged from PR #3786): Per-chat_id cooldown of 1 hour on compression warning messages. Both warning paths ('still large after compression' and 'compression failed') are gated. Defense-in-depth — even if the agent-level fix has edge cases, users won't see more than one warning per hour. Co-authored-by: dlkakbs --------- Co-authored-by: dlkakbs --- gateway/run.py | 19 ++++++++++-- run_agent.py | 17 +++++++---- tests/gateway/test_session_hygiene.py | 43 +++++++++++++++++++++++++++ 3 files changed, 72 insertions(+), 7 deletions(-) diff --git a/gateway/run.py b/gateway/run.py index de077ede8..c85ed27b8 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -476,6 +476,13 @@ class GatewayRunner: self._honcho_managers: Dict[str, Any] = {} self._honcho_configs: Dict[str, Any] = {} + # Rate-limit compression warning messages sent to users. + # Keyed by chat_id — value is the timestamp of the last warning sent. + # Prevents the warning from firing on every message when a session + # remains above the threshold after compression. + self._compression_warn_sent: Dict[str, float] = {} + self._compression_warn_cooldown: int = 3600 # seconds (1 hour) + # Ensure tirith security scanner is available (downloads if needed) try: from tools.tirith_security import ensure_installed @@ -2400,13 +2407,18 @@ class GatewayRunner: pass # Still too large after compression — warn user + # Rate-limited to once per cooldown period per + # chat to avoid spamming on every message. if _new_tokens >= _warn_token_threshold: logger.warning( "Session hygiene: still ~%s tokens after " "compression — suggesting /reset", f"{_new_tokens:,}", ) - if _hyg_adapter: + _now = time.time() + _last_warn = self._compression_warn_sent.get(source.chat_id, 0) + if _hyg_adapter and _now - _last_warn >= self._compression_warn_cooldown: + self._compression_warn_sent[source.chat_id] = _now try: await _hyg_adapter.send( source.chat_id, @@ -2428,7 +2440,10 @@ class GatewayRunner: if _approx_tokens >= _warn_token_threshold: _hyg_adapter = self.adapters.get(source.platform) _hyg_meta = {"thread_id": source.thread_id} if source.thread_id else None - if _hyg_adapter: + _now = time.time() + _last_warn = self._compression_warn_sent.get(source.chat_id, 0) + if _hyg_adapter and _now - _last_warn >= self._compression_warn_cooldown: + self._compression_warn_sent[source.chat_id] = _now try: await _hyg_adapter.send( source.chat_id, diff --git a/run_agent.py b/run_agent.py index 13eba7fe7..794c9f67a 100644 --- a/run_agent.py +++ b/run_agent.py @@ -5221,11 +5221,8 @@ class AIAgent: except Exception as e: logger.warning("Session DB compression split failed — new session will NOT be indexed: %s", e) - # Reset context pressure warning and token estimate — usage drops - # after compaction. Without this, the stale last_prompt_tokens from - # the previous API call causes the pressure calculation to stay at - # >1000% and spam warnings / re-trigger compression in a loop. - self._context_pressure_warned = False + # Update token estimate after compaction so pressure calculations + # use the post-compression count, not the stale pre-compression one. _compressed_est = ( estimate_tokens_rough(new_system_prompt) + estimate_messages_tokens_rough(compressed) @@ -5233,6 +5230,16 @@ class AIAgent: self.context_compressor.last_prompt_tokens = _compressed_est self.context_compressor.last_completion_tokens = 0 + # Only reset the pressure warning if compression actually brought + # us below the warning level (85% of threshold). When compression + # can't reduce enough (e.g. threshold is very low, or system prompt + # alone exceeds the warning level), keep the flag set to prevent + # spamming the user with repeated warnings every loop iteration. + if self.context_compressor.threshold_tokens > 0: + _post_progress = _compressed_est / self.context_compressor.threshold_tokens + if _post_progress < 0.85: + self._context_pressure_warned = False + return compressed, new_system_prompt def _execute_tool_calls(self, assistant_message, messages: list, effective_task_id: str, api_call_count: int = 0) -> None: diff --git a/tests/gateway/test_session_hygiene.py b/tests/gateway/test_session_hygiene.py index b8ff8f8a8..843c0d416 100644 --- a/tests/gateway/test_session_hygiene.py +++ b/tests/gateway/test_session_hygiene.py @@ -212,6 +212,49 @@ class TestSessionHygieneWarnThreshold: assert post_compress_tokens < warn_threshold +class TestCompressionWarnRateLimit: + """Compression warning messages must be rate-limited per chat_id.""" + + def _make_runner(self): + from unittest.mock import MagicMock, patch + with patch("gateway.run.load_gateway_config"), \ + patch("gateway.run.SessionStore"), \ + patch("gateway.run.DeliveryRouter"): + from gateway.run import GatewayRunner + runner = GatewayRunner.__new__(GatewayRunner) + runner._compression_warn_sent = {} + runner._compression_warn_cooldown = 3600 + return runner + + def test_first_warn_is_sent(self): + runner = self._make_runner() + now = 1_000_000.0 + last = runner._compression_warn_sent.get("chat:1", 0) + assert now - last >= runner._compression_warn_cooldown + + def test_second_warn_suppressed_within_cooldown(self): + runner = self._make_runner() + now = 1_000_000.0 + runner._compression_warn_sent["chat:1"] = now - 60 # 1 minute ago + last = runner._compression_warn_sent.get("chat:1", 0) + assert now - last < runner._compression_warn_cooldown + + def test_warn_allowed_after_cooldown(self): + runner = self._make_runner() + now = 1_000_000.0 + runner._compression_warn_sent["chat:1"] = now - 3601 # just past cooldown + last = runner._compression_warn_sent.get("chat:1", 0) + assert now - last >= runner._compression_warn_cooldown + + def test_rate_limit_is_per_chat(self): + """Rate-limiting one chat must not suppress warnings for another.""" + runner = self._make_runner() + now = 1_000_000.0 + runner._compression_warn_sent["chat:1"] = now - 60 # suppressed + last_other = runner._compression_warn_sent.get("chat:2", 0) + assert now - last_other >= runner._compression_warn_cooldown + + class TestEstimatedTokenThreshold: """Verify that hygiene thresholds are always below the model's context limit — for both actual and estimated token counts. -- 2.43.0 From 950f69475fd59d539ab0b8fc953c29ff170ebb88 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 30 Mar 2026 13:18:42 -0700 Subject: [PATCH 017/385] feat(browser): add Camofox local anti-detection browser backend (#4008) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Camofox-browser is a self-hosted Node.js server wrapping Camoufox (Firefox fork with C++ fingerprint spoofing). When CAMOFOX_URL is set, all 11 browser tools route through the Camofox REST API instead of the agent-browser CLI. Maps 1:1 to the existing browser tool interface: - Navigate, snapshot, click, type, scroll, back, press, close - Get images, vision (screenshot + LLM analysis) - Console (returns empty with note — camofox limitation) Setup: npm start in camofox-browser dir, or docker run -p 9377:9377 Then: CAMOFOX_URL=http://localhost:9377 in ~/.hermes/.env Advantages over Browserbase (cloud): - Free (no per-session API costs) - Local (zero network latency for browser ops) - Anti-detection at C++ level (bypasses Cloudflare/Google bot detection) - Works offline, Docker-ready Files: - tools/browser_camofox.py: Full REST backend (~400 lines) - tools/browser_tool.py: Routing at each tool function - hermes_cli/config.py: CAMOFOX_URL env var entry - tests/tools/test_browser_camofox.py: 20 tests --- hermes_cli/config.py | 8 + hermes_cli/setup.py | 6 +- hermes_cli/tools_config.py | 32 ++ package.json | 3 +- tests/tools/test_browser_camofox.py | 290 ++++++++++++++++ tools/browser_camofox.py | 496 ++++++++++++++++++++++++++++ tools/browser_tool.py | 57 ++++ 7 files changed, 889 insertions(+), 3 deletions(-) create mode 100644 tests/tools/test_browser_camofox.py create mode 100644 tools/browser_camofox.py diff --git a/hermes_cli/config.py b/hermes_cli/config.py index e2503ebec..56d102692 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -706,6 +706,14 @@ OPTIONAL_ENV_VARS = { "password": True, "category": "tool", }, + "CAMOFOX_URL": { + "description": "Camofox browser server URL for local anti-detection browsing (e.g. http://localhost:9377)", + "prompt": "Camofox server URL", + "url": "https://github.com/jo-inc/camofox-browser", + "tools": ["browser_navigate", "browser_click"], + "password": False, + "category": "tool", + }, "FAL_KEY": { "description": "FAL API key for image generation", "prompt": "FAL API key", diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py index 35695144d..304f34f56 100644 --- a/hermes_cli/setup.py +++ b/hermes_cli/setup.py @@ -601,13 +601,15 @@ def _print_setup_summary(config: dict, hermes_home): Path(__file__).parent.parent / "node_modules" / ".bin" / "agent-browser" ).exists() ) - if get_env_value("BROWSERBASE_API_KEY"): + if get_env_value("CAMOFOX_URL"): + tool_status.append(("Browser Automation (Camofox)", True, None)) + elif get_env_value("BROWSERBASE_API_KEY"): tool_status.append(("Browser Automation (Browserbase)", True, None)) elif _ab_found: tool_status.append(("Browser Automation (local)", True, None)) else: tool_status.append( - ("Browser Automation", False, "npm install -g agent-browser") + ("Browser Automation", False, "npm install -g agent-browser or set CAMOFOX_URL") ) # FAL (image generation) diff --git a/hermes_cli/tools_config.py b/hermes_cli/tools_config.py index 91496d45d..63e26d362 100644 --- a/hermes_cli/tools_config.py +++ b/hermes_cli/tools_config.py @@ -273,6 +273,16 @@ TOOL_CATEGORIES = { "browser_provider": "browser-use", "post_setup": "browserbase", }, + { + "name": "Camofox", + "tag": "Local anti-detection browser (Firefox/Camoufox)", + "env_vars": [ + {"key": "CAMOFOX_URL", "prompt": "Camofox server URL", "default": "http://localhost:9377", + "url": "https://github.com/jo-inc/camofox-browser"}, + ], + "browser_provider": "camofox", + "post_setup": "camofox", + }, ], }, "homeassistant": { @@ -337,6 +347,28 @@ def _run_post_setup(post_setup_key: str): elif not node_modules.exists(): _print_warning(" Node.js not found - browser tools require: npm install (in hermes-agent directory)") + elif post_setup_key == "camofox": + camofox_dir = PROJECT_ROOT / "node_modules" / "@askjo" / "camoufox-browser" + if not camofox_dir.exists() and shutil.which("npm"): + _print_info(" Installing Camofox browser server...") + import subprocess + result = subprocess.run( + ["npm", "install", "--silent"], + capture_output=True, text=True, cwd=str(PROJECT_ROOT) + ) + if result.returncode == 0: + _print_success(" Camofox installed") + else: + _print_warning(" npm install failed - run manually: npm install") + if camofox_dir.exists(): + _print_info(" Start the Camofox server:") + _print_info(" npx @askjo/camoufox-browser") + _print_info(" First run downloads the Camoufox engine (~300MB)") + _print_info(" Or use Docker: docker run -p 9377:9377 jo-inc/camofox-browser") + elif not shutil.which("npm"): + _print_warning(" Node.js not found. Install Camofox via Docker:") + _print_info(" docker run -p 9377:9377 jo-inc/camofox-browser") + elif post_setup_key == "rl_training": try: __import__("tinker_atropos") diff --git a/package.json b/package.json index 5e593367b..309217c82 100644 --- a/package.json +++ b/package.json @@ -16,7 +16,8 @@ }, "homepage": "https://github.com/NousResearch/Hermes-Agent#readme", "dependencies": { - "agent-browser": "^0.13.0" + "agent-browser": "^0.13.0", + "@askjo/camoufox-browser": "^1.0.0" }, "engines": { "node": ">=18.0.0" diff --git a/tests/tools/test_browser_camofox.py b/tests/tools/test_browser_camofox.py new file mode 100644 index 000000000..a59862b9b --- /dev/null +++ b/tests/tools/test_browser_camofox.py @@ -0,0 +1,290 @@ +"""Tests for the Camofox browser backend.""" + +import json +import os +from unittest.mock import MagicMock, patch + +import pytest + +from tools.browser_camofox import ( + camofox_back, + camofox_click, + camofox_close, + camofox_console, + camofox_get_images, + camofox_navigate, + camofox_press, + camofox_scroll, + camofox_snapshot, + camofox_type, + camofox_vision, + check_camofox_available, + cleanup_all_camofox_sessions, + is_camofox_mode, +) + + +# --------------------------------------------------------------------------- +# Configuration detection +# --------------------------------------------------------------------------- + + +class TestCamofoxMode: + def test_disabled_by_default(self, monkeypatch): + monkeypatch.delenv("CAMOFOX_URL", raising=False) + assert is_camofox_mode() is False + + def test_enabled_when_url_set(self, monkeypatch): + monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377") + assert is_camofox_mode() is True + + def test_health_check_unreachable(self, monkeypatch): + monkeypatch.setenv("CAMOFOX_URL", "http://localhost:19999") + assert check_camofox_available() is False + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _mock_response(status=200, json_data=None): + resp = MagicMock() + resp.status_code = status + resp.json.return_value = json_data or {} + resp.content = b"\x89PNG\r\n\x1a\nfake" + resp.raise_for_status = MagicMock() + return resp + + +# --------------------------------------------------------------------------- +# Navigate +# --------------------------------------------------------------------------- + + +class TestCamofoxNavigate: + @patch("tools.browser_camofox.requests.post") + def test_creates_tab_on_first_navigate(self, mock_post, monkeypatch): + monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377") + mock_post.return_value = _mock_response(json_data={"tabId": "tab1", "url": "https://example.com"}) + + result = json.loads(camofox_navigate("https://example.com", task_id="t1")) + assert result["success"] is True + assert result["url"] == "https://example.com" + + @patch("tools.browser_camofox.requests.post") + def test_navigates_existing_tab(self, mock_post, monkeypatch): + monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377") + # First call creates tab + mock_post.return_value = _mock_response(json_data={"tabId": "tab2", "url": "https://a.com"}) + camofox_navigate("https://a.com", task_id="t2") + + # Second call navigates + mock_post.return_value = _mock_response(json_data={"ok": True, "url": "https://b.com"}) + result = json.loads(camofox_navigate("https://b.com", task_id="t2")) + assert result["success"] is True + assert result["url"] == "https://b.com" + + def test_connection_error_returns_helpful_message(self, monkeypatch): + monkeypatch.setenv("CAMOFOX_URL", "http://localhost:19999") + result = json.loads(camofox_navigate("https://example.com", task_id="t_err")) + assert result["success"] is False + assert "Cannot connect" in result["error"] + + +# --------------------------------------------------------------------------- +# Snapshot +# --------------------------------------------------------------------------- + + +class TestCamofoxSnapshot: + def test_no_session_returns_error(self, monkeypatch): + monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377") + result = json.loads(camofox_snapshot(task_id="no_such_task")) + assert result["success"] is False + assert "browser_navigate" in result["error"] + + @patch("tools.browser_camofox.requests.post") + @patch("tools.browser_camofox.requests.get") + def test_returns_snapshot(self, mock_get, mock_post, monkeypatch): + monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377") + # Create session + mock_post.return_value = _mock_response(json_data={"tabId": "tab3", "url": "https://x.com"}) + camofox_navigate("https://x.com", task_id="t3") + + # Return snapshot + mock_get.return_value = _mock_response(json_data={ + "snapshot": "- heading \"Test\" [e1]\n- button \"Submit\" [e2]", + "refsCount": 2, + }) + result = json.loads(camofox_snapshot(task_id="t3")) + assert result["success"] is True + assert "[e1]" in result["snapshot"] + assert result["element_count"] == 2 + + +# --------------------------------------------------------------------------- +# Click / Type / Scroll / Back / Press +# --------------------------------------------------------------------------- + + +class TestCamofoxInteractions: + @patch("tools.browser_camofox.requests.post") + def test_click(self, mock_post, monkeypatch): + monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377") + mock_post.return_value = _mock_response(json_data={"tabId": "tab4", "url": "https://x.com"}) + camofox_navigate("https://x.com", task_id="t4") + + mock_post.return_value = _mock_response(json_data={"ok": True, "url": "https://x.com"}) + result = json.loads(camofox_click("@e5", task_id="t4")) + assert result["success"] is True + assert result["clicked"] == "e5" + + @patch("tools.browser_camofox.requests.post") + def test_type(self, mock_post, monkeypatch): + monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377") + mock_post.return_value = _mock_response(json_data={"tabId": "tab5", "url": "https://x.com"}) + camofox_navigate("https://x.com", task_id="t5") + + mock_post.return_value = _mock_response(json_data={"ok": True}) + result = json.loads(camofox_type("@e3", "hello world", task_id="t5")) + assert result["success"] is True + assert result["typed"] == "hello world" + + @patch("tools.browser_camofox.requests.post") + def test_scroll(self, mock_post, monkeypatch): + monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377") + mock_post.return_value = _mock_response(json_data={"tabId": "tab6", "url": "https://x.com"}) + camofox_navigate("https://x.com", task_id="t6") + + mock_post.return_value = _mock_response(json_data={"ok": True}) + result = json.loads(camofox_scroll("down", task_id="t6")) + assert result["success"] is True + assert result["scrolled"] == "down" + + @patch("tools.browser_camofox.requests.post") + def test_back(self, mock_post, monkeypatch): + monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377") + mock_post.return_value = _mock_response(json_data={"tabId": "tab7", "url": "https://x.com"}) + camofox_navigate("https://x.com", task_id="t7") + + mock_post.return_value = _mock_response(json_data={"ok": True, "url": "https://prev.com"}) + result = json.loads(camofox_back(task_id="t7")) + assert result["success"] is True + + @patch("tools.browser_camofox.requests.post") + def test_press(self, mock_post, monkeypatch): + monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377") + mock_post.return_value = _mock_response(json_data={"tabId": "tab8", "url": "https://x.com"}) + camofox_navigate("https://x.com", task_id="t8") + + mock_post.return_value = _mock_response(json_data={"ok": True}) + result = json.loads(camofox_press("Enter", task_id="t8")) + assert result["success"] is True + assert result["pressed"] == "Enter" + + +# --------------------------------------------------------------------------- +# Close +# --------------------------------------------------------------------------- + + +class TestCamofoxClose: + @patch("tools.browser_camofox.requests.delete") + @patch("tools.browser_camofox.requests.post") + def test_close_session(self, mock_post, mock_delete, monkeypatch): + monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377") + mock_post.return_value = _mock_response(json_data={"tabId": "tab9", "url": "https://x.com"}) + camofox_navigate("https://x.com", task_id="t9") + + mock_delete.return_value = _mock_response(json_data={"ok": True}) + result = json.loads(camofox_close(task_id="t9")) + assert result["success"] is True + assert result["closed"] is True + + def test_close_nonexistent_session(self, monkeypatch): + monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377") + result = json.loads(camofox_close(task_id="nonexistent")) + assert result["success"] is True + + +# --------------------------------------------------------------------------- +# Console (limited support) +# --------------------------------------------------------------------------- + + +class TestCamofoxConsole: + def test_console_returns_empty_with_note(self, monkeypatch): + monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377") + result = json.loads(camofox_console(task_id="t_console")) + assert result["success"] is True + assert result["total_messages"] == 0 + assert "not available" in result["note"] + + +# --------------------------------------------------------------------------- +# Images +# --------------------------------------------------------------------------- + + +class TestCamofoxGetImages: + @patch("tools.browser_camofox.requests.post") + @patch("tools.browser_camofox.requests.get") + def test_get_images(self, mock_get, mock_post, monkeypatch): + monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377") + mock_post.return_value = _mock_response(json_data={"tabId": "tab10", "url": "https://x.com"}) + camofox_navigate("https://x.com", task_id="t10") + + mock_get.return_value = _mock_response(json_data={ + "images": [{"src": "https://x.com/img.png", "alt": "Logo"}], + }) + result = json.loads(camofox_get_images(task_id="t10")) + assert result["success"] is True + assert result["count"] == 1 + assert result["images"][0]["src"] == "https://x.com/img.png" + + +# --------------------------------------------------------------------------- +# Routing integration — verify browser_tool routes to camofox +# --------------------------------------------------------------------------- + + +class TestBrowserToolRouting: + """Verify that browser_tool.py delegates to camofox when CAMOFOX_URL is set.""" + + @patch("tools.browser_camofox.requests.post") + def test_browser_navigate_routes_to_camofox(self, mock_post, monkeypatch): + monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377") + mock_post.return_value = _mock_response(json_data={"tabId": "tab_rt", "url": "https://example.com"}) + + from tools.browser_tool import browser_navigate + # Bypass SSRF check for test URL + with patch("tools.browser_tool._is_safe_url", return_value=True): + result = json.loads(browser_navigate("https://example.com", task_id="t_route")) + assert result["success"] is True + + def test_check_requirements_passes_with_camofox(self, monkeypatch): + monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377") + from tools.browser_tool import check_browser_requirements + assert check_browser_requirements() is True + + +# --------------------------------------------------------------------------- +# Cleanup helper +# --------------------------------------------------------------------------- + + +class TestCamofoxCleanup: + @patch("tools.browser_camofox.requests.post") + @patch("tools.browser_camofox.requests.delete") + def test_cleanup_all(self, mock_delete, mock_post, monkeypatch): + monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377") + mock_post.return_value = _mock_response(json_data={"tabId": "tab_c", "url": "https://x.com"}) + camofox_navigate("https://x.com", task_id="t_cleanup") + + mock_delete.return_value = _mock_response(json_data={"ok": True}) + cleanup_all_camofox_sessions() + + # Session should be gone + result = json.loads(camofox_snapshot(task_id="t_cleanup")) + assert result["success"] is False diff --git a/tools/browser_camofox.py b/tools/browser_camofox.py new file mode 100644 index 000000000..b1925d2c6 --- /dev/null +++ b/tools/browser_camofox.py @@ -0,0 +1,496 @@ +"""Camofox browser backend — local anti-detection browser via REST API. + +Camofox-browser is a self-hosted Node.js server wrapping Camoufox (Firefox +fork with C++ fingerprint spoofing). It exposes a REST API that maps 1:1 +to our browser tool interface: accessibility snapshots with element refs, +click/type/scroll by ref, screenshots, etc. + +When ``CAMOFOX_URL`` is set (e.g. ``http://localhost:9377``), the browser +tools route through this module instead of the ``agent-browser`` CLI. + +Setup:: + + # Option 1: npm + git clone https://github.com/jo-inc/camofox-browser && cd camofox-browser + npm install && npm start # downloads Camoufox (~300MB) on first run + + # Option 2: Docker + docker run -p 9377:9377 jo-inc/camofox-browser + +Then set ``CAMOFOX_URL=http://localhost:9377`` in ``~/.hermes/.env``. +""" + +from __future__ import annotations + +import base64 +import json +import logging +import os +import threading +import time +import uuid +from pathlib import Path +from typing import Any, Dict, Optional + +import requests + +logger = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# Configuration +# --------------------------------------------------------------------------- + +_DEFAULT_TIMEOUT = 30 # seconds per HTTP request +_SNAPSHOT_MAX_CHARS = 80_000 # camofox paginates at this limit + + +def get_camofox_url() -> str: + """Return the configured Camofox server URL, or empty string.""" + return os.getenv("CAMOFOX_URL", "").rstrip("/") + + +def is_camofox_mode() -> bool: + """True when Camofox backend is configured.""" + return bool(get_camofox_url()) + + +def check_camofox_available() -> bool: + """Verify the Camofox server is reachable.""" + url = get_camofox_url() + if not url: + return False + try: + resp = requests.get(f"{url}/health", timeout=5) + return resp.status_code == 200 + except Exception: + return False + + +# --------------------------------------------------------------------------- +# Session management +# --------------------------------------------------------------------------- +# Maps task_id -> {"user_id": str, "tab_id": str|None} +_sessions: Dict[str, Dict[str, Any]] = {} +_sessions_lock = threading.Lock() + + +def _get_session(task_id: Optional[str]) -> Dict[str, Any]: + """Get or create a camofox session for the given task.""" + task_id = task_id or "default" + with _sessions_lock: + if task_id in _sessions: + return _sessions[task_id] + session = { + "user_id": f"hermes_{uuid.uuid4().hex[:10]}", + "tab_id": None, + "session_key": f"task_{task_id[:16]}", + } + _sessions[task_id] = session + return session + + +def _ensure_tab(task_id: Optional[str], url: str = "about:blank") -> Dict[str, Any]: + """Ensure a tab exists for the session, creating one if needed.""" + session = _get_session(task_id) + if session["tab_id"]: + return session + base = get_camofox_url() + resp = requests.post( + f"{base}/tabs", + json={ + "userId": session["user_id"], + "sessionKey": session["session_key"], + "url": url, + }, + timeout=_DEFAULT_TIMEOUT, + ) + resp.raise_for_status() + data = resp.json() + session["tab_id"] = data.get("tabId") + return session + + +def _drop_session(task_id: Optional[str]) -> Optional[Dict[str, Any]]: + """Remove and return session info.""" + task_id = task_id or "default" + with _sessions_lock: + return _sessions.pop(task_id, None) + + +# --------------------------------------------------------------------------- +# HTTP helpers +# --------------------------------------------------------------------------- + +def _post(path: str, body: dict, timeout: int = _DEFAULT_TIMEOUT) -> dict: + """POST JSON to camofox and return parsed response.""" + url = f"{get_camofox_url()}{path}" + resp = requests.post(url, json=body, timeout=timeout) + resp.raise_for_status() + return resp.json() + + +def _get(path: str, params: dict = None, timeout: int = _DEFAULT_TIMEOUT) -> dict: + """GET from camofox and return parsed response.""" + url = f"{get_camofox_url()}{path}" + resp = requests.get(url, params=params, timeout=timeout) + resp.raise_for_status() + return resp.json() + + +def _get_raw(path: str, params: dict = None, timeout: int = _DEFAULT_TIMEOUT) -> requests.Response: + """GET from camofox and return raw response (for binary data).""" + url = f"{get_camofox_url()}{path}" + resp = requests.get(url, params=params, timeout=timeout) + resp.raise_for_status() + return resp + + +def _delete(path: str, body: dict = None, timeout: int = _DEFAULT_TIMEOUT) -> dict: + """DELETE to camofox and return parsed response.""" + url = f"{get_camofox_url()}{path}" + resp = requests.delete(url, json=body, timeout=timeout) + resp.raise_for_status() + return resp.json() + + +# --------------------------------------------------------------------------- +# Tool implementations +# --------------------------------------------------------------------------- + +def camofox_navigate(url: str, task_id: Optional[str] = None) -> str: + """Navigate to a URL via Camofox.""" + try: + session = _get_session(task_id) + if not session["tab_id"]: + # Create tab with the target URL directly + session = _ensure_tab(task_id, url) + data = {"ok": True, "url": url} + else: + # Navigate existing tab + data = _post( + f"/tabs/{session['tab_id']}/navigate", + {"userId": session["user_id"], "url": url}, + timeout=60, + ) + return json.dumps({ + "success": True, + "url": data.get("url", url), + "title": data.get("title", ""), + }) + except requests.HTTPError as e: + return json.dumps({"success": False, "error": f"Navigation failed: {e}"}) + except requests.ConnectionError: + return json.dumps({ + "success": False, + "error": f"Cannot connect to Camofox at {get_camofox_url()}. " + "Is the server running? Start with: npm start (in camofox-browser dir) " + "or: docker run -p 9377:9377 jo-inc/camofox-browser", + }) + except Exception as e: + return json.dumps({"success": False, "error": str(e)}) + + +def camofox_snapshot(full: bool = False, task_id: Optional[str] = None, + user_task: Optional[str] = None) -> str: + """Get accessibility tree snapshot from Camofox.""" + try: + session = _get_session(task_id) + if not session["tab_id"]: + return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."}) + + data = _get( + f"/tabs/{session['tab_id']}/snapshot", + params={"userId": session["user_id"]}, + ) + + snapshot = data.get("snapshot", "") + refs_count = data.get("refsCount", 0) + + # Apply same summarization logic as the main browser tool + from tools.browser_tool import ( + SNAPSHOT_SUMMARIZE_THRESHOLD, + _extract_relevant_content, + _truncate_snapshot, + ) + + if len(snapshot) > SNAPSHOT_SUMMARIZE_THRESHOLD: + if user_task: + snapshot = _extract_relevant_content(snapshot, user_task) + else: + snapshot = _truncate_snapshot(snapshot) + + return json.dumps({ + "success": True, + "snapshot": snapshot, + "element_count": refs_count, + }) + except Exception as e: + return json.dumps({"success": False, "error": str(e)}) + + +def camofox_click(ref: str, task_id: Optional[str] = None) -> str: + """Click an element by ref via Camofox.""" + try: + session = _get_session(task_id) + if not session["tab_id"]: + return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."}) + + # Strip @ prefix if present (our tool convention) + clean_ref = ref.lstrip("@") + + data = _post( + f"/tabs/{session['tab_id']}/click", + {"userId": session["user_id"], "ref": clean_ref}, + ) + return json.dumps({ + "success": True, + "clicked": clean_ref, + "url": data.get("url", ""), + }) + except Exception as e: + return json.dumps({"success": False, "error": str(e)}) + + +def camofox_type(ref: str, text: str, task_id: Optional[str] = None) -> str: + """Type text into an element by ref via Camofox.""" + try: + session = _get_session(task_id) + if not session["tab_id"]: + return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."}) + + clean_ref = ref.lstrip("@") + + _post( + f"/tabs/{session['tab_id']}/type", + {"userId": session["user_id"], "ref": clean_ref, "text": text}, + ) + return json.dumps({ + "success": True, + "typed": text, + "element": clean_ref, + }) + except Exception as e: + return json.dumps({"success": False, "error": str(e)}) + + +def camofox_scroll(direction: str, task_id: Optional[str] = None) -> str: + """Scroll the page via Camofox.""" + try: + session = _get_session(task_id) + if not session["tab_id"]: + return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."}) + + _post( + f"/tabs/{session['tab_id']}/scroll", + {"userId": session["user_id"], "direction": direction}, + ) + return json.dumps({"success": True, "scrolled": direction}) + except Exception as e: + return json.dumps({"success": False, "error": str(e)}) + + +def camofox_back(task_id: Optional[str] = None) -> str: + """Navigate back via Camofox.""" + try: + session = _get_session(task_id) + if not session["tab_id"]: + return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."}) + + data = _post( + f"/tabs/{session['tab_id']}/back", + {"userId": session["user_id"]}, + ) + return json.dumps({"success": True, "url": data.get("url", "")}) + except Exception as e: + return json.dumps({"success": False, "error": str(e)}) + + +def camofox_press(key: str, task_id: Optional[str] = None) -> str: + """Press a keyboard key via Camofox.""" + try: + session = _get_session(task_id) + if not session["tab_id"]: + return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."}) + + _post( + f"/tabs/{session['tab_id']}/press", + {"userId": session["user_id"], "key": key}, + ) + return json.dumps({"success": True, "pressed": key}) + except Exception as e: + return json.dumps({"success": False, "error": str(e)}) + + +def camofox_close(task_id: Optional[str] = None) -> str: + """Close the browser session via Camofox.""" + try: + session = _drop_session(task_id) + if not session: + return json.dumps({"success": True, "closed": True}) + + _delete( + f"/sessions/{session['user_id']}", + ) + return json.dumps({"success": True, "closed": True}) + except Exception as e: + return json.dumps({"success": True, "closed": True, "warning": str(e)}) + + +def camofox_get_images(task_id: Optional[str] = None) -> str: + """Get images on the current page via Camofox. + + Extracts image information from the accessibility tree snapshot, + since Camofox does not expose a dedicated /images endpoint. + """ + try: + session = _get_session(task_id) + if not session["tab_id"]: + return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."}) + + import re + + data = _get( + f"/tabs/{session['tab_id']}/snapshot", + params={"userId": session["user_id"]}, + ) + snapshot = data.get("snapshot", "") + + # Parse img elements from the accessibility tree. + # Format: img "alt text" or img "alt text" [eN] + # URLs appear on /url: lines following img entries + images = [] + lines = snapshot.split("\n") + for i, line in enumerate(lines): + stripped = line.strip() + if stripped.startswith("- img ") or stripped.startswith("img "): + alt_match = re.search(r'img\s+"([^"]*)"', stripped) + alt = alt_match.group(1) if alt_match else "" + # Look for URL on the next line + src = "" + if i + 1 < len(lines): + url_match = re.search(r'/url:\s*(\S+)', lines[i + 1].strip()) + if url_match: + src = url_match.group(1) + if alt or src: + images.append({"src": src, "alt": alt}) + + return json.dumps({ + "success": True, + "images": images, + "count": len(images), + }) + except Exception as e: + return json.dumps({"success": False, "error": str(e)}) + + +def camofox_vision(question: str, annotate: bool = False, + task_id: Optional[str] = None) -> str: + """Take a screenshot and analyze it with vision AI via Camofox.""" + try: + session = _get_session(task_id) + if not session["tab_id"]: + return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."}) + + # Get screenshot as binary PNG + resp = _get_raw( + f"/tabs/{session['tab_id']}/screenshot", + params={"userId": session["user_id"]}, + ) + + # Save screenshot to cache + from hermes_constants import get_hermes_home + screenshots_dir = get_hermes_home() / "browser_screenshots" + screenshots_dir.mkdir(parents=True, exist_ok=True) + screenshot_path = str(screenshots_dir / f"browser_screenshot_{uuid.uuid4().hex[:8]}.png") + + with open(screenshot_path, "wb") as f: + f.write(resp.content) + + # Encode for vision LLM + img_b64 = base64.b64encode(resp.content).decode("utf-8") + + # Also get annotated snapshot if requested + annotation_context = "" + if annotate: + try: + snap_data = _get( + f"/tabs/{session['tab_id']}/snapshot", + params={"userId": session["user_id"]}, + ) + annotation_context = f"\n\nAccessibility tree (element refs for interaction):\n{snap_data.get('snapshot', '')[:3000]}" + except Exception: + pass + + # Send to vision LLM + from agent.auxiliary_client import call_llm + + vision_prompt = ( + f"Analyze this browser screenshot and answer: {question}" + f"{annotation_context}" + ) + + try: + from hermes_cli.config import load_config + _cfg = load_config() + _vision_timeout = int(_cfg.get("auxiliary", {}).get("vision", {}).get("timeout", 120)) + except Exception: + _vision_timeout = 120 + + analysis = call_llm( + messages=[{ + "role": "user", + "content": [ + {"type": "text", "text": vision_prompt}, + { + "type": "image_url", + "image_url": { + "url": f"data:image/png;base64,{img_b64}", + }, + }, + ], + }], + task="vision", + timeout=_vision_timeout, + ) + + return json.dumps({ + "success": True, + "analysis": analysis, + "screenshot_path": screenshot_path, + }) + except Exception as e: + return json.dumps({"success": False, "error": str(e)}) + + +def camofox_console(clear: bool = False, task_id: Optional[str] = None) -> str: + """Get console output — limited support in Camofox. + + Camofox does not expose browser console logs via its REST API. + Returns an empty result with a note. + """ + return json.dumps({ + "success": True, + "console_messages": [], + "js_errors": [], + "total_messages": 0, + "total_errors": 0, + "note": "Console log capture is not available with the Camofox backend. " + "Use browser_snapshot or browser_vision to inspect page state.", + }) + + +# --------------------------------------------------------------------------- +# Cleanup +# --------------------------------------------------------------------------- + +def cleanup_all_camofox_sessions() -> None: + """Close all active camofox sessions.""" + with _sessions_lock: + sessions = list(_sessions.items()) + for task_id, session in sessions: + try: + _delete(f"/sessions/{session['user_id']}") + except Exception: + pass + with _sessions_lock: + _sessions.clear() diff --git a/tools/browser_tool.py b/tools/browser_tool.py index ffb772c1d..33a1c8ef6 100644 --- a/tools/browser_tool.py +++ b/tools/browser_tool.py @@ -79,6 +79,14 @@ from tools.browser_providers.base import CloudBrowserProvider from tools.browser_providers.browserbase import BrowserbaseProvider from tools.browser_providers.browser_use import BrowserUseProvider +# Camofox local anti-detection browser backend (optional). +# When CAMOFOX_URL is set, all browser operations route through the +# camofox REST API instead of the agent-browser CLI. +try: + from tools.browser_camofox import is_camofox_mode as _is_camofox_mode +except ImportError: + _is_camofox_mode = lambda: False # noqa: E731 + logger = logging.getLogger(__name__) # Standard PATH entries for environments with minimal PATH (e.g. systemd services). @@ -1046,6 +1054,11 @@ def browser_navigate(url: str, task_id: Optional[str] = None) -> str: "blocked_by_policy": {"host": blocked["host"], "rule": blocked["rule"], "source": blocked["source"]}, }) + # Camofox backend — delegate after safety checks pass + if _is_camofox_mode(): + from tools.browser_camofox import camofox_navigate + return camofox_navigate(url, task_id) + effective_task_id = task_id or "default" # Get session info to check if this is a new session @@ -1135,6 +1148,10 @@ def browser_snapshot( Returns: JSON string with page snapshot """ + if _is_camofox_mode(): + from tools.browser_camofox import camofox_snapshot + return camofox_snapshot(full, task_id, user_task) + effective_task_id = task_id or "default" # Build command args based on full flag @@ -1180,6 +1197,10 @@ def browser_click(ref: str, task_id: Optional[str] = None) -> str: Returns: JSON string with click result """ + if _is_camofox_mode(): + from tools.browser_camofox import camofox_click + return camofox_click(ref, task_id) + effective_task_id = task_id or "default" # Ensure ref starts with @ @@ -1212,6 +1233,10 @@ def browser_type(ref: str, text: str, task_id: Optional[str] = None) -> str: Returns: JSON string with type result """ + if _is_camofox_mode(): + from tools.browser_camofox import camofox_type + return camofox_type(ref, text, task_id) + effective_task_id = task_id or "default" # Ensure ref starts with @ @@ -1245,6 +1270,10 @@ def browser_scroll(direction: str, task_id: Optional[str] = None) -> str: Returns: JSON string with scroll result """ + if _is_camofox_mode(): + from tools.browser_camofox import camofox_scroll + return camofox_scroll(direction, task_id) + effective_task_id = task_id or "default" # Validate direction @@ -1278,6 +1307,10 @@ def browser_back(task_id: Optional[str] = None) -> str: Returns: JSON string with navigation result """ + if _is_camofox_mode(): + from tools.browser_camofox import camofox_back + return camofox_back(task_id) + effective_task_id = task_id or "default" result = _run_browser_command(effective_task_id, "back", []) @@ -1305,6 +1338,10 @@ def browser_press(key: str, task_id: Optional[str] = None) -> str: Returns: JSON string with key press result """ + if _is_camofox_mode(): + from tools.browser_camofox import camofox_press + return camofox_press(key, task_id) + effective_task_id = task_id or "default" result = _run_browser_command(effective_task_id, "press", [key]) @@ -1330,6 +1367,10 @@ def browser_close(task_id: Optional[str] = None) -> str: Returns: JSON string with close result """ + if _is_camofox_mode(): + from tools.browser_camofox import camofox_close + return camofox_close(task_id) + effective_task_id = task_id or "default" with _cleanup_lock: had_session = effective_task_id in _active_sessions @@ -1358,6 +1399,10 @@ def browser_console(clear: bool = False, task_id: Optional[str] = None) -> str: Returns: JSON string with console messages and JS errors """ + if _is_camofox_mode(): + from tools.browser_camofox import camofox_console + return camofox_console(clear, task_id) + effective_task_id = task_id or "default" console_args = ["--clear"] if clear else [] @@ -1452,6 +1497,10 @@ def browser_get_images(task_id: Optional[str] = None) -> str: Returns: JSON string with list of images (src and alt) """ + if _is_camofox_mode(): + from tools.browser_camofox import camofox_get_images + return camofox_get_images(task_id) + effective_task_id = task_id or "default" # Use eval to run JavaScript that extracts images @@ -1516,6 +1565,10 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str] Returns: JSON string with vision analysis results and screenshot_path """ + if _is_camofox_mode(): + from tools.browser_camofox import camofox_vision + return camofox_vision(question, annotate, task_id) + import base64 import uuid as uuid_mod from pathlib import Path @@ -1804,6 +1857,10 @@ def check_browser_requirements() -> bool: Returns: True if all requirements are met, False otherwise """ + # Camofox backend — only needs the server URL, no agent-browser CLI + if _is_camofox_mode(): + return True + # The agent-browser CLI is always required try: _find_agent_browser() -- 2.43.0 From 7b4fe0528f95ea7c64f2c7ff064f0f8d0ddaa5b3 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 30 Mar 2026 13:19:44 -0700 Subject: [PATCH 018/385] fix(auth): use bearer auth for MiniMax Anthropic endpoints (#4028) MiniMax's /anthropic endpoints implement Anthropic's Messages API but require Authorization: Bearer instead of x-api-key. Without this fix, MiniMax users get 401 errors in gateway sessions. Adds _requires_bearer_auth() to detect MiniMax endpoints and route through auth_token in the Anthropic SDK. Check runs before OAuth token detection so MiniMax keys aren't misclassified as setup tokens. Co-authored-by: kshitijk4poor --- agent/anthropic_adapter.py | 27 ++++++++++++++++++++++++++- tests/test_anthropic_adapter.py | 13 +++++++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/agent/anthropic_adapter.py b/agent/anthropic_adapter.py index a2a052d0a..a81736496 100644 --- a/agent/anthropic_adapter.py +++ b/agent/anthropic_adapter.py @@ -162,6 +162,21 @@ def _is_oauth_token(key: str) -> bool: return True +def _requires_bearer_auth(base_url: str | None) -> bool: + """Return True for Anthropic-compatible providers that require Bearer auth. + + Some third-party /anthropic endpoints implement Anthropic's Messages API but + require Authorization: Bearer instead of Anthropic's native x-api-key header. + MiniMax's global and China Anthropic-compatible endpoints follow this pattern. + """ + if not base_url: + return False + normalized = base_url.rstrip("/").lower() + return normalized.startswith("https://api.minimax.io/anthropic") or normalized.startswith( + "https://api.minimaxi.com/anthropic" + ) + + def build_anthropic_client(api_key: str, base_url: str = None): """Create an Anthropic client, auto-detecting setup-tokens vs API keys. @@ -180,7 +195,17 @@ def build_anthropic_client(api_key: str, base_url: str = None): if base_url: kwargs["base_url"] = base_url - if _is_oauth_token(api_key): + if _requires_bearer_auth(base_url): + # Some Anthropic-compatible providers (e.g. MiniMax) expect the API key in + # Authorization: Bearer even for regular API keys. Route those endpoints + # through auth_token so the SDK sends Bearer auth instead of x-api-key. + # Check this before OAuth token shape detection because MiniMax secrets do + # not use Anthropic's sk-ant-api prefix and would otherwise be misread as + # Anthropic OAuth/setup tokens. + kwargs["auth_token"] = api_key + if _COMMON_BETAS: + kwargs["default_headers"] = {"anthropic-beta": ",".join(_COMMON_BETAS)} + elif _is_oauth_token(api_key): # OAuth access token / setup-token → Bearer auth + Claude Code identity. # Anthropic routes OAuth requests based on user-agent and headers; # without Claude Code's fingerprint, requests get intermittent 500s. diff --git a/tests/test_anthropic_adapter.py b/tests/test_anthropic_adapter.py index 7e2e1c767..4b4669eab 100644 --- a/tests/test_anthropic_adapter.py +++ b/tests/test_anthropic_adapter.py @@ -81,6 +81,19 @@ class TestBuildAnthropicClient: kwargs = mock_sdk.Anthropic.call_args[1] assert kwargs["base_url"] == "https://custom.api.com" + def test_minimax_anthropic_endpoint_uses_bearer_auth_for_regular_api_keys(self): + with patch("agent.anthropic_adapter._anthropic_sdk") as mock_sdk: + build_anthropic_client( + "minimax-secret-123", + base_url="https://api.minimax.io/anthropic", + ) + kwargs = mock_sdk.Anthropic.call_args[1] + assert kwargs["auth_token"] == "minimax-secret-123" + assert "api_key" not in kwargs + assert kwargs["default_headers"] == { + "anthropic-beta": "interleaved-thinking-2025-05-14,fine-grained-tool-streaming-2025-05-14" + } + class TestReadClaudeCodeCredentials: def test_reads_valid_credentials(self, tmp_path, monkeypatch): -- 2.43.0 From 8210e7aba6a7ce37ed5c2a70c93f4c09e62487fb Mon Sep 17 00:00:00 2001 From: Bryan Cross Date: Mon, 30 Mar 2026 15:19:52 -0500 Subject: [PATCH 019/385] Optimize Dockerfile: combine RUN commands, clear caches, add .dockerignore - Combine apt-get update and install into single RUN with cache clearing - Remove APT lists after installation - Add --no-cache-dir to pip install - Add --prefer-offline --no-audit to npm install - Create .dockerignore to exclude unnecessary files from build context - Update docker-publish.yml workflow to tag images with release names - Ensure buildx caching is used (type=gha) --- .dockerignore | 74 +++++++++++++++++++++++++--- .github/workflows/docker-publish.yml | 20 +++++++- Dockerfile | 19 ++++--- 3 files changed, 98 insertions(+), 15 deletions(-) diff --git a/.dockerignore b/.dockerignore index a690443f7..356ab9dec 100644 --- a/.dockerignore +++ b/.dockerignore @@ -3,11 +3,73 @@ .gitignore .gitmodules -# Dependencies -node_modules - -# CI/CD +# GitHub .github -# Environment files -.env \ No newline at end of file +# Python +__pycache__ +*.py[cod] +*$py.class +*.so +.Python +.pytest_cache +.mypy_cache +.ruff_cache +*.egg-info +.eggs + +# Virtual environments +.venv +venv/ +ENV/ +env/ + +# IDE +.vscode +.idea +*.swp +*.swo +*~ + +# Environment files (secrets) +.env +.env.* +!.env.example + +# Logs and data +logs/ +data/ +tmp/ +temp_vision_images/ +testlogs +wandb/ + +# Test files +tests/ +*.test.py +*.spec.py + +# Documentation +*.md +!README.md + +# CI/CD +*.yml +!package.json + +# Development files +examples/ +result +.direnv/ + +# Release scripts +.release_notes.md +mini-swe-agent/ + +# Nix +.direnv/ +result + +# Skills hub +skills/.hub/ +ignored/ diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index 11b98c3a9..1f83913b2 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -5,6 +5,8 @@ on: branches: [main] pull_request: branches: [main] + release: + types: [published] concurrency: group: docker-${{ github.ref }} @@ -41,13 +43,13 @@ jobs: nousresearch/hermes-agent:test --help - name: Log in to Docker Hub - if: github.event_name == 'push' && github.ref == 'refs/heads/main' + if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release' uses: docker/login-action@v3 with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} - - name: Push image + - name: Push image (main branch) if: github.event_name == 'push' && github.ref == 'refs/heads/main' uses: docker/build-push-action@v6 with: @@ -59,3 +61,17 @@ jobs: nousresearch/hermes-agent:${{ github.sha }} cache-from: type=gha cache-to: type=gha,mode=max + + - name: Push image (release) + if: github.event_name == 'release' + uses: docker/build-push-action@v6 + with: + context: . + file: Dockerfile + push: true + tags: | + nousresearch/hermes-agent:latest + nousresearch/hermes-agent:${{ github.event.release.tag_name }} + nousresearch/hermes-agent:${{ github.sha }} + cache-from: type=gha + cache-to: type=gha,mode=max diff --git a/Dockerfile b/Dockerfile index 61b725d39..0ffe0fc2f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,20 +1,25 @@ FROM debian:13.4 -RUN apt-get update -RUN apt-get install -y nodejs npm python3 python3-pip ripgrep ffmpeg gcc python3-dev libffi-dev +# Install system dependencies in one layer, clear APT cache +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + nodejs npm python3 python3-pip ripgrep ffmpeg gcc python3-dev libffi-dev && \ + rm -rf /var/lib/apt/lists/* COPY . /opt/hermes WORKDIR /opt/hermes -RUN pip install -e ".[all]" --break-system-packages -RUN npm install -RUN npx playwright install --with-deps chromium +# Install Python and Node dependencies in one layer, no cache +RUN pip install --no-cache-dir -e ".[all]" --break-system-packages && \ + npm install --prefer-offline --no-audit && \ + npx playwright install --with-deps chromium + WORKDIR /opt/hermes/scripts/whatsapp-bridge -RUN npm install +RUN npm install --prefer-offline --no-audit WORKDIR /opt/hermes RUN chmod +x /opt/hermes/docker/entrypoint.sh ENV HERMES_HOME=/opt/data VOLUME [ "/opt/data" ] -ENTRYPOINT [ "/opt/hermes/docker/entrypoint.sh" ] \ No newline at end of file +ENTRYPOINT [ "/opt/hermes/docker/entrypoint.sh" ] -- 2.43.0 From f93637b3a16bc5a638eabd007ad7f27eaebf71fe Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 30 Mar 2026 13:20:06 -0700 Subject: [PATCH 020/385] feat: add /profile slash command to show active profile (#4027) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds /profile to COMMAND_REGISTRY (Info category) with handlers in both CLI and gateway. Shows the active profile name and home directory. Works on all platforms — CLI, Telegram, Discord, Slack, etc. Detects profile by checking if HERMES_HOME is under ~/.hermes/profiles/. Shows 'default' when running without a profile. --- cli.py | 24 ++++++++++++++++++++++++ gateway/run.py | 33 +++++++++++++++++++++++++++++++++ hermes_cli/commands.py | 1 + 3 files changed, 58 insertions(+) diff --git a/cli.py b/cli.py index 223c40563..e01a0e797 100644 --- a/cli.py +++ b/cli.py @@ -2837,6 +2837,28 @@ class HermesCLI: print(" Example: python cli.py --toolsets web,terminal") print() + def _handle_profile_command(self): + """Display active profile name and home directory.""" + from hermes_constants import get_hermes_home, display_hermes_home + + home = get_hermes_home() + display = display_hermes_home() + + profiles_parent = Path.home() / ".hermes" / "profiles" + try: + rel = home.relative_to(profiles_parent) + profile_name = str(rel).split("/")[0] + except ValueError: + profile_name = None + + print() + if profile_name: + print(f" Profile: {profile_name}") + else: + print(" Profile: default") + print(f" Home: {display}") + print() + def show_config(self): """Display current configuration with kawaii ASCII art.""" # Get terminal config from environment (which was set from cli-config.yaml) @@ -3679,6 +3701,8 @@ class HermesCLI: return False elif canonical == "help": self.show_help() + elif canonical == "profile": + self._handle_profile_command() elif canonical == "tools": self._handle_tools_command(cmd_original) elif canonical == "toolsets": diff --git a/gateway/run.py b/gateway/run.py index c85ed27b8..7638d8a51 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -1872,6 +1872,9 @@ class GatewayRunner: if canonical == "commands": return await self._handle_commands_command(event) + if canonical == "profile": + return await self._handle_profile_command(event) + if canonical == "status": return await self._handle_status_command(event) @@ -3070,6 +3073,36 @@ class GatewayRunner: return f"{header}\n\n{session_info}" return header + async def _handle_profile_command(self, event: MessageEvent) -> str: + """Handle /profile — show active profile name and home directory.""" + from hermes_constants import get_hermes_home, display_hermes_home + from pathlib import Path + + home = get_hermes_home() + display = display_hermes_home() + + # Detect profile name from HERMES_HOME path + # Profile paths look like: ~/.hermes/profiles/ + profiles_parent = Path.home() / ".hermes" / "profiles" + try: + rel = home.relative_to(profiles_parent) + profile_name = str(rel).split("/")[0] + except ValueError: + profile_name = None + + if profile_name: + lines = [ + f"👤 **Profile:** `{profile_name}`", + f"📂 **Home:** `{display}`", + ] + else: + lines = [ + "👤 **Profile:** default", + f"📂 **Home:** `{display}`", + ] + + return "\n".join(lines) + async def _handle_status_command(self, event: MessageEvent) -> str: """Handle /status command.""" source = event.source diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py index 3b1eb37ff..d9de67175 100644 --- a/hermes_cli/commands.py +++ b/hermes_cli/commands.py @@ -71,6 +71,7 @@ COMMAND_REGISTRY: list[CommandDef] = [ aliases=("q",), args_hint=""), CommandDef("status", "Show session info", "Session", gateway_only=True), + CommandDef("profile", "Show active profile name and home directory", "Info"), CommandDef("sethome", "Set this chat as the home channel", "Session", gateway_only=True, aliases=("set-home",)), CommandDef("resume", "Resume a previously-named session", "Session", -- 2.43.0 From bd376fe97604f3fafd16052815d539d0f898ef0f Mon Sep 17 00:00:00 2001 From: Teknium Date: Mon, 30 Mar 2026 13:20:55 -0700 Subject: [PATCH 021/385] fix(docs): improve mobile sidebar navigation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sidebar had all categories expanded by default (collapsed: false), which on mobile created a 60+ item flat list when opening the sidebar. Reported by danny on Discord. Changes: - Set all top-level categories to collapsed: true (tap to expand) - Enable autoCollapseCategories: true (accordion — opening one section closes others, prevents the overwhelming flat list) - Enable hideable sidebar (swipe-to-dismiss on mobile) - Add mobile CSS: larger touch targets (0.75rem padding), bolder category headers, visible subcategory indentation with left border, wider sidebar (85vw / 360px max), darker backdrop overlay --- website/docusaurus.config.ts | 6 ++++++ website/sidebars.ts | 6 +++--- website/src/css/custom.css | 40 ++++++++++++++++++++++++++++++++++++ 3 files changed, 49 insertions(+), 3 deletions(-) diff --git a/website/docusaurus.config.ts b/website/docusaurus.config.ts index 6d8b52bfe..bbd7d4ea9 100644 --- a/website/docusaurus.config.ts +++ b/website/docusaurus.config.ts @@ -65,6 +65,12 @@ const config: Config = { defaultMode: 'dark', respectPrefersColorScheme: true, }, + docs: { + sidebar: { + hideable: true, + autoCollapseCategories: true, + }, + }, navbar: { title: 'Hermes Agent', logo: { diff --git a/website/sidebars.ts b/website/sidebars.ts index 082b9ce8f..4c7bfc2e2 100644 --- a/website/sidebars.ts +++ b/website/sidebars.ts @@ -5,7 +5,7 @@ const sidebars: SidebarsConfig = { { type: 'category', label: 'Getting Started', - collapsed: false, + collapsed: true, items: [ 'getting-started/quickstart', 'getting-started/installation', @@ -17,7 +17,7 @@ const sidebars: SidebarsConfig = { { type: 'category', label: 'Guides & Tutorials', - collapsed: false, + collapsed: true, items: [ 'guides/tips', 'guides/daily-briefing-bot', @@ -32,7 +32,7 @@ const sidebars: SidebarsConfig = { { type: 'category', label: 'User Guide', - collapsed: false, + collapsed: true, items: [ 'user-guide/cli', 'user-guide/configuration', diff --git a/website/src/css/custom.css b/website/src/css/custom.css index 1df449986..7c7000391 100644 --- a/website/src/css/custom.css +++ b/website/src/css/custom.css @@ -199,6 +199,46 @@ pre.prism-code.language-ascii code { border: 1px solid rgba(255, 215, 0, 0.08); } +/* ─── Mobile sidebar improvements ─────────────────────────────────────────── */ + +/* Larger touch targets on mobile */ +@media (max-width: 996px) { + .menu__link { + padding: 0.6rem 0.75rem; + font-size: 0.95rem; + } + + .menu__list-item-collapsible > .menu__link { + font-weight: 600; + font-size: 1rem; + padding: 0.75rem 0.75rem; + border-bottom: 1px solid rgba(255, 215, 0, 0.06); + } + + /* Category caret — more visible */ + .menu__caret::before { + background-size: 1.5rem 1.5rem; + } + + /* Indent subcategories clearly */ + .menu__list .menu__list { + padding-left: 0.75rem; + border-left: 1px solid rgba(255, 215, 0, 0.06); + margin-left: 0.5rem; + } + + /* Sidebar overlay — slightly more opaque for readability */ + .navbar-sidebar__backdrop { + background-color: rgba(0, 0, 0, 0.6); + } + + /* Sidebar width on mobile — use more of the screen */ + .navbar-sidebar { + width: 85vw; + max-width: 360px; + } +} + /* Hero banner for docs landing if needed */ .hero--hermes { background: linear-gradient(135deg, #07070d 0%, #0f0f18 100%); -- 2.43.0 From 4b35836ba42a59a669699197573a969431b4df44 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 30 Mar 2026 13:21:39 -0700 Subject: [PATCH 022/385] fix(auth): use bearer auth for MiniMax Anthropic endpoints (#4028) MiniMax's /anthropic endpoints implement Anthropic's Messages API but require Authorization: Bearer instead of x-api-key. Without this fix, MiniMax users get 401 errors in gateway sessions. Adds _requires_bearer_auth() to detect MiniMax endpoints and route through auth_token in the Anthropic SDK. Check runs before OAuth token detection so MiniMax keys aren't misclassified as setup tokens. Co-authored-by: kshitijk4poor -- 2.43.0 From 72104eb06f267286ec207feed65dc00656ce4e9f Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 30 Mar 2026 13:24:48 -0700 Subject: [PATCH 023/385] fix(gateway): honor default for invalid bool-like config values (#4029) Co-authored-by: aydnOktay --- gateway/config.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/gateway/config.py b/gateway/config.py index c8ce89a7d..8c7843780 100644 --- a/gateway/config.py +++ b/gateway/config.py @@ -27,9 +27,16 @@ def _coerce_bool(value: Any, default: bool = True) -> bool: return default if isinstance(value, bool): return value + if isinstance(value, int): + return value != 0 if isinstance(value, str): - return value.strip().lower() in ("true", "1", "yes", "on") - return bool(value) + lowered = value.strip().lower() + if lowered in ("true", "1", "yes", "on"): + return True + if lowered in ("false", "0", "no", "off"): + return False + return default + return default def _normalize_unauthorized_dm_behavior(value: Any, default: str = "pair") -> str: -- 2.43.0 From eba8d52d541282c18f853ba9f56a615276097096 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 30 Mar 2026 13:25:11 -0700 Subject: [PATCH 024/385] fix: show correct shell config path for macOS/zsh in install script (#4025) - print_success() hardcoded 'source ~/.bashrc' regardless of user's shell - On macOS (default zsh), ~/.bashrc doesn't exist, leaving users unable to find the hermes command after install - Now detects $SHELL and shows the correct file (zshrc/bashrc) - Also captures .[all] install failure output instead of silencing with 2>/dev/null, so users can diagnose why full extras failed --- scripts/install.sh | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/scripts/install.sh b/scripts/install.sh index d46771e6a..c04dc4a9d 100755 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -699,14 +699,19 @@ install_deps() { # Install the main package in editable mode with all extras. # Try [all] first, fall back to base install if extras have issues. - if ! $UV_CMD pip install -e ".[all]" 2>/dev/null; then + ALL_INSTALL_LOG=$(mktemp) + if ! $UV_CMD pip install -e ".[all]" 2>"$ALL_INSTALL_LOG"; then log_warn "Full install (.[all]) failed, trying base install..." + log_info "Reason: $(tail -5 "$ALL_INSTALL_LOG" | head -3)" + rm -f "$ALL_INSTALL_LOG" if ! $UV_CMD pip install -e "."; then log_error "Package installation failed." log_info "Check that build tools are installed: sudo apt install build-essential python3-dev" log_info "Then re-run: cd $INSTALL_DIR && uv pip install -e '.[all]'" exit 1 fi + else + rm -f "$ALL_INSTALL_LOG" fi log_success "Main package installed" @@ -1070,7 +1075,14 @@ print_success() { echo "" echo -e "${YELLOW}⚡ Reload your shell to use 'hermes' command:${NC}" echo "" - echo " source ~/.bashrc # or ~/.zshrc" + LOGIN_SHELL="$(basename "${SHELL:-/bin/bash}")" + if [ "$LOGIN_SHELL" = "zsh" ]; then + echo " source ~/.zshrc" + elif [ "$LOGIN_SHELL" = "bash" ]; then + echo " source ~/.bashrc" + else + echo " source ~/.bashrc # or ~/.zshrc" + fi echo "" # Show Node.js warning if auto-install failed -- 2.43.0 From 48942c89b526274d560d6e9452f2bb675be391c2 Mon Sep 17 00:00:00 2001 From: Bryan Cross Date: Mon, 30 Mar 2026 15:27:11 -0500 Subject: [PATCH 025/385] Further npm optimizations --- Dockerfile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index 0ffe0fc2f..7efb14a6f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -12,10 +12,10 @@ WORKDIR /opt/hermes # Install Python and Node dependencies in one layer, no cache RUN pip install --no-cache-dir -e ".[all]" --break-system-packages && \ npm install --prefer-offline --no-audit && \ - npx playwright install --with-deps chromium - -WORKDIR /opt/hermes/scripts/whatsapp-bridge -RUN npm install --prefer-offline --no-audit + npx playwright install --with-deps chromium && \ + cd /opt/hermes/scripts/whatsapp-bridge && \ + npm install --prefer-offline --no-audit && \ + npm cache clean --force WORKDIR /opt/hermes RUN chmod +x /opt/hermes/docker/entrypoint.sh -- 2.43.0 From 5de312c9e39ad0ee88a2ff41f040b16d84d66c42 Mon Sep 17 00:00:00 2001 From: Bryan Cross Date: Mon, 30 Mar 2026 15:29:06 -0500 Subject: [PATCH 026/385] Simplify dockerignore --- .dockerignore | 72 +++++---------------------------------------------- 1 file changed, 6 insertions(+), 66 deletions(-) diff --git a/.dockerignore b/.dockerignore index 356ab9dec..ecf199fc9 100644 --- a/.dockerignore +++ b/.dockerignore @@ -3,73 +3,13 @@ .gitignore .gitmodules -# GitHub -.github - -# Python -__pycache__ -*.py[cod] -*$py.class -*.so -.Python -.pytest_cache -.mypy_cache -.ruff_cache -*.egg-info -.eggs - -# Virtual environments -.venv -venv/ -ENV/ -env/ - -# IDE -.vscode -.idea -*.swp -*.swo -*~ - -# Environment files (secrets) -.env -.env.* -!.env.example - -# Logs and data -logs/ -data/ -tmp/ -temp_vision_images/ -testlogs -wandb/ - -# Test files -tests/ -*.test.py -*.spec.py - -# Documentation -*.md -!README.md +# Dependencies +node_modules # CI/CD -*.yml -!package.json +.github -# Development files -examples/ -result -.direnv/ +# Environment files +.env -# Release scripts -.release_notes.md -mini-swe-agent/ - -# Nix -.direnv/ -result - -# Skills hub -skills/.hub/ -ignored/ +*.md -- 2.43.0 From 0d1003559d85372aed77116a68362e73e93b5b37 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 30 Mar 2026 13:37:25 -0700 Subject: [PATCH 027/385] refactor: simplify web backend priority detection (#4036) * fix(gateway): honor default for invalid bool-like config values * refactor: simplify web backend priority detection Replace cascading boolean conditions with a priority-ordered loop. Same behavior (verified against all 16 env var combinations), half the lines, trivially extensible for new backends. --------- Co-authored-by: aydnOktay --- tools/web_tools.py | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/tools/web_tools.py b/tools/web_tools.py index c8e7fb0f3..c61bc1eb7 100644 --- a/tools/web_tools.py +++ b/tools/web_tools.py @@ -77,20 +77,18 @@ def _get_backend() -> str: if configured in ("parallel", "firecrawl", "tavily", "exa"): return configured - # Fallback for manual / legacy config — use whichever key is present. - has_firecrawl = _has_env("FIRECRAWL_API_KEY") or _has_env("FIRECRAWL_API_URL") - has_parallel = _has_env("PARALLEL_API_KEY") - has_tavily = _has_env("TAVILY_API_KEY") - has_exa = _has_env("EXA_API_KEY") - if has_exa and not has_firecrawl and not has_parallel and not has_tavily: - return "exa" - if has_tavily and not has_firecrawl and not has_parallel: - return "tavily" - if has_parallel and not has_firecrawl: - return "parallel" + # Fallback for manual / legacy config — pick highest-priority backend + # that has a key configured. Order: firecrawl > parallel > tavily > exa. + for backend, keys in [ + ("firecrawl", ("FIRECRAWL_API_KEY", "FIRECRAWL_API_URL")), + ("parallel", ("PARALLEL_API_KEY",)), + ("tavily", ("TAVILY_API_KEY",)), + ("exa", ("EXA_API_KEY",)), + ]: + if any(_has_env(k) for k in keys): + return backend - # Default to firecrawl (backward compat, or when both are set) - return "firecrawl" + return "firecrawl" # default (backward compat) # ─── Firecrawl Client ──────────────────────────────────────────────────────── -- 2.43.0 From 3a1e489dd6d0bf99f54ef513204065318fd8c985 Mon Sep 17 00:00:00 2001 From: Bryan Cross Date: Mon, 30 Mar 2026 15:57:22 -0500 Subject: [PATCH 028/385] Add build-essential to Dockerfile dependencies --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 7efb14a6f..3b2862a81 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,7 +3,7 @@ FROM debian:13.4 # Install system dependencies in one layer, clear APT cache RUN apt-get update && \ apt-get install -y --no-install-recommends \ - nodejs npm python3 python3-pip ripgrep ffmpeg gcc python3-dev libffi-dev && \ + build-essential nodejs npm python3 python3-pip ripgrep ffmpeg gcc python3-dev libffi-dev && \ rm -rf /var/lib/apt/lists/* COPY . /opt/hermes -- 2.43.0 From de368cac54eba1be7e58ff260f332d500ccbda76 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 30 Mar 2026 14:11:39 -0700 Subject: [PATCH 029/385] fix(tools): show browser and TTS in reconfigure menu (#4041) * fix(gateway): honor default for invalid bool-like config values * refactor: simplify web backend priority detection Replace cascading boolean conditions with a priority-ordered loop. Same behavior (verified against all 16 env var combinations), half the lines, trivially extensible for new backends. * fix(tools): show browser and TTS in reconfigure menu _toolset_has_keys() returned False for toolsets with no-key providers (Local Browser, Edge TTS) because it only checked providers with env_vars. Users couldn't find these tools in the reconfigure list and had no obvious way to switch browser/TTS backends. Now treats providers with empty env_vars as always-configured, so toolsets with free/local options always appear in the reconfigure menu. --------- Co-authored-by: aydnOktay --- hermes_cli/tools_config.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/hermes_cli/tools_config.py b/hermes_cli/tools_config.py index 63e26d362..337b67fe8 100644 --- a/hermes_cli/tools_config.py +++ b/hermes_cli/tools_config.py @@ -597,7 +597,9 @@ def _toolset_has_keys(ts_key: str) -> bool: if cat: for provider in cat.get("providers", []): env_vars = provider.get("env_vars", []) - if env_vars and all(get_env_value(e["key"]) for e in env_vars): + if not env_vars: + return True # No-key provider (e.g. Local Browser, Edge TTS) + if all(get_env_value(e["key"]) for e in env_vars): return True return False -- 2.43.0 From 0287597d02c74f26084f36ff610044b7a930dd85 Mon Sep 17 00:00:00 2001 From: Bryan Cross Date: Mon, 30 Mar 2026 17:38:07 -0500 Subject: [PATCH 030/385] Optimize Playwright install --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 3b2862a81..a9624530c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -12,7 +12,7 @@ WORKDIR /opt/hermes # Install Python and Node dependencies in one layer, no cache RUN pip install --no-cache-dir -e ".[all]" --break-system-packages && \ npm install --prefer-offline --no-audit && \ - npx playwright install --with-deps chromium && \ + npx playwright install --with-deps chromium --only-shell && \ cd /opt/hermes/scripts/whatsapp-bridge && \ npm install --prefer-offline --no-audit && \ npm cache clean --force -- 2.43.0 From ab62614a89c568dfb10f78368570b36308a0b758 Mon Sep 17 00:00:00 2001 From: SHL0MS Date: Mon, 30 Mar 2026 18:48:22 -0400 Subject: [PATCH 031/385] ascii-video: add text readability techniques and external layout oracle pattern MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - composition.md: add text backdrop (gaussian dark mask behind glyphs) and external layout oracle pattern (browser-based text layout → JSON → Python renderer pipeline for obstacle-aware text reflow) - shaders.md: add reverse vignette shader (center-darkening for text readability) - troubleshooting.md: add diagnostic entries for text-over-busy-background readability and kaleidoscope-destroys-text pitfall --- .../ascii-video/references/composition.md | 146 ++++++++++++++++++ .../ascii-video/references/shaders.md | 33 ++++ .../ascii-video/references/troubleshooting.md | 2 + 3 files changed, 181 insertions(+) diff --git a/skills/creative/ascii-video/references/composition.md b/skills/creative/ascii-video/references/composition.md index 0028b93fa..f7e6eff89 100644 --- a/skills/creative/ascii-video/references/composition.md +++ b/skills/creative/ascii-video/references/composition.md @@ -744,3 +744,149 @@ class PixelBlendStack: result = blend_canvas(result, canvas, mode, opacity) return result ``` + +## Text Backdrop (Readability Mask) + +When placing readable text over busy multi-grid ASCII backgrounds, the text will blend into the background and become illegible. **Always apply a dark backdrop behind text regions.** + +The technique: compute the bounding box of all text glyphs, create a gaussian-blurred dark mask covering that area with padding, and multiply the background by `(1 - mask * darkness)` before rendering text on top. + +```python +from scipy.ndimage import gaussian_filter + +def apply_text_backdrop(canvas, glyphs, padding=80, darkness=0.75): + """Darken the background behind text for readability. + + Call AFTER rendering background, BEFORE rendering text. + + Args: + canvas: (VH, VW, 3) uint8 background + glyphs: list of {"x": float, "y": float, ...} glyph positions + padding: pixel padding around text bounding box + darkness: 0.0 = no darkening, 1.0 = fully black + Returns: + darkened canvas (uint8) + """ + if not glyphs: + return canvas + xs = [g['x'] for g in glyphs] + ys = [g['y'] for g in glyphs] + x0 = max(0, int(min(xs)) - padding) + y0 = max(0, int(min(ys)) - padding) + x1 = min(VW, int(max(xs)) + padding + 50) # extra for char width + y1 = min(VH, int(max(ys)) + padding + 60) # extra for char height + + # Soft dark mask with gaussian blur for feathered edges + mask = np.zeros((VH, VW), dtype=np.float32) + mask[y0:y1, x0:x1] = 1.0 + mask = gaussian_filter(mask, sigma=padding * 0.6) + + factor = 1.0 - mask * darkness + return (canvas.astype(np.float32) * factor[:, :, np.newaxis]).astype(np.uint8) +``` + +### Usage in render pipeline + +Insert between background rendering and text rendering: + +```python +# 1. Render background (multi-grid ASCII effects) +bg = render_background(cfg, t) + +# 2. Darken behind text region +bg = apply_text_backdrop(bg, frame_glyphs, padding=80, darkness=0.75) + +# 3. Render text on top (now readable against dark backdrop) +bg = text_renderer.render(bg, frame_glyphs, color=(255, 255, 255)) +``` + +Combine with **reverse vignette** (see shaders.md) for scenes where text is always centered — the reverse vignette provides a persistent center-dark zone, while the backdrop handles per-frame glyph positions. + +## External Layout Oracle Pattern + +For text-heavy videos where text needs to dynamically reflow around obstacles (shapes, icons, other text), use an external layout engine to pre-compute glyph positions and feed them into the Python renderer via JSON. + +### Architecture + +``` +Layout Engine (browser/Node.js) → layouts.json → Python ASCII Renderer + ↑ ↑ + Computes per-frame Reads glyph positions, + glyph (x,y) positions renders as ASCII chars + with obstacle-aware reflow with full effect pipeline +``` + +### JSON interchange format + +```json +{ + "meta": { + "canvas_width": 1080, "canvas_height": 1080, + "fps": 24, "total_frames": 1248, + "fonts": { + "body": {"charW": 12.04, "charH": 24, "fontSize": 20}, + "hero": {"charW": 24.08, "charH": 48, "fontSize": 40} + } + }, + "scenes": [ + { + "id": "scene_name", + "start_frame": 0, "end_frame": 96, + "frames": { + "0": { + "glyphs": [ + {"char": "H", "x": 287.1, "y": 400.0, "alpha": 1.0}, + {"char": "e", "x": 311.2, "y": 400.0, "alpha": 1.0} + ], + "obstacles": [ + {"type": "circle", "cx": 540, "cy": 540, "r": 80}, + {"type": "rect", "x": 300, "y": 500, "w": 120, "h": 80} + ] + } + } + } + ] +} +``` + +### When to use + +- Text that dynamically reflows around moving objects +- Per-glyph animation (reveal, scatter, physics) +- Variable typography that needs precise measurement +- Any case where Python's Pillow text layout is insufficient + +### When NOT to use + +- Static centered text (just use PIL `draw.text()` directly) +- Text that only fades in/out without spatial animation +- Simple typewriter effects (handle in Python with a character counter) + +### Running the oracle + +Use Playwright to run the layout engine in a headless browser: + +```javascript +// extract.mjs +import { chromium } from 'playwright'; +const browser = await chromium.launch({ headless: true }); +const page = await browser.newPage(); +await page.goto(`file://${oraclePath}`); +await page.waitForFunction(() => window.__ORACLE_DONE__ === true, null, { timeout: 60000 }); +const result = await page.evaluate(() => window.__ORACLE_RESULT__); +writeFileSync('layouts.json', JSON.stringify(result)); +await browser.close(); +``` + +### Consuming in Python + +```python +# In the renderer, map pixel positions to the canvas: +for glyph in frame_data['glyphs']: + char, px, py = glyph['char'], glyph['x'], glyph['y'] + alpha = glyph.get('alpha', 1.0) + # Render using PIL draw.text() at exact pixel position + draw.text((px, py), char, fill=(int(255*alpha),)*3, font=font) +``` + +Obstacles from the JSON can also be rendered as glowing ASCII shapes (circles, rectangles) to visualize the reflow zones. diff --git a/skills/creative/ascii-video/references/shaders.md b/skills/creative/ascii-video/references/shaders.md index fce436a4d..a4cf7a2e5 100644 --- a/skills/creative/ascii-video/references/shaders.md +++ b/skills/creative/ascii-video/references/shaders.md @@ -834,6 +834,39 @@ def sh_vignette(c, s=0.22): return np.clip(c * _vig_cache[k][:,:,None], 0, 255).astype(np.uint8) ``` +#### Reverse Vignette + +Inverted vignette: darkens the **center** and leaves edges bright. Useful when text is centered over busy backgrounds — creates a natural dark zone for readability without a hard-edged box. + +Combine with `apply_text_backdrop()` (see composition.md) for per-frame glyph-aware darkening. + +```python +_rvignette_cache = {} + +def sh_reverse_vignette(c, strength=0.5): + """Center darkening, edge brightening. Cached.""" + k = ('rv', c.shape[0], c.shape[1], round(strength, 2)) + if k not in _rvignette_cache: + h, w = c.shape[:2] + Y = np.linspace(-1, 1, h)[:, None] + X = np.linspace(-1, 1, w)[None, :] + d = np.sqrt(X**2 + Y**2) + # Invert: bright at edges, dark at center + mask = np.clip(1.0 - (1.0 - d * 0.7) * strength, 0.2, 1.0) + _rvignette_cache[k] = mask[:, :, np.newaxis].astype(np.float32) + return np.clip(c.astype(np.float32) * _rvignette_cache[k], 0, 255).astype(np.uint8) +``` + +| Param | Default | Effect | +|-------|---------|--------| +| `strength` | 0.5 | 0 = no effect, 1.0 = center nearly black | + +Add to ShaderChain dispatch: +```python +elif name == "reverse_vignette": + return sh_reverse_vignette(canvas, kwargs.get("strength", 0.5)) +``` + #### Contrast ```python def sh_contrast(c, factor=1.3): diff --git a/skills/creative/ascii-video/references/troubleshooting.md b/skills/creative/ascii-video/references/troubleshooting.md index 8c4bb0229..6b38382cd 100644 --- a/skills/creative/ascii-video/references/troubleshooting.md +++ b/skills/creative/ascii-video/references/troubleshooting.md @@ -14,6 +14,8 @@ | Random dark holes in output | Font missing Unicode glyphs | Validate palettes at init | | Audio-visual desync | Frame timing accumulation | Use integer frame counter, compute t fresh each frame | | Single-color flat output | Hue field shape mismatch | Ensure h,s,v arrays all (rows,cols) before hsv2rgb | +| Text unreadable over busy bg | No contrast between text and background | Use `apply_text_backdrop()` (composition.md) + `reverse_vignette` shader (shaders.md) | +| Text garbled/mirrored | Kaleidoscope or mirror shader applied to text scene | **Never apply kaleidoscope, mirror_h/v/quad/diag to scenes with readable text** — radial folding destroys legibility. Apply these only to background layers or text-free scenes | Common bugs, gotchas, and platform-specific issues encountered during ASCII video development. -- 2.43.0 From 3d47af01c3b7e348fe5fb7340412fd081b7eab19 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 30 Mar 2026 16:41:19 -0700 Subject: [PATCH 032/385] fix(honcho): write config to instance-local path for profile isolation (#4037) Multiple agents/profiles running 'hermes honcho setup' all wrote to the shared global ~/.honcho/config.json, overwriting each other's configuration. Root cause: _write_config() defaulted to resolve_config_path() which returns the global path when no instance-local file exists yet (i.e. on first setup). Fix: _write_config() now defaults to _local_config_path() which always returns $HERMES_HOME/honcho.json. Each profile gets its own config file. Reading still falls back to global for cross-app interop and seeding. Also updates cmd_setup and cmd_status messaging to show the actual write path. Includes 10 new tests verifying profile isolation, global fallback reads, and multi-profile independence. --- honcho_integration/cli.py | 30 ++- .../test_config_isolation.py | 190 ++++++++++++++++++ 2 files changed, 212 insertions(+), 8 deletions(-) create mode 100644 tests/honcho_integration/test_config_isolation.py diff --git a/honcho_integration/cli.py b/honcho_integration/cli.py index ae09c3713..f6cbcedf6 100644 --- a/honcho_integration/cli.py +++ b/honcho_integration/cli.py @@ -10,16 +10,27 @@ import os import sys from pathlib import Path +from hermes_constants import get_hermes_home from honcho_integration.client import resolve_config_path, GLOBAL_CONFIG_PATH HOST = "hermes" def _config_path() -> Path: - """Return the active Honcho config path (instance-local or global).""" + """Return the active Honcho config path for reading (instance-local or global).""" return resolve_config_path() +def _local_config_path() -> Path: + """Return the instance-local Honcho config path for writing. + + Always returns $HERMES_HOME/honcho.json so each profile/instance gets + its own config file. The global ~/.honcho/config.json is only used as + a read fallback (via resolve_config_path) for cross-app interop. + """ + return get_hermes_home() / "honcho.json" + + def _read_config() -> dict: path = _config_path() if path.exists(): @@ -31,7 +42,7 @@ def _read_config() -> dict: def _write_config(cfg: dict, path: Path | None = None) -> None: - path = path or _config_path() + path = path or _local_config_path() path.parent.mkdir(parents=True, exist_ok=True) path.write_text( json.dumps(cfg, indent=2, ensure_ascii=False) + "\n", @@ -95,13 +106,13 @@ def cmd_setup(args) -> None: """Interactive Honcho setup wizard.""" cfg = _read_config() - active_path = _config_path() + write_path = _local_config_path() + read_path = _config_path() print("\nHoncho memory setup\n" + "─" * 40) print(" Honcho gives Hermes persistent cross-session memory.") - if active_path != GLOBAL_CONFIG_PATH: - print(f" Instance config: {active_path}") - else: - print(" Config is shared with other hosts at ~/.honcho/config.json") + print(f" Config: {write_path}") + if read_path != write_path and read_path.exists(): + print(f" (seeding from existing config at {read_path})") print() if not _ensure_sdk_installed(): @@ -189,7 +200,7 @@ def cmd_setup(args) -> None: hermes_host.setdefault("saveMessages", True) _write_config(cfg) - print(f"\n Config written to {active_path}") + print(f"\n Config written to {write_path}") # Test connection print(" Testing connection... ", end="", flush=True) @@ -237,6 +248,7 @@ def cmd_status(args) -> None: cfg = _read_config() active_path = _config_path() + write_path = _local_config_path() if not cfg: print(f" No Honcho config found at {active_path}") @@ -259,6 +271,8 @@ def cmd_status(args) -> None: print(f" Workspace: {hcfg.workspace_id}") print(f" Host: {hcfg.host}") print(f" Config path: {active_path}") + if write_path != active_path: + print(f" Write path: {write_path} (instance-local)") print(f" AI peer: {hcfg.ai_peer}") print(f" User peer: {hcfg.peer_name or 'not set'}") print(f" Session key: {hcfg.resolve_session_name()}") diff --git a/tests/honcho_integration/test_config_isolation.py b/tests/honcho_integration/test_config_isolation.py new file mode 100644 index 000000000..4d9898e68 --- /dev/null +++ b/tests/honcho_integration/test_config_isolation.py @@ -0,0 +1,190 @@ +"""Tests for Honcho config profile isolation. + +Verifies that each Hermes profile writes to its own instance-local +honcho.json ($HERMES_HOME/honcho.json) rather than the shared global +~/.honcho/config.json. +""" + +import json +import os +from pathlib import Path +from unittest.mock import patch + +import pytest + +from honcho_integration.cli import ( + _config_path, + _local_config_path, + _read_config, + _write_config, +) + + +@pytest.fixture +def isolated_home(tmp_path, monkeypatch): + """Create an isolated HERMES_HOME + real home for testing.""" + hermes_home = tmp_path / "profile_a" + hermes_home.mkdir() + global_dir = tmp_path / "home" / ".honcho" + global_dir.mkdir(parents=True) + global_config = global_dir / "config.json" + + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + monkeypatch.setattr(Path, "home", staticmethod(lambda: tmp_path / "home")) + # GLOBAL_CONFIG_PATH is a module-level constant cached at import time, + # so we must patch it in both the defining module and the importing module. + import honcho_integration.client as _client_mod + import honcho_integration.cli as _cli_mod + monkeypatch.setattr(_client_mod, "GLOBAL_CONFIG_PATH", global_config) + monkeypatch.setattr(_cli_mod, "GLOBAL_CONFIG_PATH", global_config) + + return { + "hermes_home": hermes_home, + "global_config": global_config, + "local_config": hermes_home / "honcho.json", + } + + +class TestLocalConfigPath: + """_local_config_path always returns $HERMES_HOME/honcho.json.""" + + def test_returns_hermes_home_path(self, isolated_home): + assert _local_config_path() == isolated_home["local_config"] + + def test_differs_from_global(self, isolated_home): + from honcho_integration.client import GLOBAL_CONFIG_PATH + assert _local_config_path() != GLOBAL_CONFIG_PATH + + +class TestWriteConfigIsolation: + """_write_config defaults to the instance-local path.""" + + def test_write_creates_local_file(self, isolated_home): + cfg = {"apiKey": "test-key", "hosts": {"hermes": {"enabled": True}}} + _write_config(cfg) + + assert isolated_home["local_config"].exists() + written = json.loads(isolated_home["local_config"].read_text()) + assert written["apiKey"] == "test-key" + + def test_write_does_not_touch_global(self, isolated_home): + # Pre-populate global config + isolated_home["global_config"].write_text( + json.dumps({"apiKey": "global-key"}) + ) + + cfg = {"apiKey": "profile-key"} + _write_config(cfg) + + # Global should be untouched + global_data = json.loads(isolated_home["global_config"].read_text()) + assert global_data["apiKey"] == "global-key" + + # Local should have the new value + local_data = json.loads(isolated_home["local_config"].read_text()) + assert local_data["apiKey"] == "profile-key" + + def test_explicit_path_override_still_works(self, isolated_home): + custom = isolated_home["hermes_home"] / "custom.json" + _write_config({"custom": True}, path=custom) + assert custom.exists() + assert not isolated_home["local_config"].exists() + + +class TestReadConfigFallback: + """_read_config falls back to global when no local file exists.""" + + def test_reads_local_when_exists(self, isolated_home): + isolated_home["local_config"].write_text( + json.dumps({"source": "local"}) + ) + cfg = _read_config() + assert cfg["source"] == "local" + + def test_falls_back_to_global(self, isolated_home): + isolated_home["global_config"].write_text( + json.dumps({"source": "global"}) + ) + # No local file exists + assert not isolated_home["local_config"].exists() + cfg = _read_config() + assert cfg["source"] == "global" + + def test_local_takes_priority_over_global(self, isolated_home): + isolated_home["local_config"].write_text( + json.dumps({"source": "local"}) + ) + isolated_home["global_config"].write_text( + json.dumps({"source": "global"}) + ) + cfg = _read_config() + assert cfg["source"] == "local" + + +class TestMultiProfileIsolation: + """Two profiles writing config don't interfere with each other.""" + + def test_two_profiles_get_separate_configs(self, tmp_path, monkeypatch): + home = tmp_path / "home" + home.mkdir() + monkeypatch.setattr(Path, "home", staticmethod(lambda: home)) + + profile_a = tmp_path / "profile_a" + profile_b = tmp_path / "profile_b" + profile_a.mkdir() + profile_b.mkdir() + + # Profile A writes its config + monkeypatch.setenv("HERMES_HOME", str(profile_a)) + _write_config({"apiKey": "key-a", "hosts": {"hermes": {"peerName": "alice"}}}) + + # Profile B writes its config + monkeypatch.setenv("HERMES_HOME", str(profile_b)) + _write_config({"apiKey": "key-b", "hosts": {"hermes": {"peerName": "bob"}}}) + + # Verify isolation + a_data = json.loads((profile_a / "honcho.json").read_text()) + b_data = json.loads((profile_b / "honcho.json").read_text()) + + assert a_data["hosts"]["hermes"]["peerName"] == "alice" + assert b_data["hosts"]["hermes"]["peerName"] == "bob" + + def test_first_setup_seeds_from_global(self, tmp_path, monkeypatch): + """First setup reads global config, writes to local.""" + home = tmp_path / "home" + global_dir = home / ".honcho" + global_dir.mkdir(parents=True) + monkeypatch.setattr(Path, "home", staticmethod(lambda: home)) + import honcho_integration.client as _client_mod + import honcho_integration.cli as _cli_mod + global_cfg_path = global_dir / "config.json" + monkeypatch.setattr(_client_mod, "GLOBAL_CONFIG_PATH", global_cfg_path) + monkeypatch.setattr(_cli_mod, "GLOBAL_CONFIG_PATH", global_cfg_path) + + # Existing global config + global_config = global_dir / "config.json" + global_config.write_text(json.dumps({ + "apiKey": "shared-key", + "hosts": {"hermes": {"workspace": "shared-ws"}}, + })) + + profile = tmp_path / "new_profile" + profile.mkdir() + monkeypatch.setenv("HERMES_HOME", str(profile)) + + # Read seeds from global + cfg = _read_config() + assert cfg["apiKey"] == "shared-key" + + # Modify and write goes to local + cfg["hosts"]["hermes"]["peerName"] = "new-user" + _write_config(cfg) + + local_config = profile / "honcho.json" + assert local_config.exists() + local_data = json.loads(local_config.read_text()) + assert local_data["hosts"]["hermes"]["peerName"] == "new-user" + + # Global unchanged + global_data = json.loads(global_config.read_text()) + assert "peerName" not in global_data["hosts"]["hermes"] -- 2.43.0 From f007284d051900a424745dc4d4fb4bdcd78eff04 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 30 Mar 2026 16:48:00 -0700 Subject: [PATCH 033/385] fix: rate-limit pairing rejection messages to prevent spam (#4081) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: rate-limit pairing rejection messages to prevent spam When generate_code() returns None (rate limited or max pending), the "Too many pairing requests" message was sent on every subsequent DM with no cooldown. A user sending 30 messages would get 30 rejection replies — reported as potential hack on WhatsApp. Now check _is_rate_limited() before any pairing response, and record rate limit after sending a rejection. Subsequent messages from the same user are silently ignored until the rate limit window expires. * test: add coverage for pairing response rate limiting Follow-up to cherry-picked PR #4042 — adds tests verifying: - Rate-limited users get silently ignored (no response sent) - Rejection messages record rate limit for subsequent suppression --------- Co-authored-by: 0xbyt4 <35742124+0xbyt4@users.noreply.github.com> --- gateway/run.py | 7 +++ .../gateway/test_unauthorized_dm_behavior.py | 51 +++++++++++++++++++ 2 files changed, 58 insertions(+) diff --git a/gateway/run.py b/gateway/run.py index 7638d8a51..735832744 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -1702,6 +1702,11 @@ class GatewayRunner: # In DMs: offer pairing code. In groups: silently ignore. if source.chat_type == "dm" and self._get_unauthorized_dm_behavior(source.platform) == "pair": platform_name = source.platform.value if source.platform else "unknown" + # Rate-limit ALL pairing responses (code or rejection) to + # prevent spamming the user with repeated messages when + # multiple DMs arrive in quick succession. + if self.pairing_store._is_rate_limited(platform_name, source.user_id): + return None code = self.pairing_store.generate_code( platform_name, source.user_id, source.user_name or "" ) @@ -1723,6 +1728,8 @@ class GatewayRunner: "Too many pairing requests right now~ " "Please try again later!" ) + # Record rate limit so subsequent messages are silently ignored + self.pairing_store._record_rate_limit(platform_name, source.user_id) return None # PRIORITY handling when an agent is already running for this session. diff --git a/tests/gateway/test_unauthorized_dm_behavior.py b/tests/gateway/test_unauthorized_dm_behavior.py index 02aae301c..25b51dc2f 100644 --- a/tests/gateway/test_unauthorized_dm_behavior.py +++ b/tests/gateway/test_unauthorized_dm_behavior.py @@ -60,6 +60,7 @@ def _make_runner(platform: Platform, config: GatewayConfig): runner.adapters = {platform: adapter} runner.pairing_store = MagicMock() runner.pairing_store.is_approved.return_value = False + runner.pairing_store._is_rate_limited.return_value = False return runner, adapter @@ -142,6 +143,56 @@ async def test_unauthorized_whatsapp_dm_can_be_ignored(monkeypatch): adapter.send.assert_not_awaited() +@pytest.mark.asyncio +async def test_rate_limited_user_gets_no_response(monkeypatch): + """When a user is already rate-limited, pairing messages are silently ignored.""" + _clear_auth_env(monkeypatch) + config = GatewayConfig( + platforms={Platform.WHATSAPP: PlatformConfig(enabled=True)}, + ) + runner, adapter = _make_runner(Platform.WHATSAPP, config) + runner.pairing_store._is_rate_limited.return_value = True + + result = await runner._handle_message( + _make_event( + Platform.WHATSAPP, + "15551234567@s.whatsapp.net", + "15551234567@s.whatsapp.net", + ) + ) + + assert result is None + runner.pairing_store.generate_code.assert_not_called() + adapter.send.assert_not_awaited() + + +@pytest.mark.asyncio +async def test_rejection_message_records_rate_limit(monkeypatch): + """After sending a 'too many requests' rejection, rate limit is recorded + so subsequent messages are silently ignored.""" + _clear_auth_env(monkeypatch) + config = GatewayConfig( + platforms={Platform.WHATSAPP: PlatformConfig(enabled=True)}, + ) + runner, adapter = _make_runner(Platform.WHATSAPP, config) + runner.pairing_store.generate_code.return_value = None # triggers rejection + + result = await runner._handle_message( + _make_event( + Platform.WHATSAPP, + "15551234567@s.whatsapp.net", + "15551234567@s.whatsapp.net", + ) + ) + + assert result is None + adapter.send.assert_awaited_once() + assert "Too many" in adapter.send.await_args.args[1] + runner.pairing_store._record_rate_limit.assert_called_once_with( + "whatsapp", "15551234567@s.whatsapp.net" + ) + + @pytest.mark.asyncio async def test_global_ignore_suppresses_pairing_reply(monkeypatch): _clear_auth_env(monkeypatch) -- 2.43.0 From 4a7c17fca59e3193dfb57aa545d1f68d41760670 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 30 Mar 2026 17:04:31 -0700 Subject: [PATCH 034/385] fix(gateway): read custom_providers context_length in hygiene compression (#4085) Gateway hygiene pre-compression only checked model.context_length from the top-level config, missing per-model context_length defined in custom_providers entries. This caused premature compression for custom provider users (e.g. 128K default instead of 200K configured). The AIAgent's own compressor already reads custom_providers correctly (run_agent.py lines 1171-1189). This adds the same fallback to the gateway hygiene path, running after runtime provider resolution so the base_url is available for matching. --- gateway/run.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/gateway/run.py b/gateway/run.py index 735832744..c42510709 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -2284,6 +2284,29 @@ class GatewayRunner: _hyg_api_key = _hyg_runtime.get("api_key") except Exception: pass + + # Check custom_providers per-model context_length + # (same fallback as run_agent.py lines 1171-1189). + # Must run after runtime resolution so _hyg_base_url is set. + if _hyg_config_context_length is None and _hyg_base_url: + try: + _hyg_custom_providers = _hyg_data.get("custom_providers") + if isinstance(_hyg_custom_providers, list): + for _cp in _hyg_custom_providers: + if not isinstance(_cp, dict): + continue + _cp_url = (_cp.get("base_url") or "").rstrip("/") + if _cp_url and _cp_url == _hyg_base_url.rstrip("/"): + _cp_models = _cp.get("models", {}) + if isinstance(_cp_models, dict): + _cp_model_cfg = _cp_models.get(_hyg_model, {}) + if isinstance(_cp_model_cfg, dict): + _cp_ctx = _cp_model_cfg.get("context_length") + if _cp_ctx is not None: + _hyg_config_context_length = int(_cp_ctx) + break + except (TypeError, ValueError): + pass except Exception: pass -- 2.43.0 From 13f3e6716575d0bd20162409b9de19c74dc55037 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 30 Mar 2026 17:05:40 -0700 Subject: [PATCH 035/385] ux: show 'Initializing agent...' on first message (#4086) Display a brief status message before the heavy agent initialization (OpenAI client setup, tool loading, memory init, etc.) so users aren't staring at a blank screen for several seconds. Only prints when self.agent is None (first use or after model switch). Closes #4060 Co-authored-by: SHL0MS --- cli.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cli.py b/cli.py index e01a0e797..1df9ed2ce 100644 --- a/cli.py +++ b/cli.py @@ -5597,6 +5597,8 @@ class HermesCLI: self.agent = None # Initialize agent if needed + if self.agent is None: + _cprint(f"{_DIM}Initializing agent...{_RST}") if not self._init_agent( model_override=turn_route["model"], runtime_override=turn_route["runtime"], -- 2.43.0 From 3c8f91097393dd6d3c201f64fccf91b45ae1b9e3 Mon Sep 17 00:00:00 2001 From: SHL0MS <131039422+SHL0MS@users.noreply.github.com> Date: Mon, 30 Mar 2026 17:07:21 -0700 Subject: [PATCH 036/385] feat: respect NO_COLOR env var and TERM=dumb (#4079) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add should_use_color() function to hermes_cli/colors.py that checks NO_COLOR (https://no-color.org/) and TERM=dumb before emitting ANSI escapes. The existing color() helper now uses this function instead of a bare isatty() check. This is the foundation — cli.py and banner.py still have inline ANSI constants that bypass this module (tracked in #4071). Closes #4066 Co-authored-by: SHL0MS --- hermes_cli/colors.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/hermes_cli/colors.py b/hermes_cli/colors.py index d30f99c62..8c85b4c0b 100644 --- a/hermes_cli/colors.py +++ b/hermes_cli/colors.py @@ -1,8 +1,24 @@ """Shared ANSI color utilities for Hermes CLI modules.""" +import os import sys +def should_use_color() -> bool: + """Return True when colored output is appropriate. + + Respects the NO_COLOR environment variable (https://no-color.org/) + and TERM=dumb, in addition to the existing TTY check. + """ + if os.environ.get("NO_COLOR") is not None: + return False + if os.environ.get("TERM") == "dumb": + return False + if not sys.stdout.isatty(): + return False + return True + + class Colors: RESET = "\033[0m" BOLD = "\033[1m" @@ -16,7 +32,7 @@ class Colors: def color(text: str, *codes) -> str: - """Apply color codes to text (only when output is a TTY).""" - if not sys.stdout.isatty(): + """Apply color codes to text (only when color output is appropriate).""" + if not should_use_color(): return text return "".join(codes) + text + Colors.RESET -- 2.43.0 From 7e0c2c3ce3afa8c80467609edd9084431391a33c Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 30 Mar 2026 17:15:21 -0700 Subject: [PATCH 037/385] =?UTF-8?q?docs:=20comprehensive=20documentation?= =?UTF-8?q?=20audit=20=E2=80=94=20fix=209=20HIGH,=2020+=20MEDIUM=20gaps=20?= =?UTF-8?q?(#4087)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reference docs fixes: - cli-commands.md: remove non-existent --provider alibaba, add hermes profile/completion/plugins/mcp to top-level table, add --profile/-p global flag, add --source chat option - slash-commands.md: add /yolo and /commands, fix /q alias conflict (resolves to /queue not /quit), add missing aliases (/bg, /set-home, /reload_mcp, /gateway) - toolsets-reference.md: fix hermes-api-server (not same as hermes-cli, omits clarify/send_message/text_to_speech) - profile-commands.md: fix show name required not optional, --clone-from not --from, add --remove/--name to alias, fix alias path, fix export/ import arg types, remove non-existent fish completion - tools-reference.md: add EXA_API_KEY to web tools requires_env - mcp-config-reference.md: add auth key for OAuth, tool name sanitization - environment-variables.md: add EXA_API_KEY, update provider values - plugins.md: remove non-existent ctx.register_command(), add ctx.inject_message() Feature docs additions: - security.md: add /yolo mode, approval modes (manual/smart/off), configurable timeout, expanded dangerous patterns table - cron.md: add wrap_response config, [SILENT] suppression - mcp.md: add dynamic tool discovery, MCP sampling support - cli.md: add Ctrl+Z suspend, busy_input_mode, tool_preview_length - docker.md: add skills/credential file mounting Messaging platform docs: - telegram.md: add webhook mode, DoH fallback IPs - slack.md: add multi-workspace OAuth support - discord.md: add DISCORD_IGNORE_NO_MENTION - matrix.md: add MSC3245 native voice messages - feishu.md: expand from 129 to 365 lines (encrypt key, verification token, group policy, card actions, media, rate limiting, markdown, troubleshooting) - wecom.md: expand from 86 to 264 lines (per-group allowlists, media, AES decryption, stream replies, reconnection, troubleshooting) Configuration docs: - quickstart.md: add DeepSeek, Copilot, Copilot ACP providers - configuration.md: add DeepSeek provider, Exa web backend, terminal env_passthrough/images, browser.command_timeout, compression params, discord config, security/tirith config, timezone, auxiliary models 21 files changed, ~1000 lines added --- website/docs/getting-started/quickstart.md | 3 + website/docs/reference/cli-commands.md | 58 ++++- .../docs/reference/environment-variables.md | 3 +- .../docs/reference/mcp-config-reference.md | 32 +++ website/docs/reference/profile-commands.md | 55 ++-- website/docs/reference/slash-commands.md | 22 +- website/docs/reference/tools-reference.md | 4 +- website/docs/reference/toolsets-reference.md | 2 +- website/docs/user-guide/cli.md | 40 +++ website/docs/user-guide/configuration.md | 96 ++++++- website/docs/user-guide/docker.md | 6 + website/docs/user-guide/features/cron.md | 34 +++ website/docs/user-guide/features/mcp.md | 43 ++- website/docs/user-guide/features/plugins.md | 55 ++-- website/docs/user-guide/messaging/discord.md | 4 + website/docs/user-guide/messaging/feishu.md | 246 +++++++++++++++++- website/docs/user-guide/messaging/matrix.md | 1 + website/docs/user-guide/messaging/slack.md | 54 ++++ website/docs/user-guide/messaging/telegram.md | 67 +++++ website/docs/user-guide/messaging/wecom.md | 186 ++++++++++++- website/docs/user-guide/security.md | 76 +++++- 21 files changed, 1004 insertions(+), 83 deletions(-) diff --git a/website/docs/getting-started/quickstart.md b/website/docs/getting-started/quickstart.md index 27cee7084..bc182f655 100644 --- a/website/docs/getting-started/quickstart.md +++ b/website/docs/getting-started/quickstart.md @@ -54,6 +54,9 @@ hermes setup # Or configure everything at once | **Kilo Code** | KiloCode-hosted models | Set `KILOCODE_API_KEY` | | **OpenCode Zen** | Pay-as-you-go access to curated models | Set `OPENCODE_ZEN_API_KEY` | | **OpenCode Go** | $10/month subscription for open models | Set `OPENCODE_GO_API_KEY` | +| **DeepSeek** | Direct DeepSeek API access | Set `DEEPSEEK_API_KEY` | +| **GitHub Copilot** | GitHub Copilot subscription (GPT-5.x, Claude, Gemini, etc.) | OAuth via `hermes model`, or `COPILOT_GITHUB_TOKEN` / `GH_TOKEN` | +| **GitHub Copilot ACP** | Copilot ACP agent backend (spawns local `copilot` CLI) | `hermes model` (requires `copilot` CLI + `copilot login`) | | **Vercel AI Gateway** | Vercel AI Gateway routing | Set `AI_GATEWAY_API_KEY` | | **Custom Endpoint** | VLLM, SGLang, Ollama, or any OpenAI-compatible API | Set base URL + API key | diff --git a/website/docs/reference/cli-commands.md b/website/docs/reference/cli-commands.md index a9f12d76b..cd0cff39c 100644 --- a/website/docs/reference/cli-commands.md +++ b/website/docs/reference/cli-commands.md @@ -21,6 +21,7 @@ hermes [global-options] [subcommand/options] | Option | Description | |--------|-------------| | `--version`, `-V` | Show version and exit. | +| `--profile `, `-p ` | Select which Hermes profile to use for this invocation. Overrides the sticky default set by `hermes profile use`. | | `--resume `, `-r ` | Resume a previous session by ID or title. | | `--continue [name]`, `-c [name]` | Resume the most recent session, or the most recent session matching a title. | | `--worktree`, `-w` | Start in an isolated git worktree for parallel-agent workflows. | @@ -46,10 +47,14 @@ hermes [global-options] [subcommand/options] | `hermes skills` | Browse, install, publish, audit, and configure skills. | | `hermes honcho` | Manage Honcho cross-session memory integration. | | `hermes acp` | Run Hermes as an ACP server for editor integration. | +| `hermes mcp` | Manage MCP server configurations and run Hermes as an MCP server. | +| `hermes plugins` | Manage Hermes Agent plugins (install, enable, disable, remove). | | `hermes tools` | Configure enabled tools per platform. | | `hermes sessions` | Browse, export, prune, rename, and delete sessions. | | `hermes insights` | Show token/cost/activity analytics. | | `hermes claw` | OpenClaw migration helpers. | +| `hermes profile` | Manage profiles — multiple isolated Hermes instances. | +| `hermes completion` | Print shell completion scripts (bash/zsh). | | `hermes version` | Show version information. | | `hermes update` | Pull latest code and reinstall dependencies. | | `hermes uninstall` | Remove Hermes from the system. | @@ -67,7 +72,7 @@ Common options: | `-q`, `--query "..."` | One-shot, non-interactive prompt. | | `-m`, `--model ` | Override the model for this run. | | `-t`, `--toolsets ` | Enable a comma-separated set of toolsets. | -| `--provider ` | Force a provider: `auto`, `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `huggingface`, `alibaba`, `zai`, `kimi-coding`, `minimax`, `minimax-cn`, `kilocode`. | +| `--provider ` | Force a provider: `auto`, `openrouter`, `nous`, `openai-codex`, `copilot-acp`, `copilot`, `anthropic`, `huggingface`, `zai`, `kimi-coding`, `minimax`, `minimax-cn`, `kilocode`. | | `-s`, `--skills ` | Preload one or more skills for the session (can be repeated or comma-separated). | | `-v`, `--verbose` | Verbose output. | | `-Q`, `--quiet` | Programmatic mode: suppress banner/spinner/tool previews. | @@ -76,6 +81,7 @@ Common options: | `--checkpoints` | Enable filesystem checkpoints before destructive file changes. | | `--yolo` | Skip approval prompts. | | `--pass-session-id` | Pass the session ID into the system prompt. | +| `--source ` | Session source tag for filtering (default: `cli`). Use `tool` for third-party integrations that should not appear in user session lists. | Examples: @@ -507,6 +513,56 @@ hermes claw migrate --preset user-data --overwrite hermes claw migrate --source /home/user/old-openclaw ``` +## `hermes profile` + +```bash +hermes profile +``` + +Manage profiles — multiple isolated Hermes instances, each with its own config, sessions, skills, and home directory. + +| Subcommand | Description | +|------------|-------------| +| `list` | List all profiles. | +| `use ` | Set a sticky default profile. | +| `create [--clone] [--no-alias]` | Create a new profile. `--clone` copies config, `.env`, and `SOUL.md` from the active profile. | +| `delete [-y]` | Delete a profile. | +| `show ` | Show profile details (home directory, config, etc.). | +| `alias [--remove] [--name NAME]` | Manage wrapper scripts for quick profile access. | +| `rename ` | Rename a profile. | +| `export [-o FILE]` | Export a profile to a `.tar.gz` archive. | +| `import [--name NAME]` | Import a profile from a `.tar.gz` archive. | + +Examples: + +```bash +hermes profile list +hermes profile create work --clone +hermes profile use work +hermes profile alias work --name h-work +hermes profile export work -o work-backup.tar.gz +hermes profile import work-backup.tar.gz --name restored +hermes -p work chat -q "Hello from work profile" +``` + +## `hermes completion` + +```bash +hermes completion [bash|zsh] +``` + +Print a shell completion script to stdout. Source the output in your shell profile for tab-completion of Hermes commands, subcommands, and profile names. + +Examples: + +```bash +# Bash +hermes completion bash >> ~/.bashrc + +# Zsh +hermes completion zsh >> ~/.zshrc +``` + ## Maintenance commands | Command | Description | diff --git a/website/docs/reference/environment-variables.md b/website/docs/reference/environment-variables.md index 715c9fbc1..d94121481 100644 --- a/website/docs/reference/environment-variables.md +++ b/website/docs/reference/environment-variables.md @@ -63,7 +63,7 @@ For native Anthropic auth, Hermes prefers Claude Code's own credential files whe | Variable | Description | |----------|-------------| -| `HERMES_INFERENCE_PROVIDER` | Override provider selection: `auto`, `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `huggingface`, `zai`, `kimi-coding`, `minimax`, `minimax-cn`, `kilocode`, `alibaba` (default: `auto`) | +| `HERMES_INFERENCE_PROVIDER` | Override provider selection: `auto`, `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `huggingface`, `zai`, `kimi-coding`, `minimax`, `minimax-cn`, `kilocode`, `alibaba`, `deepseek`, `opencode-zen`, `opencode-go`, `ai-gateway` (default: `auto`) | | `HERMES_PORTAL_BASE_URL` | Override Nous Portal URL (for development/testing) | | `NOUS_INFERENCE_BASE_URL` | Override Nous inference API URL | | `HERMES_NOUS_MIN_KEY_TTL_SECONDS` | Min agent key TTL before re-mint (default: 1800 = 30min) | @@ -80,6 +80,7 @@ For native Anthropic auth, Hermes prefers Claude Code's own credential files whe | `FIRECRAWL_API_KEY` | Web scraping ([firecrawl.dev](https://firecrawl.dev/)) | | `FIRECRAWL_API_URL` | Custom Firecrawl API endpoint for self-hosted instances (optional) | | `TAVILY_API_KEY` | Tavily API key for AI-native web search, extract, and crawl ([app.tavily.com](https://app.tavily.com/home)) | +| `EXA_API_KEY` | Exa API key for AI-native web search and contents ([exa.ai](https://exa.ai/)) | | `BROWSERBASE_API_KEY` | Browser automation ([browserbase.com](https://browserbase.com/)) | | `BROWSERBASE_PROJECT_ID` | Browserbase project ID | | `BROWSER_USE_API_KEY` | Browser Use cloud browser API key ([browser-use.com](https://browser-use.com/)) | diff --git a/website/docs/reference/mcp-config-reference.md b/website/docs/reference/mcp-config-reference.md index 5f78185b9..a87478f91 100644 --- a/website/docs/reference/mcp-config-reference.md +++ b/website/docs/reference/mcp-config-reference.md @@ -48,6 +48,8 @@ mcp_servers: | `timeout` | number | both | Tool call timeout | | `connect_timeout` | number | both | Initial connection timeout | | `tools` | mapping | both | Filtering and utility-tool policy | +| `auth` | string | HTTP | Authentication method. Set to `oauth` to enable OAuth 2.1 with PKCE | +| `sampling` | mapping | both | Server-initiated LLM request policy (see MCP guide) | ## `tools` policy keys @@ -213,3 +215,33 @@ Utility tools follow the same prefixing pattern: - `mcp__read_resource` - `mcp__list_prompts` - `mcp__get_prompt` + +### Name sanitization + +Hyphens (`-`) and dots (`.`) in both server names and tool names are replaced with underscores before registration. This ensures tool names are valid identifiers for LLM function-calling APIs. + +For example, a server named `my-api` exposing a tool called `list-items.v2` becomes: + +```text +mcp_my_api_list_items_v2 +``` + +Keep this in mind when writing `include` / `exclude` filters — use the **original** MCP tool name (with hyphens/dots), not the sanitized version. + +## OAuth 2.1 authentication + +For HTTP servers that require OAuth, set `auth: oauth` on the server entry: + +```yaml +mcp_servers: + protected_api: + url: "https://mcp.example.com/mcp" + auth: oauth +``` + +Behavior: +- Hermes uses the MCP SDK's OAuth 2.1 PKCE flow (metadata discovery, dynamic client registration, token exchange, and refresh) +- On first connect, a browser window opens for authorization +- Tokens are persisted to `~/.hermes/mcp-tokens/.json` and reused across sessions +- Token refresh is automatic; re-authorization only happens when refresh fails +- Only applies to HTTP/StreamableHTTP transport (`url`-based servers) diff --git a/website/docs/reference/profile-commands.md b/website/docs/reference/profile-commands.md index a59e27574..d2d7adb8f 100644 --- a/website/docs/reference/profile-commands.md +++ b/website/docs/reference/profile-commands.md @@ -78,7 +78,7 @@ Creates a new profile. | `` | Name for the new profile. Must be a valid directory name (alphanumeric, hyphens, underscores). | | `--clone` | Copy `config.yaml`, `.env`, and `SOUL.md` from the current profile. | | `--clone-all` | Copy everything (config, memories, skills, sessions, state) from the current profile. | -| `--from ` | Clone from a specific profile instead of the current one. Used with `--clone` or `--clone-all`. | +| `--clone-from ` | Clone from a specific profile instead of the current one. Used with `--clone` or `--clone-all`. | **Examples:** @@ -93,7 +93,7 @@ hermes profile create work --clone hermes profile create backup --clone-all # Clone config from a specific profile -hermes profile create work2 --clone --from work +hermes profile create work2 --clone --clone-from work ``` ## `hermes profile delete` @@ -123,14 +123,14 @@ This permanently deletes the profile's entire directory including all config, me ## `hermes profile show` ```bash -hermes profile show [name] +hermes profile show ``` Displays details about a profile including its home directory, configured model, active platforms, and disk usage. | Argument | Description | |----------|-------------| -| `[name]` | Profile to inspect. Defaults to the current active profile if omitted. | +| `` | Profile to inspect. | **Example:** @@ -147,20 +147,28 @@ Disk: 48 MB ## `hermes profile alias` ```bash -hermes profile alias +hermes profile alias [options] ``` -Regenerates the shell alias script at `~/.local/bin/hermes-`. Useful if the alias was accidentally deleted or if you need to update it after moving your Hermes installation. +Regenerates the shell alias script at `~/.local/bin/`. Useful if the alias was accidentally deleted or if you need to update it after moving your Hermes installation. -| Argument | Description | -|----------|-------------| +| Argument / Option | Description | +|-------------------|-------------| | `` | Profile to create/update the alias for. | +| `--remove` | Remove the wrapper script instead of creating it. | +| `--name ` | Custom alias name (default: profile name). | **Example:** ```bash hermes profile alias work # Creates/updates ~/.local/bin/work + +hermes profile alias work --name mywork +# Creates ~/.local/bin/mywork + +hermes profile alias work --remove +# Removes the wrapper script ``` ## `hermes profile rename` @@ -187,39 +195,45 @@ hermes profile rename mybot assistant ## `hermes profile export` ```bash -hermes profile export +hermes profile export [options] ``` Exports a profile as a compressed tar.gz archive. -| Argument | Description | -|----------|-------------| +| Argument / Option | Description | +|-------------------|-------------| | `` | Profile to export. | -| `` | Path for the output archive (e.g., `./work-backup.tar.gz`). | +| `-o`, `--output ` | Output file path (default: `.tar.gz`). | **Example:** ```bash -hermes profile export work ./work-2026-03-29.tar.gz +hermes profile export work +# Creates work.tar.gz in the current directory + +hermes profile export work -o ./work-2026-03-29.tar.gz ``` ## `hermes profile import` ```bash -hermes profile import [name] +hermes profile import [options] ``` Imports a profile from a tar.gz archive. -| Argument | Description | -|----------|-------------| -| `` | Path to the tar.gz archive to import. | -| `[name]` | Name for the imported profile. Defaults to the original profile name from the archive. | +| Argument / Option | Description | +|-------------------|-------------| +| `` | Path to the tar.gz archive to import. | +| `--name ` | Name for the imported profile (default: inferred from archive). | **Example:** ```bash -hermes profile import ./work-2026-03-29.tar.gz work-restored +hermes profile import ./work-2026-03-29.tar.gz +# Infers profile name from the archive + +hermes profile import ./work-2026-03-29.tar.gz --name work-restored ``` ## `hermes -p` / `hermes --profile` @@ -254,7 +268,7 @@ Generates shell completion scripts. Includes completions for profile names and p | Argument | Description | |----------|-------------| -| `` | Shell to generate completions for: `bash`, `zsh`, or `fish`. | +| `` | Shell to generate completions for: `bash` or `zsh`. | **Examples:** @@ -262,7 +276,6 @@ Generates shell completion scripts. Includes completions for profile names and p # Install completions hermes completion bash >> ~/.bashrc hermes completion zsh >> ~/.zshrc -hermes completion fish > ~/.config/fish/completions/hermes.fish # Reload shell source ~/.bashrc diff --git a/website/docs/reference/slash-commands.md b/website/docs/reference/slash-commands.md index 70b15efa9..94e413445 100644 --- a/website/docs/reference/slash-commands.md +++ b/website/docs/reference/slash-commands.md @@ -31,10 +31,10 @@ Type `/` in the CLI to open the autocomplete menu. Built-in commands are case-in | `/compress` | Manually compress conversation context (flush memories + summarize) | | `/rollback` | List or restore filesystem checkpoints (usage: /rollback [number]) | | `/stop` | Kill all running background processes | -| `/queue ` (alias: `/q`) | Queue a prompt for the next turn (doesn't interrupt the current agent response) | +| `/queue ` (alias: `/q`) | Queue a prompt for the next turn (doesn't interrupt the current agent response). **Note:** `/q` is claimed by both `/queue` and `/quit`; the last registration wins, so `/q` resolves to `/quit` in practice. Use `/queue` explicitly. | | `/resume [name]` | Resume a previously-named session | | `/statusbar` (alias: `/sb`) | Toggle the context/model status bar on or off | -| `/background ` | Run a prompt in a separate background session. The agent processes your prompt independently — your current session stays free for other work. Results appear as a panel when the task finishes. See [CLI Background Sessions](/docs/user-guide/cli#background-sessions). | +| `/background ` (alias: `/bg`) | Run a prompt in a separate background session. The agent processes your prompt independently — your current session stays free for other work. Results appear as a panel when the task finishes. See [CLI Background Sessions](/docs/user-guide/cli#background-sessions). | | `/plan [request]` | Load the bundled `plan` skill to write a markdown plan instead of executing the work. Plans are saved under `.hermes/plans/` relative to the active workspace/backend working directory. | ### Configuration @@ -50,6 +50,7 @@ Type `/` in the CLI to open the autocomplete menu. Built-in commands are case-in | `/reasoning` | Manage reasoning effort and display (usage: /reasoning [level\|show\|hide]) | | `/skin` | Show or change the display skin/theme | | `/voice [on\|off\|tts\|status]` | Toggle CLI voice mode and spoken playback. Recording uses `voice.record_key` (default: `Ctrl+B`). | +| `/yolo` | Toggle YOLO mode — skip all dangerous command approval prompts. | ### Tools & Skills @@ -60,7 +61,7 @@ Type `/` in the CLI to open the autocomplete menu. Built-in commands are case-in | `/browser [connect\|disconnect\|status]` | Manage local Chrome CDP connection. `connect` attaches browser tools to a running Chrome instance (default: `ws://localhost:9222`). `disconnect` detaches. `status` shows current connection. Auto-launches Chrome if no debugger is detected. | | `/skills` | Search, install, inspect, or manage skills from online registries | | `/cron` | Manage scheduled tasks (list, add/create, edit, pause, resume, run, remove) | -| `/reload-mcp` | Reload MCP servers from config.yaml | +| `/reload-mcp` (alias: `/reload_mcp`) | Reload MCP servers from config.yaml | | `/plugins` | List installed plugins and their status | ### Info @@ -70,14 +71,15 @@ Type `/` in the CLI to open the autocomplete menu. Built-in commands are case-in | `/help` | Show this help message | | `/usage` | Show token usage, cost breakdown, and session duration | | `/insights` | Show usage insights and analytics (last 30 days) | -| `/platforms` | Show gateway/messaging platform status | +| `/platforms` (alias: `/gateway`) | Show gateway/messaging platform status | | `/paste` | Check clipboard for an image and attach it | +| `/profile` | Show active profile name and home directory | ### Exit | Command | Description | |---------|-------------| -| `/quit` | Exit the CLI (also: /exit, /q) | +| `/quit` | Exit the CLI (also: `/exit`). See note on `/q` under `/queue` above. | ### Dynamic CLI slash commands @@ -105,7 +107,7 @@ The messaging gateway supports the following built-in commands inside Telegram, | `/personality [name]` | Set a personality overlay for the session. | | `/retry` | Retry the last message. | | `/undo` | Remove the last exchange. | -| `/sethome` | Mark the current chat as the platform home channel for deliveries. | +| `/sethome` (alias: `/set-home`) | Mark the current chat as the platform home channel for deliveries. | | `/compress` | Manually compress conversation context. | | `/title [name]` | Set or show the session title. | | `/resume [name]` | Resume a previously named session. | @@ -116,7 +118,9 @@ The messaging gateway supports the following built-in commands inside Telegram, | `/rollback [number]` | List or restore filesystem checkpoints. | | `/background ` | Run a prompt in a separate background session. Results are delivered back to the same chat when the task finishes. See [Messaging Background Sessions](/docs/user-guide/messaging/#background-sessions). | | `/plan [request]` | Load the bundled `plan` skill to write a markdown plan instead of executing the work. Plans are saved under `.hermes/plans/` relative to the active workspace/backend working directory. | -| `/reload-mcp` | Reload MCP servers from config. | +| `/reload-mcp` (alias: `/reload_mcp`) | Reload MCP servers from config. | +| `/yolo` | Toggle YOLO mode — skip all dangerous command approval prompts. | +| `/commands [page]` | Browse all commands and skills (paginated). | | `/approve [session\|always]` | Approve and execute a pending dangerous command. `session` approves for this session only; `always` adds to permanent allowlist. | | `/deny` | Reject a pending dangerous command. | | `/update` | Update Hermes Agent to the latest version. | @@ -127,6 +131,6 @@ The messaging gateway supports the following built-in commands inside Telegram, - `/skin`, `/tools`, `/toolsets`, `/browser`, `/config`, `/prompt`, `/cron`, `/skills`, `/platforms`, `/paste`, `/statusbar`, and `/plugins` are **CLI-only** commands. - `/verbose` is **CLI-only by default**, but can be enabled for messaging platforms by setting `display.tool_progress_command: true` in `config.yaml`. When enabled, it cycles the `display.tool_progress` mode and saves to config. -- `/status`, `/sethome`, `/update`, `/approve`, and `/deny` are **messaging-only** commands. -- `/background`, `/voice`, `/reload-mcp`, and `/rollback` work in **both** the CLI and the messaging gateway. +- `/status`, `/sethome`, `/update`, `/approve`, `/deny`, and `/commands` are **messaging-only** commands. +- `/background`, `/voice`, `/reload-mcp`, `/rollback`, and `/yolo` work in **both** the CLI and the messaging gateway. - `/voice join`, `/voice channel`, and `/voice leave` are only meaningful on Discord. diff --git a/website/docs/reference/tools-reference.md b/website/docs/reference/tools-reference.md index 9a30bab33..275dea4fe 100644 --- a/website/docs/reference/tools-reference.md +++ b/website/docs/reference/tools-reference.md @@ -151,8 +151,8 @@ This page documents the built-in Hermes tool registry as it exists in code. Avai | Tool | Description | Requires environment | |------|-------------|----------------------| -| `web_search` | Search the web for information on any topic. Returns up to 5 relevant results with titles, URLs, and descriptions. | PARALLEL_API_KEY or FIRECRAWL_API_KEY or TAVILY_API_KEY | -| `web_extract` | Extract content from web page URLs. Returns page content in markdown format. Also works with PDF URLs — pass the PDF link directly and it converts to markdown text. Pages under 5000 chars return full markdown; larger pages are LLM-summarized. | PARALLEL_API_KEY or FIRECRAWL_API_KEY or TAVILY_API_KEY | +| `web_search` | Search the web for information on any topic. Returns up to 5 relevant results with titles, URLs, and descriptions. | EXA_API_KEY or PARALLEL_API_KEY or FIRECRAWL_API_KEY or TAVILY_API_KEY | +| `web_extract` | Extract content from web page URLs. Returns page content in markdown format. Also works with PDF URLs — pass the PDF link directly and it converts to markdown text. Pages under 5000 chars return full markdown; larger pages are LLM-summarized. | EXA_API_KEY or PARALLEL_API_KEY or FIRECRAWL_API_KEY or TAVILY_API_KEY | ## `tts` toolset diff --git a/website/docs/reference/toolsets-reference.md b/website/docs/reference/toolsets-reference.md index 83cf92e4c..7999acc01 100644 --- a/website/docs/reference/toolsets-reference.md +++ b/website/docs/reference/toolsets-reference.md @@ -19,7 +19,7 @@ Toolsets are named bundles of tools that you can enable with `hermes chat --tool | `file` | core | `patch`, `read_file`, `search_files`, `write_file` | | `hermes-acp` | platform | `browser_back`, `browser_click`, `browser_close`, `browser_console`, `browser_get_images`, `browser_navigate`, `browser_press`, `browser_scroll`, `browser_snapshot`, `browser_type`, `browser_vision`, `delegate_task`, `execute_code`, `memory`, `patch`, `process`, `read_file`, `search_files`, `session_search`, `skill_manage`, `skill_view`, `skills_list`, `terminal`, `todo`, `vision_analyze`, `web_extract`, `web_search`, `write_file` | | `hermes-cli` | platform | `browser_back`, `browser_click`, `browser_close`, `browser_console`, `browser_get_images`, `browser_navigate`, `browser_press`, `browser_scroll`, `browser_snapshot`, `browser_type`, `browser_vision`, `clarify`, `cronjob`, `delegate_task`, `execute_code`, `ha_call_service`, `ha_get_state`, `ha_list_entities`, `ha_list_services`, `honcho_conclude`, `honcho_context`, `honcho_profile`, `honcho_search`, `image_generate`, `memory`, `mixture_of_agents`, `patch`, `process`, `read_file`, `search_files`, `send_message`, `session_search`, `skill_manage`, `skill_view`, `skills_list`, `terminal`, `text_to_speech`, `todo`, `vision_analyze`, `web_extract`, `web_search`, `write_file` | -| `hermes-api-server` | platform | _(same as hermes-cli)_ | +| `hermes-api-server` | platform | `browser_back`, `browser_click`, `browser_close`, `browser_console`, `browser_get_images`, `browser_navigate`, `browser_press`, `browser_scroll`, `browser_snapshot`, `browser_type`, `browser_vision`, `cronjob`, `delegate_task`, `execute_code`, `ha_call_service`, `ha_get_state`, `ha_list_entities`, `ha_list_services`, `honcho_conclude`, `honcho_context`, `honcho_profile`, `honcho_search`, `image_generate`, `memory`, `mixture_of_agents`, `patch`, `process`, `read_file`, `search_files`, `session_search`, `skill_manage`, `skill_view`, `skills_list`, `terminal`, `todo`, `vision_analyze`, `web_extract`, `web_search`, `write_file` | | `hermes-dingtalk` | platform | _(same as hermes-cli)_ | | `hermes-feishu` | platform | _(same as hermes-cli)_ | | `hermes-wecom` | platform | _(same as hermes-cli)_ | diff --git a/website/docs/user-guide/cli.md b/website/docs/user-guide/cli.md index 1c4857d71..e37b1ddba 100644 --- a/website/docs/user-guide/cli.md +++ b/website/docs/user-guide/cli.md @@ -94,6 +94,7 @@ When resuming a previous session (`hermes -c` or `hermes --resume `), a "Pre | `Ctrl+B` | Start/stop voice recording when voice mode is enabled (`voice.record_key`, default: `ctrl+b`) | | `Ctrl+C` | Interrupt agent (double-press within 2s to force exit) | | `Ctrl+D` | Exit | +| `Ctrl+Z` | Suspend Hermes to background (Unix only). Run `fg` in the shell to resume. | | `Tab` | Accept auto-suggestion (ghost text) or autocomplete slash commands | ## Slash Commands @@ -212,6 +213,33 @@ You can interrupt the agent at any point: - In-progress terminal commands are killed immediately (SIGTERM, then SIGKILL after 1s) - Multiple messages typed during interrupt are combined into one prompt +### Busy Input Mode + +The `display.busy_input_mode` config key controls what happens when you press Enter while the agent is working: + +| Mode | Behavior | +|------|----------| +| `"interrupt"` (default) | Your message interrupts the current operation and is processed immediately | +| `"queue"` | Your message is silently queued and sent as the next turn after the agent finishes | + +```yaml +# ~/.hermes/config.yaml +display: + busy_input_mode: "queue" # or "interrupt" (default) +``` + +Queue mode is useful when you want to prepare follow-up messages without accidentally canceling in-flight work. Unknown values fall back to `"interrupt"`. + +### Suspending to Background + +On Unix systems, press **`Ctrl+Z`** to suspend Hermes to the background — just like any terminal process. The shell prints a confirmation: + +``` +Hermes Agent has been suspended. Run `fg` to bring Hermes Agent back. +``` + +Type `fg` in your shell to resume the session exactly where you left off. This is not supported on Windows. + ## Tool Progress Display The CLI shows animated feedback as the agent works: @@ -232,6 +260,18 @@ The CLI shows animated feedback as the agent works: Cycle through display modes with `/verbose`: `off → new → all → verbose`. This command can also be enabled for messaging platforms — see [configuration](/docs/user-guide/configuration#display-settings). +### Tool Preview Length + +The `display.tool_preview_length` config key controls the maximum number of characters shown in tool call preview lines (e.g. file paths, terminal commands). The default is `0`, which means no limit — full paths and commands are shown. + +```yaml +# ~/.hermes/config.yaml +display: + tool_preview_length: 80 # Truncate tool previews to 80 chars (0 = no limit) +``` + +This is useful on narrow terminals or when tool arguments contain very long file paths. + ## Session Management ### Resuming Sessions diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md index c3aa96f53..b0ea0482d 100644 --- a/website/docs/user-guide/configuration.md +++ b/website/docs/user-guide/configuration.md @@ -92,6 +92,7 @@ You need at least one way to connect to an LLM. Use `hermes model` to switch pro | **Kilo Code** | `KILOCODE_API_KEY` in `~/.hermes/.env` (provider: `kilocode`) | | **OpenCode Zen** | `OPENCODE_ZEN_API_KEY` in `~/.hermes/.env` (provider: `opencode-zen`) | | **OpenCode Go** | `OPENCODE_GO_API_KEY` in `~/.hermes/.env` (provider: `opencode-go`) | +| **DeepSeek** | `DEEPSEEK_API_KEY` in `~/.hermes/.env` (provider: `deepseek`) | | **Hugging Face** | `HF_TOKEN` in `~/.hermes/.env` (provider: `huggingface`, aliases: `hf`) | | **Custom Endpoint** | `hermes model` (saved in `config.yaml`) or `OPENAI_BASE_URL` + `OPENAI_API_KEY` in `~/.hermes/.env` | @@ -706,6 +707,10 @@ terminal: backend: local # local | docker | ssh | modal | daytona | singularity cwd: "." # Working directory ("." = current dir for local, "/root" for containers) timeout: 180 # Per-command timeout in seconds + env_passthrough: [] # Env var names to forward to sandboxed execution (terminal + execute_code) + singularity_image: "docker://nikolaik/python-nodejs:python3.11-nodejs20" # Container image for Singularity backend + modal_image: "nikolaik/python-nodejs:python3.11-nodejs20" # Container image for Modal backend + daytona_image: "nikolaik/python-nodejs:python3.11-nodejs20" # Container image for Daytona backend ``` ### Backend Overview @@ -1012,6 +1017,8 @@ All compression settings live in `config.yaml` (no environment variables). compression: enabled: true # Toggle compression on/off threshold: 0.50 # Compress at this % of context limit + target_ratio: 0.20 # Fraction of threshold to preserve as recent tail + protect_last_n: 20 # Min recent messages to keep uncompressed summary_model: "google/gemini-3-flash-preview" # Model for summarization summary_provider: "auto" # Provider: "auto", "openrouter", "nous", "codex", "main", etc. summary_base_url: null # Custom OpenAI-compatible endpoint (overrides provider) @@ -1146,6 +1153,38 @@ auxiliary: # Context compression timeout (separate from compression.* config) compression: timeout: 120 # seconds — compression summarizes long conversations, needs more time + + # Session search — summarizes past session matches + session_search: + provider: "auto" + model: "" + base_url: "" + api_key: "" + timeout: 30 + + # Skills hub — skill matching and search + skills_hub: + provider: "auto" + model: "" + base_url: "" + api_key: "" + timeout: 30 + + # MCP tool dispatch + mcp: + provider: "auto" + model: "" + base_url: "" + api_key: "" + timeout: 30 + + # Memory flush — summarizes conversation for persistent memory + flush_memories: + provider: "auto" + model: "" + base_url: "" + api_key: "" + timeout: 30 ``` :::tip @@ -1340,6 +1379,7 @@ display: streaming: false # Stream tokens to terminal as they arrive (real-time output) background_process_notifications: all # all | result | error | off (gateway only) show_cost: false # Show estimated $ cost in the CLI status bar + tool_preview_length: 0 # Max chars for tool call previews (0 = no limit, show full paths/commands) ``` ### Theme mode @@ -1554,11 +1594,11 @@ code_execution: ## Web Search Backends -The `web_search`, `web_extract`, and `web_crawl` tools support three backend providers. Configure the backend in `config.yaml` or via `hermes tools`: +The `web_search`, `web_extract`, and `web_crawl` tools support four backend providers. Configure the backend in `config.yaml` or via `hermes tools`: ```yaml web: - backend: firecrawl # firecrawl | parallel | tavily + backend: firecrawl # firecrawl | parallel | tavily | exa ``` | Backend | Env Var | Search | Extract | Crawl | @@ -1566,8 +1606,9 @@ web: | **Firecrawl** (default) | `FIRECRAWL_API_KEY` | ✔ | ✔ | ✔ | | **Parallel** | `PARALLEL_API_KEY` | ✔ | ✔ | — | | **Tavily** | `TAVILY_API_KEY` | ✔ | ✔ | ✔ | +| **Exa** | `EXA_API_KEY` | ✔ | ✔ | — | -**Backend selection:** If `web.backend` is not set, the backend is auto-detected from available API keys. If only `TAVILY_API_KEY` is set, Tavily is used. If only `PARALLEL_API_KEY` is set, Parallel is used. Otherwise Firecrawl is the default. +**Backend selection:** If `web.backend` is not set, the backend is auto-detected from available API keys. If only `EXA_API_KEY` is set, Exa is used. If only `TAVILY_API_KEY` is set, Tavily is used. If only `PARALLEL_API_KEY` is set, Parallel is used. Otherwise Firecrawl is the default. **Self-hosted Firecrawl:** Set `FIRECRAWL_API_URL` to point at your own instance. When a custom URL is set, the API key becomes optional (set `USE_DB_AUTHENTICATION=false` on the server to disable auth). @@ -1580,11 +1621,60 @@ Configure browser automation behavior: ```yaml browser: inactivity_timeout: 120 # Seconds before auto-closing idle sessions + command_timeout: 30 # Timeout in seconds for browser commands (screenshot, navigate, etc.) record_sessions: false # Auto-record browser sessions as WebM videos to ~/.hermes/browser_recordings/ ``` The browser toolset supports multiple providers. See the [Browser feature page](/docs/user-guide/features/browser) for details on Browserbase, Browser Use, and local Chrome CDP setup. +## Timezone + +Override the server-local timezone with an IANA timezone string. Affects timestamps in logs, cron scheduling, and system prompt time injection. + +```yaml +timezone: "America/New_York" # IANA timezone (default: "" = server-local time) +``` + +Supported values: any IANA timezone identifier (e.g. `America/New_York`, `Europe/London`, `Asia/Kolkata`, `UTC`). Leave empty or omit for server-local time. + +## Discord + +Configure Discord-specific behavior for the messaging gateway: + +```yaml +discord: + require_mention: true # Require @mention to respond in server channels + free_response_channels: "" # Comma-separated channel IDs where bot responds without @mention + auto_thread: true # Auto-create threads on @mention in channels +``` + +- `require_mention` — when `true` (default), the bot only responds in server channels when mentioned with `@BotName`. DMs always work without mention. +- `free_response_channels` — comma-separated list of channel IDs where the bot responds to every message without requiring a mention. +- `auto_thread` — when `true` (default), mentions in channels automatically create a thread for the conversation, keeping channels clean (similar to Slack threading). + +## Security + +Pre-execution security scanning and secret redaction: + +```yaml +security: + redact_secrets: true # Redact API key patterns in tool output and logs + tirith_enabled: true # Enable Tirith security scanning for terminal commands + tirith_path: "tirith" # Path to tirith binary (default: "tirith" in $PATH) + tirith_timeout: 5 # Seconds to wait for tirith scan before timing out + tirith_fail_open: true # Allow command execution if tirith is unavailable + website_blocklist: # See Website Blocklist section below + enabled: false + domains: [] + shared_files: [] +``` + +- `redact_secrets` — automatically detects and redacts patterns that look like API keys, tokens, and passwords in tool output before it enters the conversation context and logs. +- `tirith_enabled` — when `true`, terminal commands are scanned by [Tirith](https://github.com/StackGuardian/tirith) before execution to detect potentially dangerous operations. +- `tirith_path` — path to the tirith binary. Set this if tirith is installed in a non-standard location. +- `tirith_timeout` — maximum seconds to wait for a tirith scan. Commands proceed if the scan times out. +- `tirith_fail_open` — when `true` (default), commands are allowed to execute if tirith is unavailable or fails. Set to `false` to block commands when tirith cannot verify them. + ## Website Blocklist Block specific domains from being accessed by the agent's web and browser tools: diff --git a/website/docs/user-guide/docker.md b/website/docs/user-guide/docker.md index 229919774..3fb33a93f 100644 --- a/website/docs/user-guide/docker.md +++ b/website/docs/user-guide/docker.md @@ -54,3 +54,9 @@ docker run -d \ -v ~/.hermes:/opt/data \ nousresearch/hermes-agent ``` + +## Skills and credential files + +When using Docker as the execution environment (not the methods above, but when the agent runs commands inside a Docker sandbox), Hermes automatically bind-mounts the skills directory (`~/.hermes/skills/`) and any credential files declared by skills into the container as read-only volumes. This means skill scripts, templates, and references are available inside the sandbox without manual configuration. + +The same syncing happens for SSH and Modal backends — skills and credential files are uploaded via rsync or the Modal mount API before each command. diff --git a/website/docs/user-guide/features/cron.md b/website/docs/user-guide/features/cron.md index 2d0a4c836..f8b1d2c5a 100644 --- a/website/docs/user-guide/features/cron.md +++ b/website/docs/user-guide/features/cron.md @@ -193,6 +193,40 @@ When scheduling jobs, you specify where the output goes: The agent's final response is automatically delivered. You do not need to call `send_message` in the cron prompt. +### Response wrapping + +By default, delivered cron output is wrapped with a header and footer so the recipient knows it came from a scheduled task: + +``` +Cronjob Response: Morning feeds +------------- + + + +Note: The agent cannot see this message, and therefore cannot respond to it. +``` + +To deliver the raw agent output without the wrapper, set `cron.wrap_response` to `false`: + +```yaml +# ~/.hermes/config.yaml +cron: + wrap_response: false +``` + +### Silent suppression + +If the agent's final response starts with `[SILENT]`, delivery is suppressed entirely. The output is still saved locally for audit (in `~/.hermes/cron/output/`), but no message is sent to the delivery target. + +This is useful for monitoring jobs that should only report when something is wrong: + +```text +Check if nginx is running. If everything is healthy, respond with only [SILENT]. +Otherwise, report the issue. +``` + +Failed jobs always deliver regardless of the `[SILENT]` marker — only successful runs can be silenced. + ## Schedule formats The agent's final response is automatically delivered — you do **not** need to include `send_message` in the cron prompt for that same destination. If a cron run calls `send_message` to the exact target the scheduler will already deliver to, Hermes skips that duplicate send and tells the model to put the user-facing content in the final response instead. Use `send_message` only for additional or different targets. diff --git a/website/docs/user-guide/features/mcp.md b/website/docs/user-guide/features/mcp.md index 9b8326d46..b48f4f656 100644 --- a/website/docs/user-guide/features/mcp.md +++ b/website/docs/user-guide/features/mcp.md @@ -277,6 +277,14 @@ That keeps the tool list clean. Hermes discovers MCP servers at startup and registers their tools into the normal tool registry. +### Dynamic Tool Discovery + +MCP servers can notify Hermes when their available tools change at runtime by sending a `notifications/tools/list_changed` notification. When Hermes receives this notification, it automatically re-fetches the server's tool list and updates the registry — no manual `/reload-mcp` required. + +This is useful for MCP servers whose capabilities change dynamically (e.g. a server that adds tools when a new database schema is loaded, or removes tools when a service goes offline). + +The refresh is lock-protected so rapid-fire notifications from the same server don't cause overlapping refreshes. Prompt and resource change notifications (`prompts/list_changed`, `resources/list_changed`) are received but not yet acted on. + ### Reloading If you change MCP config, use: @@ -285,7 +293,7 @@ If you change MCP config, use: /reload-mcp ``` -This reloads MCP servers from config and refreshes the available tool list. +This reloads MCP servers from config and refreshes the available tool list. For runtime tool changes pushed by the server itself, see [Dynamic Tool Discovery](#dynamic-tool-discovery) above. ### Toolsets @@ -403,6 +411,39 @@ Because Hermes now only registers those wrappers when both are true: This is intentional and keeps the tool list honest. +## MCP Sampling Support + +MCP servers can request LLM inference from Hermes via the `sampling/createMessage` protocol. This allows an MCP server to ask Hermes to generate text on its behalf — useful for servers that need LLM capabilities but don't have their own model access. + +Sampling is **enabled by default** for all MCP servers (when the MCP SDK supports it). Configure it per-server under the `sampling` key: + +```yaml +mcp_servers: + my_server: + command: "my-mcp-server" + sampling: + enabled: true # Enable sampling (default: true) + model: "openai/gpt-4o" # Override model for sampling requests (optional) + max_tokens_cap: 4096 # Max tokens per sampling response (default: 4096) + timeout: 30 # Timeout in seconds per request (default: 30) + max_rpm: 10 # Rate limit: max requests per minute (default: 10) + max_tool_rounds: 5 # Max tool-use rounds in sampling loops (default: 5) + allowed_models: [] # Allowlist of model names the server may request (empty = any) + log_level: "info" # Audit log level: debug, info, or warning (default: info) +``` + +The sampling handler includes a sliding-window rate limiter, per-request timeouts, and tool-loop depth limits to prevent runaway usage. Metrics (request count, errors, tokens used) are tracked per server instance. + +To disable sampling for a specific server: + +```yaml +mcp_servers: + untrusted_server: + url: "https://mcp.example.com" + sampling: + enabled: false +``` + ## Running Hermes as an MCP server In addition to connecting **to** MCP servers, Hermes can also **be** an MCP server. This lets other MCP-capable agents (Claude Code, Cursor, Codex, or any MCP client) use Hermes's messaging capabilities — list conversations, read message history, and send messages across all your connected platforms. diff --git a/website/docs/user-guide/features/plugins.md b/website/docs/user-guide/features/plugins.md index 0f2e20f17..28fc8041e 100644 --- a/website/docs/user-guide/features/plugins.md +++ b/website/docs/user-guide/features/plugins.md @@ -4,7 +4,7 @@ sidebar_position: 20 # Plugins -Hermes has a plugin system for adding custom tools, hooks, slash commands, and integrations without modifying core code. +Hermes has a plugin system for adding custom tools, hooks, and integrations without modifying core code. **→ [Build a Hermes Plugin](/docs/guides/build-a-hermes-plugin)** — step-by-step guide with a complete working example. @@ -30,7 +30,7 @@ Project-local plugins under `./.hermes/plugins/` are disabled by default. Enable |-----------|-----| | Add tools | `ctx.register_tool(name, schema, handler)` | | Add hooks | `ctx.register_hook("post_tool_call", callback)` | -| Add slash commands | `ctx.register_command("mycommand", handler)` | +| Inject messages | `ctx.inject_message(content, role="user")` — see [Injecting Messages](#injecting-messages) | | Ship data files | `Path(__file__).parent / "data" / "file.yaml"` | | Bundle skills | Copy `skill.md` to `~/.hermes/skills/` at load time | | Gate on env vars | `requires_env: [API_KEY]` in plugin.yaml | @@ -57,34 +57,6 @@ Plugins can register callbacks for these lifecycle events. See the **[Event Hook | `on_session_start` | New session created (first turn only) | | `on_session_end` | End of every `run_conversation` call | -## Slash commands - -Plugins can register slash commands that work in both CLI and messaging platforms: - -```python -def register(ctx): - ctx.register_command( - name="greet", - handler=lambda args: f"Hello, {args or 'world'}!", - description="Greet someone", - args_hint="[name]", - aliases=("hi",), - ) -``` - -The handler receives the argument string (everything after `/greet`) and returns a string to display. Registered commands automatically appear in `/help`, tab autocomplete, Telegram bot menu, and Slack subcommand mapping. - -| Parameter | Description | -|-----------|-------------| -| `name` | Command name without slash | -| `handler` | Callable that takes `args: str` and returns `str | None` | -| `description` | Shown in `/help` | -| `args_hint` | Usage hint, e.g. `"[name]"` | -| `aliases` | Tuple of alternative names | -| `cli_only` | Only available in CLI | -| `gateway_only` | Only available in messaging platforms | -| `gateway_config_gate` | Config dotpath (e.g. `"display.my_option"`). When set on a `cli_only` command, the command becomes available in the gateway if the config value is truthy. | - ## Managing plugins ```bash @@ -109,4 +81,27 @@ plugins: In a running session, `/plugins` shows which plugins are currently loaded. +## Injecting Messages + +Plugins can inject messages into the active conversation using `ctx.inject_message()`: + +```python +ctx.inject_message("New data arrived from the webhook", role="user") +``` + +**Signature:** `ctx.inject_message(content: str, role: str = "user") -> bool` + +How it works: + +- If the agent is **idle** (waiting for user input), the message is queued as the next input and starts a new turn. +- If the agent is **mid-turn** (actively running), the message interrupts the current operation — the same as a user typing a new message and pressing Enter. +- For non-`"user"` roles, the content is prefixed with `[role]` (e.g. `[system] ...`). +- Returns `True` if the message was queued successfully, `False` if no CLI reference is available (e.g. in gateway mode). + +This enables plugins like remote control viewers, messaging bridges, or webhook receivers to feed messages into the conversation from external sources. + +:::note +`inject_message` is only available in CLI mode. In gateway mode, there is no CLI reference and the method returns `False`. +::: + See the **[full guide](/docs/guides/build-a-hermes-plugin)** for handler contracts, schema format, hook behavior, error handling, and common mistakes. diff --git a/website/docs/user-guide/messaging/discord.md b/website/docs/user-guide/messaging/discord.md index df97930a6..2f40283ec 100644 --- a/website/docs/user-guide/messaging/discord.md +++ b/website/docs/user-guide/messaging/discord.md @@ -19,6 +19,7 @@ Before setup, here's the part most people want to know: how Hermes behaves once | **Free-response channels** | You can make specific channels mention-free with `DISCORD_FREE_RESPONSE_CHANNELS`, or disable mentions globally with `DISCORD_REQUIRE_MENTION=false`. | | **Threads** | Hermes replies in the same thread. Mention rules still apply unless that thread or its parent channel is configured as free-response. Threads stay isolated from the parent channel for session history. | | **Shared channels with multiple users** | By default, Hermes isolates session history per user inside the channel for safety and clarity. Two people talking in the same channel do not share one transcript unless you explicitly disable that. | +| **Messages mentioning other users** | When `DISCORD_IGNORE_NO_MENTION` is `true` (the default), Hermes stays silent if a message @mentions other users but does **not** mention the bot. This prevents the bot from jumping into conversations directed at other people. Set to `false` if you want the bot to respond to all messages regardless of who is mentioned. This only applies in server channels, not DMs. | :::tip If you want a normal bot-help channel where people can talk to Hermes without tagging it every time, add that channel to `DISCORD_FREE_RESPONSE_CHANNELS`. @@ -253,6 +254,9 @@ DISCORD_ALLOWED_USERS=284102345871466496 # Optional: channels where bot responds without @mention (comma-separated channel IDs) # DISCORD_FREE_RESPONSE_CHANNELS=1234567890,9876543210 + +# Optional: ignore messages that @mention other users but NOT the bot (default: true) +# DISCORD_IGNORE_NO_MENTION=true ``` Optional behavior settings in `~/.hermes/config.yaml`: diff --git a/website/docs/user-guide/messaging/feishu.md b/website/docs/user-guide/messaging/feishu.md index 1b7141e78..47901e353 100644 --- a/website/docs/user-guide/messaging/feishu.md +++ b/website/docs/user-guide/messaging/feishu.md @@ -18,7 +18,7 @@ The integration supports both connection modes: | Context | Behavior | |---------|----------| | Direct messages | Hermes responds to every message. | -| Group chats | Hermes responds when the bot is addressed in the chat. | +| Group chats | Hermes responds only when the bot is @mentioned in the chat. | | Shared group chats | By default, session history is isolated per user inside a shared chat. | This shared-chat behavior is controlled by `config.yaml`: @@ -46,12 +46,16 @@ Keep the App Secret private. Anyone with it can impersonate your app. ### Recommended: WebSocket mode -Use WebSocket mode when Hermes runs on your laptop, workstation, or a private server. No public URL is required. +Use WebSocket mode when Hermes runs on your laptop, workstation, or a private server. No public URL is required. The official Lark SDK opens and maintains a persistent outbound WebSocket connection with automatic reconnection. ```bash FEISHU_CONNECTION_MODE=websocket ``` +**Requirements:** The `websockets` Python package must be installed. The SDK handles connection lifecycle, heartbeats, and auto-reconnection internally. + +**How it works:** The adapter runs the Lark SDK's WebSocket client in a background executor thread. Inbound events (messages, reactions, card actions) are dispatched to the main asyncio loop. On disconnect, the SDK will attempt to reconnect automatically. + ### Optional: Webhook mode Use webhook mode only when you already run Hermes behind a reachable HTTP endpoint. @@ -60,12 +64,24 @@ Use webhook mode only when you already run Hermes behind a reachable HTTP endpoi FEISHU_CONNECTION_MODE=webhook ``` -In webhook mode, Hermes serves a Feishu endpoint at: +In webhook mode, Hermes starts an HTTP server (via `aiohttp`) and serves a Feishu endpoint at: ```text /feishu/webhook ``` +**Requirements:** The `aiohttp` Python package must be installed. + +You can customize the webhook server bind address and path: + +```bash +FEISHU_WEBHOOK_HOST=127.0.0.1 # default: 127.0.0.1 +FEISHU_WEBHOOK_PORT=8765 # default: 8765 +FEISHU_WEBHOOK_PATH=/feishu/webhook # default: /feishu/webhook +``` + +When Feishu sends a URL verification challenge (`type: url_verification`), the webhook responds automatically so you can complete the subscription setup in the Feishu developer console. + ## Step 3: Configure Hermes ### Option A: Interactive Setup @@ -116,13 +132,233 @@ FEISHU_HOME_CHANNEL=oc_xxx ## Security -For production use, set an allowlist: +### User Allowlist + +For production use, set an allowlist of Feishu Open IDs: ```bash FEISHU_ALLOWED_USERS=ou_xxx,ou_yyy ``` -If you leave the allowlist empty, anyone who can reach the bot may be able to use it. +If you leave the allowlist empty, anyone who can reach the bot may be able to use it. In group chats, the allowlist is checked against the sender's open_id before the message is processed. + +### Webhook Encryption Key + +When running in webhook mode, set an encryption key to enable signature verification of inbound webhook payloads: + +```bash +FEISHU_ENCRYPT_KEY=your-encrypt-key +``` + +This key is found in the **Event Subscriptions** section of your Feishu app configuration. When set, the adapter verifies every webhook request using the signature algorithm: + +``` +SHA256(timestamp + nonce + encrypt_key + body) +``` + +The computed hash is compared against the `x-lark-signature` header using timing-safe comparison. Requests with invalid or missing signatures are rejected with HTTP 401. + +:::tip +In WebSocket mode, signature verification is handled by the SDK itself, so `FEISHU_ENCRYPT_KEY` is optional. In webhook mode, it is strongly recommended for production. +::: + +### Verification Token + +An additional layer of authentication that checks the `token` field inside webhook payloads: + +```bash +FEISHU_VERIFICATION_TOKEN=your-verification-token +``` + +This token is also found in the **Event Subscriptions** section of your Feishu app. When set, every inbound webhook payload must contain a matching `token` in its `header` object. Mismatched tokens are rejected with HTTP 401. + +Both `FEISHU_ENCRYPT_KEY` and `FEISHU_VERIFICATION_TOKEN` can be used together for defense in depth. + +## Group Message Policy + +The `FEISHU_GROUP_POLICY` environment variable controls whether and how Hermes responds in group chats: + +```bash +FEISHU_GROUP_POLICY=allowlist # default +``` + +| Value | Behavior | +|-------|----------| +| `open` | Hermes responds to @mentions from any user in any group. | +| `allowlist` | Hermes only responds to @mentions from users listed in `FEISHU_ALLOWED_USERS`. | +| `disabled` | Hermes ignores all group messages entirely. | + +In all modes, the bot must be explicitly @mentioned (or @all) in the group before the message is processed. Direct messages bypass this gate. + +### Bot Identity for @Mention Gating + +For precise @mention detection in groups, the adapter needs to know the bot's identity. It can be provided explicitly: + +```bash +FEISHU_BOT_OPEN_ID=ou_xxx +FEISHU_BOT_USER_ID=xxx +FEISHU_BOT_NAME=MyBot +``` + +If none of these are set, the adapter will attempt to auto-discover the bot name via the Application Info API on startup. For this to work, grant the `admin:app.info:readonly` or `application:application:self_manage` permission scope. + +## Interactive Card Actions + +When users click buttons or interact with interactive cards sent by the bot, the adapter routes these as synthetic `/card` command events: + +- Button clicks become: `/card button {"key": "value", ...}` +- The action's `value` payload from the card definition is included as JSON. +- Card actions are deduplicated with a 15-minute window to prevent double processing. + +Card action events are dispatched with `MessageType.COMMAND`, so they flow through the normal command processing pipeline. + +To use this feature, enable the **Interactive Card** event in your Feishu app's event subscriptions (`card.action.trigger`). + +## Media Support + +### Inbound (receiving) + +The adapter receives and caches the following media types from users: + +| Type | Extensions | How it's processed | +|------|-----------|-------------------| +| **Images** | .jpg, .jpeg, .png, .gif, .webp, .bmp | Downloaded via Feishu API and cached locally | +| **Audio** | .ogg, .mp3, .wav, .m4a, .aac, .flac, .opus, .webm | Downloaded and cached; small text files are auto-extracted | +| **Video** | .mp4, .mov, .avi, .mkv, .webm, .m4v, .3gp | Downloaded and cached as documents | +| **Files** | .pdf, .doc, .docx, .xls, .xlsx, .ppt, .pptx, and more | Downloaded and cached as documents | + +Media from rich-text (post) messages, including inline images and file attachments, is also extracted and cached. + +For small text-based documents (.txt, .md), the file content is automatically injected into the message text so the agent can read it directly without needing tools. + +### Outbound (sending) + +| Method | What it sends | +|--------|--------------| +| `send` | Text or rich post messages (auto-detected based on markdown content) | +| `send_image` / `send_image_file` | Uploads image to Feishu, then sends as native image bubble (with optional caption) | +| `send_document` | Uploads file to Feishu API, then sends as file attachment | +| `send_voice` | Uploads audio file as a Feishu file attachment | +| `send_video` | Uploads video and sends as native media message | +| `send_animation` | GIFs are downgraded to file attachments (Feishu has no native GIF bubble) | + +File upload routing is automatic based on extension: + +- `.ogg`, `.opus` → uploaded as `opus` audio +- `.mp4`, `.mov`, `.avi`, `.m4v` → uploaded as `mp4` media +- `.pdf`, `.doc(x)`, `.xls(x)`, `.ppt(x)` → uploaded with their document type +- Everything else → uploaded as a generic stream file + +## Markdown Rendering and Post Fallback + +When outbound text contains markdown formatting (headings, bold, lists, code blocks, links, etc.), the adapter automatically sends it as a Feishu **post** message with an embedded `md` tag rather than as plain text. This enables rich rendering in the Feishu client. + +If the Feishu API rejects the post payload (e.g., due to unsupported markdown constructs), the adapter automatically falls back to sending as plain text with markdown stripped. This two-stage fallback ensures messages are always delivered. + +Plain text messages (no markdown detected) are sent as the simple `text` message type. + +## ACK Emoji Reactions + +When the adapter receives an inbound message, it immediately adds an ✅ (OK) emoji reaction to signal that the message was received and is being processed. This provides visual feedback before the agent completes its response. + +The reaction is persistent — it remains on the message after the response is sent, serving as a receipt marker. + +User reactions on bot messages are also tracked. If a user adds or removes an emoji reaction on a message sent by the bot, it is routed as a synthetic text event (`reaction:added:EMOJI_TYPE` or `reaction:removed:EMOJI_TYPE`) so the agent can respond to feedback. + +## Burst Protection and Batching + +The adapter includes debouncing for rapid message bursts to avoid overwhelming the agent: + +### Text Batching + +When a user sends multiple text messages in quick succession, they are merged into a single event before being dispatched: + +| Setting | Env Var | Default | +|---------|---------|---------| +| Quiet period | `HERMES_FEISHU_TEXT_BATCH_DELAY_SECONDS` | 0.6s | +| Max messages per batch | `HERMES_FEISHU_TEXT_BATCH_MAX_MESSAGES` | 8 | +| Max characters per batch | `HERMES_FEISHU_TEXT_BATCH_MAX_CHARS` | 4000 | + +### Media Batching + +Multiple media attachments sent in quick succession (e.g., dragging several images) are merged into a single event: + +| Setting | Env Var | Default | +|---------|---------|---------| +| Quiet period | `HERMES_FEISHU_MEDIA_BATCH_DELAY_SECONDS` | 0.8s | + +### Per-Chat Serialization + +Messages within the same chat are processed serially (one at a time) to maintain conversation coherence. Each chat has its own lock, so messages in different chats are processed concurrently. + +## Rate Limiting (Webhook Mode) + +In webhook mode, the adapter enforces per-IP rate limiting to protect against abuse: + +- **Window:** 60-second sliding window +- **Limit:** 120 requests per window per (app_id, path, IP) triple +- **Tracking cap:** Up to 4096 unique keys tracked (prevents unbounded memory growth) + +Requests that exceed the limit receive HTTP 429 (Too Many Requests). + +### Webhook Anomaly Tracking + +The adapter tracks consecutive error responses per IP address. After 25 consecutive errors from the same IP within a 6-hour window, a warning is logged. This helps detect misconfigured clients or probing attempts. + +Additional webhook protections: +- **Body size limit:** 1 MB maximum +- **Body read timeout:** 30 seconds +- **Content-Type enforcement:** Only `application/json` is accepted + +## Deduplication + +Inbound messages are deduplicated using message IDs with a 24-hour TTL. The dedup state is persisted across restarts to `~/.hermes/feishu_seen_message_ids.json`. + +| Setting | Env Var | Default | +|---------|---------|---------| +| Cache size | `HERMES_FEISHU_DEDUP_CACHE_SIZE` | 2048 entries | + +## All Environment Variables + +| Variable | Required | Default | Description | +|----------|----------|---------|-------------| +| `FEISHU_APP_ID` | ✅ | — | Feishu/Lark App ID | +| `FEISHU_APP_SECRET` | ✅ | — | Feishu/Lark App Secret | +| `FEISHU_DOMAIN` | — | `feishu` | `feishu` (China) or `lark` (international) | +| `FEISHU_CONNECTION_MODE` | — | `websocket` | `websocket` or `webhook` | +| `FEISHU_ALLOWED_USERS` | — | _(empty)_ | Comma-separated open_id list for user allowlist | +| `FEISHU_HOME_CHANNEL` | — | — | Chat ID for cron/notification output | +| `FEISHU_ENCRYPT_KEY` | — | _(empty)_ | Encrypt key for webhook signature verification | +| `FEISHU_VERIFICATION_TOKEN` | — | _(empty)_ | Verification token for webhook payload auth | +| `FEISHU_GROUP_POLICY` | — | `allowlist` | Group message policy: `open`, `allowlist`, `disabled` | +| `FEISHU_BOT_OPEN_ID` | — | _(empty)_ | Bot's open_id (for @mention detection) | +| `FEISHU_BOT_USER_ID` | — | _(empty)_ | Bot's user_id (for @mention detection) | +| `FEISHU_BOT_NAME` | — | _(empty)_ | Bot's display name (for @mention detection) | +| `FEISHU_WEBHOOK_HOST` | — | `127.0.0.1` | Webhook server bind address | +| `FEISHU_WEBHOOK_PORT` | — | `8765` | Webhook server port | +| `FEISHU_WEBHOOK_PATH` | — | `/feishu/webhook` | Webhook endpoint path | +| `HERMES_FEISHU_DEDUP_CACHE_SIZE` | — | `2048` | Max deduplicated message IDs to track | +| `HERMES_FEISHU_TEXT_BATCH_DELAY_SECONDS` | — | `0.6` | Text burst debounce quiet period | +| `HERMES_FEISHU_TEXT_BATCH_MAX_MESSAGES` | — | `8` | Max messages merged per text batch | +| `HERMES_FEISHU_TEXT_BATCH_MAX_CHARS` | — | `4000` | Max characters merged per text batch | +| `HERMES_FEISHU_MEDIA_BATCH_DELAY_SECONDS` | — | `0.8` | Media burst debounce quiet period | + +## Troubleshooting + +| Problem | Fix | +|---------|-----| +| `lark-oapi not installed` | Install the SDK: `pip install lark-oapi` | +| `websockets not installed; websocket mode unavailable` | Install websockets: `pip install websockets` | +| `aiohttp not installed; webhook mode unavailable` | Install aiohttp: `pip install aiohttp` | +| `FEISHU_APP_ID or FEISHU_APP_SECRET not set` | Set both env vars or configure via `hermes gateway setup` | +| `Another local Hermes gateway is already using this Feishu app_id` | Only one Hermes instance can use the same app_id at a time. Stop the other gateway first. | +| Bot doesn't respond in groups | Ensure the bot is @mentioned, check `FEISHU_GROUP_POLICY`, and verify the sender is in `FEISHU_ALLOWED_USERS` if policy is `allowlist` | +| `Webhook rejected: invalid verification token` | Ensure `FEISHU_VERIFICATION_TOKEN` matches the token in your Feishu app's Event Subscriptions config | +| `Webhook rejected: invalid signature` | Ensure `FEISHU_ENCRYPT_KEY` matches the encrypt key in your Feishu app config | +| Post messages show as plain text | The Feishu API rejected the post payload; this is normal fallback behavior. Check logs for details. | +| Images/files not received by bot | Grant `im:message` and `im:resource` permission scopes to your Feishu app | +| Bot identity not auto-detected | Grant `admin:app.info:readonly` scope, or set `FEISHU_BOT_OPEN_ID` / `FEISHU_BOT_NAME` manually | +| `Webhook rate limit exceeded` | More than 120 requests/minute from the same IP. This is usually a misconfiguration or loop. | ## Toolset diff --git a/website/docs/user-guide/messaging/matrix.md b/website/docs/user-guide/messaging/matrix.md index 020e15bd6..70b8855a2 100644 --- a/website/docs/user-guide/messaging/matrix.md +++ b/website/docs/user-guide/messaging/matrix.md @@ -352,3 +352,4 @@ For more information on securing your Hermes Agent deployment, see the [Security - **Federation**: If you're on a federated homeserver, the bot can communicate with users from other servers — just add their full `@user:server` IDs to `MATRIX_ALLOWED_USERS`. - **Auto-join**: The bot automatically accepts room invites and joins. It starts responding immediately after joining. - **Media support**: Hermes can send and receive images, audio, video, and file attachments. Media is uploaded to your homeserver using the Matrix content repository API. +- **Native voice messages (MSC3245)**: The Matrix adapter automatically tags outgoing voice messages with the `org.matrix.msc3245.voice` flag. This means TTS responses and voice audio are rendered as **native voice bubbles** in Element and other clients that support MSC3245, rather than as generic audio file attachments. Incoming voice messages with the MSC3245 flag are also correctly identified and routed to speech-to-text transcription. No configuration is needed — this works automatically. diff --git a/website/docs/user-guide/messaging/slack.md b/website/docs/user-guide/messaging/slack.md index f011dcd78..21511f77d 100644 --- a/website/docs/user-guide/messaging/slack.md +++ b/website/docs/user-guide/messaging/slack.md @@ -237,6 +237,60 @@ Make sure the bot has been **invited to the channel** (`/invite @Hermes Agent`). --- +## Multi-Workspace Support + +Hermes can connect to **multiple Slack workspaces** simultaneously using a single gateway instance. Each workspace is authenticated independently with its own bot user ID. + +### Configuration + +Provide multiple bot tokens as a **comma-separated list** in `SLACK_BOT_TOKEN`: + +```bash +# Multiple bot tokens — one per workspace +SLACK_BOT_TOKEN=xoxb-workspace1-token,xoxb-workspace2-token,xoxb-workspace3-token + +# A single app-level token is still used for Socket Mode +SLACK_APP_TOKEN=xapp-your-app-token +``` + +Or in `~/.hermes/config.yaml`: + +```yaml +platforms: + slack: + token: "xoxb-workspace1-token,xoxb-workspace2-token" +``` + +### OAuth Token File + +In addition to tokens in the environment or config, Hermes also loads tokens from an **OAuth token file** at: + +``` +~/.hermes/platforms/slack/slack_tokens.json +``` + +This file is a JSON object mapping team IDs to token entries: + +```json +{ + "T01ABC2DEF3": { + "token": "xoxb-workspace-token-here", + "team_name": "My Workspace" + } +} +``` + +Tokens from this file are merged with any tokens specified via `SLACK_BOT_TOKEN`. Duplicate tokens are automatically deduplicated. + +### How it works + +- The **first token** in the list is the primary token, used for the Socket Mode connection (AsyncApp). +- Each token is authenticated via `auth.test` on startup. The gateway maps each `team_id` to its own `WebClient` and `bot_user_id`. +- When a message arrives, Hermes uses the correct workspace-specific client to respond. +- The primary `bot_user_id` (from the first token) is used for backward compatibility with features that expect a single bot identity. + +--- + ## Voice Messages Hermes supports voice on Slack: diff --git a/website/docs/user-guide/messaging/telegram.md b/website/docs/user-guide/messaging/telegram.md index be99eaa75..c984ecdbc 100644 --- a/website/docs/user-guide/messaging/telegram.md +++ b/website/docs/user-guide/messaging/telegram.md @@ -258,6 +258,73 @@ Topics created outside of the config (e.g., by manually calling the Telegram API - **Privacy policy:** Telegram now requires bots to have a privacy policy. Set one via BotFather with `/setprivacy_policy`, or Telegram may auto-generate a placeholder. This is particularly important if your bot is public-facing. - **Message streaming:** Bot API 9.x added support for streaming long responses, which can improve perceived latency for lengthy agent replies. +## Webhook Mode + +By default, the Telegram adapter connects via **long polling** — the gateway makes outbound connections to Telegram's servers. This works everywhere but keeps a persistent connection open. + +**Webhook mode** is an alternative where Telegram pushes updates to your server over HTTPS. This is ideal for **serverless and cloud deployments** (Fly.io, Railway, etc.) where inbound HTTP can wake a suspended machine. + +### Configuration + +Set the `TELEGRAM_WEBHOOK_URL` environment variable to enable webhook mode: + +```bash +# Required — your public HTTPS endpoint +TELEGRAM_WEBHOOK_URL=https://app.fly.dev/telegram + +# Optional — local listen port (default: 8443) +TELEGRAM_WEBHOOK_PORT=8443 + +# Optional — secret token for update verification (auto-generated if not set) +TELEGRAM_WEBHOOK_SECRET=my-secret-token +``` + +Or in `~/.hermes/config.yaml`: + +```yaml +telegram: + webhook_mode: true +``` + +When `TELEGRAM_WEBHOOK_URL` is set, the gateway starts an HTTP server listening on `0.0.0.0:` and registers the webhook URL with Telegram. The URL path is extracted from the webhook URL (defaults to `/telegram`). + +:::warning +Telegram requires a **valid TLS certificate** on the webhook endpoint. Self-signed certificates will be rejected. Use a reverse proxy (nginx, Caddy) or a platform that provides TLS termination (Fly.io, Railway, Cloudflare Tunnel). +::: + +## DNS-over-HTTPS Fallback IPs + +In some restricted networks, `api.telegram.org` may resolve to an IP that is unreachable. The Telegram adapter includes a **fallback IP** mechanism that transparently retries connections against alternative IPs while preserving the correct TLS hostname and SNI. + +### How it works + +1. If `TELEGRAM_FALLBACK_IPS` is set, those IPs are used directly. +2. Otherwise, the adapter automatically queries **Google DNS** and **Cloudflare DNS** via DNS-over-HTTPS (DoH) to discover alternative IPs for `api.telegram.org`. +3. IPs returned by DoH that differ from the system DNS result are used as fallbacks. +4. If DoH is also blocked, a hardcoded seed IP (`149.154.167.220`) is used as a last resort. +5. Once a fallback IP succeeds, it becomes "sticky" — subsequent requests use it directly without retrying the primary path first. + +### Configuration + +```bash +# Explicit fallback IPs (comma-separated) +TELEGRAM_FALLBACK_IPS=149.154.167.220,149.154.167.221 +``` + +Or in `~/.hermes/config.yaml`: + +```yaml +platforms: + telegram: + extra: + fallback_ips: + - "149.154.167.220" +``` + +:::tip +You usually don't need to configure this manually. The auto-discovery via DoH handles most restricted-network scenarios. The `TELEGRAM_FALLBACK_IPS` env var is only needed if DoH is also blocked on your network. +::: + ## Troubleshooting | Problem | Solution | diff --git a/website/docs/user-guide/messaging/wecom.md b/website/docs/user-guide/messaging/wecom.md index e5a551b8f..1a078a892 100644 --- a/website/docs/user-guide/messaging/wecom.md +++ b/website/docs/user-guide/messaging/wecom.md @@ -13,6 +13,7 @@ Connect Hermes to [WeCom](https://work.weixin.qq.com/) (企业微信), Tencent's - A WeCom organization account - An AI Bot created in the WeCom Admin Console - The Bot ID and Secret from the bot's credentials page +- Python packages: `aiohttp` and `httpx` ## Setup @@ -56,10 +57,12 @@ hermes gateway start - **WebSocket transport** — persistent connection, no public endpoint needed - **DM and group messaging** — configurable access policies +- **Per-group sender allowlists** — fine-grained control over who can interact in each group - **Media support** — images, files, voice, video upload and download - **AES-encrypted media** — automatic decryption for inbound attachments - **Quote context** — preserves reply threading - **Markdown rendering** — rich text responses +- **Reply-mode streaming** — correlates responses to inbound message context - **Auto-reconnect** — exponential backoff on connection drops ## Configuration Options @@ -75,12 +78,187 @@ Set these in `config.yaml` under `platforms.wecom.extra`: | `group_policy` | `open` | Group access: `open`, `allowlist`, `disabled` | | `allow_from` | `[]` | User IDs allowed for DMs (when dm_policy=allowlist) | | `group_allow_from` | `[]` | Group IDs allowed (when group_policy=allowlist) | +| `groups` | `{}` | Per-group configuration (see below) | + +## Access Policies + +### DM Policy + +Controls who can send direct messages to the bot: + +| Value | Behavior | +|-------|----------| +| `open` | Anyone can DM the bot (default) | +| `allowlist` | Only user IDs in `allow_from` can DM | +| `disabled` | All DMs are ignored | +| `pairing` | Pairing mode (for initial setup) | + +```bash +WECOM_DM_POLICY=allowlist +``` + +### Group Policy + +Controls which groups the bot responds in: + +| Value | Behavior | +|-------|----------| +| `open` | Bot responds in all groups (default) | +| `allowlist` | Bot only responds in group IDs listed in `group_allow_from` | +| `disabled` | All group messages are ignored | + +```bash +WECOM_GROUP_POLICY=allowlist +``` + +### Per-Group Sender Allowlists + +For fine-grained control, you can restrict which users are allowed to interact with the bot within specific groups. This is configured in `config.yaml`: + +```yaml +platforms: + wecom: + enabled: true + extra: + bot_id: "your-bot-id" + secret: "your-secret" + group_policy: "allowlist" + group_allow_from: + - "group_id_1" + - "group_id_2" + groups: + group_id_1: + allow_from: + - "user_alice" + - "user_bob" + group_id_2: + allow_from: + - "user_charlie" + "*": + allow_from: + - "user_admin" +``` + +**How it works:** + +1. The `group_policy` and `group_allow_from` controls determine whether a group is allowed at all. +2. If a group passes the top-level check, the `groups..allow_from` list (if present) further restricts which senders within that group can interact with the bot. +3. A wildcard `"*"` group entry serves as a default for groups not explicitly listed. +4. Allowlist entries support the `*` wildcard to allow all users, and entries are case-insensitive. +5. Entries can optionally use the `wecom:user:` or `wecom:group:` prefix format — the prefix is stripped automatically. + +If no `allow_from` is configured for a group, all users in that group are allowed (assuming the group itself passes the top-level policy check). + +## Media Support + +### Inbound (receiving) + +The adapter receives media attachments from users and caches them locally for agent processing: + +| Type | How it's handled | +|------|-----------------| +| **Images** | Downloaded and cached locally. Supports both URL-based and base64-encoded images. | +| **Files** | Downloaded and cached. Filename is preserved from the original message. | +| **Voice** | Voice message text transcription is extracted if available. | +| **Mixed messages** | WeCom mixed-type messages (text + images) are parsed and all components extracted. | + +**Quoted messages:** Media from quoted (replied-to) messages is also extracted, so the agent has context about what the user is replying to. + +### AES-Encrypted Media Decryption + +WeCom encrypts some inbound media attachments with AES-256-CBC. The adapter handles this automatically: + +- When an inbound media item includes an `aeskey` field, the adapter downloads the encrypted bytes and decrypts them using AES-256-CBC with PKCS#7 padding. +- The AES key is the base64-decoded value of the `aeskey` field (must be exactly 32 bytes). +- The IV is derived from the first 16 bytes of the key. +- This requires the `cryptography` Python package (`pip install cryptography`). + +No configuration is needed — decryption happens transparently when encrypted media is received. + +### Outbound (sending) + +| Method | What it sends | Size limit | +|--------|--------------|------------| +| `send` | Markdown text messages | 4000 chars | +| `send_image` / `send_image_file` | Native image messages | 10 MB | +| `send_document` | File attachments | 20 MB | +| `send_voice` | Voice messages (AMR format only for native voice) | 2 MB | +| `send_video` | Video messages | 10 MB | + +**Chunked upload:** Files are uploaded in 512 KB chunks through a three-step protocol (init → chunks → finish). The adapter handles this automatically. + +**Automatic downgrade:** When media exceeds the native type's size limit but is under the absolute 20 MB file limit, it is automatically sent as a generic file attachment instead: + +- Images > 10 MB → sent as file +- Videos > 10 MB → sent as file +- Voice > 2 MB → sent as file +- Non-AMR audio → sent as file (WeCom only supports AMR for native voice) + +Files exceeding the absolute 20 MB limit are rejected with an informational message sent to the chat. + +## Reply-Mode Stream Responses + +When the bot receives a message via the WeCom callback, the adapter remembers the inbound request ID. If a response is sent while the request context is still active, the adapter uses WeCom's reply-mode (`aibot_respond_msg`) with streaming to correlate the response directly to the inbound message. This provides a more natural conversation experience in the WeCom client. + +If the inbound request context has expired or is unavailable, the adapter falls back to proactive message sending via `aibot_send_msg`. + +Reply-mode also works for media: uploaded media can be sent as a reply to the originating message. + +## Connection and Reconnection + +The adapter maintains a persistent WebSocket connection to WeCom's gateway at `wss://openws.work.weixin.qq.com`. + +### Connection Lifecycle + +1. **Connect:** Opens a WebSocket connection and sends an `aibot_subscribe` authentication frame with the bot_id and secret. +2. **Heartbeat:** Sends application-level ping frames every 30 seconds to keep the connection alive. +3. **Listen:** Continuously reads inbound frames and dispatches message callbacks. + +### Reconnection Behavior + +On connection loss, the adapter uses exponential backoff to reconnect: + +| Attempt | Delay | +|---------|-------| +| 1st retry | 2 seconds | +| 2nd retry | 5 seconds | +| 3rd retry | 10 seconds | +| 4th retry | 30 seconds | +| 5th+ retry | 60 seconds | + +After each successful reconnection, the backoff counter resets to zero. All pending request futures are failed on disconnect so callers don't hang indefinitely. + +### Deduplication + +Inbound messages are deduplicated using message IDs with a 5-minute window and a maximum cache of 1000 entries. This prevents double-processing of messages during reconnection or network hiccups. + +## All Environment Variables + +| Variable | Required | Default | Description | +|----------|----------|---------|-------------| +| `WECOM_BOT_ID` | ✅ | — | WeCom AI Bot ID | +| `WECOM_SECRET` | ✅ | — | WeCom AI Bot Secret | +| `WECOM_ALLOWED_USERS` | — | _(empty)_ | Comma-separated user IDs for the gateway-level allowlist | +| `WECOM_HOME_CHANNEL` | — | — | Chat ID for cron/notification output | +| `WECOM_WEBSOCKET_URL` | — | `wss://openws.work.weixin.qq.com` | WebSocket gateway URL | +| `WECOM_DM_POLICY` | — | `open` | DM access policy | +| `WECOM_GROUP_POLICY` | — | `open` | Group access policy | ## Troubleshooting | Problem | Fix | |---------|-----| -| "WECOM_BOT_ID and WECOM_SECRET are required" | Set both env vars or configure in setup wizard | -| "invalid secret (errcode=40013)" | Verify the secret matches your bot's credentials | -| "Timed out waiting for subscribe acknowledgement" | Check network connectivity to `openws.work.weixin.qq.com` | -| Bot doesn't respond in groups | Check `group_policy` setting and group allowlist | +| `WECOM_BOT_ID and WECOM_SECRET are required` | Set both env vars or configure in setup wizard | +| `WeCom startup failed: aiohttp not installed` | Install aiohttp: `pip install aiohttp` | +| `WeCom startup failed: httpx not installed` | Install httpx: `pip install httpx` | +| `invalid secret (errcode=40013)` | Verify the secret matches your bot's credentials | +| `Timed out waiting for subscribe acknowledgement` | Check network connectivity to `openws.work.weixin.qq.com` | +| Bot doesn't respond in groups | Check `group_policy` setting and ensure the group ID is in `group_allow_from` | +| Bot ignores certain users in a group | Check per-group `allow_from` lists in the `groups` config section | +| Media decryption fails | Install `cryptography`: `pip install cryptography` | +| `cryptography is required for WeCom media decryption` | The inbound media is AES-encrypted. Install: `pip install cryptography` | +| Voice messages sent as files | WeCom only supports AMR format for native voice. Other formats are auto-downgraded to file. | +| `File too large` error | WeCom has a 20 MB absolute limit on all file uploads. Compress or split the file. | +| Images sent as files | Images > 10 MB exceed the native image limit and are auto-downgraded to file attachments. | +| `Timeout sending message to WeCom` | The WebSocket may have disconnected. Check logs for reconnection messages. | +| `WeCom websocket closed during authentication` | Network issue or incorrect credentials. Verify bot_id and secret. | diff --git a/website/docs/user-guide/security.md b/website/docs/user-guide/security.md index 4d51161e1..195583639 100644 --- a/website/docs/user-guide/security.md +++ b/website/docs/user-guide/security.md @@ -22,6 +22,61 @@ The security model has five layers: Before executing any command, Hermes checks it against a curated list of dangerous patterns. If a match is found, the user must explicitly approve it. +### Approval Modes + +The approval system supports three modes, configured via `approvals.mode` in `~/.hermes/config.yaml`: + +```yaml +approvals: + mode: manual # manual | smart | off + timeout: 60 # seconds to wait for user response (default: 60) +``` + +| Mode | Behavior | +|------|----------| +| **manual** (default) | Always prompt the user for approval on dangerous commands | +| **smart** | Use an auxiliary LLM to assess risk. Low-risk commands (e.g., `python -c "print('hello')"`) are auto-approved. Genuinely dangerous commands are auto-denied. Uncertain cases escalate to a manual prompt. | +| **off** | Disable all approval checks — equivalent to running with `--yolo`. All commands execute without prompts. | + +:::warning +Setting `approvals.mode: off` disables all safety prompts. Use only in trusted environments (CI/CD, containers, etc.). +::: + +### YOLO Mode + +YOLO mode bypasses **all** dangerous command approval prompts for the current session. It can be activated three ways: + +1. **CLI flag**: Start a session with `hermes --yolo` or `hermes chat --yolo` +2. **Slash command**: Type `/yolo` during a session to toggle it on/off +3. **Environment variable**: Set `HERMES_YOLO_MODE=1` + +The `/yolo` command is a **toggle** — each use flips the mode on or off: + +``` +> /yolo + ⚡ YOLO mode ON — all commands auto-approved. Use with caution. + +> /yolo + ⚠ YOLO mode OFF — dangerous commands will require approval. +``` + +YOLO mode is available in both CLI and gateway sessions. Internally, it sets the `HERMES_YOLO_MODE` environment variable which is checked before every command execution. + +:::danger +YOLO mode disables **all** dangerous command safety checks for the session. Use only when you fully trust the commands being generated (e.g., well-tested automation scripts in disposable environments). +::: + +### Approval Timeout + +When a dangerous command prompt appears, the user has a configurable amount of time to respond. If no response is given within the timeout, the command is **denied** by default (fail-closed). + +Configure the timeout in `~/.hermes/config.yaml`: + +```yaml +approvals: + timeout: 60 # seconds (default: 60) +``` + ### What Triggers Approval The following patterns trigger approval prompts (defined in `tools/approval.py`): @@ -30,21 +85,32 @@ The following patterns trigger approval prompts (defined in `tools/approval.py`) |---------|-------------| | `rm -r` / `rm --recursive` | Recursive delete | | `rm ... /` | Delete in root path | -| `chmod 777` | World-writable permissions | +| `chmod 777/666` / `o+w` / `a+w` | World/other-writable permissions | +| `chmod --recursive` with unsafe perms | Recursive world/other-writable (long flag) | +| `chown -R root` / `chown --recursive root` | Recursive chown to root | | `mkfs` | Format filesystem | | `dd if=` | Disk copy | +| `> /dev/sd` | Write to block device | | `DROP TABLE/DATABASE` | SQL DROP | | `DELETE FROM` (without WHERE) | SQL DELETE without WHERE | | `TRUNCATE TABLE` | SQL TRUNCATE | | `> /etc/` | Overwrite system config | | `systemctl stop/disable/mask` | Stop/disable system services | | `kill -9 -1` | Kill all processes | -| `curl ... \| sh` | Pipe remote content to shell | -| `bash -c`, `python -e` | Shell/script execution via flags | -| `find -exec rm`, `find -delete` | Find with destructive actions | +| `pkill -9` | Force kill processes | | Fork bomb patterns | Fork bombs | +| `bash -c` / `sh -c` / `zsh -c` / `ksh -c` | Shell command execution via `-c` flag (including combined flags like `-lc`) | +| `python -e` / `perl -e` / `ruby -e` / `node -c` | Script execution via `-e`/`-c` flag | +| `curl ... \| sh` / `wget ... \| sh` | Pipe remote content to shell | +| `bash <(curl ...)` / `sh <(wget ...)` | Execute remote script via process substitution | +| `tee` to `/etc/`, `~/.ssh/`, `~/.hermes/.env` | Overwrite sensitive file via tee | +| `>` / `>>` to `/etc/`, `~/.ssh/`, `~/.hermes/.env` | Overwrite sensitive file via redirection | +| `xargs rm` | xargs with rm | +| `find -exec rm` / `find -delete` | Find with destructive actions | +| `cp`/`mv`/`install` to `/etc/` | Copy/move file into system config | +| `sed -i` / `sed --in-place` on `/etc/` | In-place edit of system config | | `pkill`/`killall` hermes/gateway | Self-termination prevention | -| `gateway run` with `&`/`disown`/`nohup` | Prevents starting gateway outside service manager | +| `gateway run` with `&`/`disown`/`nohup`/`setsid` | Prevents starting gateway outside service manager | :::info **Container bypass**: When running in `docker`, `singularity`, `modal`, or `daytona` backends, dangerous command checks are **skipped** because the container itself is the security boundary. Destructive commands inside a container can't harm the host. -- 2.43.0 From 07746dca0c1ac5e1f7afb698cb2e6a7615648c77 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 30 Mar 2026 17:16:09 -0700 Subject: [PATCH 038/385] =?UTF-8?q?fix(matrix):=20E2EE=20decryption=20?= =?UTF-8?q?=E2=80=94=20request=20keys,=20auto-trust=20devices,=20retry=20b?= =?UTF-8?q?uffered=20events=20(#4083)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the Matrix adapter receives encrypted events it can't decrypt (MegolmEvent), it now: 1. Requests the missing room key from other devices via client.request_room_key(event) instead of silently dropping the message 2. Buffers undecrypted events (bounded to 100, 5 min TTL) and retries decryption after each E2EE maintenance cycle when new keys arrive 3. Auto-trusts/verifies all devices after key queries so other clients share session keys with the bot proactively 4. Exports Megolm keys on disconnect and imports them on connect, so session keys survive gateway restarts This addresses the 'could not decrypt event' warnings that caused the bot to miss messages in encrypted rooms. --- gateway/platforms/matrix.py | 168 ++++++++++++++++- tests/gateway/test_matrix.py | 350 +++++++++++++++++++++++++++++++++++ 2 files changed, 514 insertions(+), 4 deletions(-) diff --git a/gateway/platforms/matrix.py b/gateway/platforms/matrix.py index 309baeee7..c9bcd945a 100644 --- a/gateway/platforms/matrix.py +++ b/gateway/platforms/matrix.py @@ -49,6 +49,14 @@ _STORE_DIR = _get_hermes_dir("platforms/matrix/store", "matrix/store") # Grace period: ignore messages older than this many seconds before startup. _STARTUP_GRACE_SECONDS = 5 +# E2EE key export file for persistence across restarts. +_KEY_EXPORT_FILE = _STORE_DIR / "exported_keys.txt" +_KEY_EXPORT_PASSPHRASE = "hermes-matrix-e2ee-keys" + +# Pending undecrypted events: cap and TTL for retry buffer. +_MAX_PENDING_EVENTS = 100 +_PENDING_EVENT_TTL = 300 # seconds — stop retrying after 5 min + def check_matrix_requirements() -> bool: """Return True if the Matrix adapter can be used.""" @@ -111,6 +119,10 @@ class MatrixAdapter(BasePlatformAdapter): self._processed_events: deque = deque(maxlen=1000) self._processed_events_set: set = set() + # Buffer for undecrypted events pending key receipt. + # Each entry: (room, event, timestamp) + self._pending_megolm: list = [] + def _is_duplicate_event(self, event_id) -> bool: """Return True if this event was already processed. Tracks the ID otherwise.""" if not event_id: @@ -232,6 +244,16 @@ class MatrixAdapter(BasePlatformAdapter): logger.info("Matrix: E2EE crypto initialized") except Exception as exc: logger.warning("Matrix: crypto init issue: %s", exc) + + # Import previously exported Megolm keys (survives restarts). + if _KEY_EXPORT_FILE.exists(): + try: + await client.import_keys( + str(_KEY_EXPORT_FILE), _KEY_EXPORT_PASSPHRASE, + ) + logger.info("Matrix: imported Megolm keys from backup") + except Exception as exc: + logger.debug("Matrix: could not import keys: %s", exc) elif self._encryption: logger.warning( "Matrix: E2EE requested but crypto store is not loaded; " @@ -286,6 +308,18 @@ class MatrixAdapter(BasePlatformAdapter): except (asyncio.CancelledError, Exception): pass + # Export Megolm keys before closing so the next restart can decrypt + # events that used sessions from this run. + if self._client and self._encryption and getattr(self._client, "olm", None): + try: + _STORE_DIR.mkdir(parents=True, exist_ok=True) + await self._client.export_keys( + str(_KEY_EXPORT_FILE), _KEY_EXPORT_PASSPHRASE, + ) + logger.info("Matrix: exported Megolm keys for next restart") + except Exception as exc: + logger.debug("Matrix: could not export keys on disconnect: %s", exc) + if self._client: await self._client.close() self._client = None @@ -665,17 +699,22 @@ class MatrixAdapter(BasePlatformAdapter): Hermes uses a custom sync loop instead of matrix-nio's sync_forever(), so we need to explicitly drive the key management work that sync_forever() normally handles for encrypted rooms. + + Also auto-trusts all devices (so senders share session keys with us) + and retries decryption for any buffered MegolmEvents. """ client = self._client if not client or not self._encryption or not getattr(client, "olm", None): return + did_query_keys = client.should_query_keys + tasks = [asyncio.create_task(client.send_to_device_messages())] if client.should_upload_keys: tasks.append(asyncio.create_task(client.keys_upload())) - if client.should_query_keys: + if did_query_keys: tasks.append(asyncio.create_task(client.keys_query())) if client.should_claim_keys: @@ -691,6 +730,111 @@ class MatrixAdapter(BasePlatformAdapter): except Exception as exc: logger.warning("Matrix: E2EE maintenance task failed: %s", exc) + # After key queries, auto-trust all devices so senders share keys with + # us. For a bot this is the right default — we want to decrypt + # everything, not enforce manual verification. + if did_query_keys: + self._auto_trust_devices() + + # Retry any buffered undecrypted events now that new keys may have + # arrived (from key requests, key queries, or to-device forwarding). + if self._pending_megolm: + await self._retry_pending_decryptions() + + def _auto_trust_devices(self) -> None: + """Trust/verify all unverified devices we know about. + + When other clients see our device as verified, they proactively share + Megolm session keys with us. Without this, many clients will refuse + to include an unverified device in key distributions. + """ + client = self._client + if not client: + return + + device_store = getattr(client, "device_store", None) + if not device_store: + return + + own_device = getattr(client, "device_id", None) + trusted_count = 0 + + try: + # DeviceStore.__iter__ yields OlmDevice objects directly. + for device in device_store: + if getattr(device, "device_id", None) == own_device: + continue + if not getattr(device, "verified", False): + client.verify_device(device) + trusted_count += 1 + except Exception as exc: + logger.debug("Matrix: auto-trust error: %s", exc) + + if trusted_count: + logger.info("Matrix: auto-trusted %d new device(s)", trusted_count) + + async def _retry_pending_decryptions(self) -> None: + """Retry decrypting buffered MegolmEvents after new keys arrive.""" + import nio + + client = self._client + if not client or not self._pending_megolm: + return + + now = time.time() + still_pending: list = [] + + for room, event, ts in self._pending_megolm: + # Drop events that have aged past the TTL. + if now - ts > _PENDING_EVENT_TTL: + logger.debug( + "Matrix: dropping expired pending event %s (age %.0fs)", + getattr(event, "event_id", "?"), now - ts, + ) + continue + + try: + decrypted = client.decrypt_event(event) + except Exception: + # Still missing the key — keep in buffer. + still_pending.append((room, event, ts)) + continue + + if isinstance(decrypted, nio.MegolmEvent): + # decrypt_event returned the same undecryptable event. + still_pending.append((room, event, ts)) + continue + + logger.info( + "Matrix: decrypted buffered event %s (%s)", + getattr(event, "event_id", "?"), + type(decrypted).__name__, + ) + + # Route to the appropriate handler based on decrypted type. + try: + if isinstance(decrypted, nio.RoomMessageText): + await self._on_room_message(room, decrypted) + elif isinstance( + decrypted, + (nio.RoomMessageImage, nio.RoomMessageAudio, + nio.RoomMessageVideo, nio.RoomMessageFile), + ): + await self._on_room_message_media(room, decrypted) + else: + logger.debug( + "Matrix: decrypted event %s has unhandled type %s", + getattr(event, "event_id", "?"), + type(decrypted).__name__, + ) + except Exception as exc: + logger.warning( + "Matrix: error processing decrypted event %s: %s", + getattr(event, "event_id", "?"), exc, + ) + + self._pending_megolm = still_pending + # ------------------------------------------------------------------ # Event callbacks # ------------------------------------------------------------------ @@ -712,13 +856,29 @@ class MatrixAdapter(BasePlatformAdapter): if event_ts and event_ts < self._startup_ts - _STARTUP_GRACE_SECONDS: return - # Handle decrypted MegolmEvents — extract the inner event. + # Handle undecryptable MegolmEvents: request the missing session key + # and buffer the event for retry once the key arrives. if isinstance(event, nio.MegolmEvent): - # Failed to decrypt. logger.warning( - "Matrix: could not decrypt event %s in %s", + "Matrix: could not decrypt event %s in %s — requesting key", event.event_id, room.room_id, ) + + # Ask other devices in the room to forward the session key. + try: + resp = await self._client.request_room_key(event) + if hasattr(resp, "event_id") or not isinstance(resp, Exception): + logger.debug( + "Matrix: room key request sent for session %s", + getattr(event, "session_id", "?"), + ) + except Exception as exc: + logger.debug("Matrix: room key request failed: %s", exc) + + # Buffer for retry on next maintenance cycle. + self._pending_megolm.append((room, event, time.time())) + if len(self._pending_megolm) > _MAX_PENDING_EVENTS: + self._pending_megolm = self._pending_megolm[-_MAX_PENDING_EVENTS:] return # Skip edits (m.replace relation). diff --git a/tests/gateway/test_matrix.py b/tests/gateway/test_matrix.py index 5a9879f60..9912eef00 100644 --- a/tests/gateway/test_matrix.py +++ b/tests/gateway/test_matrix.py @@ -643,3 +643,353 @@ class TestMatrixEncryptedSendFallback: assert fake_client.room_send.await_count == 2 second_call = fake_client.room_send.await_args_list[1] assert second_call.kwargs.get("ignore_unverified_devices") is True + + +# --------------------------------------------------------------------------- +# E2EE: Auto-trust devices +# --------------------------------------------------------------------------- + +class TestMatrixAutoTrustDevices: + def test_auto_trust_verifies_unverified_devices(self): + adapter = _make_adapter() + + # DeviceStore.__iter__ yields OlmDevice objects directly. + device_a = MagicMock() + device_a.device_id = "DEVICE_A" + device_a.verified = False + device_b = MagicMock() + device_b.device_id = "DEVICE_B" + device_b.verified = True # already trusted + device_c = MagicMock() + device_c.device_id = "DEVICE_C" + device_c.verified = False + + fake_client = MagicMock() + fake_client.device_id = "OWN_DEVICE" + fake_client.verify_device = MagicMock() + + # Simulate DeviceStore iteration (yields OlmDevice objects) + fake_client.device_store = MagicMock() + fake_client.device_store.__iter__ = MagicMock( + return_value=iter([device_a, device_b, device_c]) + ) + + adapter._client = fake_client + adapter._auto_trust_devices() + + # Should have verified device_a and device_c (not device_b, already verified) + assert fake_client.verify_device.call_count == 2 + verified_devices = [call.args[0] for call in fake_client.verify_device.call_args_list] + assert device_a in verified_devices + assert device_c in verified_devices + assert device_b not in verified_devices + + def test_auto_trust_skips_own_device(self): + adapter = _make_adapter() + + own_device = MagicMock() + own_device.device_id = "MY_DEVICE" + own_device.verified = False + + fake_client = MagicMock() + fake_client.device_id = "MY_DEVICE" + fake_client.verify_device = MagicMock() + + fake_client.device_store = MagicMock() + fake_client.device_store.__iter__ = MagicMock( + return_value=iter([own_device]) + ) + + adapter._client = fake_client + adapter._auto_trust_devices() + + fake_client.verify_device.assert_not_called() + + def test_auto_trust_handles_missing_device_store(self): + adapter = _make_adapter() + fake_client = MagicMock(spec=[]) # empty spec — no attributes + adapter._client = fake_client + # Should not raise + adapter._auto_trust_devices() + + +# --------------------------------------------------------------------------- +# E2EE: MegolmEvent key request + buffering +# --------------------------------------------------------------------------- + +class TestMatrixMegolmEventHandling: + @pytest.mark.asyncio + async def test_megolm_event_requests_room_key_and_buffers(self): + adapter = _make_adapter() + adapter._user_id = "@bot:example.org" + adapter._startup_ts = 0.0 + adapter._dm_rooms = {} + + fake_megolm = MagicMock() + fake_megolm.sender = "@alice:example.org" + fake_megolm.event_id = "$encrypted_event" + fake_megolm.server_timestamp = 9999999999000 # future + fake_megolm.session_id = "SESSION123" + + fake_room = MagicMock() + fake_room.room_id = "!room:example.org" + + fake_client = MagicMock() + fake_client.request_room_key = AsyncMock(return_value=MagicMock()) + adapter._client = fake_client + + # Create a MegolmEvent class for isinstance check + fake_nio = MagicMock() + FakeMegolmEvent = type("MegolmEvent", (), {}) + fake_megolm.__class__ = FakeMegolmEvent + fake_nio.MegolmEvent = FakeMegolmEvent + + with patch.dict("sys.modules", {"nio": fake_nio}): + await adapter._on_room_message(fake_room, fake_megolm) + + # Should have requested the room key + fake_client.request_room_key.assert_awaited_once_with(fake_megolm) + + # Should have buffered the event + assert len(adapter._pending_megolm) == 1 + room, event, ts = adapter._pending_megolm[0] + assert room is fake_room + assert event is fake_megolm + + @pytest.mark.asyncio + async def test_megolm_buffer_capped(self): + adapter = _make_adapter() + adapter._user_id = "@bot:example.org" + adapter._startup_ts = 0.0 + adapter._dm_rooms = {} + + fake_client = MagicMock() + fake_client.request_room_key = AsyncMock(return_value=MagicMock()) + adapter._client = fake_client + + FakeMegolmEvent = type("MegolmEvent", (), {}) + fake_nio = MagicMock() + fake_nio.MegolmEvent = FakeMegolmEvent + + # Fill the buffer past max + from gateway.platforms.matrix import _MAX_PENDING_EVENTS + with patch.dict("sys.modules", {"nio": fake_nio}): + for i in range(_MAX_PENDING_EVENTS + 10): + evt = MagicMock() + evt.__class__ = FakeMegolmEvent + evt.sender = "@alice:example.org" + evt.event_id = f"$event_{i}" + evt.server_timestamp = 9999999999000 + evt.session_id = f"SESSION_{i}" + room = MagicMock() + room.room_id = "!room:example.org" + await adapter._on_room_message(room, evt) + + assert len(adapter._pending_megolm) == _MAX_PENDING_EVENTS + + +# --------------------------------------------------------------------------- +# E2EE: Retry pending decryptions +# --------------------------------------------------------------------------- + +class TestMatrixRetryPendingDecryptions: + @pytest.mark.asyncio + async def test_successful_decryption_routes_to_text_handler(self): + import time as _time + + adapter = _make_adapter() + adapter._user_id = "@bot:example.org" + adapter._startup_ts = 0.0 + adapter._dm_rooms = {} + + # Create types + FakeMegolmEvent = type("MegolmEvent", (), {}) + FakeRoomMessageText = type("RoomMessageText", (), {}) + + decrypted_event = MagicMock() + decrypted_event.__class__ = FakeRoomMessageText + + fake_megolm = MagicMock() + fake_megolm.__class__ = FakeMegolmEvent + fake_megolm.event_id = "$encrypted" + + fake_room = MagicMock() + now = _time.time() + + adapter._pending_megolm = [(fake_room, fake_megolm, now)] + + fake_client = MagicMock() + fake_client.decrypt_event = MagicMock(return_value=decrypted_event) + adapter._client = fake_client + + fake_nio = MagicMock() + fake_nio.MegolmEvent = FakeMegolmEvent + fake_nio.RoomMessageText = FakeRoomMessageText + fake_nio.RoomMessageImage = type("RoomMessageImage", (), {}) + fake_nio.RoomMessageAudio = type("RoomMessageAudio", (), {}) + fake_nio.RoomMessageVideo = type("RoomMessageVideo", (), {}) + fake_nio.RoomMessageFile = type("RoomMessageFile", (), {}) + + with patch.dict("sys.modules", {"nio": fake_nio}): + with patch.object(adapter, "_on_room_message", AsyncMock()) as mock_handler: + await adapter._retry_pending_decryptions() + mock_handler.assert_awaited_once_with(fake_room, decrypted_event) + + # Buffer should be empty now + assert len(adapter._pending_megolm) == 0 + + @pytest.mark.asyncio + async def test_still_undecryptable_stays_in_buffer(self): + import time as _time + + adapter = _make_adapter() + + FakeMegolmEvent = type("MegolmEvent", (), {}) + + fake_megolm = MagicMock() + fake_megolm.__class__ = FakeMegolmEvent + fake_megolm.event_id = "$still_encrypted" + + now = _time.time() + adapter._pending_megolm = [(MagicMock(), fake_megolm, now)] + + fake_client = MagicMock() + # decrypt_event raises when key is still missing + fake_client.decrypt_event = MagicMock(side_effect=Exception("missing key")) + adapter._client = fake_client + + fake_nio = MagicMock() + fake_nio.MegolmEvent = FakeMegolmEvent + + with patch.dict("sys.modules", {"nio": fake_nio}): + await adapter._retry_pending_decryptions() + + assert len(adapter._pending_megolm) == 1 + + @pytest.mark.asyncio + async def test_expired_events_dropped(self): + import time as _time + + adapter = _make_adapter() + + from gateway.platforms.matrix import _PENDING_EVENT_TTL + + fake_megolm = MagicMock() + fake_megolm.event_id = "$old_event" + fake_megolm.__class__ = type("MegolmEvent", (), {}) + + # Timestamp well past TTL + old_ts = _time.time() - _PENDING_EVENT_TTL - 60 + adapter._pending_megolm = [(MagicMock(), fake_megolm, old_ts)] + + fake_client = MagicMock() + adapter._client = fake_client + + fake_nio = MagicMock() + fake_nio.MegolmEvent = type("MegolmEvent", (), {}) + + with patch.dict("sys.modules", {"nio": fake_nio}): + await adapter._retry_pending_decryptions() + + # Should have been dropped + assert len(adapter._pending_megolm) == 0 + # Should NOT have tried to decrypt + fake_client.decrypt_event.assert_not_called() + + @pytest.mark.asyncio + async def test_media_event_routes_to_media_handler(self): + import time as _time + + adapter = _make_adapter() + adapter._user_id = "@bot:example.org" + adapter._startup_ts = 0.0 + + FakeMegolmEvent = type("MegolmEvent", (), {}) + FakeRoomMessageImage = type("RoomMessageImage", (), {}) + + decrypted_image = MagicMock() + decrypted_image.__class__ = FakeRoomMessageImage + + fake_megolm = MagicMock() + fake_megolm.__class__ = FakeMegolmEvent + fake_megolm.event_id = "$encrypted_image" + + fake_room = MagicMock() + now = _time.time() + adapter._pending_megolm = [(fake_room, fake_megolm, now)] + + fake_client = MagicMock() + fake_client.decrypt_event = MagicMock(return_value=decrypted_image) + adapter._client = fake_client + + fake_nio = MagicMock() + fake_nio.MegolmEvent = FakeMegolmEvent + fake_nio.RoomMessageText = type("RoomMessageText", (), {}) + fake_nio.RoomMessageImage = FakeRoomMessageImage + fake_nio.RoomMessageAudio = type("RoomMessageAudio", (), {}) + fake_nio.RoomMessageVideo = type("RoomMessageVideo", (), {}) + fake_nio.RoomMessageFile = type("RoomMessageFile", (), {}) + + with patch.dict("sys.modules", {"nio": fake_nio}): + with patch.object(adapter, "_on_room_message_media", AsyncMock()) as mock_media: + await adapter._retry_pending_decryptions() + mock_media.assert_awaited_once_with(fake_room, decrypted_image) + + assert len(adapter._pending_megolm) == 0 + + +# --------------------------------------------------------------------------- +# E2EE: Key export / import +# --------------------------------------------------------------------------- + +class TestMatrixKeyExportImport: + @pytest.mark.asyncio + async def test_disconnect_exports_keys(self): + adapter = _make_adapter() + adapter._encryption = True + adapter._sync_task = None + + fake_client = MagicMock() + fake_client.olm = object() + fake_client.export_keys = AsyncMock() + fake_client.close = AsyncMock() + adapter._client = fake_client + + from gateway.platforms.matrix import _KEY_EXPORT_FILE, _KEY_EXPORT_PASSPHRASE + + await adapter.disconnect() + + fake_client.export_keys.assert_awaited_once_with( + str(_KEY_EXPORT_FILE), _KEY_EXPORT_PASSPHRASE, + ) + + @pytest.mark.asyncio + async def test_disconnect_handles_export_failure(self): + adapter = _make_adapter() + adapter._encryption = True + adapter._sync_task = None + + fake_client = MagicMock() + fake_client.olm = object() + fake_client.export_keys = AsyncMock(side_effect=Exception("export failed")) + fake_client.close = AsyncMock() + adapter._client = fake_client + + # Should not raise + await adapter.disconnect() + assert adapter._client is None # still cleaned up + + @pytest.mark.asyncio + async def test_disconnect_skips_export_when_no_encryption(self): + adapter = _make_adapter() + adapter._encryption = False + adapter._sync_task = None + + fake_client = MagicMock() + fake_client.close = AsyncMock() + adapter._client = fake_client + + await adapter.disconnect() + # Should not have tried to export + assert not hasattr(fake_client, "export_keys") or \ + not fake_client.export_keys.called -- 2.43.0 From 11aa44d34d13af1f15eb0642276cd223879b6c5d Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 30 Mar 2026 17:21:59 -0700 Subject: [PATCH 039/385] docs(telegram): add webhook mode documentation (#4089) Documents the Telegram webhook mode from #3880: - New 'Webhook Mode' section in telegram.md with polling vs webhook comparison, config table, Fly.io deployment example, troubleshooting - Add TELEGRAM_WEBHOOK_URL/PORT/SECRET to environment-variables.md - Add Telegram section to .env.example (existing + webhook vars) Co-authored-by: raulbcs --- .env.example | 15 +++++ .../docs/reference/environment-variables.md | 3 + website/docs/user-guide/messaging/telegram.md | 61 +++++++++++++++++++ 3 files changed, 79 insertions(+) diff --git a/.env.example b/.env.example index bcb5708d6..3df76497e 100644 --- a/.env.example +++ b/.env.example @@ -231,6 +231,21 @@ VOICE_TOOLS_OPENAI_KEY= # Slack allowed users (comma-separated Slack user IDs) # SLACK_ALLOWED_USERS= +# ============================================================================= +# TELEGRAM INTEGRATION +# ============================================================================= +# Telegram Bot Token - From @BotFather (https://t.me/BotFather) +# TELEGRAM_BOT_TOKEN= +# TELEGRAM_ALLOWED_USERS= # Comma-separated user IDs +# TELEGRAM_HOME_CHANNEL= # Default chat for cron delivery +# TELEGRAM_HOME_CHANNEL_NAME= # Display name for home channel + +# Webhook mode (optional — for cloud deployments like Fly.io/Railway) +# Default is long polling. Setting TELEGRAM_WEBHOOK_URL switches to webhook mode. +# TELEGRAM_WEBHOOK_URL=https://my-app.fly.dev/telegram +# TELEGRAM_WEBHOOK_PORT=8443 +# TELEGRAM_WEBHOOK_SECRET= # Recommended for production + # WhatsApp (built-in Baileys bridge — run `hermes whatsapp` to pair) # WHATSAPP_ENABLED=false # WHATSAPP_ALLOWED_USERS=15551234567 diff --git a/website/docs/reference/environment-variables.md b/website/docs/reference/environment-variables.md index d94121481..fd57ffb02 100644 --- a/website/docs/reference/environment-variables.md +++ b/website/docs/reference/environment-variables.md @@ -153,6 +153,9 @@ For native Anthropic auth, Hermes prefers Claude Code's own credential files whe | `TELEGRAM_ALLOWED_USERS` | Comma-separated user IDs allowed to use the bot | | `TELEGRAM_HOME_CHANNEL` | Default Telegram chat/channel for cron delivery | | `TELEGRAM_HOME_CHANNEL_NAME` | Display name for the Telegram home channel | +| `TELEGRAM_WEBHOOK_URL` | Public HTTPS URL for webhook mode (enables webhook instead of polling) | +| `TELEGRAM_WEBHOOK_PORT` | Local listen port for webhook server (default: `8443`) | +| `TELEGRAM_WEBHOOK_SECRET` | Secret token for verifying updates come from Telegram | | `DISCORD_BOT_TOKEN` | Discord bot token | | `DISCORD_ALLOWED_USERS` | Comma-separated Discord user IDs allowed to use the bot | | `DISCORD_HOME_CHANNEL` | Default Discord channel for cron delivery | diff --git a/website/docs/user-guide/messaging/telegram.md b/website/docs/user-guide/messaging/telegram.md index c984ecdbc..473619ccf 100644 --- a/website/docs/user-guide/messaging/telegram.md +++ b/website/docs/user-guide/messaging/telegram.md @@ -112,6 +112,66 @@ hermes gateway The bot should come online within seconds. Send it a message on Telegram to verify. +## Webhook Mode + +By default, Hermes connects to Telegram using **long polling** — the gateway makes outbound requests to Telegram's servers to fetch new updates. This works well for local and always-on deployments. + +For **cloud deployments** (Fly.io, Railway, Render, etc.), **webhook mode** is more cost-effective. These platforms can auto-wake suspended machines on inbound HTTP traffic, but not on outbound connections. Since polling is outbound, a polling bot can never sleep. Webhook mode flips the direction — Telegram pushes updates to your bot's HTTPS URL, enabling sleep-when-idle deployments. + +| | Polling (default) | Webhook | +|---|---|---| +| Direction | Gateway → Telegram (outbound) | Telegram → Gateway (inbound) | +| Best for | Local, always-on servers | Cloud platforms with auto-wake | +| Setup | No extra config | Set `TELEGRAM_WEBHOOK_URL` | +| Idle cost | Machine must stay running | Machine can sleep between messages | + +### Configuration + +Add the following to `~/.hermes/.env`: + +```bash +TELEGRAM_WEBHOOK_URL=https://my-app.fly.dev/telegram +# TELEGRAM_WEBHOOK_PORT=8443 # optional, default 8443 +# TELEGRAM_WEBHOOK_SECRET=mysecret # optional, recommended +``` + +| Variable | Required | Description | +|----------|----------|-------------| +| `TELEGRAM_WEBHOOK_URL` | Yes | Public HTTPS URL where Telegram will send updates. The URL path is auto-extracted (e.g., `/telegram` from the example above). | +| `TELEGRAM_WEBHOOK_PORT` | No | Local port the webhook server listens on (default: `8443`). | +| `TELEGRAM_WEBHOOK_SECRET` | No | Secret token for verifying that updates actually come from Telegram. **Strongly recommended** for production deployments. | + +When `TELEGRAM_WEBHOOK_URL` is set, the gateway starts an HTTP webhook server instead of polling. When unset, polling mode is used — no behavior change from previous versions. + +### Cloud deployment example (Fly.io) + +1. Add the env vars to your Fly.io app secrets: + +```bash +fly secrets set TELEGRAM_WEBHOOK_URL=https://my-app.fly.dev/telegram +fly secrets set TELEGRAM_WEBHOOK_SECRET=$(openssl rand -hex 32) +``` + +2. Expose the webhook port in your `fly.toml`: + +```toml +[[services]] + internal_port = 8443 + protocol = "tcp" + + [[services.ports]] + handlers = ["tls", "http"] + port = 443 +``` + +3. Deploy: + +```bash +fly deploy +``` + +The gateway log should show: `[telegram] Connected to Telegram (webhook mode)`. + ## Home Channel Use the `/sethome` command in any Telegram chat (DM or group) to designate it as the **home channel**. Scheduled tasks (cron jobs) deliver their results to this channel. @@ -335,6 +395,7 @@ You usually don't need to configure this manually. The auto-discovery via DoH ha | Voice messages not transcribed | Verify STT is available: install `faster-whisper` for local transcription, or set `GROQ_API_KEY` / `VOICE_TOOLS_OPENAI_KEY` in `~/.hermes/.env`. | | Voice replies are files, not bubbles | Install `ffmpeg` (needed for Edge TTS Opus conversion). | | Bot token revoked/invalid | Generate a new token via `/revoke` then `/newbot` or `/token` in BotFather. Update your `.env` file. | +| Webhook not receiving updates | Verify `TELEGRAM_WEBHOOK_URL` is publicly reachable (test with `curl`). Ensure your platform/reverse proxy routes inbound HTTPS traffic from the URL's port to the local listen port configured by `TELEGRAM_WEBHOOK_PORT` (they do not need to be the same number). Ensure SSL/TLS is active — Telegram only sends to HTTPS URLs. Check firewall rules. | ## Exec Approval -- 2.43.0 From 1b7473e702b23baad2a95df3b948f3518036a9f2 Mon Sep 17 00:00:00 2001 From: Robin Fernandes Date: Tue, 31 Mar 2026 09:29:59 +0900 Subject: [PATCH 040/385] Fixes and refactors enabled by recent updates to main. --- tests/tools/test_managed_modal_environment.py | 104 +++++++++- tests/tools/test_modal_snapshot_isolation.py | 4 + tools/environments/managed_modal.py | 172 ++++++++--------- tools/environments/modal.py | 98 ++++------ tools/environments/modal_common.py | 178 ++++++++++++++++++ 5 files changed, 406 insertions(+), 150 deletions(-) create mode 100644 tools/environments/modal_common.py diff --git a/tests/tools/test_managed_modal_environment.py b/tests/tools/test_managed_modal_environment.py index b52801809..10c1ab56f 100644 --- a/tests/tools/test_managed_modal_environment.py +++ b/tests/tools/test_managed_modal_environment.py @@ -6,6 +6,8 @@ import types from importlib.util import module_from_spec, spec_from_file_location from pathlib import Path +import pytest + TOOLS_DIR = Path(__file__).resolve().parents[2] / "tools" @@ -25,7 +27,7 @@ def _reset_modules(prefixes: tuple[str, ...]): sys.modules.pop(name, None) -def _install_fake_tools_package(): +def _install_fake_tools_package(*, credential_mounts=None): _reset_modules(("tools", "agent", "hermes_cli")) hermes_cli = types.ModuleType("hermes_cli") @@ -68,6 +70,9 @@ def _install_fake_tools_package(): managed_mode=True, ) ) + sys.modules["tools.credential_files"] = types.SimpleNamespace( + get_credential_file_mounts=lambda: list(credential_mounts or []), + ) return interrupt_event @@ -87,6 +92,7 @@ class _FakeResponse: def test_managed_modal_execute_polls_until_completed(monkeypatch): _install_fake_tools_package() managed_modal = _load_tool_module("tools.environments.managed_modal", "environments/managed_modal.py") + modal_common = sys.modules["tools.environments.modal_common"] calls = [] poll_count = {"value": 0} @@ -112,7 +118,7 @@ def test_managed_modal_execute_polls_until_completed(monkeypatch): raise AssertionError(f"Unexpected request: {method} {url}") monkeypatch.setattr(managed_modal.requests, "request", fake_request) - monkeypatch.setattr(managed_modal.time, "sleep", lambda _: None) + monkeypatch.setattr(modal_common.time, "sleep", lambda _: None) env = managed_modal.ManagedModalEnvironment(image="python:3.11") result = env.execute("echo hello") @@ -149,6 +155,7 @@ def test_managed_modal_create_sends_a_stable_idempotency_key(monkeypatch): def test_managed_modal_execute_cancels_on_interrupt(monkeypatch): interrupt_event = _install_fake_tools_package() managed_modal = _load_tool_module("tools.environments.managed_modal", "environments/managed_modal.py") + modal_common = sys.modules["tools.environments.modal_common"] calls = [] @@ -170,7 +177,7 @@ def test_managed_modal_execute_cancels_on_interrupt(monkeypatch): interrupt_event.set() monkeypatch.setattr(managed_modal.requests, "request", fake_request) - monkeypatch.setattr(managed_modal.time, "sleep", fake_sleep) + monkeypatch.setattr(modal_common.time, "sleep", fake_sleep) env = managed_modal.ManagedModalEnvironment(image="python:3.11") result = env.execute("sleep 30") @@ -190,6 +197,7 @@ def test_managed_modal_execute_cancels_on_interrupt(monkeypatch): def test_managed_modal_execute_returns_descriptive_error_on_missing_exec(monkeypatch): _install_fake_tools_package() managed_modal = _load_tool_module("tools.environments.managed_modal", "environments/managed_modal.py") + modal_common = sys.modules["tools.environments.modal_common"] def fake_request(method, url, headers=None, json=None, timeout=None): if method == "POST" and url.endswith("/v1/sandboxes"): @@ -203,7 +211,7 @@ def test_managed_modal_execute_returns_descriptive_error_on_missing_exec(monkeyp raise AssertionError(f"Unexpected request: {method} {url}") monkeypatch.setattr(managed_modal.requests, "request", fake_request) - monkeypatch.setattr(managed_modal.time, "sleep", lambda _: None) + monkeypatch.setattr(modal_common.time, "sleep", lambda _: None) env = managed_modal.ManagedModalEnvironment(image="python:3.11") result = env.execute("echo hello") @@ -211,3 +219,91 @@ def test_managed_modal_execute_returns_descriptive_error_on_missing_exec(monkeyp assert result["returncode"] == 1 assert "not found" in result["output"].lower() + + +def test_managed_modal_create_and_cleanup_preserve_gateway_persistence_fields(monkeypatch): + _install_fake_tools_package() + managed_modal = _load_tool_module("tools.environments.managed_modal", "environments/managed_modal.py") + + create_payloads = [] + terminate_payloads = [] + + def fake_request(method, url, headers=None, json=None, timeout=None): + if method == "POST" and url.endswith("/v1/sandboxes"): + create_payloads.append(json) + return _FakeResponse(200, {"id": "sandbox-1"}) + if method == "POST" and url.endswith("/terminate"): + terminate_payloads.append(json) + return _FakeResponse(200, {"status": "terminated"}) + raise AssertionError(f"Unexpected request: {method} {url}") + + monkeypatch.setattr(managed_modal.requests, "request", fake_request) + + env = managed_modal.ManagedModalEnvironment( + image="python:3.11", + task_id="task-managed-persist", + persistent_filesystem=False, + ) + env.cleanup() + + assert create_payloads == [{ + "image": "python:3.11", + "cwd": "/root", + "cpu": 1.0, + "memoryMiB": 5120.0, + "timeoutMs": 3_600_000, + "idleTimeoutMs": 300_000, + "persistentFilesystem": False, + "logicalKey": "task-managed-persist", + }] + assert terminate_payloads == [{"snapshotBeforeTerminate": False}] + + +def test_managed_modal_rejects_host_credential_passthrough(): + _install_fake_tools_package( + credential_mounts=[{ + "host_path": "/tmp/token.json", + "container_path": "/root/.hermes/token.json", + }] + ) + managed_modal = _load_tool_module("tools.environments.managed_modal", "environments/managed_modal.py") + + with pytest.raises(ValueError, match="credential-file passthrough"): + managed_modal.ManagedModalEnvironment(image="python:3.11") + + +def test_managed_modal_execute_times_out_and_cancels(monkeypatch): + _install_fake_tools_package() + managed_modal = _load_tool_module("tools.environments.managed_modal", "environments/managed_modal.py") + modal_common = sys.modules["tools.environments.modal_common"] + + calls = [] + monotonic_values = iter([0.0, 12.5]) + + def fake_request(method, url, headers=None, json=None, timeout=None): + calls.append((method, url, json, timeout)) + if method == "POST" and url.endswith("/v1/sandboxes"): + return _FakeResponse(200, {"id": "sandbox-1"}) + if method == "POST" and url.endswith("/execs"): + return _FakeResponse(202, {"execId": json["execId"], "status": "running"}) + if method == "GET" and "/execs/" in url: + return _FakeResponse(200, {"execId": url.rsplit("/", 1)[-1], "status": "running"}) + if method == "POST" and url.endswith("/cancel"): + return _FakeResponse(202, {"status": "cancelling"}) + if method == "POST" and url.endswith("/terminate"): + return _FakeResponse(200, {"status": "terminated"}) + raise AssertionError(f"Unexpected request: {method} {url}") + + monkeypatch.setattr(managed_modal.requests, "request", fake_request) + monkeypatch.setattr(modal_common.time, "monotonic", lambda: next(monotonic_values)) + monkeypatch.setattr(modal_common.time, "sleep", lambda _: None) + + env = managed_modal.ManagedModalEnvironment(image="python:3.11") + result = env.execute("sleep 30", timeout=2) + env.cleanup() + + assert result == { + "output": "Managed Modal exec timed out after 2s", + "returncode": 124, + } + assert any(call[0] == "POST" and call[1].endswith("/cancel") for call in calls) diff --git a/tests/tools/test_modal_snapshot_isolation.py b/tests/tools/test_modal_snapshot_isolation.py index 1f9d9ff95..a3d0eeacd 100644 --- a/tests/tools/test_modal_snapshot_isolation.py +++ b/tests/tools/test_modal_snapshot_isolation.py @@ -87,6 +87,10 @@ def _install_modal_test_modules( sys.modules["tools.environments.base"] = types.SimpleNamespace(BaseEnvironment=_DummyBaseEnvironment) sys.modules["tools.interrupt"] = types.SimpleNamespace(is_interrupted=lambda: False) + sys.modules["tools.credential_files"] = types.SimpleNamespace( + get_credential_file_mounts=lambda: [], + iter_skills_files=lambda: [], + ) from_id_calls: list[str] = [] registry_calls: list[tuple[str, list[str] | None]] = [] diff --git a/tools/environments/managed_modal.py b/tools/environments/managed_modal.py index 241c69094..a8197bccf 100644 --- a/tools/environments/managed_modal.py +++ b/tools/environments/managed_modal.py @@ -6,12 +6,15 @@ import json import logging import os import requests -import time import uuid +from dataclasses import dataclass from typing import Any, Dict, Optional -from tools.environments.base import BaseEnvironment -from tools.interrupt import is_interrupted +from tools.environments.modal_common import ( + BaseModalExecutionEnvironment, + ModalExecStart, + PreparedModalExec, +) from tools.managed_tool_gateway import resolve_managed_tool_gateway logger = logging.getLogger(__name__) @@ -25,12 +28,20 @@ def _request_timeout_env(name: str, default: float) -> float: return default -class ManagedModalEnvironment(BaseEnvironment): +@dataclass(frozen=True) +class _ManagedModalExecHandle: + exec_id: str + + +class ManagedModalEnvironment(BaseModalExecutionEnvironment): """Gateway-owned Modal sandbox with Hermes-compatible execute/cleanup.""" _CONNECT_TIMEOUT_SECONDS = _request_timeout_env("TERMINAL_MANAGED_MODAL_CONNECT_TIMEOUT_SECONDS", 1.0) _POLL_READ_TIMEOUT_SECONDS = _request_timeout_env("TERMINAL_MANAGED_MODAL_POLL_READ_TIMEOUT_SECONDS", 5.0) _CANCEL_READ_TIMEOUT_SECONDS = _request_timeout_env("TERMINAL_MANAGED_MODAL_CANCEL_READ_TIMEOUT_SECONDS", 5.0) + _client_timeout_grace_seconds = 10.0 + _interrupt_output = "[Command interrupted - Modal sandbox exec cancelled]" + _unexpected_error_prefix = "Managed Modal exec failed" def __init__( self, @@ -43,6 +54,8 @@ class ManagedModalEnvironment(BaseEnvironment): ): super().__init__(cwd=cwd, timeout=timeout) + self._guard_unsupported_credential_passthrough() + gateway = resolve_managed_tool_gateway("modal") if gateway is None: raise ValueError("Managed Modal requires a configured tool gateway and Nous user token") @@ -56,31 +69,16 @@ class ManagedModalEnvironment(BaseEnvironment): self._create_idempotency_key = str(uuid.uuid4()) self._sandbox_id = self._create_sandbox() - def execute(self, command: str, cwd: str = "", *, - timeout: int | None = None, - stdin_data: str | None = None) -> dict: - exec_command, sudo_stdin = self._prepare_command(command) - - # When a sudo password is present, inject it via a shell-level pipe - # (same approach as the direct ModalEnvironment) since the gateway - # cannot pipe subprocess stdin directly. - if sudo_stdin is not None: - import shlex - exec_command = ( - f"printf '%s\\n' {shlex.quote(sudo_stdin.rstrip())} | {exec_command}" - ) - - exec_cwd = cwd or self.cwd - effective_timeout = timeout or self.timeout + def _start_modal_exec(self, prepared: PreparedModalExec) -> ModalExecStart: exec_id = str(uuid.uuid4()) payload: Dict[str, Any] = { "execId": exec_id, - "command": exec_command, - "cwd": exec_cwd, - "timeoutMs": int(effective_timeout * 1000), + "command": prepared.command, + "cwd": prepared.cwd, + "timeoutMs": int(prepared.timeout * 1000), } - if stdin_data is not None: - payload["stdinData"] = stdin_data + if prepared.stdin_data is not None: + payload["stdinData"] = prepared.stdin_data try: response = self._request( @@ -90,81 +88,68 @@ class ManagedModalEnvironment(BaseEnvironment): timeout=10, ) except Exception as exc: - return { - "output": f"Managed Modal exec failed: {exc}", - "returncode": 1, - } + return ModalExecStart( + immediate_result=self._error_result(f"Managed Modal exec failed: {exc}") + ) if response.status_code >= 400: - return { - "output": self._format_error("Managed Modal exec failed", response), - "returncode": 1, - } + return ModalExecStart( + immediate_result=self._error_result( + self._format_error("Managed Modal exec failed", response) + ) + ) body = response.json() status = body.get("status") if status in {"completed", "failed", "cancelled", "timeout"}: - return { - "output": body.get("output", ""), - "returncode": body.get("returncode", 1), - } + return ModalExecStart( + immediate_result=self._result( + body.get("output", ""), + body.get("returncode", 1), + ) + ) if body.get("execId") != exec_id: - return { - "output": "Managed Modal exec start did not return the expected exec id", - "returncode": 1, - } - - poll_interval = 0.25 - deadline = time.monotonic() + effective_timeout + 10 - - while time.monotonic() < deadline: - if is_interrupted(): - self._cancel_exec(exec_id) - return { - "output": "[Command interrupted - Modal sandbox exec cancelled]", - "returncode": 130, - } - - try: - status_response = self._request( - "GET", - f"/v1/sandboxes/{self._sandbox_id}/execs/{exec_id}", - timeout=(self._CONNECT_TIMEOUT_SECONDS, self._POLL_READ_TIMEOUT_SECONDS), + return ModalExecStart( + immediate_result=self._error_result( + "Managed Modal exec start did not return the expected exec id" ) - except Exception as exc: - return { - "output": f"Managed Modal exec poll failed: {exc}", - "returncode": 1, - } + ) - if status_response.status_code == 404: - return { - "output": "Managed Modal exec not found", - "returncode": 1, - } + return ModalExecStart(handle=_ManagedModalExecHandle(exec_id=exec_id)) - if status_response.status_code >= 400: - return { - "output": self._format_error("Managed Modal exec poll failed", status_response), - "returncode": 1, - } + def _poll_modal_exec(self, handle: _ManagedModalExecHandle) -> dict | None: + try: + status_response = self._request( + "GET", + f"/v1/sandboxes/{self._sandbox_id}/execs/{handle.exec_id}", + timeout=(self._CONNECT_TIMEOUT_SECONDS, self._POLL_READ_TIMEOUT_SECONDS), + ) + except Exception as exc: + return self._error_result(f"Managed Modal exec poll failed: {exc}") - status_body = status_response.json() - status = status_body.get("status") - if status in {"completed", "failed", "cancelled", "timeout"}: - return { - "output": status_body.get("output", ""), - "returncode": status_body.get("returncode", 1), - } + if status_response.status_code == 404: + return self._error_result("Managed Modal exec not found") - time.sleep(poll_interval) + if status_response.status_code >= 400: + return self._error_result( + self._format_error("Managed Modal exec poll failed", status_response) + ) - self._cancel_exec(exec_id) - return { - "output": f"Managed Modal exec timed out after {effective_timeout}s", - "returncode": 124, - } + status_body = status_response.json() + status = status_body.get("status") + if status in {"completed", "failed", "cancelled", "timeout"}: + return self._result( + status_body.get("output", ""), + status_body.get("returncode", 1), + ) + return None + + def _cancel_modal_exec(self, handle: _ManagedModalExecHandle) -> None: + self._cancel_exec(handle.exec_id) + + def _timeout_result_for_modal(self, timeout: int) -> dict: + return self._result(f"Managed Modal exec timed out after {timeout}s", 124) def cleanup(self): if not getattr(self, "_sandbox_id", None): @@ -226,6 +211,21 @@ class ManagedModalEnvironment(BaseEnvironment): raise RuntimeError("Managed Modal create did not return a sandbox id") return sandbox_id + def _guard_unsupported_credential_passthrough(self) -> None: + """Managed Modal does not sync or mount host credential files.""" + try: + from tools.credential_files import get_credential_file_mounts + except Exception: + return + + mounts = get_credential_file_mounts() + if mounts: + raise ValueError( + "Managed Modal does not support host credential-file passthrough. " + "Use TERMINAL_MODAL_MODE=direct when skills or config require " + "credential files inside the sandbox." + ) + def _request(self, method: str, path: str, *, json: Dict[str, Any] | None = None, timeout: int = 30, diff --git a/tools/environments/modal.py b/tools/environments/modal.py index 8954a6f34..805f9ac28 100644 --- a/tools/environments/modal.py +++ b/tools/environments/modal.py @@ -9,13 +9,16 @@ import json import logging import shlex import threading -import uuid +from dataclasses import dataclass from pathlib import Path from typing import Any, Dict, Optional from hermes_constants import get_hermes_home -from tools.environments.base import BaseEnvironment -from tools.interrupt import is_interrupted +from tools.environments.modal_common import ( + BaseModalExecutionEnvironment, + ModalExecStart, + PreparedModalExec, +) logger = logging.getLogger(__name__) @@ -135,9 +138,20 @@ class _AsyncWorker: self._thread.join(timeout=10) -class ModalEnvironment(BaseEnvironment): +@dataclass +class _DirectModalExecHandle: + thread: threading.Thread + result_holder: Dict[str, Any] + + +class ModalEnvironment(BaseModalExecutionEnvironment): """Modal cloud execution via native Modal sandboxes.""" + _stdin_mode = "heredoc" + _poll_interval_seconds = 0.2 + _interrupt_output = "[Command interrupted - Modal sandbox terminated]" + _unexpected_error_prefix = "Modal execution error" + def __init__( self, image: str, @@ -312,36 +326,11 @@ class ModalEnvironment(BaseEnvironment): except Exception as e: logger.debug("Modal: file sync failed: %s", e) - def execute( - self, - command: str, - cwd: str = "", - *, - timeout: int | None = None, - stdin_data: str | None = None, - ) -> dict: + def _before_execute(self) -> None: self._sync_files() - if stdin_data is not None: - marker = f"HERMES_EOF_{uuid.uuid4().hex[:8]}" - while marker in stdin_data: - marker = f"HERMES_EOF_{uuid.uuid4().hex[:8]}" - command = f"{command} << '{marker}'\n{stdin_data}\n{marker}" - - exec_command, sudo_stdin = self._prepare_command(command) - - # Modal sandboxes execute commands via exec() and cannot pipe - # subprocess stdin directly. When a sudo password is present, - # use a shell-level pipe from printf. - if sudo_stdin is not None: - exec_command = ( - f"printf '%s\\n' {shlex.quote(sudo_stdin.rstrip())} | {exec_command}" - ) - - effective_cwd = cwd or self.cwd - effective_timeout = timeout or self.timeout - full_command = f"cd {shlex.quote(effective_cwd)} && {exec_command}" - + def _start_modal_exec(self, prepared: PreparedModalExec) -> ModalExecStart: + full_command = f"cd {shlex.quote(prepared.cwd)} && {prepared.command}" result_holder = {"value": None, "error": None} def _run(): @@ -351,7 +340,7 @@ class ModalEnvironment(BaseEnvironment): "bash", "-c", full_command, - timeout=effective_timeout, + timeout=prepared.timeout, ) stdout = await process.stdout.read.aio() stderr = await process.stderr.read.aio() @@ -363,42 +352,31 @@ class ModalEnvironment(BaseEnvironment): output = stdout if stderr: output = f"{stdout}\n{stderr}" if stdout else stderr - return output, exit_code + return self._result(output, exit_code) - output, exit_code = self._worker.run_coroutine( + result_holder["value"] = self._worker.run_coroutine( _do_execute(), - timeout=effective_timeout + 30, + timeout=prepared.timeout + 30, ) - result_holder["value"] = { - "output": output, - "returncode": exit_code, - } except Exception as e: result_holder["error"] = e t = threading.Thread(target=_run, daemon=True) t.start() - while t.is_alive(): - t.join(timeout=0.2) - if is_interrupted(): - try: - self._worker.run_coroutine( - self._sandbox.terminate.aio(), - timeout=15, - ) - except Exception: - pass - return { - "output": "[Command interrupted - Modal sandbox terminated]", - "returncode": 130, - } + return ModalExecStart(handle=_DirectModalExecHandle(thread=t, result_holder=result_holder)) - if result_holder["error"]: - return { - "output": f"Modal execution error: {result_holder['error']}", - "returncode": 1, - } - return result_holder["value"] + def _poll_modal_exec(self, handle: _DirectModalExecHandle) -> dict | None: + if handle.thread.is_alive(): + return None + if handle.result_holder["error"]: + return self._error_result(f"Modal execution error: {handle.result_holder['error']}") + return handle.result_holder["value"] + + def _cancel_modal_exec(self, handle: _DirectModalExecHandle) -> None: + self._worker.run_coroutine( + self._sandbox.terminate.aio(), + timeout=15, + ) def cleanup(self): """Snapshot the filesystem (if persistent) then stop the sandbox.""" diff --git a/tools/environments/modal_common.py b/tools/environments/modal_common.py new file mode 100644 index 000000000..0affd0209 --- /dev/null +++ b/tools/environments/modal_common.py @@ -0,0 +1,178 @@ +"""Shared Hermes-side execution flow for Modal transports. + +This module deliberately stops at the Hermes boundary: +- command preparation +- cwd/timeout normalization +- stdin/sudo shell wrapping +- common result shape +- interrupt/cancel polling + +Direct Modal and managed Modal keep separate transport logic, persistence, and +trust-boundary decisions in their own modules. +""" + +from __future__ import annotations + +import shlex +import time +import uuid +from abc import abstractmethod +from dataclasses import dataclass +from typing import Any + +from tools.environments.base import BaseEnvironment +from tools.interrupt import is_interrupted + + +@dataclass(frozen=True) +class PreparedModalExec: + """Normalized command data passed to a transport-specific exec runner.""" + + command: str + cwd: str + timeout: int + stdin_data: str | None = None + + +@dataclass(frozen=True) +class ModalExecStart: + """Transport response after starting an exec.""" + + handle: Any | None = None + immediate_result: dict | None = None + + +def wrap_modal_stdin_heredoc(command: str, stdin_data: str) -> str: + """Append stdin as a shell heredoc for transports without stdin piping.""" + marker = f"HERMES_EOF_{uuid.uuid4().hex[:8]}" + while marker in stdin_data: + marker = f"HERMES_EOF_{uuid.uuid4().hex[:8]}" + return f"{command} << '{marker}'\n{stdin_data}\n{marker}" + + +def wrap_modal_sudo_pipe(command: str, sudo_stdin: str) -> str: + """Feed sudo via a shell pipe for transports without direct stdin piping.""" + return f"printf '%s\\n' {shlex.quote(sudo_stdin.rstrip())} | {command}" + + +class BaseModalExecutionEnvironment(BaseEnvironment): + """Common execute() flow for direct and managed Modal transports.""" + + _stdin_mode = "payload" + _poll_interval_seconds = 0.25 + _client_timeout_grace_seconds: float | None = None + _interrupt_output = "[Command interrupted]" + _unexpected_error_prefix = "Modal execution error" + + def execute( + self, + command: str, + cwd: str = "", + *, + timeout: int | None = None, + stdin_data: str | None = None, + ) -> dict: + self._before_execute() + prepared = self._prepare_modal_exec( + command, + cwd=cwd, + timeout=timeout, + stdin_data=stdin_data, + ) + + try: + start = self._start_modal_exec(prepared) + except Exception as exc: + return self._error_result(f"{self._unexpected_error_prefix}: {exc}") + + if start.immediate_result is not None: + return start.immediate_result + + if start.handle is None: + return self._error_result( + f"{self._unexpected_error_prefix}: transport did not return an exec handle" + ) + + deadline = None + if self._client_timeout_grace_seconds is not None: + deadline = time.monotonic() + prepared.timeout + self._client_timeout_grace_seconds + + while True: + if is_interrupted(): + try: + self._cancel_modal_exec(start.handle) + except Exception: + pass + return self._result(self._interrupt_output, 130) + + try: + result = self._poll_modal_exec(start.handle) + except Exception as exc: + return self._error_result(f"{self._unexpected_error_prefix}: {exc}") + + if result is not None: + return result + + if deadline is not None and time.monotonic() >= deadline: + try: + self._cancel_modal_exec(start.handle) + except Exception: + pass + return self._timeout_result_for_modal(prepared.timeout) + + time.sleep(self._poll_interval_seconds) + + def _before_execute(self) -> None: + """Hook for backends that need pre-exec sync or validation.""" + return None + + def _prepare_modal_exec( + self, + command: str, + *, + cwd: str = "", + timeout: int | None = None, + stdin_data: str | None = None, + ) -> PreparedModalExec: + effective_cwd = cwd or self.cwd + effective_timeout = timeout or self.timeout + + exec_command = command + exec_stdin = stdin_data if self._stdin_mode == "payload" else None + if stdin_data is not None and self._stdin_mode == "heredoc": + exec_command = wrap_modal_stdin_heredoc(exec_command, stdin_data) + + exec_command, sudo_stdin = self._prepare_command(exec_command) + if sudo_stdin is not None: + exec_command = wrap_modal_sudo_pipe(exec_command, sudo_stdin) + + return PreparedModalExec( + command=exec_command, + cwd=effective_cwd, + timeout=effective_timeout, + stdin_data=exec_stdin, + ) + + def _result(self, output: str, returncode: int) -> dict: + return { + "output": output, + "returncode": returncode, + } + + def _error_result(self, output: str) -> dict: + return self._result(output, 1) + + def _timeout_result_for_modal(self, timeout: int) -> dict: + return self._result(f"Command timed out after {timeout}s", 124) + + @abstractmethod + def _start_modal_exec(self, prepared: PreparedModalExec) -> ModalExecStart: + """Begin a transport-specific exec.""" + + @abstractmethod + def _poll_modal_exec(self, handle: Any) -> dict | None: + """Return a final result dict when complete, else ``None``.""" + + @abstractmethod + def _cancel_modal_exec(self, handle: Any) -> None: + """Cancel or terminate the active transport exec.""" -- 2.43.0 From e64b047663a0ff95753a1bf930036e6ccca43bd2 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 30 Mar 2026 17:34:43 -0700 Subject: [PATCH 041/385] chore: prepare Hermes for Homebrew packaging (#4099) Co-authored-by: Yabuku-xD <78594762+Yabuku-xD@users.noreply.github.com> --- MANIFEST.in | 4 + gateway/run.py | 8 +- hermes_cli/banner.py | 3 +- hermes_cli/claw.py | 4 +- hermes_cli/config.py | 82 +++++++++-- hermes_cli/main.py | 11 +- hermes_cli/plugins_cmd.py | 3 +- hermes_cli/setup.py | 5 +- hermes_constants.py | 14 ++ packaging/homebrew/README.md | 14 ++ packaging/homebrew/hermes-agent.rb | 48 +++++++ pyproject.toml | 9 +- scripts/release.py | 158 +++++++++++++++++----- tests/gateway/test_update_command.py | 11 ++ tests/hermes_cli/test_managed_installs.py | 54 ++++++++ tests/test_packaging_metadata.py | 22 +++ tools/skills_hub.py | 6 +- 17 files changed, 400 insertions(+), 56 deletions(-) create mode 100644 MANIFEST.in create mode 100644 packaging/homebrew/README.md create mode 100644 packaging/homebrew/hermes-agent.rb create mode 100644 tests/hermes_cli/test_managed_installs.py create mode 100644 tests/test_packaging_metadata.py diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 000000000..876aeeb7d --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,4 @@ +graft skills +graft optional-skills +global-exclude __pycache__ +global-exclude *.py[cod] diff --git a/gateway/run.py b/gateway/run.py index c42510709..0b5e3a1b4 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -325,9 +325,9 @@ def _check_unavailable_skill(command_name: str) -> str | None: ) # Check optional skills (shipped with repo but not installed) - from hermes_constants import get_hermes_home + from hermes_constants import get_hermes_home, get_optional_skills_dir repo_root = Path(__file__).resolve().parent.parent - optional_dir = repo_root / "optional-skills" + optional_dir = get_optional_skills_dir(repo_root / "optional-skills") if optional_dir.exists(): for skill_md in optional_dir.rglob("SKILL.md"): name = skill_md.parent.name.lower().replace("_", "-") @@ -4695,6 +4695,10 @@ class GatewayRunner: import shutil import subprocess from datetime import datetime + from hermes_cli.config import is_managed, format_managed_message + + if is_managed(): + return f"✗ {format_managed_message('update Hermes Agent')}" project_root = Path(__file__).parent.parent.resolve() git_dir = project_root / '.git' diff --git a/hermes_cli/banner.py b/hermes_cli/banner.py index 5ecc94acf..7435750bc 100644 --- a/hermes_cli/banner.py +++ b/hermes_cli/banner.py @@ -432,10 +432,11 @@ def build_welcome_banner(console: Console, model: str, cwd: str, try: behind = get_update_result(timeout=0.5) if behind and behind > 0: + from hermes_cli.config import recommended_update_command commits_word = "commit" if behind == 1 else "commits" right_lines.append( f"[bold yellow]⚠ {behind} {commits_word} behind[/]" - f"[dim yellow] — run [bold]hermes update[/bold] to update[/]" + f"[dim yellow] — run [bold]{recommended_update_command()}[/bold] to update[/]" ) except Exception: pass # Never break the banner over an update check diff --git a/hermes_cli/claw.py b/hermes_cli/claw.py index 014a2abeb..b3b624dc5 100644 --- a/hermes_cli/claw.py +++ b/hermes_cli/claw.py @@ -12,6 +12,7 @@ import sys from pathlib import Path from hermes_cli.config import get_hermes_home, get_config_path, load_config, save_config +from hermes_constants import get_optional_skills_dir from hermes_cli.setup import ( Colors, color, @@ -27,8 +28,7 @@ logger = logging.getLogger(__name__) PROJECT_ROOT = Path(__file__).parent.parent.resolve() _OPENCLAW_SCRIPT = ( - PROJECT_ROOT - / "optional-skills" + get_optional_skills_dir(PROJECT_ROOT / "optional-skills") / "migration" / "openclaw-migration" / "scripts" diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 56d102692..f7ae4239d 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -52,26 +52,86 @@ from hermes_cli.default_soul import DEFAULT_SOUL_MD # Managed mode (NixOS declarative config) # ============================================================================= +_MANAGED_TRUE_VALUES = ("true", "1", "yes") +_MANAGED_SYSTEM_NAMES = { + "brew": "Homebrew", + "homebrew": "Homebrew", + "nix": "NixOS", + "nixos": "NixOS", +} + + +def get_managed_system() -> Optional[str]: + """Return the package manager owning this install, if any.""" + raw = os.getenv("HERMES_MANAGED", "").strip() + if raw: + normalized = raw.lower() + if normalized in _MANAGED_TRUE_VALUES: + return "NixOS" + return _MANAGED_SYSTEM_NAMES.get(normalized, raw) + + managed_marker = get_hermes_home() / ".managed" + if managed_marker.exists(): + return "NixOS" + return None + + def is_managed() -> bool: - """Check if hermes is running in Nix-managed mode. + """Check if Hermes is running in package-manager-managed mode. Two signals: the HERMES_MANAGED env var (set by the systemd service), or a .managed marker file in HERMES_HOME (set by the NixOS activation script, so interactive shells also see it). """ - if os.getenv("HERMES_MANAGED", "").lower() in ("true", "1", "yes"): - return True - managed_marker = get_hermes_home() / ".managed" - return managed_marker.exists() + return get_managed_system() is not None + + +def get_managed_update_command() -> Optional[str]: + """Return the preferred upgrade command for a managed install.""" + managed_system = get_managed_system() + if managed_system == "Homebrew": + return "brew upgrade hermes-agent" + if managed_system == "NixOS": + return "sudo nixos-rebuild switch" + return None + + +def recommended_update_command() -> str: + """Return the best update command for the current installation.""" + return get_managed_update_command() or "hermes update" + + +def format_managed_message(action: str = "modify this Hermes installation") -> str: + """Build a user-facing error for managed installs.""" + managed_system = get_managed_system() or "a package manager" + raw = os.getenv("HERMES_MANAGED", "").strip().lower() + + if managed_system == "NixOS": + env_hint = "true" if raw in _MANAGED_TRUE_VALUES else raw or "true" + return ( + f"Cannot {action}: this Hermes installation is managed by NixOS " + f"(HERMES_MANAGED={env_hint}).\n" + "Edit services.hermes-agent.settings in your configuration.nix and run:\n" + " sudo nixos-rebuild switch" + ) + + if managed_system == "Homebrew": + env_hint = raw or "homebrew" + return ( + f"Cannot {action}: this Hermes installation is managed by Homebrew " + f"(HERMES_MANAGED={env_hint}).\n" + "Use:\n" + " brew upgrade hermes-agent" + ) + + return ( + f"Cannot {action}: this Hermes installation is managed by {managed_system}.\n" + "Use your package manager to upgrade or reinstall Hermes." + ) def managed_error(action: str = "modify configuration"): """Print user-friendly error for managed mode.""" - print( - f"Cannot {action}: configuration is managed by NixOS (HERMES_MANAGED=true).\n" - "Edit services.hermes-agent.settings in your configuration.nix and run:\n" - " sudo nixos-rebuild switch", - file=sys.stderr, - ) + print(format_managed_message(action), file=sys.stderr) # ============================================================================= diff --git a/hermes_cli/main.py b/hermes_cli/main.py index f6d7d7c71..64fc455cd 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -2467,10 +2467,14 @@ def cmd_version(args): # Show update status (synchronous — acceptable since user asked for version info) try: from hermes_cli.banner import check_for_updates + from hermes_cli.config import recommended_update_command behind = check_for_updates() if behind and behind > 0: commits_word = "commit" if behind == 1 else "commits" - print(f"Update available: {behind} {commits_word} behind — run 'hermes update'") + print( + f"Update available: {behind} {commits_word} behind — " + f"run '{recommended_update_command()}'" + ) elif behind == 0: print("Up to date") except Exception: @@ -2821,6 +2825,11 @@ def _invalidate_update_cache(): def cmd_update(args): """Update Hermes Agent to the latest version.""" import shutil + from hermes_cli.config import is_managed, managed_error + + if is_managed(): + managed_error("update Hermes Agent") + return print("⚕ Updating Hermes Agent...") print() diff --git a/hermes_cli/plugins_cmd.py b/hermes_cli/plugins_cmd.py index e53f5c94b..c3717bfa3 100644 --- a/hermes_cli/plugins_cmd.py +++ b/hermes_cli/plugins_cmd.py @@ -265,10 +265,11 @@ def cmd_install(identifier: str, force: bool = False) -> None: ) sys.exit(1) if mv_int > _SUPPORTED_MANIFEST_VERSION: + from hermes_cli.config import recommended_update_command console.print( f"[red]Error:[/red] Plugin '{plugin_name}' requires manifest_version " f"{mv}, but this installer only supports up to {_SUPPORTED_MANIFEST_VERSION}.\n" - f"Run [bold]hermes update[/bold] to get a newer installer." + f"Run [bold]{recommended_update_command()}[/bold] to get a newer installer." ) sys.exit(1) diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py index 304f34f56..503c2bcde 100644 --- a/hermes_cli/setup.py +++ b/hermes_cli/setup.py @@ -18,6 +18,8 @@ import sys from pathlib import Path from typing import Optional, Dict, Any +from hermes_constants import get_optional_skills_dir + logger = logging.getLogger(__name__) PROJECT_ROOT = Path(__file__).parent.parent.resolve() @@ -3121,8 +3123,7 @@ def _skip_configured_section( _OPENCLAW_SCRIPT = ( - PROJECT_ROOT - / "optional-skills" + get_optional_skills_dir(PROJECT_ROOT / "optional-skills") / "migration" / "openclaw-migration" / "scripts" diff --git a/hermes_constants.py b/hermes_constants.py index 2bfc0a8c7..c28f6dc8f 100644 --- a/hermes_constants.py +++ b/hermes_constants.py @@ -17,6 +17,20 @@ def get_hermes_home() -> Path: return Path(os.getenv("HERMES_HOME", Path.home() / ".hermes")) +def get_optional_skills_dir(default: Path | None = None) -> Path: + """Return the optional-skills directory, honoring package-manager wrappers. + + Packaged installs may ship ``optional-skills`` outside the Python package + tree and expose it via ``HERMES_OPTIONAL_SKILLS``. + """ + override = os.getenv("HERMES_OPTIONAL_SKILLS", "").strip() + if override: + return Path(override) + if default is not None: + return default + return get_hermes_home() / "optional-skills" + + def get_hermes_dir(new_subpath: str, old_name: str) -> Path: """Resolve a Hermes subdirectory with backward compatibility. diff --git a/packaging/homebrew/README.md b/packaging/homebrew/README.md new file mode 100644 index 000000000..e53d3fd0b --- /dev/null +++ b/packaging/homebrew/README.md @@ -0,0 +1,14 @@ +Homebrew packaging notes for Hermes Agent. + +Use `packaging/homebrew/hermes-agent.rb` as a tap or `homebrew-core` starting point. + +Key choices: +- Stable builds should target the semver-named sdist asset attached to each GitHub release, not the CalVer tag tarball. +- `faster-whisper` now lives in the `voice` extra, which keeps wheel-only transitive dependencies out of the base Homebrew formula. +- The wrapper exports `HERMES_BUNDLED_SKILLS`, `HERMES_OPTIONAL_SKILLS`, and `HERMES_MANAGED=homebrew` so packaged installs keep runtime assets and defer upgrades to Homebrew. + +Typical update flow: +1. Bump the formula `url`, `version`, and `sha256`. +2. Refresh Python resources with `brew update-python-resources --print-only hermes-agent`. +3. Keep `ignore_packages: %w[certifi cryptography pydantic]`. +4. Verify `brew audit --new --strict hermes-agent` and `brew test hermes-agent`. diff --git a/packaging/homebrew/hermes-agent.rb b/packaging/homebrew/hermes-agent.rb new file mode 100644 index 000000000..7c00fc6ac --- /dev/null +++ b/packaging/homebrew/hermes-agent.rb @@ -0,0 +1,48 @@ +class HermesAgent < Formula + include Language::Python::Virtualenv + + desc "Self-improving AI agent that creates skills from experience" + homepage "https://hermes-agent.nousresearch.com" + # Stable source should point at the semver-named sdist asset attached by + # scripts/release.py, not the CalVer tag tarball. + url "https://github.com/NousResearch/hermes-agent/releases/download/v2026.3.30/hermes_agent-0.6.0.tar.gz" + sha256 "" + license "MIT" + + depends_on "certifi" => :no_linkage + depends_on "cryptography" => :no_linkage + depends_on "libyaml" + depends_on "python@3.14" + + pypi_packages ignore_packages: %w[certifi cryptography pydantic] + + # Refresh resource stanzas after bumping the source url/version: + # brew update-python-resources --print-only hermes-agent + + def install + venv = virtualenv_create(libexec, "python3.14") + venv.pip_install resources + venv.pip_install buildpath + + pkgshare.install "skills", "optional-skills" + + %w[hermes hermes-agent hermes-acp].each do |exe| + next unless (libexec/"bin"/exe).exist? + + (bin/exe).write_env_script( + libexec/"bin"/exe, + HERMES_BUNDLED_SKILLS: pkgshare/"skills", + HERMES_OPTIONAL_SKILLS: pkgshare/"optional-skills", + HERMES_MANAGED: "homebrew" + ) + end + end + + test do + assert_match "Hermes Agent v#{version}", shell_output("#{bin}/hermes version") + + managed = shell_output("#{bin}/hermes update 2>&1") + assert_match "managed by Homebrew", managed + assert_match "brew upgrade hermes-agent", managed + end +end diff --git a/pyproject.toml b/pyproject.toml index c3154d1ae..3cf339845 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,7 +32,6 @@ dependencies = [ "fal-client>=0.13.1,<1", # Text-to-speech (Edge TTS is free, no API key needed) "edge-tts>=7.2.7,<8", - "faster-whisper>=1.0.0,<2", # Skills Hub (GitHub App JWT auth — optional, only needed for bot identity) "PyJWT[crypto]>=2.12.0,<3", # CVE-2026-32597 ] @@ -47,7 +46,13 @@ slack = ["slack-bolt>=1.18.0,<2", "slack-sdk>=3.27.0,<4"] matrix = ["matrix-nio[e2e]>=0.24.0,<1"] cli = ["simple-term-menu>=1.0,<2"] tts-premium = ["elevenlabs>=1.0,<2"] -voice = ["sounddevice>=0.4.6,<1", "numpy>=1.24.0,<3"] +voice = [ + # Local STT pulls in wheel-only transitive deps (ctranslate2, onnxruntime), + # so keep it out of the base install for source-build packagers like Homebrew. + "faster-whisper>=1.0.0,<2", + "sounddevice>=0.4.6,<1", + "numpy>=1.24.0,<3", +] pty = [ "ptyprocess>=0.7.0,<1; sys_platform != 'win32'", "pywinpty>=2.0.0,<3; sys_platform == 'win32'", diff --git a/scripts/release.py b/scripts/release.py index cafb30321..cfe360064 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -24,6 +24,7 @@ import argparse import json import os import re +import shutil import subprocess import sys from collections import defaultdict @@ -128,6 +129,16 @@ def git(*args, cwd=None): return result.stdout.strip() +def git_result(*args, cwd=None): + """Run a git command and return the full CompletedProcess.""" + return subprocess.run( + ["git"] + list(args), + capture_output=True, + text=True, + cwd=cwd or str(REPO_ROOT), + ) + + def get_last_tag(): """Get the most recent CalVer tag.""" tags = git("tag", "--list", "v20*", "--sort=-v:refname") @@ -136,6 +147,18 @@ def get_last_tag(): return None +def next_available_tag(base_tag: str) -> tuple[str, str]: + """Return a tag/calver pair, suffixing same-day releases when needed.""" + if not git("tag", "--list", base_tag): + return base_tag, base_tag.removeprefix("v") + + suffix = 2 + while git("tag", "--list", f"{base_tag}.{suffix}"): + suffix += 1 + tag_name = f"{base_tag}.{suffix}" + return tag_name, tag_name.removeprefix("v") + + def get_current_version(): """Read current semver from __init__.py.""" content = VERSION_FILE.read_text() @@ -192,6 +215,41 @@ def update_version_files(semver: str, calver_date: str): PYPROJECT_FILE.write_text(pyproject) +def build_release_artifacts(semver: str) -> list[Path]: + """Build sdist/wheel artifacts for the current release. + + Returns the artifact paths when the local environment has ``python -m build`` + available. If build tooling is missing or the build fails, returns an empty + list and lets the release proceed without attached Python artifacts. + """ + dist_dir = REPO_ROOT / "dist" + shutil.rmtree(dist_dir, ignore_errors=True) + + result = subprocess.run( + [sys.executable, "-m", "build", "--sdist", "--wheel"], + cwd=str(REPO_ROOT), + capture_output=True, + text=True, + ) + if result.returncode != 0: + print(" ⚠ Could not build Python release artifacts.") + stderr = result.stderr.strip() + stdout = result.stdout.strip() + if stderr: + print(f" {stderr.splitlines()[-1]}") + elif stdout: + print(f" {stdout.splitlines()[-1]}") + print(" Install the 'build' package to attach semver-named sdist/wheel assets.") + return [] + + artifacts = sorted(p for p in dist_dir.iterdir() if p.is_file()) + matching = [p for p in artifacts if semver in p.name] + if not matching: + print(" ⚠ Built artifacts did not match the expected release version.") + return [] + return matching + + def resolve_author(name: str, email: str) -> str: """Resolve a git author to a GitHub @mention.""" # Try email lookup first @@ -424,18 +482,10 @@ def main(): now = datetime.now() calver_date = f"{now.year}.{now.month}.{now.day}" - tag_name = f"v{calver_date}" - - # Check for existing tag with same date - existing = git("tag", "--list", tag_name) - if existing and not args.publish: - # Append a suffix for same-day releases - suffix = 2 - while git("tag", "--list", f"{tag_name}.{suffix}"): - suffix += 1 - tag_name = f"{tag_name}.{suffix}" - calver_date = f"{calver_date}.{suffix}" - print(f"Note: Tag {tag_name[:-2]} already exists, using {tag_name}") + base_tag = f"v{calver_date}" + tag_name, calver_date = next_available_tag(base_tag) + if tag_name != base_tag: + print(f"Note: Tag {base_tag} already exists, using {tag_name}") # Determine semver current_version = get_current_version() @@ -494,41 +544,83 @@ def main(): print(f" ✓ Updated version files to v{new_version} ({calver_date})") # Commit version bump - git("add", str(VERSION_FILE), str(PYPROJECT_FILE)) - git("commit", "-m", f"chore: bump version to v{new_version} ({calver_date})") + add_result = git_result("add", str(VERSION_FILE), str(PYPROJECT_FILE)) + if add_result.returncode != 0: + print(f" ✗ Failed to stage version files: {add_result.stderr.strip()}") + return + + commit_result = git_result( + "commit", "-m", f"chore: bump version to v{new_version} ({calver_date})" + ) + if commit_result.returncode != 0: + print(f" ✗ Failed to commit version bump: {commit_result.stderr.strip()}") + return print(f" ✓ Committed version bump") # Create annotated tag - git("tag", "-a", tag_name, "-m", - f"Hermes Agent v{new_version} ({calver_date})\n\nWeekly release") + tag_result = git_result( + "tag", "-a", tag_name, "-m", + f"Hermes Agent v{new_version} ({calver_date})\n\nWeekly release" + ) + if tag_result.returncode != 0: + print(f" ✗ Failed to create tag {tag_name}: {tag_result.stderr.strip()}") + return print(f" ✓ Created tag {tag_name}") # Push - push_result = git("push", "origin", "HEAD", "--tags") - print(f" ✓ Pushed to origin") + push_result = git_result("push", "origin", "HEAD", "--tags") + if push_result.returncode == 0: + print(f" ✓ Pushed to origin") + else: + print(f" ✗ Failed to push to origin: {push_result.stderr.strip()}") + print(" Continue manually after fixing access:") + print(" git push origin HEAD --tags") + + # Build semver-named Python artifacts so downstream packagers + # (e.g. Homebrew) can target them without relying on CalVer tag names. + artifacts = build_release_artifacts(new_version) + if artifacts: + print(" ✓ Built release artifacts:") + for artifact in artifacts: + print(f" - {artifact.relative_to(REPO_ROOT)}") # Create GitHub release changelog_file = REPO_ROOT / ".release_notes.md" changelog_file.write_text(changelog) - result = subprocess.run( - ["gh", "release", "create", tag_name, - "--title", f"Hermes Agent v{new_version} ({calver_date})", - "--notes-file", str(changelog_file)], - capture_output=True, text=True, - cwd=str(REPO_ROOT), - ) + gh_cmd = [ + "gh", "release", "create", tag_name, + "--title", f"Hermes Agent v{new_version} ({calver_date})", + "--notes-file", str(changelog_file), + ] + gh_cmd.extend(str(path) for path in artifacts) - changelog_file.unlink(missing_ok=True) - - if result.returncode == 0: - print(f" ✓ GitHub release created: {result.stdout.strip()}") + gh_bin = shutil.which("gh") + if gh_bin: + result = subprocess.run( + gh_cmd, + capture_output=True, text=True, + cwd=str(REPO_ROOT), + ) else: - print(f" ✗ GitHub release failed: {result.stderr}") - print(f" Tag was created. Create the release manually:") - print(f" gh release create {tag_name} --title 'Hermes Agent v{new_version} ({calver_date})'") + result = None - print(f"\n 🎉 Release v{new_version} ({tag_name}) published!") + if result and result.returncode == 0: + changelog_file.unlink(missing_ok=True) + print(f" ✓ GitHub release created: {result.stdout.strip()}") + print(f"\n 🎉 Release v{new_version} ({tag_name}) published!") + else: + if result is None: + print(" ✗ GitHub release skipped: `gh` CLI not found.") + else: + print(f" ✗ GitHub release failed: {result.stderr.strip()}") + print(f" Release notes kept at: {changelog_file}") + print(f" Tag was created locally. Create the release manually:") + print( + f" gh release create {tag_name} --title 'Hermes Agent v{new_version} ({calver_date})' " + f"--notes-file .release_notes.md {' '.join(str(path) for path in artifacts)}" + ) + print(f"\n ✓ Release artifacts prepared for manual publish: v{new_version} ({tag_name})") else: print(f"\n{'='*60}") print(f" Dry run complete. To publish, add --publish") diff --git a/tests/gateway/test_update_command.py b/tests/gateway/test_update_command.py index ac9beac1b..e8fb3ddc1 100644 --- a/tests/gateway/test_update_command.py +++ b/tests/gateway/test_update_command.py @@ -45,6 +45,17 @@ def _make_runner(): class TestHandleUpdateCommand: """Tests for GatewayRunner._handle_update_command.""" + @pytest.mark.asyncio + async def test_managed_install_returns_package_manager_guidance(self, monkeypatch): + runner = _make_runner() + event = _make_event() + monkeypatch.setenv("HERMES_MANAGED", "homebrew") + + result = await runner._handle_update_command(event) + + assert "managed by Homebrew" in result + assert "brew upgrade hermes-agent" in result + @pytest.mark.asyncio async def test_no_git_directory(self, tmp_path): """Returns an error when .git does not exist.""" diff --git a/tests/hermes_cli/test_managed_installs.py b/tests/hermes_cli/test_managed_installs.py new file mode 100644 index 000000000..c6b5d792c --- /dev/null +++ b/tests/hermes_cli/test_managed_installs.py @@ -0,0 +1,54 @@ +from types import SimpleNamespace +from unittest.mock import patch + +from hermes_cli.config import ( + format_managed_message, + get_managed_system, + recommended_update_command, +) +from hermes_cli.main import cmd_update +from tools.skills_hub import OptionalSkillSource + + +def test_get_managed_system_homebrew(monkeypatch): + monkeypatch.setenv("HERMES_MANAGED", "homebrew") + + assert get_managed_system() == "Homebrew" + assert recommended_update_command() == "brew upgrade hermes-agent" + + +def test_format_managed_message_homebrew(monkeypatch): + monkeypatch.setenv("HERMES_MANAGED", "homebrew") + + message = format_managed_message("update Hermes Agent") + + assert "managed by Homebrew" in message + assert "brew upgrade hermes-agent" in message + + +def test_recommended_update_command_defaults_to_hermes_update(monkeypatch): + monkeypatch.delenv("HERMES_MANAGED", raising=False) + + assert recommended_update_command() == "hermes update" + + +def test_cmd_update_blocks_managed_homebrew(monkeypatch, capsys): + monkeypatch.setenv("HERMES_MANAGED", "homebrew") + + with patch("hermes_cli.main.subprocess.run") as mock_run: + cmd_update(SimpleNamespace()) + + assert not mock_run.called + captured = capsys.readouterr() + assert "managed by Homebrew" in captured.err + assert "brew upgrade hermes-agent" in captured.err + + +def test_optional_skill_source_honors_env_override(monkeypatch, tmp_path): + optional_dir = tmp_path / "optional-skills" + optional_dir.mkdir() + monkeypatch.setenv("HERMES_OPTIONAL_SKILLS", str(optional_dir)) + + source = OptionalSkillSource() + + assert source._optional_dir == optional_dir diff --git a/tests/test_packaging_metadata.py b/tests/test_packaging_metadata.py new file mode 100644 index 000000000..ce6d4793f --- /dev/null +++ b/tests/test_packaging_metadata.py @@ -0,0 +1,22 @@ +from pathlib import Path +import tomllib + + +REPO_ROOT = Path(__file__).resolve().parents[1] + + +def test_faster_whisper_is_not_a_base_dependency(): + data = tomllib.loads((REPO_ROOT / "pyproject.toml").read_text(encoding="utf-8")) + deps = data["project"]["dependencies"] + + assert not any(dep.startswith("faster-whisper") for dep in deps) + + voice_extra = data["project"]["optional-dependencies"]["voice"] + assert any(dep.startswith("faster-whisper") for dep in voice_extra) + + +def test_manifest_includes_bundled_skills(): + manifest = (REPO_ROOT / "MANIFEST.in").read_text(encoding="utf-8") + + assert "graft skills" in manifest + assert "graft optional-skills" in manifest diff --git a/tools/skills_hub.py b/tools/skills_hub.py index a824c3e3b..c818261d7 100644 --- a/tools/skills_hub.py +++ b/tools/skills_hub.py @@ -2115,7 +2115,11 @@ class OptionalSkillSource(SkillSource): """ def __init__(self): - self._optional_dir = Path(__file__).parent.parent / "optional-skills" + from hermes_constants import get_optional_skills_dir + + self._optional_dir = get_optional_skills_dir( + Path(__file__).parent.parent / "optional-skills" + ) def source_id(self) -> str: return "official" -- 2.43.0 From 8a794d029d3238b26c781888eafa4c8cb60583c7 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 30 Mar 2026 17:38:32 -0700 Subject: [PATCH 042/385] fix(ci): add repo conditionals to prevent fork workflow failures (#4107) Add github.repository checks to docker-publish and deploy-site workflows so they skip on forks where upstream-specific resources (Docker Hub org, custom domain) are unavailable. Co-authored-by: StreamOfRon --- .github/workflows/deploy-site.yml | 2 ++ .github/workflows/docker-publish.yml | 2 ++ 2 files changed, 4 insertions(+) diff --git a/.github/workflows/deploy-site.yml b/.github/workflows/deploy-site.yml index 89e031e58..3c21e8a00 100644 --- a/.github/workflows/deploy-site.yml +++ b/.github/workflows/deploy-site.yml @@ -19,6 +19,8 @@ concurrency: jobs: build-and-deploy: + # Only run on the upstream repository, not on forks + if: github.repository == 'NousResearch/hermes-agent' runs-on: ubuntu-latest environment: name: github-pages diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index 11b98c3a9..0455c34d0 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -12,6 +12,8 @@ concurrency: jobs: build-and-push: + # Only run on the upstream repository, not on forks + if: github.repository == 'NousResearch/hermes-agent' runs-on: ubuntu-latest timeout-minutes: 30 steps: -- 2.43.0 From 720507efac6f3909b3450d949503addcf8550181 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 30 Mar 2026 17:39:08 -0700 Subject: [PATCH 043/385] feat: add post-migration cleanup for OpenClaw directories (#4100) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After migrating from OpenClaw, leftover workspace directories contain state files (todo.json, sessions, logs) that confuse the agent — it discovers them and reads/writes to stale locations instead of the Hermes state directory, causing issues like cron jobs reading a different todo list than interactive sessions. Changes: - hermes claw migrate now offers to archive the source directory after successful migration (rename to .pre-migration, not delete) - New `hermes claw cleanup` subcommand for users who already migrated and need to archive leftover OpenClaw directories - Migration notes updated with explicit cleanup guidance - 42 tests covering all new functionality Reported by SteveSkedasticity — multiple todo.json files across ~/.hermes/, ~/.openclaw/workspace/, and ~/.openclaw/workspace-assistant/ caused cron jobs to read from wrong locations. --- hermes_cli/claw.py | 252 +++++++++++- hermes_cli/main.py | 22 ++ .../scripts/openclaw_to_hermes.py | 15 + tests/hermes_cli/test_claw.py | 362 ++++++++++++++++++ 4 files changed, 649 insertions(+), 2 deletions(-) diff --git a/hermes_cli/claw.py b/hermes_cli/claw.py index b3b624dc5..87735f931 100644 --- a/hermes_cli/claw.py +++ b/hermes_cli/claw.py @@ -4,11 +4,15 @@ Usage: hermes claw migrate # Interactive migration from ~/.openclaw hermes claw migrate --dry-run # Preview what would be migrated hermes claw migrate --preset full --overwrite # Full migration, overwrite conflicts + hermes claw cleanup # Archive leftover OpenClaw directories + hermes claw cleanup --dry-run # Preview what would be archived """ import importlib.util import logging +import shutil import sys +from datetime import datetime from pathlib import Path from hermes_cli.config import get_hermes_home, get_config_path, load_config, save_config @@ -20,6 +24,7 @@ from hermes_cli.setup import ( print_info, print_success, print_error, + print_warning, prompt_yes_no, ) @@ -45,6 +50,18 @@ _OPENCLAW_SCRIPT_INSTALLED = ( / "openclaw_to_hermes.py" ) +# Known OpenClaw directory names (current + legacy) +_OPENCLAW_DIR_NAMES = (".openclaw", ".clawdbot", ".moldbot") + +# State files commonly found in OpenClaw workspace directories that cause +# confusion after migration (the agent discovers them and writes to them) +_WORKSPACE_STATE_GLOBS = ( + "*/todo.json", + "*/sessions/*", + "*/memory/*.json", + "*/logs/*", +) + def _find_migration_script() -> Path | None: """Find the openclaw_to_hermes.py script in known locations.""" @@ -71,19 +88,88 @@ def _load_migration_module(script_path: Path): return mod +def _find_openclaw_dirs() -> list[Path]: + """Find all OpenClaw directories on disk.""" + found = [] + for name in _OPENCLAW_DIR_NAMES: + candidate = Path.home() / name + if candidate.is_dir(): + found.append(candidate) + return found + + +def _scan_workspace_state(source_dir: Path) -> list[tuple[Path, str]]: + """Scan an OpenClaw directory for workspace state files that cause confusion. + + Returns a list of (path, description) tuples. + """ + findings: list[tuple[Path, str]] = [] + + # Direct state files in the root + for name in ("todo.json", "sessions", "logs"): + candidate = source_dir / name + if candidate.exists(): + kind = "directory" if candidate.is_dir() else "file" + findings.append((candidate, f"Root {kind}: {name}")) + + # State files inside workspace directories + for child in sorted(source_dir.iterdir()): + if not child.is_dir() or child.name.startswith("."): + continue + # Check for workspace-like subdirectories + for state_name in ("todo.json", "sessions", "logs", "memory"): + state_path = child / state_name + if state_path.exists(): + kind = "directory" if state_path.is_dir() else "file" + rel = state_path.relative_to(source_dir) + findings.append((state_path, f"Workspace {kind}: {rel}")) + + return findings + + +def _archive_directory(source_dir: Path, dry_run: bool = False) -> Path: + """Rename an OpenClaw directory to .pre-migration. + + Returns the archive path. + """ + timestamp = datetime.now().strftime("%Y%m%d") + archive_name = f"{source_dir.name}.pre-migration" + archive_path = source_dir.parent / archive_name + + # If archive already exists, add timestamp + if archive_path.exists(): + archive_name = f"{source_dir.name}.pre-migration-{timestamp}" + archive_path = source_dir.parent / archive_name + + # If still exists (multiple runs same day), add counter + counter = 2 + while archive_path.exists(): + archive_name = f"{source_dir.name}.pre-migration-{timestamp}-{counter}" + archive_path = source_dir.parent / archive_name + counter += 1 + + if not dry_run: + source_dir.rename(archive_path) + + return archive_path + + def claw_command(args): """Route hermes claw subcommands.""" action = getattr(args, "claw_action", None) if action == "migrate": _cmd_migrate(args) + elif action in ("cleanup", "clean"): + _cmd_cleanup(args) else: - print("Usage: hermes claw migrate [options]") + print("Usage: hermes claw [options]") print() print("Commands:") print(" migrate Migrate settings from OpenClaw to Hermes") + print(" cleanup Archive leftover OpenClaw directories after migration") print() - print("Run 'hermes claw migrate --help' for migration options.") + print("Run 'hermes claw --help' for options.") def _cmd_migrate(args): @@ -210,6 +296,168 @@ def _cmd_migrate(args): # Print results _print_migration_report(report, dry_run) + # After successful non-dry-run migration, offer to archive the source directory + if not dry_run and report.get("summary", {}).get("migrated", 0) > 0: + _offer_source_archival(source_dir, getattr(args, "yes", False)) + + +def _offer_source_archival(source_dir: Path, auto_yes: bool = False): + """After migration, offer to rename the source directory to prevent state fragmentation. + + OpenClaw workspace directories contain state files (todo.json, sessions, etc.) + that the agent may discover and write to, causing confusion. Renaming the + directory prevents this. + """ + if not source_dir.is_dir(): + return + + # Scan for state files that could cause problems + state_files = _scan_workspace_state(source_dir) + + print() + print_header("Post-Migration Cleanup") + print_info("The OpenClaw directory still exists and contains workspace state files") + print_info("that can confuse the agent (todo lists, sessions, logs).") + if state_files: + print() + print(color(" Found state files:", Colors.YELLOW)) + # Show up to 10 most relevant findings + for path, desc in state_files[:10]: + print(f" {desc}") + if len(state_files) > 10: + print(f" ... and {len(state_files) - 10} more") + print() + print_info(f"Recommend: rename {source_dir.name}/ to {source_dir.name}.pre-migration/") + print_info("This prevents the agent from discovering old workspace directories.") + print_info("You can always rename it back if needed.") + print() + + if auto_yes or prompt_yes_no(f"Archive {source_dir} now?", default=True): + try: + archive_path = _archive_directory(source_dir) + print_success(f"Archived: {source_dir} → {archive_path}") + print_info("The original directory has been renamed, not deleted.") + print_info(f"To undo: mv {archive_path} {source_dir}") + except OSError as e: + print_error(f"Could not archive: {e}") + print_info(f"You can do it manually: mv {source_dir} {source_dir}.pre-migration") + else: + print_info("Skipped. You can archive later with: hermes claw cleanup") + + +def _cmd_cleanup(args): + """Archive leftover OpenClaw directories after migration. + + Scans for OpenClaw directories that still exist after migration and offers + to rename them to .pre-migration to prevent state fragmentation. + """ + dry_run = getattr(args, "dry_run", False) + auto_yes = getattr(args, "yes", False) + explicit_source = getattr(args, "source", None) + + print() + print( + color( + "┌─────────────────────────────────────────────────────────┐", + Colors.MAGENTA, + ) + ) + print( + color( + "│ ⚕ Hermes — OpenClaw Cleanup │", + Colors.MAGENTA, + ) + ) + print( + color( + "└─────────────────────────────────────────────────────────┘", + Colors.MAGENTA, + ) + ) + + # Find OpenClaw directories + if explicit_source: + dirs_to_check = [Path(explicit_source)] + else: + dirs_to_check = _find_openclaw_dirs() + + if not dirs_to_check: + print() + print_success("No OpenClaw directories found. Nothing to clean up.") + return + + total_archived = 0 + + for source_dir in dirs_to_check: + print() + print_header(f"Found: {source_dir}") + + # Scan for state files + state_files = _scan_workspace_state(source_dir) + + # Show directory stats + try: + workspace_dirs = [ + d for d in source_dir.iterdir() + if d.is_dir() and not d.name.startswith(".") + and any((d / name).exists() for name in ("todo.json", "SOUL.md", "MEMORY.md", "USER.md")) + ] + except OSError: + workspace_dirs = [] + + if workspace_dirs: + print_info(f"Workspace directories: {len(workspace_dirs)}") + for ws in workspace_dirs[:5]: + items = [] + if (ws / "todo.json").exists(): + items.append("todo.json") + if (ws / "sessions").is_dir(): + items.append("sessions/") + if (ws / "SOUL.md").exists(): + items.append("SOUL.md") + if (ws / "MEMORY.md").exists(): + items.append("MEMORY.md") + detail = ", ".join(items) if items else "empty" + print(f" {ws.name}/ ({detail})") + if len(workspace_dirs) > 5: + print(f" ... and {len(workspace_dirs) - 5} more") + + if state_files: + print() + print(color(f" {len(state_files)} state file(s) that could cause confusion:", Colors.YELLOW)) + for path, desc in state_files[:8]: + print(f" {desc}") + if len(state_files) > 8: + print(f" ... and {len(state_files) - 8} more") + + print() + + if dry_run: + archive_path = _archive_directory(source_dir, dry_run=True) + print_info(f"Would archive: {source_dir} → {archive_path}") + else: + if auto_yes or prompt_yes_no(f"Archive {source_dir}?", default=True): + try: + archive_path = _archive_directory(source_dir) + print_success(f"Archived: {source_dir} → {archive_path}") + total_archived += 1 + except OSError as e: + print_error(f"Could not archive: {e}") + print_info(f"Try manually: mv {source_dir} {source_dir}.pre-migration") + else: + print_info("Skipped.") + + # Summary + print() + if dry_run: + print_info(f"Dry run complete. {len(dirs_to_check)} directory(ies) would be archived.") + print_info("Run without --dry-run to archive them.") + elif total_archived: + print_success(f"Cleaned up {total_archived} OpenClaw directory(ies).") + print_info("Directories were renamed, not deleted. You can undo by renaming them back.") + else: + print_info("No directories were archived.") + def _print_migration_report(report: dict, dry_run: bool): """Print a formatted migration report.""" diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 64fc455cd..763bcea4e 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -4712,6 +4712,28 @@ For more help on a command: help="Skip confirmation prompts" ) + # claw cleanup + claw_cleanup = claw_subparsers.add_parser( + "cleanup", + aliases=["clean"], + help="Archive leftover OpenClaw directories after migration", + description="Scan for and archive leftover OpenClaw directories to prevent state fragmentation" + ) + claw_cleanup.add_argument( + "--source", + help="Path to a specific OpenClaw directory to clean up" + ) + claw_cleanup.add_argument( + "--dry-run", + action="store_true", + help="Preview what would be archived without making changes" + ) + claw_cleanup.add_argument( + "--yes", "-y", + action="store_true", + help="Skip confirmation prompts" + ) + def cmd_claw(args): from hermes_cli.claw import claw_command claw_command(args) diff --git a/optional-skills/migration/openclaw-migration/scripts/openclaw_to_hermes.py b/optional-skills/migration/openclaw-migration/scripts/openclaw_to_hermes.py index ac99e2a6f..74e9d7dac 100644 --- a/optional-skills/migration/openclaw-migration/scripts/openclaw_to_hermes.py +++ b/optional-skills/migration/openclaw-migration/scripts/openclaw_to_hermes.py @@ -2455,9 +2455,24 @@ class Migrator: notes.append("") notes.extend([ + "## IMPORTANT: Archive the OpenClaw Directory", + "", + "After migration, your OpenClaw directory still exists on disk with workspace", + "state files (todo.json, sessions, logs). If the Hermes agent discovers these", + "directories, it may read/write to them instead of the Hermes state, causing", + "confusion (e.g., cron jobs reading a different todo list than interactive sessions).", + "", + "**Strongly recommended:** Run `hermes claw cleanup` to rename the OpenClaw", + "directory to `.openclaw.pre-migration`. This prevents the agent from finding it.", + "The directory is renamed, not deleted — you can undo this at any time.", + "", + "If you skip this step and notice the agent getting confused about workspaces", + "or todo lists, run `hermes claw cleanup` to fix it.", + "", "## Hermes-Specific Setup", "", "After migration, you may want to:", + "- Run `hermes claw cleanup` to archive the OpenClaw directory (prevents state confusion)", "- Run `hermes setup` to configure any remaining settings", "- Run `hermes mcp list` to verify MCP servers were imported correctly", "- Run `hermes cron` to recreate scheduled tasks (see archive/cron-config.json)", diff --git a/tests/hermes_cli/test_claw.py b/tests/hermes_cli/test_claw.py index a9788db93..138b21e9d 100644 --- a/tests/hermes_cli/test_claw.py +++ b/tests/hermes_cli/test_claw.py @@ -40,6 +40,119 @@ class TestFindMigrationScript: assert claw_mod._find_migration_script() is None +# --------------------------------------------------------------------------- +# _find_openclaw_dirs +# --------------------------------------------------------------------------- + + +class TestFindOpenclawDirs: + """Test discovery of OpenClaw directories.""" + + def test_finds_openclaw_dir(self, tmp_path): + openclaw = tmp_path / ".openclaw" + openclaw.mkdir() + with patch("pathlib.Path.home", return_value=tmp_path): + found = claw_mod._find_openclaw_dirs() + assert openclaw in found + + def test_finds_legacy_dirs(self, tmp_path): + clawdbot = tmp_path / ".clawdbot" + clawdbot.mkdir() + moldbot = tmp_path / ".moldbot" + moldbot.mkdir() + with patch("pathlib.Path.home", return_value=tmp_path): + found = claw_mod._find_openclaw_dirs() + assert len(found) == 2 + assert clawdbot in found + assert moldbot in found + + def test_returns_empty_when_none_exist(self, tmp_path): + with patch("pathlib.Path.home", return_value=tmp_path): + found = claw_mod._find_openclaw_dirs() + assert found == [] + + +# --------------------------------------------------------------------------- +# _scan_workspace_state +# --------------------------------------------------------------------------- + + +class TestScanWorkspaceState: + """Test scanning for workspace state files.""" + + def test_finds_root_state_files(self, tmp_path): + (tmp_path / "todo.json").write_text("{}") + (tmp_path / "sessions").mkdir() + findings = claw_mod._scan_workspace_state(tmp_path) + descs = [desc for _, desc in findings] + assert any("todo.json" in d for d in descs) + assert any("sessions" in d for d in descs) + + def test_finds_workspace_state_files(self, tmp_path): + ws = tmp_path / "workspace" + ws.mkdir() + (ws / "todo.json").write_text("{}") + (ws / "sessions").mkdir() + findings = claw_mod._scan_workspace_state(tmp_path) + descs = [desc for _, desc in findings] + assert any("workspace/todo.json" in d for d in descs) + assert any("workspace/sessions" in d for d in descs) + + def test_ignores_hidden_dirs(self, tmp_path): + scan_dir = tmp_path / "scan_target" + scan_dir.mkdir() + hidden = scan_dir / ".git" + hidden.mkdir() + (hidden / "todo.json").write_text("{}") + findings = claw_mod._scan_workspace_state(scan_dir) + assert len(findings) == 0 + + def test_empty_dir_returns_empty(self, tmp_path): + scan_dir = tmp_path / "scan_target" + scan_dir.mkdir() + findings = claw_mod._scan_workspace_state(scan_dir) + assert findings == [] + + +# --------------------------------------------------------------------------- +# _archive_directory +# --------------------------------------------------------------------------- + + +class TestArchiveDirectory: + """Test directory archival (rename).""" + + def test_renames_to_pre_migration(self, tmp_path): + source = tmp_path / ".openclaw" + source.mkdir() + (source / "test.txt").write_text("data") + + archive_path = claw_mod._archive_directory(source) + assert archive_path == tmp_path / ".openclaw.pre-migration" + assert archive_path.is_dir() + assert not source.exists() + assert (archive_path / "test.txt").read_text() == "data" + + def test_adds_timestamp_when_archive_exists(self, tmp_path): + source = tmp_path / ".openclaw" + source.mkdir() + # Pre-existing archive + (tmp_path / ".openclaw.pre-migration").mkdir() + + archive_path = claw_mod._archive_directory(source) + assert ".pre-migration-" in archive_path.name + assert archive_path.is_dir() + assert not source.exists() + + def test_dry_run_does_not_rename(self, tmp_path): + source = tmp_path / ".openclaw" + source.mkdir() + + archive_path = claw_mod._archive_directory(source, dry_run=True) + assert archive_path == tmp_path / ".openclaw.pre-migration" + assert source.is_dir() # Still exists + + # --------------------------------------------------------------------------- # claw_command routing # --------------------------------------------------------------------------- @@ -56,11 +169,24 @@ class TestClawCommand: claw_mod.claw_command(args) mock.assert_called_once_with(args) + def test_routes_to_cleanup(self): + args = Namespace(claw_action="cleanup", source=None, dry_run=False, yes=False) + with patch.object(claw_mod, "_cmd_cleanup") as mock: + claw_mod.claw_command(args) + mock.assert_called_once_with(args) + + def test_routes_clean_alias(self): + args = Namespace(claw_action="clean", source=None, dry_run=False, yes=False) + with patch.object(claw_mod, "_cmd_cleanup") as mock: + claw_mod.claw_command(args) + mock.assert_called_once_with(args) + def test_shows_help_for_no_action(self, capsys): args = Namespace(claw_action=None) claw_mod.claw_command(args) captured = capsys.readouterr() assert "migrate" in captured.out + assert "cleanup" in captured.out # --------------------------------------------------------------------------- @@ -168,6 +294,7 @@ class TestCmdMigrate: patch.object(claw_mod, "_load_migration_module", return_value=fake_mod), patch.object(claw_mod, "get_config_path", return_value=config_path), patch.object(claw_mod, "prompt_yes_no", return_value=True), + patch.object(claw_mod, "_offer_source_archival"), ): claw_mod._cmd_migrate(args) @@ -175,6 +302,75 @@ class TestCmdMigrate: assert "Migration Results" in captured.out assert "Migration complete!" in captured.out + def test_execute_offers_archival_on_success(self, tmp_path, capsys): + """After successful migration, _offer_source_archival should be called.""" + openclaw_dir = tmp_path / ".openclaw" + openclaw_dir.mkdir() + + fake_mod = ModuleType("openclaw_to_hermes") + fake_mod.resolve_selected_options = MagicMock(return_value={"soul"}) + fake_migrator = MagicMock() + fake_migrator.migrate.return_value = { + "summary": {"migrated": 3, "skipped": 0, "conflict": 0, "error": 0}, + "items": [ + {"kind": "soul", "status": "migrated", "destination": str(tmp_path / "SOUL.md")}, + ], + } + fake_mod.Migrator = MagicMock(return_value=fake_migrator) + + args = Namespace( + source=str(openclaw_dir), + dry_run=False, preset="full", overwrite=False, + migrate_secrets=False, workspace_target=None, + skill_conflict="skip", yes=True, + ) + + with ( + patch.object(claw_mod, "_find_migration_script", return_value=tmp_path / "s.py"), + patch.object(claw_mod, "_load_migration_module", return_value=fake_mod), + patch.object(claw_mod, "get_config_path", return_value=tmp_path / "config.yaml"), + patch.object(claw_mod, "save_config"), + patch.object(claw_mod, "load_config", return_value={}), + patch.object(claw_mod, "_offer_source_archival") as mock_archival, + ): + claw_mod._cmd_migrate(args) + + mock_archival.assert_called_once_with(openclaw_dir, True) + + def test_dry_run_skips_archival(self, tmp_path, capsys): + """Dry run should not offer archival.""" + openclaw_dir = tmp_path / ".openclaw" + openclaw_dir.mkdir() + + fake_mod = ModuleType("openclaw_to_hermes") + fake_mod.resolve_selected_options = MagicMock(return_value=set()) + fake_migrator = MagicMock() + fake_migrator.migrate.return_value = { + "summary": {"migrated": 2, "skipped": 0, "conflict": 0, "error": 0}, + "items": [], + "preset": "full", + } + fake_mod.Migrator = MagicMock(return_value=fake_migrator) + + args = Namespace( + source=str(openclaw_dir), + dry_run=True, preset="full", overwrite=False, + migrate_secrets=False, workspace_target=None, + skill_conflict="skip", yes=False, + ) + + with ( + patch.object(claw_mod, "_find_migration_script", return_value=tmp_path / "s.py"), + patch.object(claw_mod, "_load_migration_module", return_value=fake_mod), + patch.object(claw_mod, "get_config_path", return_value=tmp_path / "config.yaml"), + patch.object(claw_mod, "save_config"), + patch.object(claw_mod, "load_config", return_value={}), + patch.object(claw_mod, "_offer_source_archival") as mock_archival, + ): + claw_mod._cmd_migrate(args) + + mock_archival.assert_not_called() + def test_execute_cancelled_by_user(self, tmp_path, capsys): openclaw_dir = tmp_path / ".openclaw" openclaw_dir.mkdir() @@ -290,6 +486,172 @@ class TestCmdMigrate: assert call_kwargs["migrate_secrets"] is True +# --------------------------------------------------------------------------- +# _offer_source_archival +# --------------------------------------------------------------------------- + + +class TestOfferSourceArchival: + """Test the post-migration archival offer.""" + + def test_archives_with_auto_yes(self, tmp_path, capsys): + source = tmp_path / ".openclaw" + source.mkdir() + (source / "workspace").mkdir() + (source / "workspace" / "todo.json").write_text("{}") + + claw_mod._offer_source_archival(source, auto_yes=True) + + captured = capsys.readouterr() + assert "Archived" in captured.out + assert not source.exists() + assert (tmp_path / ".openclaw.pre-migration").is_dir() + + def test_skips_when_user_declines(self, tmp_path, capsys): + source = tmp_path / ".openclaw" + source.mkdir() + + with patch.object(claw_mod, "prompt_yes_no", return_value=False): + claw_mod._offer_source_archival(source, auto_yes=False) + + captured = capsys.readouterr() + assert "Skipped" in captured.out + assert source.is_dir() # Still exists + + def test_noop_when_source_missing(self, tmp_path, capsys): + claw_mod._offer_source_archival(tmp_path / "nonexistent", auto_yes=True) + captured = capsys.readouterr() + assert captured.out == "" # No output + + def test_shows_state_files(self, tmp_path, capsys): + source = tmp_path / ".openclaw" + source.mkdir() + ws = source / "workspace" + ws.mkdir() + (ws / "todo.json").write_text("{}") + + with patch.object(claw_mod, "prompt_yes_no", return_value=False): + claw_mod._offer_source_archival(source, auto_yes=False) + + captured = capsys.readouterr() + assert "todo.json" in captured.out + + def test_handles_archive_error(self, tmp_path, capsys): + source = tmp_path / ".openclaw" + source.mkdir() + + with patch.object(claw_mod, "_archive_directory", side_effect=OSError("permission denied")): + claw_mod._offer_source_archival(source, auto_yes=True) + + captured = capsys.readouterr() + assert "Could not archive" in captured.out + + +# --------------------------------------------------------------------------- +# _cmd_cleanup +# --------------------------------------------------------------------------- + + +class TestCmdCleanup: + """Test the cleanup command handler.""" + + def test_no_dirs_found(self, tmp_path, capsys): + args = Namespace(source=None, dry_run=False, yes=False) + with patch.object(claw_mod, "_find_openclaw_dirs", return_value=[]): + claw_mod._cmd_cleanup(args) + captured = capsys.readouterr() + assert "No OpenClaw directories found" in captured.out + + def test_dry_run_lists_dirs(self, tmp_path, capsys): + openclaw = tmp_path / ".openclaw" + openclaw.mkdir() + ws = openclaw / "workspace" + ws.mkdir() + (ws / "todo.json").write_text("{}") + + args = Namespace(source=None, dry_run=True, yes=False) + with patch.object(claw_mod, "_find_openclaw_dirs", return_value=[openclaw]): + claw_mod._cmd_cleanup(args) + + captured = capsys.readouterr() + assert "Would archive" in captured.out + assert openclaw.is_dir() # Not actually archived + + def test_archives_with_yes(self, tmp_path, capsys): + openclaw = tmp_path / ".openclaw" + openclaw.mkdir() + (openclaw / "workspace").mkdir() + (openclaw / "workspace" / "todo.json").write_text("{}") + + args = Namespace(source=None, dry_run=False, yes=True) + with patch.object(claw_mod, "_find_openclaw_dirs", return_value=[openclaw]): + claw_mod._cmd_cleanup(args) + + captured = capsys.readouterr() + assert "Archived" in captured.out + assert "Cleaned up 1" in captured.out + assert not openclaw.exists() + assert (tmp_path / ".openclaw.pre-migration").is_dir() + + def test_skips_when_user_declines(self, tmp_path, capsys): + openclaw = tmp_path / ".openclaw" + openclaw.mkdir() + + args = Namespace(source=None, dry_run=False, yes=False) + with ( + patch.object(claw_mod, "_find_openclaw_dirs", return_value=[openclaw]), + patch.object(claw_mod, "prompt_yes_no", return_value=False), + ): + claw_mod._cmd_cleanup(args) + + captured = capsys.readouterr() + assert "Skipped" in captured.out + assert openclaw.is_dir() + + def test_explicit_source(self, tmp_path, capsys): + custom_dir = tmp_path / "my-openclaw" + custom_dir.mkdir() + (custom_dir / "todo.json").write_text("{}") + + args = Namespace(source=str(custom_dir), dry_run=False, yes=True) + claw_mod._cmd_cleanup(args) + + captured = capsys.readouterr() + assert "Archived" in captured.out + assert not custom_dir.exists() + + def test_shows_workspace_details(self, tmp_path, capsys): + openclaw = tmp_path / ".openclaw" + openclaw.mkdir() + ws = openclaw / "workspace" + ws.mkdir() + (ws / "todo.json").write_text("{}") + (ws / "SOUL.md").write_text("# Soul") + + args = Namespace(source=None, dry_run=True, yes=False) + with patch.object(claw_mod, "_find_openclaw_dirs", return_value=[openclaw]): + claw_mod._cmd_cleanup(args) + + captured = capsys.readouterr() + assert "workspace/" in captured.out + assert "todo.json" in captured.out + + def test_handles_multiple_dirs(self, tmp_path, capsys): + openclaw = tmp_path / ".openclaw" + openclaw.mkdir() + clawdbot = tmp_path / ".clawdbot" + clawdbot.mkdir() + + args = Namespace(source=None, dry_run=False, yes=True) + with patch.object(claw_mod, "_find_openclaw_dirs", return_value=[openclaw, clawdbot]): + claw_mod._cmd_cleanup(args) + + captured = capsys.readouterr() + assert "Cleaned up 2" in captured.out + assert not openclaw.exists() + assert not clawdbot.exists() + + # --------------------------------------------------------------------------- # _print_migration_report # --------------------------------------------------------------------------- -- 2.43.0 From ffd5d37f9b50febb2a85343a2052fec08950f199 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 30 Mar 2026 17:41:13 -0700 Subject: [PATCH 044/385] fix: treat non-sk-ant- keys as regular API keys, not OAuth tokens (#4093) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: treat non-sk-ant- prefixed keys (Azure AI Foundry) as regular API keys, not OAuth tokens * fix: treat non-sk-ant- keys as regular API keys, not OAuth tokens _is_oauth_token() returned True for any key not starting with sk-ant-api, misclassifying Azure AI Foundry keys as OAuth tokens and sending Bearer auth instead of x-api-key → 401 rejection. Real Anthropic OAuth tokens all start with sk-ant-oat (confirmed from live .credentials.json). Non-sk-ant- keys are third-party provider keys that should use x-api-key. Test fixtures updated to use realistic sk-ant-oat01- prefixed tokens instead of fake strings. Salvaged from PR #4075 by @HangGlidersRule. --------- Co-authored-by: Clawdbot --- agent/anthropic_adapter.py | 6 +++++- tests/agent/test_auxiliary_client.py | 4 ++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/agent/anthropic_adapter.py b/agent/anthropic_adapter.py index a81736496..74539cbc2 100644 --- a/agent/anthropic_adapter.py +++ b/agent/anthropic_adapter.py @@ -152,13 +152,17 @@ def _is_oauth_token(key: str) -> bool: Regular API keys start with 'sk-ant-api'. Everything else (setup-tokens starting with 'sk-ant-oat', managed keys, JWTs, etc.) needs Bearer auth. + Azure AI Foundry keys (non sk-ant- prefixed) should use x-api-key, not Bearer. """ if not key: return False # Regular Console API keys use x-api-key header if key.startswith("sk-ant-api"): return False - # Everything else (setup-tokens, managed keys, JWTs) uses Bearer auth + # Azure AI Foundry keys don't start with sk-ant- at all — treat as regular API key + if not key.startswith("sk-ant-"): + return False + # Everything else (setup-tokens sk-ant-oat, managed keys, JWTs) uses Bearer auth return True diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py index 35dcee7ad..28ef57289 100644 --- a/tests/agent/test_auxiliary_client.py +++ b/tests/agent/test_auxiliary_client.py @@ -310,7 +310,7 @@ class TestExpiredCodexFallback: def test_hermes_oauth_file_sets_oauth_flag(self, monkeypatch): """OAuth-style tokens should get is_oauth=True (token is not sk-ant-api-*).""" # Mock resolve_anthropic_token to return an OAuth-style token - with patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="hermes-oauth-jwt-token"), \ + with patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="sk-ant-oat01-hermes-oauth-test"), \ patch("agent.anthropic_adapter.build_anthropic_client") as mock_build: mock_build.return_value = MagicMock() from agent.auxiliary_client import _try_anthropic, AnthropicAuxiliaryClient @@ -364,7 +364,7 @@ class TestExpiredCodexFallback: def test_claude_code_oauth_env_sets_flag(self, monkeypatch): """CLAUDE_CODE_OAUTH_TOKEN env var should get is_oauth=True.""" - monkeypatch.setenv("CLAUDE_CODE_OAUTH_TOKEN", "cc-oauth-token-test") + monkeypatch.setenv("CLAUDE_CODE_OAUTH_TOKEN", "sk-ant-oat01-cc-oauth-test") monkeypatch.delenv("ANTHROPIC_TOKEN", raising=False) with patch("agent.anthropic_adapter.build_anthropic_client") as mock_build: mock_build.return_value = MagicMock() -- 2.43.0 From b2e1a095f8ec90db545acfc81328939a3a90fb5f Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 30 Mar 2026 18:35:16 -0700 Subject: [PATCH 045/385] fix(anthropic): write scopes field to Claude Code credentials on token refresh (#4126) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Claude Code >=2.1.81 checks for a 'scopes' array containing 'user:inference' in ~/.claude/.credentials.json before accepting stored OAuth tokens as valid. When Hermes refreshes the token, it writes only accessToken, refreshToken, and expiresAt — omitting the scopes field. This causes Claude Code to report 'loggedIn: false' and refuse to start, even though the token is valid. This commit: - Parses the 'scope' field from the OAuth refresh response - Passes it to _write_claude_code_credentials() as a keyword argument - Persists the scopes array in the claudeAiOauth credential store - Preserves existing scopes when the refresh response omits the field Tested against Claude Code v2.1.87 on Linux — auth status correctly reports loggedIn: true and claude --print works after this fix. Co-authored-by: Nick --- agent/anthropic_adapter.py | 35 +++++++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/agent/anthropic_adapter.py b/agent/anthropic_adapter.py index 74539cbc2..879d1b34b 100644 --- a/agent/anthropic_adapter.py +++ b/agent/anthropic_adapter.py @@ -342,7 +342,14 @@ def _refresh_oauth_token(creds: Dict[str, Any]) -> Optional[str]: if new_access: new_expires_ms = int(time.time() * 1000) + (expires_in * 1000) - _write_claude_code_credentials(new_access, new_refresh, new_expires_ms) + # Parse scopes from refresh response — Claude Code >=2.1.81 + # requires a "scopes" field in the credential store and checks + # for "user:inference" before accepting the token as valid. + scope_str = result.get("scope", "") + scopes = scope_str.split() if scope_str else None + _write_claude_code_credentials( + new_access, new_refresh, new_expires_ms, scopes=scopes, + ) logger.debug("Refreshed Claude Code OAuth token via %s", endpoint) return new_access except Exception as e: @@ -351,8 +358,20 @@ def _refresh_oauth_token(creds: Dict[str, Any]) -> Optional[str]: return None -def _write_claude_code_credentials(access_token: str, refresh_token: str, expires_at_ms: int) -> None: - """Write refreshed credentials back to ~/.claude/.credentials.json.""" +def _write_claude_code_credentials( + access_token: str, + refresh_token: str, + expires_at_ms: int, + *, + scopes: Optional[list] = None, +) -> None: + """Write refreshed credentials back to ~/.claude/.credentials.json. + + The optional *scopes* list (e.g. ``["user:inference", "user:profile", ...]``) + is persisted so that Claude Code's own auth check recognises the credential + as valid. Claude Code >=2.1.81 gates on the presence of ``"user:inference"`` + in the stored scopes before it will use the token. + """ cred_path = Path.home() / ".claude" / ".credentials.json" try: # Read existing file to preserve other fields @@ -360,11 +379,19 @@ def _write_claude_code_credentials(access_token: str, refresh_token: str, expire if cred_path.exists(): existing = json.loads(cred_path.read_text(encoding="utf-8")) - existing["claudeAiOauth"] = { + oauth_data: Dict[str, Any] = { "accessToken": access_token, "refreshToken": refresh_token, "expiresAt": expires_at_ms, } + if scopes is not None: + oauth_data["scopes"] = scopes + elif "claudeAiOauth" in existing and "scopes" in existing["claudeAiOauth"]: + # Preserve previously-stored scopes when the refresh response + # does not include a scope field. + oauth_data["scopes"] = existing["claudeAiOauth"]["scopes"] + + existing["claudeAiOauth"] = oauth_data cred_path.parent.mkdir(parents=True, exist_ok=True) cred_path.write_text(json.dumps(existing, indent=2), encoding="utf-8") -- 2.43.0 From 44d02f35d234087997797c29db56e9fe50f2e982 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 30 Mar 2026 18:39:51 -0700 Subject: [PATCH 046/385] =?UTF-8?q?docs:=20restructure=20site=20navigation?= =?UTF-8?q?=20=E2=80=94=20promote=20features=20and=20platforms=20to=20top-?= =?UTF-8?q?level=20(#4116)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Major reorganization of the documentation site for better discoverability and navigation. 94 pages across 8 top-level sections (was 5). Structural changes: - Promote Features from 3-level-deep subcategory to top-level section with new Overview hub page categorizing all 26 feature pages - Promote Messaging Platforms from User Guide subcategory to top-level section, add platform comparison matrix (13 platforms x 7 features) - Create new Integrations section with hub page, grouping MCP, ACP, API Server, Honcho, Provider Routing, Fallback Providers - Extract AI provider content (626 lines) from configuration.md into dedicated integrations/providers.md — configuration.md drops from 1803 to 1178 lines - Subcategorize Developer Guide into Architecture, Extending, Internals - Rename "User Guide" to "Using Hermes" for top-level items Orphan fixes (7 pages now reachable via sidebar): - build-a-hermes-plugin.md added to Guides - sms.md added to Messaging Platforms - context-references.md added to Features > Core - plugins.md added to Features > Core - git-worktrees.md added to Using Hermes - checkpoints-and-rollback.md added to Using Hermes - checkpoints.md (30-line stub) deleted, superseded by checkpoints-and-rollback.md (203 lines) New files: - integrations/index.md — Integrations hub page - integrations/providers.md — AI provider setup (extracted) - user-guide/features/overview.md — Features hub page Broken link fixes: - quickstart.md, faq.md: update context-length-detection anchors - configuration.md: update checkpoints link - overview.md: fix checkpoint link path Docusaurus build verified clean (zero broken links/anchors). --- website/docs/getting-started/quickstart.md | 2 +- website/docs/guides/build-a-hermes-plugin.md | 5 +- website/docs/integrations/index.md | 25 + website/docs/integrations/providers.md | 643 ++++++++++++++++++ website/docs/reference/faq.md | 2 +- .../user-guide/checkpoints-and-rollback.md | 1 + website/docs/user-guide/configuration.md | 631 +---------------- .../docs/user-guide/features/checkpoints.md | 30 - .../user-guide/features/context-references.md | 1 + website/docs/user-guide/features/overview.md | 40 ++ website/docs/user-guide/features/plugins.md | 5 +- website/docs/user-guide/git-worktrees.md | 3 +- website/docs/user-guide/messaging/index.md | 20 + website/docs/user-guide/messaging/sms.md | 1 + website/docs/user-guide/skills/godmode.md | 2 + website/sidebars.ts | 183 +++-- 16 files changed, 858 insertions(+), 736 deletions(-) create mode 100644 website/docs/integrations/index.md create mode 100644 website/docs/integrations/providers.md delete mode 100644 website/docs/user-guide/features/checkpoints.md create mode 100644 website/docs/user-guide/features/overview.md diff --git a/website/docs/getting-started/quickstart.md b/website/docs/getting-started/quickstart.md index bc182f655..7ed83e819 100644 --- a/website/docs/getting-started/quickstart.md +++ b/website/docs/getting-started/quickstart.md @@ -61,7 +61,7 @@ hermes setup # Or configure everything at once | **Custom Endpoint** | VLLM, SGLang, Ollama, or any OpenAI-compatible API | Set base URL + API key | :::tip -You can switch providers at any time with `hermes model` — no code changes, no lock-in. When configuring a custom endpoint, Hermes will prompt for the context window size and auto-detect it when possible. See [Context Length Detection](../user-guide/configuration.md#context-length-detection) for details. +You can switch providers at any time with `hermes model` — no code changes, no lock-in. When configuring a custom endpoint, Hermes will prompt for the context window size and auto-detect it when possible. See [Context Length Detection](../integrations/providers.md#context-length-detection) for details. ::: ## 3. Start Chatting diff --git a/website/docs/guides/build-a-hermes-plugin.md b/website/docs/guides/build-a-hermes-plugin.md index abe1e3424..b3f6df959 100644 --- a/website/docs/guides/build-a-hermes-plugin.md +++ b/website/docs/guides/build-a-hermes-plugin.md @@ -1,5 +1,8 @@ --- -sidebar_position: 10 +sidebar_position: 8 +sidebar_label: "Build a Plugin" +title: "Build a Hermes Plugin" +description: "Step-by-step guide to building a complete Hermes plugin with tools, hooks, data files, and skills" --- # Build a Hermes Plugin diff --git a/website/docs/integrations/index.md b/website/docs/integrations/index.md new file mode 100644 index 000000000..829c1c67d --- /dev/null +++ b/website/docs/integrations/index.md @@ -0,0 +1,25 @@ +--- +title: "Integrations" +sidebar_label: "Overview" +sidebar_position: 0 +--- + +# Integrations + +Hermes Agent connects to external systems for AI inference, tool servers, IDE workflows, programmatic access, and more. These integrations extend what Hermes can do and where it can run. + +## Available Integrations + +- **[AI Providers](/docs/user-guide/features/provider-routing)** — Set up and configure inference providers. Hermes works with OpenRouter, Anthropic, OpenAI, Google, and any OpenAI-compatible endpoint. Use `hermes model` to configure interactively. + +- **[MCP Servers](/docs/user-guide/features/mcp)** — Connect Hermes to external tool servers via Model Context Protocol. Access tools from GitHub, databases, file systems, browser stacks, internal APIs, and more without writing native Hermes tools. + +- **[IDE Integration (ACP)](/docs/user-guide/features/acp)** — Use Hermes Agent inside ACP-compatible editors such as VS Code, Zed, and JetBrains. Hermes runs as an ACP server, rendering chat messages, tool activity, file diffs, and terminal commands inside your editor. + +- **[API Server](/docs/user-guide/features/api-server)** — Expose Hermes as an OpenAI-compatible HTTP endpoint. Any frontend that speaks the OpenAI format — Open WebUI, LobeChat, LibreChat, NextChat, ChatBox — can connect and use Hermes as a backend with its full toolset. + +- **[Honcho Memory](/docs/user-guide/features/honcho)** — AI-native persistent memory for cross-session user modeling and personalization. Honcho adds deep user modeling via dialectic reasoning on top of Hermes's built-in memory system. + +- **[Provider Routing](/docs/user-guide/features/provider-routing)** — Fine-grained control over which underlying AI providers handle your OpenRouter requests. Optimize for cost, speed, or quality with sorting, whitelists, blacklists, and explicit priority ordering. + +- **[Fallback Providers](/docs/user-guide/features/fallback-providers)** — Automatic failover to backup LLM providers when your primary model encounters errors. Includes primary model fallback and independent auxiliary task fallback for vision, compression, and web extraction. diff --git a/website/docs/integrations/providers.md b/website/docs/integrations/providers.md new file mode 100644 index 000000000..ab4c8f354 --- /dev/null +++ b/website/docs/integrations/providers.md @@ -0,0 +1,643 @@ +--- +title: "AI Providers" +sidebar_label: "AI Providers" +sidebar_position: 1 +--- + +# AI Providers + +This page covers setting up inference providers for Hermes Agent — from cloud APIs like OpenRouter and Anthropic, to self-hosted endpoints like Ollama and vLLM, to advanced routing and fallback configurations. You need at least one provider configured to use Hermes. + +## Inference Providers + +You need at least one way to connect to an LLM. Use `hermes model` to switch providers and models interactively, or configure directly: + +| Provider | Setup | +|----------|-------| +| **Nous Portal** | `hermes model` (OAuth, subscription-based) | +| **OpenAI Codex** | `hermes model` (ChatGPT OAuth, uses Codex models) | +| **GitHub Copilot** | `hermes model` (OAuth device code flow, `COPILOT_GITHUB_TOKEN`, `GH_TOKEN`, or `gh auth token`) | +| **GitHub Copilot ACP** | `hermes model` (spawns local `copilot --acp --stdio`) | +| **Anthropic** | `hermes model` (Claude Pro/Max via Claude Code auth, Anthropic API key, or manual setup-token) | +| **OpenRouter** | `OPENROUTER_API_KEY` in `~/.hermes/.env` | +| **AI Gateway** | `AI_GATEWAY_API_KEY` in `~/.hermes/.env` (provider: `ai-gateway`) | +| **z.ai / GLM** | `GLM_API_KEY` in `~/.hermes/.env` (provider: `zai`) | +| **Kimi / Moonshot** | `KIMI_API_KEY` in `~/.hermes/.env` (provider: `kimi-coding`) | +| **MiniMax** | `MINIMAX_API_KEY` in `~/.hermes/.env` (provider: `minimax`) | +| **MiniMax China** | `MINIMAX_CN_API_KEY` in `~/.hermes/.env` (provider: `minimax-cn`) | +| **Alibaba Cloud** | `DASHSCOPE_API_KEY` in `~/.hermes/.env` (provider: `alibaba`, aliases: `dashscope`, `qwen`) | +| **Kilo Code** | `KILOCODE_API_KEY` in `~/.hermes/.env` (provider: `kilocode`) | +| **OpenCode Zen** | `OPENCODE_ZEN_API_KEY` in `~/.hermes/.env` (provider: `opencode-zen`) | +| **OpenCode Go** | `OPENCODE_GO_API_KEY` in `~/.hermes/.env` (provider: `opencode-go`) | +| **DeepSeek** | `DEEPSEEK_API_KEY` in `~/.hermes/.env` (provider: `deepseek`) | +| **Hugging Face** | `HF_TOKEN` in `~/.hermes/.env` (provider: `huggingface`, aliases: `hf`) | +| **Custom Endpoint** | `hermes model` (saved in `config.yaml`) or `OPENAI_BASE_URL` + `OPENAI_API_KEY` in `~/.hermes/.env` | + +:::tip Model key alias +In the `model:` config section, you can use either `default:` or `model:` as the key name for your model ID. Both `model: { default: my-model }` and `model: { model: my-model }` work identically. +::: + +:::info Codex Note +The OpenAI Codex provider authenticates via device code (open a URL, enter a code). Hermes stores the resulting credentials in its own auth store under `~/.hermes/auth.json` and can import existing Codex CLI credentials from `~/.codex/auth.json` when present. No Codex CLI installation is required. +::: + +:::warning +Even when using Nous Portal, Codex, or a custom endpoint, some tools (vision, web summarization, MoA) use a separate "auxiliary" model — by default Gemini Flash via OpenRouter. An `OPENROUTER_API_KEY` enables these tools automatically. You can also configure which model and provider these tools use — see [Auxiliary Models](/docs/user-guide/configuration#auxiliary-models). +::: + +### Anthropic (Native) + +Use Claude models directly through the Anthropic API — no OpenRouter proxy needed. Supports three auth methods: + +```bash +# With an API key (pay-per-token) +export ANTHROPIC_API_KEY=*** +hermes chat --provider anthropic --model claude-sonnet-4-6 + +# Preferred: authenticate through `hermes model` +# Hermes will use Claude Code's credential store directly when available +hermes model + +# Manual override with a setup-token (fallback / legacy) +export ANTHROPIC_TOKEN=*** # setup-token or manual OAuth token +hermes chat --provider anthropic + +# Auto-detect Claude Code credentials (if you already use Claude Code) +hermes chat --provider anthropic # reads Claude Code credential files automatically +``` + +When you choose Anthropic OAuth through `hermes model`, Hermes prefers Claude Code's own credential store over copying the token into `~/.hermes/.env`. That keeps refreshable Claude credentials refreshable. + +Or set it permanently: +```yaml +model: + provider: "anthropic" + default: "claude-sonnet-4-6" +``` + +:::tip Aliases +`--provider claude` and `--provider claude-code` also work as shorthand for `--provider anthropic`. +::: + +### GitHub Copilot + +Hermes supports GitHub Copilot as a first-class provider with two modes: + +**`copilot` — Direct Copilot API** (recommended). Uses your GitHub Copilot subscription to access GPT-5.x, Claude, Gemini, and other models through the Copilot API. + +```bash +hermes chat --provider copilot --model gpt-5.4 +``` + +**Authentication options** (checked in this order): + +1. `COPILOT_GITHUB_TOKEN` environment variable +2. `GH_TOKEN` environment variable +3. `GITHUB_TOKEN` environment variable +4. `gh auth token` CLI fallback + +If no token is found, `hermes model` offers an **OAuth device code login** — the same flow used by the Copilot CLI and opencode. + +:::warning Token types +The Copilot API does **not** support classic Personal Access Tokens (`ghp_*`). Supported token types: + +| Type | Prefix | How to get | +|------|--------|------------| +| OAuth token | `gho_` | `hermes model` → GitHub Copilot → Login with GitHub | +| Fine-grained PAT | `github_pat_` | GitHub Settings → Developer settings → Fine-grained tokens (needs **Copilot Requests** permission) | +| GitHub App token | `ghu_` | Via GitHub App installation | + +If your `gh auth token` returns a `ghp_*` token, use `hermes model` to authenticate via OAuth instead. +::: + +**API routing**: GPT-5+ models (except `gpt-5-mini`) automatically use the Responses API. All other models (GPT-4o, Claude, Gemini, etc.) use Chat Completions. Models are auto-detected from the live Copilot catalog. + +**`copilot-acp` — Copilot ACP agent backend**. Spawns the local Copilot CLI as a subprocess: + +```bash +hermes chat --provider copilot-acp --model copilot-acp +# Requires the GitHub Copilot CLI in PATH and an existing `copilot login` session +``` + +**Permanent config:** +```yaml +model: + provider: "copilot" + default: "gpt-5.4" +``` + +| Environment variable | Description | +|---------------------|-------------| +| `COPILOT_GITHUB_TOKEN` | GitHub token for Copilot API (first priority) | +| `HERMES_COPILOT_ACP_COMMAND` | Override the Copilot CLI binary path (default: `copilot`) | +| `HERMES_COPILOT_ACP_ARGS` | Override ACP args (default: `--acp --stdio`) | + +### First-Class Chinese AI Providers + +These providers have built-in support with dedicated provider IDs. Set the API key and use `--provider` to select: + +```bash +# z.ai / ZhipuAI GLM +hermes chat --provider zai --model glm-4-plus +# Requires: GLM_API_KEY in ~/.hermes/.env + +# Kimi / Moonshot AI +hermes chat --provider kimi-coding --model moonshot-v1-auto +# Requires: KIMI_API_KEY in ~/.hermes/.env + +# MiniMax (global endpoint) +hermes chat --provider minimax --model MiniMax-M2.7 +# Requires: MINIMAX_API_KEY in ~/.hermes/.env + +# MiniMax (China endpoint) +hermes chat --provider minimax-cn --model MiniMax-M2.7 +# Requires: MINIMAX_CN_API_KEY in ~/.hermes/.env + +# Alibaba Cloud / DashScope (Qwen models) +hermes chat --provider alibaba --model qwen3.5-plus +# Requires: DASHSCOPE_API_KEY in ~/.hermes/.env +``` + +Or set the provider permanently in `config.yaml`: +```yaml +model: + provider: "zai" # or: kimi-coding, minimax, minimax-cn, alibaba + default: "glm-4-plus" +``` + +Base URLs can be overridden with `GLM_BASE_URL`, `KIMI_BASE_URL`, `MINIMAX_BASE_URL`, `MINIMAX_CN_BASE_URL`, or `DASHSCOPE_BASE_URL` environment variables. + +### Hugging Face Inference Providers + +[Hugging Face Inference Providers](https://huggingface.co/docs/inference-providers) routes to 20+ open models through a unified OpenAI-compatible endpoint (`router.huggingface.co/v1`). Requests are automatically routed to the fastest available backend (Groq, Together, SambaNova, etc.) with automatic failover. + +```bash +# Use any available model +hermes chat --provider huggingface --model Qwen/Qwen3-235B-A22B-Thinking-2507 +# Requires: HF_TOKEN in ~/.hermes/.env + +# Short alias +hermes chat --provider hf --model deepseek-ai/DeepSeek-V3.2 +``` + +Or set it permanently in `config.yaml`: +```yaml +model: + provider: "huggingface" + default: "Qwen/Qwen3-235B-A22B-Thinking-2507" +``` + +Get your token at [huggingface.co/settings/tokens](https://huggingface.co/settings/tokens) — make sure to enable the "Make calls to Inference Providers" permission. Free tier included ($0.10/month credit, no markup on provider rates). + +You can append routing suffixes to model names: `:fastest` (default), `:cheapest`, or `:provider_name` to force a specific backend. + +The base URL can be overridden with `HF_BASE_URL`. + +## Custom & Self-Hosted LLM Providers + +Hermes Agent works with **any OpenAI-compatible API endpoint**. If a server implements `/v1/chat/completions`, you can point Hermes at it. This means you can use local models, GPU inference servers, multi-provider routers, or any third-party API. + +### General Setup + +Three ways to configure a custom endpoint: + +**Interactive setup (recommended):** +```bash +hermes model +# Select "Custom endpoint (self-hosted / VLLM / etc.)" +# Enter: API base URL, API key, Model name +``` + +**Manual config (`config.yaml`):** +```yaml +# In ~/.hermes/config.yaml +model: + default: your-model-name + provider: custom + base_url: http://localhost:8000/v1 + api_key: your-key-or-leave-empty-for-local +``` + +**Environment variables (`.env` file):** +```bash +# Add to ~/.hermes/.env +OPENAI_BASE_URL=http://localhost:8000/v1 +OPENAI_API_KEY=your-key # Any non-empty string for local servers +LLM_MODEL=your-model-name +``` + +All three approaches end up in the same runtime path. `hermes model` persists provider, model, and base URL to `config.yaml` so later sessions keep using that endpoint even if env vars are not set. + +### Switching Models with `/model` + +Once a custom endpoint is configured, you can switch models mid-session: + +``` +/model custom:qwen-2.5 # Switch to a model on your custom endpoint +/model custom # Auto-detect the model from the endpoint +/model openrouter:claude-sonnet-4 # Switch back to a cloud provider +``` + +If you have **named custom providers** configured (see below), use the triple syntax: + +``` +/model custom:local:qwen-2.5 # Use the "local" custom provider with model qwen-2.5 +/model custom:work:llama3 # Use the "work" custom provider with llama3 +``` + +When switching providers, Hermes persists the base URL and provider to config so the change survives restarts. When switching away from a custom endpoint to a built-in provider, the stale base URL is automatically cleared. + +:::tip +`/model custom` (bare, no model name) queries your endpoint's `/models` API and auto-selects the model if exactly one is loaded. Useful for local servers running a single model. +::: + +Everything below follows this same pattern — just change the URL, key, and model name. + +--- + +### Ollama — Local Models, Zero Config + +[Ollama](https://ollama.com/) runs open-weight models locally with one command. Best for: quick local experimentation, privacy-sensitive work, offline use. + +```bash +# Install and run a model +ollama pull llama3.1:70b +ollama serve # Starts on port 11434 + +# Configure Hermes +OPENAI_BASE_URL=http://localhost:11434/v1 +OPENAI_API_KEY=ollama # Any non-empty string +LLM_MODEL=llama3.1:70b +``` + +Ollama's OpenAI-compatible endpoint supports chat completions, streaming, and tool calling (for supported models). No GPU required for smaller models — Ollama handles CPU inference automatically. + +:::tip +List available models with `ollama list`. Pull any model from the [Ollama library](https://ollama.com/library) with `ollama pull `. +::: + +--- + +### vLLM — High-Performance GPU Inference + +[vLLM](https://docs.vllm.ai/) is the standard for production LLM serving. Best for: maximum throughput on GPU hardware, serving large models, continuous batching. + +```bash +# Start vLLM server +pip install vllm +vllm serve meta-llama/Llama-3.1-70B-Instruct \ + --port 8000 \ + --tensor-parallel-size 2 # Multi-GPU + +# Configure Hermes +OPENAI_BASE_URL=http://localhost:8000/v1 +OPENAI_API_KEY=dummy +LLM_MODEL=meta-llama/Llama-3.1-70B-Instruct +``` + +vLLM supports tool calling, structured output, and multi-modal models. Use `--enable-auto-tool-choice` and `--tool-call-parser hermes` for Hermes-format tool calling with NousResearch models. + +--- + +### SGLang — Fast Serving with RadixAttention + +[SGLang](https://github.com/sgl-project/sglang) is an alternative to vLLM with RadixAttention for KV cache reuse. Best for: multi-turn conversations (prefix caching), constrained decoding, structured output. + +```bash +# Start SGLang server +pip install "sglang[all]" +python -m sglang.launch_server \ + --model meta-llama/Llama-3.1-70B-Instruct \ + --port 8000 \ + --tp 2 + +# Configure Hermes +OPENAI_BASE_URL=http://localhost:8000/v1 +OPENAI_API_KEY=dummy +LLM_MODEL=meta-llama/Llama-3.1-70B-Instruct +``` + +--- + +### llama.cpp / llama-server — CPU & Metal Inference + +[llama.cpp](https://github.com/ggml-org/llama.cpp) runs quantized models on CPU, Apple Silicon (Metal), and consumer GPUs. Best for: running models without a datacenter GPU, Mac users, edge deployment. + +```bash +# Build and start llama-server +cmake -B build && cmake --build build --config Release +./build/bin/llama-server \ + -m models/llama-3.1-8b-instruct-Q4_K_M.gguf \ + --port 8080 --host 0.0.0.0 + +# Configure Hermes +OPENAI_BASE_URL=http://localhost:8080/v1 +OPENAI_API_KEY=dummy +LLM_MODEL=llama-3.1-8b-instruct +``` + +:::tip +Download GGUF models from [Hugging Face](https://huggingface.co/models?library=gguf). Q4_K_M quantization offers the best balance of quality vs. memory usage. +::: + +--- + +### LiteLLM Proxy — Multi-Provider Gateway + +[LiteLLM](https://docs.litellm.ai/) is an OpenAI-compatible proxy that unifies 100+ LLM providers behind a single API. Best for: switching between providers without config changes, load balancing, fallback chains, budget controls. + +```bash +# Install and start +pip install "litellm[proxy]" +litellm --model anthropic/claude-sonnet-4 --port 4000 + +# Or with a config file for multiple models: +litellm --config litellm_config.yaml --port 4000 + +# Configure Hermes +OPENAI_BASE_URL=http://localhost:4000/v1 +OPENAI_API_KEY=sk-your-litellm-key +LLM_MODEL=anthropic/claude-sonnet-4 +``` + +Example `litellm_config.yaml` with fallback: +```yaml +model_list: + - model_name: "best" + litellm_params: + model: anthropic/claude-sonnet-4 + api_key: sk-ant-... + - model_name: "best" + litellm_params: + model: openai/gpt-4o + api_key: sk-... +router_settings: + routing_strategy: "latency-based-routing" +``` + +--- + +### ClawRouter — Cost-Optimized Routing + +[ClawRouter](https://github.com/BlockRunAI/ClawRouter) by BlockRunAI is a local routing proxy that auto-selects models based on query complexity. It classifies requests across 14 dimensions and routes to the cheapest model that can handle the task. Payment is via USDC cryptocurrency (no API keys). + +```bash +# Install and start +npx @blockrun/clawrouter # Starts on port 8402 + +# Configure Hermes +OPENAI_BASE_URL=http://localhost:8402/v1 +OPENAI_API_KEY=dummy +LLM_MODEL=blockrun/auto # or: blockrun/eco, blockrun/premium, blockrun/agentic +``` + +Routing profiles: +| Profile | Strategy | Savings | +|---------|----------|---------| +| `blockrun/auto` | Balanced quality/cost | 74-100% | +| `blockrun/eco` | Cheapest possible | 95-100% | +| `blockrun/premium` | Best quality models | 0% | +| `blockrun/free` | Free models only | 100% | +| `blockrun/agentic` | Optimized for tool use | varies | + +:::note +ClawRouter requires a USDC-funded wallet on Base or Solana for payment. All requests route through BlockRun's backend API. Run `npx @blockrun/clawrouter doctor` to check wallet status. +::: + +--- + +### Other Compatible Providers + +Any service with an OpenAI-compatible API works. Some popular options: + +| Provider | Base URL | Notes | +|----------|----------|-------| +| [Together AI](https://together.ai) | `https://api.together.xyz/v1` | Cloud-hosted open models | +| [Groq](https://groq.com) | `https://api.groq.com/openai/v1` | Ultra-fast inference | +| [DeepSeek](https://deepseek.com) | `https://api.deepseek.com/v1` | DeepSeek models | +| [Fireworks AI](https://fireworks.ai) | `https://api.fireworks.ai/inference/v1` | Fast open model hosting | +| [Cerebras](https://cerebras.ai) | `https://api.cerebras.ai/v1` | Wafer-scale chip inference | +| [Mistral AI](https://mistral.ai) | `https://api.mistral.ai/v1` | Mistral models | +| [OpenAI](https://openai.com) | `https://api.openai.com/v1` | Direct OpenAI access | +| [Azure OpenAI](https://azure.microsoft.com) | `https://YOUR.openai.azure.com/` | Enterprise OpenAI | +| [LocalAI](https://localai.io) | `http://localhost:8080/v1` | Self-hosted, multi-model | +| [Jan](https://jan.ai) | `http://localhost:1337/v1` | Desktop app with local models | + +```bash +# Example: Together AI +OPENAI_BASE_URL=https://api.together.xyz/v1 +OPENAI_API_KEY=your-together-key +LLM_MODEL=meta-llama/Llama-3.1-70B-Instruct-Turbo +``` + +--- + +### Context Length Detection + +Hermes uses a multi-source resolution chain to detect the correct context window for your model and provider: + +1. **Config override** — `model.context_length` in config.yaml (highest priority) +2. **Custom provider per-model** — `custom_providers[].models..context_length` +3. **Persistent cache** — previously discovered values (survives restarts) +4. **Endpoint `/models`** — queries your server's API (local/custom endpoints) +5. **Anthropic `/v1/models`** — queries Anthropic's API for `max_input_tokens` (API-key users only) +6. **OpenRouter API** — live model metadata from OpenRouter +7. **Nous Portal** — suffix-matches Nous model IDs against OpenRouter metadata +8. **[models.dev](https://models.dev)** — community-maintained registry with provider-specific context lengths for 3800+ models across 100+ providers +9. **Fallback defaults** — broad model family patterns (128K default) + +For most setups this works out of the box. The system is provider-aware — the same model can have different context limits depending on who serves it (e.g., `claude-opus-4.6` is 1M on Anthropic direct but 128K on GitHub Copilot). + +To set the context length explicitly, add `context_length` to your model config: + +```yaml +model: + default: "qwen3.5:9b" + base_url: "http://localhost:8080/v1" + context_length: 131072 # tokens +``` + +For custom endpoints, you can also set context length per model: + +```yaml +custom_providers: + - name: "My Local LLM" + base_url: "http://localhost:11434/v1" + models: + qwen3.5:27b: + context_length: 32768 + deepseek-r1:70b: + context_length: 65536 +``` + +`hermes model` will prompt for context length when configuring a custom endpoint. Leave it blank for auto-detection. + +:::tip When to set this manually +- You're using Ollama with a custom `num_ctx` that's lower than the model's maximum +- You want to limit context below the model's maximum (e.g., 8k on a 128k model to save VRAM) +- You're running behind a proxy that doesn't expose `/v1/models` +::: + +--- + +### Named Custom Providers + +If you work with multiple custom endpoints (e.g., a local dev server and a remote GPU server), you can define them as named custom providers in `config.yaml`: + +```yaml +custom_providers: + - name: local + base_url: http://localhost:8080/v1 + # api_key omitted — Hermes uses "no-key-required" for keyless local servers + - name: work + base_url: https://gpu-server.internal.corp/v1 + api_key: corp-api-key + api_mode: chat_completions # optional, auto-detected from URL + - name: anthropic-proxy + base_url: https://proxy.example.com/anthropic + api_key: proxy-key + api_mode: anthropic_messages # for Anthropic-compatible proxies +``` + +Switch between them mid-session with the triple syntax: + +``` +/model custom:local:qwen-2.5 # Use the "local" endpoint with qwen-2.5 +/model custom:work:llama3-70b # Use the "work" endpoint with llama3-70b +/model custom:anthropic-proxy:claude-sonnet-4 # Use the proxy +``` + +You can also select named custom providers from the interactive `hermes model` menu. + +--- + +### Choosing the Right Setup + +| Use Case | Recommended | +|----------|-------------| +| **Just want it to work** | OpenRouter (default) or Nous Portal | +| **Local models, easy setup** | Ollama | +| **Production GPU serving** | vLLM or SGLang | +| **Mac / no GPU** | Ollama or llama.cpp | +| **Multi-provider routing** | LiteLLM Proxy or OpenRouter | +| **Cost optimization** | ClawRouter or OpenRouter with `sort: "price"` | +| **Maximum privacy** | Ollama, vLLM, or llama.cpp (fully local) | +| **Enterprise / Azure** | Azure OpenAI with custom endpoint | +| **Chinese AI models** | z.ai (GLM), Kimi/Moonshot, or MiniMax (first-class providers) | + +:::tip +You can switch between providers at any time with `hermes model` — no restart required. Your conversation history, memory, and skills carry over regardless of which provider you use. +::: + +## Optional API Keys + +| Feature | Provider | Env Variable | +|---------|----------|--------------| +| Web scraping | [Firecrawl](https://firecrawl.dev/) | `FIRECRAWL_API_KEY`, `FIRECRAWL_API_URL` | +| Browser automation | [Browserbase](https://browserbase.com/) | `BROWSERBASE_API_KEY`, `BROWSERBASE_PROJECT_ID` | +| Image generation | [FAL](https://fal.ai/) | `FAL_KEY` | +| Premium TTS voices | [ElevenLabs](https://elevenlabs.io/) | `ELEVENLABS_API_KEY` | +| OpenAI TTS + voice transcription | [OpenAI](https://platform.openai.com/api-keys) | `VOICE_TOOLS_OPENAI_KEY` | +| RL Training | [Tinker](https://tinker-console.thinkingmachines.ai/) + [WandB](https://wandb.ai/) | `TINKER_API_KEY`, `WANDB_API_KEY` | +| Cross-session user modeling | [Honcho](https://honcho.dev/) | `HONCHO_API_KEY` | + +### Self-Hosting Firecrawl + +By default, Hermes uses the [Firecrawl cloud API](https://firecrawl.dev/) for web search and scraping. If you prefer to run Firecrawl locally, you can point Hermes at a self-hosted instance instead. See Firecrawl's [SELF_HOST.md](https://github.com/firecrawl/firecrawl/blob/main/SELF_HOST.md) for complete setup instructions. + +**What you get:** No API key required, no rate limits, no per-page costs, full data sovereignty. + +**What you lose:** The cloud version uses Firecrawl's proprietary "Fire-engine" for advanced anti-bot bypassing (Cloudflare, CAPTCHAs, IP rotation). Self-hosted uses basic fetch + Playwright, so some protected sites may fail. Search uses DuckDuckGo instead of Google. + +**Setup:** + +1. Clone and start the Firecrawl Docker stack (5 containers: API, Playwright, Redis, RabbitMQ, PostgreSQL — requires ~4-8 GB RAM): + ```bash + git clone https://github.com/firecrawl/firecrawl + cd firecrawl + # In .env, set: USE_DB_AUTHENTICATION=false, HOST=0.0.0.0, PORT=3002 + docker compose up -d + ``` + +2. Point Hermes at your instance (no API key needed): + ```bash + hermes config set FIRECRAWL_API_URL http://localhost:3002 + ``` + +You can also set both `FIRECRAWL_API_KEY` and `FIRECRAWL_API_URL` if your self-hosted instance has authentication enabled. + +## OpenRouter Provider Routing + +When using OpenRouter, you can control how requests are routed across providers. Add a `provider_routing` section to `~/.hermes/config.yaml`: + +```yaml +provider_routing: + sort: "throughput" # "price" (default), "throughput", or "latency" + # only: ["anthropic"] # Only use these providers + # ignore: ["deepinfra"] # Skip these providers + # order: ["anthropic", "google"] # Try providers in this order + # require_parameters: true # Only use providers that support all request params + # data_collection: "deny" # Exclude providers that may store/train on data +``` + +**Shortcuts:** Append `:nitro` to any model name for throughput sorting (e.g., `anthropic/claude-sonnet-4:nitro`), or `:floor` for price sorting. + +## Fallback Model + +Configure a backup provider:model that Hermes switches to automatically when your primary model fails (rate limits, server errors, auth failures): + +```yaml +fallback_model: + provider: openrouter # required + model: anthropic/claude-sonnet-4 # required + # base_url: http://localhost:8000/v1 # optional, for custom endpoints + # api_key_env: MY_CUSTOM_KEY # optional, env var name for custom endpoint API key +``` + +When activated, the fallback swaps the model and provider mid-session without losing your conversation. It fires **at most once** per session. + +Supported providers: `openrouter`, `nous`, `openai-codex`, `copilot`, `anthropic`, `huggingface`, `zai`, `kimi-coding`, `minimax`, `minimax-cn`, `custom`. + +:::tip +Fallback is configured exclusively through `config.yaml` — there are no environment variables for it. For full details on when it triggers, supported providers, and how it interacts with auxiliary tasks and delegation, see [Fallback Providers](/docs/user-guide/features/fallback-providers). +::: + +## Smart Model Routing + +Optional cheap-vs-strong routing lets Hermes keep your main model for complex work while sending very short/simple turns to a cheaper model. + +```yaml +smart_model_routing: + enabled: true + max_simple_chars: 160 + max_simple_words: 28 + cheap_model: + provider: openrouter + model: google/gemini-2.5-flash + # base_url: http://localhost:8000/v1 # optional custom endpoint + # api_key_env: MY_CUSTOM_KEY # optional env var name for that endpoint's API key +``` + +How it works: +- If a turn is short, single-line, and does not look code/tool/debug heavy, Hermes may route it to `cheap_model` +- If the turn looks complex, Hermes stays on your primary model/provider +- If the cheap route cannot be resolved cleanly, Hermes falls back to the primary model automatically + +This is intentionally conservative. It is meant for quick, low-stakes turns like: +- short factual questions +- quick rewrites +- lightweight summaries + +It will avoid routing prompts that look like: +- coding/debugging work +- tool-heavy requests +- long or multi-line analysis asks + +Use this when you want lower latency or cost without fully changing your default model. + +--- + +## See Also + +- [Configuration](/docs/user-guide/configuration) — General configuration (directory structure, config precedence, terminal backends, memory, compression, and more) +- [Environment Variables](/docs/reference/environment-variables) — Complete reference of all environment variables diff --git a/website/docs/reference/faq.md b/website/docs/reference/faq.md index e207420f8..50302dae8 100644 --- a/website/docs/reference/faq.md +++ b/website/docs/reference/faq.md @@ -254,7 +254,7 @@ custom_providers: context_length: 32768 ``` -See [Context Length Detection](../user-guide/configuration.md#context-length-detection) for how auto-detection works and all override options. +See [Context Length Detection](../integrations/providers.md#context-length-detection) for how auto-detection works and all override options. --- diff --git a/website/docs/user-guide/checkpoints-and-rollback.md b/website/docs/user-guide/checkpoints-and-rollback.md index f81a7d4f8..1c31acdae 100644 --- a/website/docs/user-guide/checkpoints-and-rollback.md +++ b/website/docs/user-guide/checkpoints-and-rollback.md @@ -1,5 +1,6 @@ --- sidebar_position: 8 +sidebar_label: "Checkpoints & Rollback" title: "Checkpoints and /rollback" description: "Filesystem safety nets for destructive operations using shadow git repos and automatic snapshots" --- diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md index b0ea0482d..d3c2ca23e 100644 --- a/website/docs/user-guide/configuration.md +++ b/website/docs/user-guide/configuration.md @@ -71,632 +71,7 @@ delegation: Multiple references in a single value work: `url: "${HOST}:${PORT}"`. If a referenced variable is not set, the placeholder is kept verbatim (`${UNDEFINED_VAR}` stays as-is). Only the `${VAR}` syntax is supported — bare `$VAR` is not expanded. -## Inference Providers - -You need at least one way to connect to an LLM. Use `hermes model` to switch providers and models interactively, or configure directly: - -| Provider | Setup | -|----------|-------| -| **Nous Portal** | `hermes model` (OAuth, subscription-based) | -| **OpenAI Codex** | `hermes model` (ChatGPT OAuth, uses Codex models) | -| **GitHub Copilot** | `hermes model` (OAuth device code flow, `COPILOT_GITHUB_TOKEN`, `GH_TOKEN`, or `gh auth token`) | -| **GitHub Copilot ACP** | `hermes model` (spawns local `copilot --acp --stdio`) | -| **Anthropic** | `hermes model` (Claude Pro/Max via Claude Code auth, Anthropic API key, or manual setup-token) | -| **OpenRouter** | `OPENROUTER_API_KEY` in `~/.hermes/.env` | -| **AI Gateway** | `AI_GATEWAY_API_KEY` in `~/.hermes/.env` (provider: `ai-gateway`) | -| **z.ai / GLM** | `GLM_API_KEY` in `~/.hermes/.env` (provider: `zai`) | -| **Kimi / Moonshot** | `KIMI_API_KEY` in `~/.hermes/.env` (provider: `kimi-coding`) | -| **MiniMax** | `MINIMAX_API_KEY` in `~/.hermes/.env` (provider: `minimax`) | -| **MiniMax China** | `MINIMAX_CN_API_KEY` in `~/.hermes/.env` (provider: `minimax-cn`) | -| **Alibaba Cloud** | `DASHSCOPE_API_KEY` in `~/.hermes/.env` (provider: `alibaba`, aliases: `dashscope`, `qwen`) | -| **Kilo Code** | `KILOCODE_API_KEY` in `~/.hermes/.env` (provider: `kilocode`) | -| **OpenCode Zen** | `OPENCODE_ZEN_API_KEY` in `~/.hermes/.env` (provider: `opencode-zen`) | -| **OpenCode Go** | `OPENCODE_GO_API_KEY` in `~/.hermes/.env` (provider: `opencode-go`) | -| **DeepSeek** | `DEEPSEEK_API_KEY` in `~/.hermes/.env` (provider: `deepseek`) | -| **Hugging Face** | `HF_TOKEN` in `~/.hermes/.env` (provider: `huggingface`, aliases: `hf`) | -| **Custom Endpoint** | `hermes model` (saved in `config.yaml`) or `OPENAI_BASE_URL` + `OPENAI_API_KEY` in `~/.hermes/.env` | - -:::tip Model key alias -In the `model:` config section, you can use either `default:` or `model:` as the key name for your model ID. Both `model: { default: my-model }` and `model: { model: my-model }` work identically. -::: - -:::info Codex Note -The OpenAI Codex provider authenticates via device code (open a URL, enter a code). Hermes stores the resulting credentials in its own auth store under `~/.hermes/auth.json` and can import existing Codex CLI credentials from `~/.codex/auth.json` when present. No Codex CLI installation is required. -::: - -:::warning -Even when using Nous Portal, Codex, or a custom endpoint, some tools (vision, web summarization, MoA) use a separate "auxiliary" model — by default Gemini Flash via OpenRouter. An `OPENROUTER_API_KEY` enables these tools automatically. You can also configure which model and provider these tools use — see [Auxiliary Models](#auxiliary-models) below. -::: - -### Anthropic (Native) - -Use Claude models directly through the Anthropic API — no OpenRouter proxy needed. Supports three auth methods: - -```bash -# With an API key (pay-per-token) -export ANTHROPIC_API_KEY=*** -hermes chat --provider anthropic --model claude-sonnet-4-6 - -# Preferred: authenticate through `hermes model` -# Hermes will use Claude Code's credential store directly when available -hermes model - -# Manual override with a setup-token (fallback / legacy) -export ANTHROPIC_TOKEN=*** # setup-token or manual OAuth token -hermes chat --provider anthropic - -# Auto-detect Claude Code credentials (if you already use Claude Code) -hermes chat --provider anthropic # reads Claude Code credential files automatically -``` - -When you choose Anthropic OAuth through `hermes model`, Hermes prefers Claude Code's own credential store over copying the token into `~/.hermes/.env`. That keeps refreshable Claude credentials refreshable. - -Or set it permanently: -```yaml -model: - provider: "anthropic" - default: "claude-sonnet-4-6" -``` - -:::tip Aliases -`--provider claude` and `--provider claude-code` also work as shorthand for `--provider anthropic`. -::: - -### GitHub Copilot - -Hermes supports GitHub Copilot as a first-class provider with two modes: - -**`copilot` — Direct Copilot API** (recommended). Uses your GitHub Copilot subscription to access GPT-5.x, Claude, Gemini, and other models through the Copilot API. - -```bash -hermes chat --provider copilot --model gpt-5.4 -``` - -**Authentication options** (checked in this order): - -1. `COPILOT_GITHUB_TOKEN` environment variable -2. `GH_TOKEN` environment variable -3. `GITHUB_TOKEN` environment variable -4. `gh auth token` CLI fallback - -If no token is found, `hermes model` offers an **OAuth device code login** — the same flow used by the Copilot CLI and opencode. - -:::warning Token types -The Copilot API does **not** support classic Personal Access Tokens (`ghp_*`). Supported token types: - -| Type | Prefix | How to get | -|------|--------|------------| -| OAuth token | `gho_` | `hermes model` → GitHub Copilot → Login with GitHub | -| Fine-grained PAT | `github_pat_` | GitHub Settings → Developer settings → Fine-grained tokens (needs **Copilot Requests** permission) | -| GitHub App token | `ghu_` | Via GitHub App installation | - -If your `gh auth token` returns a `ghp_*` token, use `hermes model` to authenticate via OAuth instead. -::: - -**API routing**: GPT-5+ models (except `gpt-5-mini`) automatically use the Responses API. All other models (GPT-4o, Claude, Gemini, etc.) use Chat Completions. Models are auto-detected from the live Copilot catalog. - -**`copilot-acp` — Copilot ACP agent backend**. Spawns the local Copilot CLI as a subprocess: - -```bash -hermes chat --provider copilot-acp --model copilot-acp -# Requires the GitHub Copilot CLI in PATH and an existing `copilot login` session -``` - -**Permanent config:** -```yaml -model: - provider: "copilot" - default: "gpt-5.4" -``` - -| Environment variable | Description | -|---------------------|-------------| -| `COPILOT_GITHUB_TOKEN` | GitHub token for Copilot API (first priority) | -| `HERMES_COPILOT_ACP_COMMAND` | Override the Copilot CLI binary path (default: `copilot`) | -| `HERMES_COPILOT_ACP_ARGS` | Override ACP args (default: `--acp --stdio`) | - -### First-Class Chinese AI Providers - -These providers have built-in support with dedicated provider IDs. Set the API key and use `--provider` to select: - -```bash -# z.ai / ZhipuAI GLM -hermes chat --provider zai --model glm-4-plus -# Requires: GLM_API_KEY in ~/.hermes/.env - -# Kimi / Moonshot AI -hermes chat --provider kimi-coding --model moonshot-v1-auto -# Requires: KIMI_API_KEY in ~/.hermes/.env - -# MiniMax (global endpoint) -hermes chat --provider minimax --model MiniMax-M2.7 -# Requires: MINIMAX_API_KEY in ~/.hermes/.env - -# MiniMax (China endpoint) -hermes chat --provider minimax-cn --model MiniMax-M2.7 -# Requires: MINIMAX_CN_API_KEY in ~/.hermes/.env - -# Alibaba Cloud / DashScope (Qwen models) -hermes chat --provider alibaba --model qwen3.5-plus -# Requires: DASHSCOPE_API_KEY in ~/.hermes/.env -``` - -Or set the provider permanently in `config.yaml`: -```yaml -model: - provider: "zai" # or: kimi-coding, minimax, minimax-cn, alibaba - default: "glm-4-plus" -``` - -Base URLs can be overridden with `GLM_BASE_URL`, `KIMI_BASE_URL`, `MINIMAX_BASE_URL`, `MINIMAX_CN_BASE_URL`, or `DASHSCOPE_BASE_URL` environment variables. - -### Hugging Face Inference Providers - -[Hugging Face Inference Providers](https://huggingface.co/docs/inference-providers) routes to 20+ open models through a unified OpenAI-compatible endpoint (`router.huggingface.co/v1`). Requests are automatically routed to the fastest available backend (Groq, Together, SambaNova, etc.) with automatic failover. - -```bash -# Use any available model -hermes chat --provider huggingface --model Qwen/Qwen3-235B-A22B-Thinking-2507 -# Requires: HF_TOKEN in ~/.hermes/.env - -# Short alias -hermes chat --provider hf --model deepseek-ai/DeepSeek-V3.2 -``` - -Or set it permanently in `config.yaml`: -```yaml -model: - provider: "huggingface" - default: "Qwen/Qwen3-235B-A22B-Thinking-2507" -``` - -Get your token at [huggingface.co/settings/tokens](https://huggingface.co/settings/tokens) — make sure to enable the "Make calls to Inference Providers" permission. Free tier included ($0.10/month credit, no markup on provider rates). - -You can append routing suffixes to model names: `:fastest` (default), `:cheapest`, or `:provider_name` to force a specific backend. - -The base URL can be overridden with `HF_BASE_URL`. - -## Custom & Self-Hosted LLM Providers - -Hermes Agent works with **any OpenAI-compatible API endpoint**. If a server implements `/v1/chat/completions`, you can point Hermes at it. This means you can use local models, GPU inference servers, multi-provider routers, or any third-party API. - -### General Setup - -Three ways to configure a custom endpoint: - -**Interactive setup (recommended):** -```bash -hermes model -# Select "Custom endpoint (self-hosted / VLLM / etc.)" -# Enter: API base URL, API key, Model name -``` - -**Manual config (`config.yaml`):** -```yaml -# In ~/.hermes/config.yaml -model: - default: your-model-name - provider: custom - base_url: http://localhost:8000/v1 - api_key: your-key-or-leave-empty-for-local -``` - -**Environment variables (`.env` file):** -```bash -# Add to ~/.hermes/.env -OPENAI_BASE_URL=http://localhost:8000/v1 -OPENAI_API_KEY=your-key # Any non-empty string for local servers -LLM_MODEL=your-model-name -``` - -All three approaches end up in the same runtime path. `hermes model` persists provider, model, and base URL to `config.yaml` so later sessions keep using that endpoint even if env vars are not set. - -### Switching Models with `/model` - -Once a custom endpoint is configured, you can switch models mid-session: - -``` -/model custom:qwen-2.5 # Switch to a model on your custom endpoint -/model custom # Auto-detect the model from the endpoint -/model openrouter:claude-sonnet-4 # Switch back to a cloud provider -``` - -If you have **named custom providers** configured (see below), use the triple syntax: - -``` -/model custom:local:qwen-2.5 # Use the "local" custom provider with model qwen-2.5 -/model custom:work:llama3 # Use the "work" custom provider with llama3 -``` - -When switching providers, Hermes persists the base URL and provider to config so the change survives restarts. When switching away from a custom endpoint to a built-in provider, the stale base URL is automatically cleared. - -:::tip -`/model custom` (bare, no model name) queries your endpoint's `/models` API and auto-selects the model if exactly one is loaded. Useful for local servers running a single model. -::: - -Everything below follows this same pattern — just change the URL, key, and model name. - ---- - -### Ollama — Local Models, Zero Config - -[Ollama](https://ollama.com/) runs open-weight models locally with one command. Best for: quick local experimentation, privacy-sensitive work, offline use. - -```bash -# Install and run a model -ollama pull llama3.1:70b -ollama serve # Starts on port 11434 - -# Configure Hermes -OPENAI_BASE_URL=http://localhost:11434/v1 -OPENAI_API_KEY=ollama # Any non-empty string -LLM_MODEL=llama3.1:70b -``` - -Ollama's OpenAI-compatible endpoint supports chat completions, streaming, and tool calling (for supported models). No GPU required for smaller models — Ollama handles CPU inference automatically. - -:::tip -List available models with `ollama list`. Pull any model from the [Ollama library](https://ollama.com/library) with `ollama pull `. -::: - ---- - -### vLLM — High-Performance GPU Inference - -[vLLM](https://docs.vllm.ai/) is the standard for production LLM serving. Best for: maximum throughput on GPU hardware, serving large models, continuous batching. - -```bash -# Start vLLM server -pip install vllm -vllm serve meta-llama/Llama-3.1-70B-Instruct \ - --port 8000 \ - --tensor-parallel-size 2 # Multi-GPU - -# Configure Hermes -OPENAI_BASE_URL=http://localhost:8000/v1 -OPENAI_API_KEY=dummy -LLM_MODEL=meta-llama/Llama-3.1-70B-Instruct -``` - -vLLM supports tool calling, structured output, and multi-modal models. Use `--enable-auto-tool-choice` and `--tool-call-parser hermes` for Hermes-format tool calling with NousResearch models. - ---- - -### SGLang — Fast Serving with RadixAttention - -[SGLang](https://github.com/sgl-project/sglang) is an alternative to vLLM with RadixAttention for KV cache reuse. Best for: multi-turn conversations (prefix caching), constrained decoding, structured output. - -```bash -# Start SGLang server -pip install "sglang[all]" -python -m sglang.launch_server \ - --model meta-llama/Llama-3.1-70B-Instruct \ - --port 8000 \ - --tp 2 - -# Configure Hermes -OPENAI_BASE_URL=http://localhost:8000/v1 -OPENAI_API_KEY=dummy -LLM_MODEL=meta-llama/Llama-3.1-70B-Instruct -``` - ---- - -### llama.cpp / llama-server — CPU & Metal Inference - -[llama.cpp](https://github.com/ggml-org/llama.cpp) runs quantized models on CPU, Apple Silicon (Metal), and consumer GPUs. Best for: running models without a datacenter GPU, Mac users, edge deployment. - -```bash -# Build and start llama-server -cmake -B build && cmake --build build --config Release -./build/bin/llama-server \ - -m models/llama-3.1-8b-instruct-Q4_K_M.gguf \ - --port 8080 --host 0.0.0.0 - -# Configure Hermes -OPENAI_BASE_URL=http://localhost:8080/v1 -OPENAI_API_KEY=dummy -LLM_MODEL=llama-3.1-8b-instruct -``` - -:::tip -Download GGUF models from [Hugging Face](https://huggingface.co/models?library=gguf). Q4_K_M quantization offers the best balance of quality vs. memory usage. -::: - ---- - -### LiteLLM Proxy — Multi-Provider Gateway - -[LiteLLM](https://docs.litellm.ai/) is an OpenAI-compatible proxy that unifies 100+ LLM providers behind a single API. Best for: switching between providers without config changes, load balancing, fallback chains, budget controls. - -```bash -# Install and start -pip install "litellm[proxy]" -litellm --model anthropic/claude-sonnet-4 --port 4000 - -# Or with a config file for multiple models: -litellm --config litellm_config.yaml --port 4000 - -# Configure Hermes -OPENAI_BASE_URL=http://localhost:4000/v1 -OPENAI_API_KEY=sk-your-litellm-key -LLM_MODEL=anthropic/claude-sonnet-4 -``` - -Example `litellm_config.yaml` with fallback: -```yaml -model_list: - - model_name: "best" - litellm_params: - model: anthropic/claude-sonnet-4 - api_key: sk-ant-... - - model_name: "best" - litellm_params: - model: openai/gpt-4o - api_key: sk-... -router_settings: - routing_strategy: "latency-based-routing" -``` - ---- - -### ClawRouter — Cost-Optimized Routing - -[ClawRouter](https://github.com/BlockRunAI/ClawRouter) by BlockRunAI is a local routing proxy that auto-selects models based on query complexity. It classifies requests across 14 dimensions and routes to the cheapest model that can handle the task. Payment is via USDC cryptocurrency (no API keys). - -```bash -# Install and start -npx @blockrun/clawrouter # Starts on port 8402 - -# Configure Hermes -OPENAI_BASE_URL=http://localhost:8402/v1 -OPENAI_API_KEY=dummy -LLM_MODEL=blockrun/auto # or: blockrun/eco, blockrun/premium, blockrun/agentic -``` - -Routing profiles: -| Profile | Strategy | Savings | -|---------|----------|---------| -| `blockrun/auto` | Balanced quality/cost | 74-100% | -| `blockrun/eco` | Cheapest possible | 95-100% | -| `blockrun/premium` | Best quality models | 0% | -| `blockrun/free` | Free models only | 100% | -| `blockrun/agentic` | Optimized for tool use | varies | - -:::note -ClawRouter requires a USDC-funded wallet on Base or Solana for payment. All requests route through BlockRun's backend API. Run `npx @blockrun/clawrouter doctor` to check wallet status. -::: - ---- - -### Other Compatible Providers - -Any service with an OpenAI-compatible API works. Some popular options: - -| Provider | Base URL | Notes | -|----------|----------|-------| -| [Together AI](https://together.ai) | `https://api.together.xyz/v1` | Cloud-hosted open models | -| [Groq](https://groq.com) | `https://api.groq.com/openai/v1` | Ultra-fast inference | -| [DeepSeek](https://deepseek.com) | `https://api.deepseek.com/v1` | DeepSeek models | -| [Fireworks AI](https://fireworks.ai) | `https://api.fireworks.ai/inference/v1` | Fast open model hosting | -| [Cerebras](https://cerebras.ai) | `https://api.cerebras.ai/v1` | Wafer-scale chip inference | -| [Mistral AI](https://mistral.ai) | `https://api.mistral.ai/v1` | Mistral models | -| [OpenAI](https://openai.com) | `https://api.openai.com/v1` | Direct OpenAI access | -| [Azure OpenAI](https://azure.microsoft.com) | `https://YOUR.openai.azure.com/` | Enterprise OpenAI | -| [LocalAI](https://localai.io) | `http://localhost:8080/v1` | Self-hosted, multi-model | -| [Jan](https://jan.ai) | `http://localhost:1337/v1` | Desktop app with local models | - -```bash -# Example: Together AI -OPENAI_BASE_URL=https://api.together.xyz/v1 -OPENAI_API_KEY=your-together-key -LLM_MODEL=meta-llama/Llama-3.1-70B-Instruct-Turbo -``` - ---- - -### Context Length Detection - -Hermes uses a multi-source resolution chain to detect the correct context window for your model and provider: - -1. **Config override** — `model.context_length` in config.yaml (highest priority) -2. **Custom provider per-model** — `custom_providers[].models..context_length` -3. **Persistent cache** — previously discovered values (survives restarts) -4. **Endpoint `/models`** — queries your server's API (local/custom endpoints) -5. **Anthropic `/v1/models`** — queries Anthropic's API for `max_input_tokens` (API-key users only) -6. **OpenRouter API** — live model metadata from OpenRouter -7. **Nous Portal** — suffix-matches Nous model IDs against OpenRouter metadata -8. **[models.dev](https://models.dev)** — community-maintained registry with provider-specific context lengths for 3800+ models across 100+ providers -9. **Fallback defaults** — broad model family patterns (128K default) - -For most setups this works out of the box. The system is provider-aware — the same model can have different context limits depending on who serves it (e.g., `claude-opus-4.6` is 1M on Anthropic direct but 128K on GitHub Copilot). - -To set the context length explicitly, add `context_length` to your model config: - -```yaml -model: - default: "qwen3.5:9b" - base_url: "http://localhost:8080/v1" - context_length: 131072 # tokens -``` - -For custom endpoints, you can also set context length per model: - -```yaml -custom_providers: - - name: "My Local LLM" - base_url: "http://localhost:11434/v1" - models: - qwen3.5:27b: - context_length: 32768 - deepseek-r1:70b: - context_length: 65536 -``` - -`hermes model` will prompt for context length when configuring a custom endpoint. Leave it blank for auto-detection. - -:::tip When to set this manually -- You're using Ollama with a custom `num_ctx` that's lower than the model's maximum -- You want to limit context below the model's maximum (e.g., 8k on a 128k model to save VRAM) -- You're running behind a proxy that doesn't expose `/v1/models` -::: - ---- - -### Named Custom Providers - -If you work with multiple custom endpoints (e.g., a local dev server and a remote GPU server), you can define them as named custom providers in `config.yaml`: - -```yaml -custom_providers: - - name: local - base_url: http://localhost:8080/v1 - # api_key omitted — Hermes uses "no-key-required" for keyless local servers - - name: work - base_url: https://gpu-server.internal.corp/v1 - api_key: corp-api-key - api_mode: chat_completions # optional, auto-detected from URL - - name: anthropic-proxy - base_url: https://proxy.example.com/anthropic - api_key: proxy-key - api_mode: anthropic_messages # for Anthropic-compatible proxies -``` - -Switch between them mid-session with the triple syntax: - -``` -/model custom:local:qwen-2.5 # Use the "local" endpoint with qwen-2.5 -/model custom:work:llama3-70b # Use the "work" endpoint with llama3-70b -/model custom:anthropic-proxy:claude-sonnet-4 # Use the proxy -``` - -You can also select named custom providers from the interactive `hermes model` menu. - ---- - -### Choosing the Right Setup - -| Use Case | Recommended | -|----------|-------------| -| **Just want it to work** | OpenRouter (default) or Nous Portal | -| **Local models, easy setup** | Ollama | -| **Production GPU serving** | vLLM or SGLang | -| **Mac / no GPU** | Ollama or llama.cpp | -| **Multi-provider routing** | LiteLLM Proxy or OpenRouter | -| **Cost optimization** | ClawRouter or OpenRouter with `sort: "price"` | -| **Maximum privacy** | Ollama, vLLM, or llama.cpp (fully local) | -| **Enterprise / Azure** | Azure OpenAI with custom endpoint | -| **Chinese AI models** | z.ai (GLM), Kimi/Moonshot, or MiniMax (first-class providers) | - -:::tip -You can switch between providers at any time with `hermes model` — no restart required. Your conversation history, memory, and skills carry over regardless of which provider you use. -::: - -## Optional API Keys - -| Feature | Provider | Env Variable | -|---------|----------|--------------| -| Web scraping | [Firecrawl](https://firecrawl.dev/) | `FIRECRAWL_API_KEY`, `FIRECRAWL_API_URL` | -| Browser automation | [Browserbase](https://browserbase.com/) | `BROWSERBASE_API_KEY`, `BROWSERBASE_PROJECT_ID` | -| Image generation | [FAL](https://fal.ai/) | `FAL_KEY` | -| Premium TTS voices | [ElevenLabs](https://elevenlabs.io/) | `ELEVENLABS_API_KEY` | -| OpenAI TTS + voice transcription | [OpenAI](https://platform.openai.com/api-keys) | `VOICE_TOOLS_OPENAI_KEY` | -| RL Training | [Tinker](https://tinker-console.thinkingmachines.ai/) + [WandB](https://wandb.ai/) | `TINKER_API_KEY`, `WANDB_API_KEY` | -| Cross-session user modeling | [Honcho](https://honcho.dev/) | `HONCHO_API_KEY` | - -### Self-Hosting Firecrawl - -By default, Hermes uses the [Firecrawl cloud API](https://firecrawl.dev/) for web search and scraping. If you prefer to run Firecrawl locally, you can point Hermes at a self-hosted instance instead. See Firecrawl's [SELF_HOST.md](https://github.com/firecrawl/firecrawl/blob/main/SELF_HOST.md) for complete setup instructions. - -**What you get:** No API key required, no rate limits, no per-page costs, full data sovereignty. - -**What you lose:** The cloud version uses Firecrawl's proprietary "Fire-engine" for advanced anti-bot bypassing (Cloudflare, CAPTCHAs, IP rotation). Self-hosted uses basic fetch + Playwright, so some protected sites may fail. Search uses DuckDuckGo instead of Google. - -**Setup:** - -1. Clone and start the Firecrawl Docker stack (5 containers: API, Playwright, Redis, RabbitMQ, PostgreSQL — requires ~4-8 GB RAM): - ```bash - git clone https://github.com/firecrawl/firecrawl - cd firecrawl - # In .env, set: USE_DB_AUTHENTICATION=false, HOST=0.0.0.0, PORT=3002 - docker compose up -d - ``` - -2. Point Hermes at your instance (no API key needed): - ```bash - hermes config set FIRECRAWL_API_URL http://localhost:3002 - ``` - -You can also set both `FIRECRAWL_API_KEY` and `FIRECRAWL_API_URL` if your self-hosted instance has authentication enabled. - -## OpenRouter Provider Routing - -When using OpenRouter, you can control how requests are routed across providers. Add a `provider_routing` section to `~/.hermes/config.yaml`: - -```yaml -provider_routing: - sort: "throughput" # "price" (default), "throughput", or "latency" - # only: ["anthropic"] # Only use these providers - # ignore: ["deepinfra"] # Skip these providers - # order: ["anthropic", "google"] # Try providers in this order - # require_parameters: true # Only use providers that support all request params - # data_collection: "deny" # Exclude providers that may store/train on data -``` - -**Shortcuts:** Append `:nitro` to any model name for throughput sorting (e.g., `anthropic/claude-sonnet-4:nitro`), or `:floor` for price sorting. - -## Fallback Model - -Configure a backup provider:model that Hermes switches to automatically when your primary model fails (rate limits, server errors, auth failures): - -```yaml -fallback_model: - provider: openrouter # required - model: anthropic/claude-sonnet-4 # required - # base_url: http://localhost:8000/v1 # optional, for custom endpoints - # api_key_env: MY_CUSTOM_KEY # optional, env var name for custom endpoint API key -``` - -When activated, the fallback swaps the model and provider mid-session without losing your conversation. It fires **at most once** per session. - -Supported providers: `openrouter`, `nous`, `openai-codex`, `copilot`, `anthropic`, `huggingface`, `zai`, `kimi-coding`, `minimax`, `minimax-cn`, `custom`. - -:::tip -Fallback is configured exclusively through `config.yaml` — there are no environment variables for it. For full details on when it triggers, supported providers, and how it interacts with auxiliary tasks and delegation, see [Fallback Providers](/docs/user-guide/features/fallback-providers). -::: - -## Smart Model Routing - -Optional cheap-vs-strong routing lets Hermes keep your main model for complex work while sending very short/simple turns to a cheaper model. - -```yaml -smart_model_routing: - enabled: true - max_simple_chars: 160 - max_simple_words: 28 - cheap_model: - provider: openrouter - model: google/gemini-2.5-flash - # base_url: http://localhost:8000/v1 # optional custom endpoint - # api_key_env: MY_CUSTOM_KEY # optional env var name for that endpoint's API key -``` - -How it works: -- If a turn is short, single-line, and does not look code/tool/debug heavy, Hermes may route it to `cheap_model` -- If the turn looks complex, Hermes stays on your primary model/provider -- If the cheap route cannot be resolved cleanly, Hermes falls back to the primary model automatically - -This is intentionally conservative. It is meant for quick, low-stakes turns like: -- short factual questions -- quick rewrites -- lightweight summaries - -It will avoid routing prompts that look like: -- coding/debugging work -- tool-heavy requests -- long or multi-line analysis asks - -Use this when you want lower latency or cost without fully changing your default model. +For AI provider setup (OpenRouter, Anthropic, Copilot, custom endpoints, self-hosted LLMs, fallback models, etc.), see [AI Providers](/docs/integrations/providers). ## Terminal Backend Configuration @@ -1192,7 +567,7 @@ Each auxiliary task has a configurable `timeout` (in seconds). Defaults: vision ::: :::info -Context compression has its own top-level `compression:` block with `summary_provider`, `summary_model`, and `summary_base_url` — see [Context Compression](#context-compression) above. The fallback model uses a `fallback_model:` block — see [Fallback Model](#fallback-model) above. All three follow the same provider/model/base_url pattern. +Context compression has its own top-level `compression:` block with `summary_provider`, `summary_model`, and `summary_base_url` — see [Context Compression](#context-compression) above. The fallback model uses a `fallback_model:` block — see [Fallback Model](/docs/integrations/providers#fallback-model). All three follow the same provider/model/base_url pattern. ::: ### Changing the Vision Model @@ -1725,7 +1100,7 @@ Setting `approvals.mode: off` disables all safety checks for terminal commands. ## Checkpoints -Automatic filesystem snapshots before destructive file operations. See the [Checkpoints feature page](/docs/user-guide/features/checkpoints) for details. +Automatic filesystem snapshots before destructive file operations. See the [Checkpoints & Rollback](/docs/user-guide/checkpoints-and-rollback) for details. ```yaml checkpoints: diff --git a/website/docs/user-guide/features/checkpoints.md b/website/docs/user-guide/features/checkpoints.md deleted file mode 100644 index aed879fc2..000000000 --- a/website/docs/user-guide/features/checkpoints.md +++ /dev/null @@ -1,30 +0,0 @@ -# Filesystem Checkpoints - -Hermes automatically snapshots your working directory before making file changes, giving you a safety net to roll back if something goes wrong. Checkpoints are **enabled by default**. - -## Quick Reference - -| Command | Description | -|---------|-------------| -| `/rollback` | List all checkpoints with change stats | -| `/rollback ` | Restore to checkpoint N (also undoes last chat turn) | -| `/rollback diff ` | Preview diff between checkpoint N and current state | -| `/rollback ` | Restore a single file from checkpoint N | - -## What Triggers Checkpoints - -- **File tools** — `write_file` and `patch` -- **Destructive terminal commands** — `rm`, `mv`, `sed -i`, output redirects (`>`), `git reset`/`clean` - -## Configuration - -```yaml -# ~/.hermes/config.yaml -checkpoints: - enabled: true # default: true - max_snapshots: 50 # max checkpoints per directory -``` - -## Learn More - -For the full guide — how shadow repos work, diff previews, file-level restore, conversation undo, safety guards, and best practices — see **[Checkpoints and /rollback](../checkpoints-and-rollback.md)**. diff --git a/website/docs/user-guide/features/context-references.md b/website/docs/user-guide/features/context-references.md index 2b58f80ca..18624150e 100644 --- a/website/docs/user-guide/features/context-references.md +++ b/website/docs/user-guide/features/context-references.md @@ -1,5 +1,6 @@ --- sidebar_position: 9 +sidebar_label: "Context References" title: "Context References" description: "Inline @-syntax for attaching files, folders, git diffs, and URLs directly into your messages" --- diff --git a/website/docs/user-guide/features/overview.md b/website/docs/user-guide/features/overview.md new file mode 100644 index 000000000..984758f66 --- /dev/null +++ b/website/docs/user-guide/features/overview.md @@ -0,0 +1,40 @@ +--- +title: "Features Overview" +sidebar_label: "Overview" +sidebar_position: 1 +--- + +# Features Overview + +Hermes Agent includes a rich set of capabilities that extend far beyond basic chat. From persistent memory and file-aware context to browser automation and voice conversations, these features work together to make Hermes a powerful autonomous assistant. + +## Core + +- **[Tools & Toolsets](tools.md)** — Tools are functions that extend the agent's capabilities. They're organized into logical toolsets that can be enabled or disabled per platform, covering web search, terminal execution, file editing, memory, delegation, and more. +- **[Skills System](skills.md)** — On-demand knowledge documents the agent can load when needed. Skills follow a progressive disclosure pattern to minimize token usage and are compatible with the [agentskills.io](https://agentskills.io/specification) open standard. +- **[Persistent Memory](memory.md)** — Bounded, curated memory that persists across sessions. Hermes remembers your preferences, projects, environment, and things it has learned via `MEMORY.md` and `USER.md`. +- **[Context Files](context-files.md)** — Hermes automatically discovers and loads project context files (`.hermes.md`, `AGENTS.md`, `CLAUDE.md`, `SOUL.md`, `.cursorrules`) that shape how it behaves in your project. +- **[Context References](context-references.md)** — Type `@` followed by a reference to inject files, folders, git diffs, and URLs directly into your messages. Hermes expands the reference inline and appends the content automatically. +- **[Checkpoints](../checkpoints-and-rollback.md)** — Hermes automatically snapshots your working directory before making file changes, giving you a safety net to roll back with `/rollback` if something goes wrong. + +## Automation + +- **[Scheduled Tasks (Cron)](cron.md)** — Schedule tasks to run automatically with natural language or cron expressions. Jobs can attach skills, deliver results to any platform, and support pause/resume/edit operations. +- **[Subagent Delegation](delegation.md)** — The `delegate_task` tool spawns child agent instances with isolated context, restricted toolsets, and their own terminal sessions. Run up to 3 concurrent subagents for parallel workstreams. +- **[Code Execution](code-execution.md)** — The `execute_code` tool lets the agent write Python scripts that call Hermes tools programmatically, collapsing multi-step workflows into a single LLM turn via sandboxed RPC execution. +- **[Event Hooks](hooks.md)** — Run custom code at key lifecycle points. Gateway hooks handle logging, alerts, and webhooks; plugin hooks handle tool interception, metrics, and guardrails. +- **[Batch Processing](batch-processing.md)** — Run the Hermes agent across hundreds or thousands of prompts in parallel, generating structured ShareGPT-format trajectory data for training data generation or evaluation. + +## Media & Web + +- **[Voice Mode](voice-mode.md)** — Full voice interaction across CLI and messaging platforms. Talk to the agent using your microphone, hear spoken replies, and have live voice conversations in Discord voice channels. +- **[Browser Automation](browser.md)** — Full browser automation with multiple backends: Browserbase cloud, Browser Use cloud, local Chrome via CDP, or local Chromium. Navigate websites, fill forms, and extract information. +- **[Vision & Image Paste](vision.md)** — Multimodal vision support. Paste images from your clipboard into the CLI and ask the agent to analyze, describe, or work with them using any vision-capable model. +- **[Image Generation](image-generation.md)** — Generate images from text prompts using FAL.ai's FLUX 2 Pro model with automatic 2x upscaling via the Clarity Upscaler. +- **[Voice & TTS](tts.md)** — Text-to-speech output and voice message transcription across all messaging platforms, with four provider options: Edge TTS (free), ElevenLabs, OpenAI TTS, and NeuTTS. + +## Customization + +- **[Personality & SOUL.md](personality.md)** — Fully customizable agent personality. `SOUL.md` is the primary identity file — the first thing in the system prompt — and you can swap in built-in or custom `/personality` presets per session. +- **[Skins & Themes](skins.md)** — Customize the CLI's visual presentation: banner colors, spinner faces and verbs, response-box labels, branding text, and the tool activity prefix. +- **[Plugins](plugins.md)** — Add custom tools, hooks, and integrations without modifying core code. Drop a directory into `~/.hermes/plugins/` with a `plugin.yaml` and Python code. diff --git a/website/docs/user-guide/features/plugins.md b/website/docs/user-guide/features/plugins.md index 28fc8041e..e13f7aef4 100644 --- a/website/docs/user-guide/features/plugins.md +++ b/website/docs/user-guide/features/plugins.md @@ -1,5 +1,8 @@ --- -sidebar_position: 20 +sidebar_position: 11 +sidebar_label: "Plugins" +title: "Plugins" +description: "Extend Hermes with custom tools, hooks, and integrations via the plugin system" --- # Plugins diff --git a/website/docs/user-guide/git-worktrees.md b/website/docs/user-guide/git-worktrees.md index 708170622..33d29506e 100644 --- a/website/docs/user-guide/git-worktrees.md +++ b/website/docs/user-guide/git-worktrees.md @@ -1,5 +1,6 @@ --- -sidebar_position: 9 +sidebar_position: 3 +sidebar_label: "Git Worktrees" title: "Git Worktrees" description: "Run multiple Hermes agents safely on the same repository using git worktrees and isolated checkouts" --- diff --git a/website/docs/user-guide/messaging/index.md b/website/docs/user-guide/messaging/index.md index 9073e45ff..fa662305b 100644 --- a/website/docs/user-guide/messaging/index.md +++ b/website/docs/user-guide/messaging/index.md @@ -10,6 +10,26 @@ Chat with Hermes from Telegram, Discord, Slack, WhatsApp, Signal, SMS, Email, Ho For the full voice feature set — including CLI microphone mode, spoken replies in messaging, and Discord voice-channel conversations — see [Voice Mode](/docs/user-guide/features/voice-mode) and [Use Voice Mode with Hermes](/docs/guides/use-voice-mode-with-hermes). +## Platform Comparison + +| Platform | Voice | Images | Files | Threads | Reactions | Typing | Streaming | +|----------|:-----:|:------:|:-----:|:-------:|:---------:|:------:|:---------:| +| Telegram | ✅ | ✅ | ✅ | ✅ | — | ✅ | ✅ | +| Discord | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| Slack | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| WhatsApp | — | ✅ | ✅ | — | — | ✅ | ✅ | +| Signal | — | ✅ | ✅ | — | — | ✅ | ✅ | +| SMS | — | — | — | — | — | — | — | +| Email | — | ✅ | ✅ | ✅ | — | — | — | +| Home Assistant | — | — | — | — | — | — | — | +| Mattermost | ✅ | ✅ | ✅ | ✅ | — | ✅ | ✅ | +| Matrix | ✅ | ✅ | ✅ | ✅ | — | ✅ | ✅ | +| DingTalk | — | — | — | — | — | ✅ | ✅ | +| Feishu/Lark | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| WeCom | ✅ | ✅ | ✅ | — | — | ✅ | ✅ | + +**Voice** = TTS audio replies and/or voice message transcription. **Images** = send/receive images. **Files** = send/receive file attachments. **Threads** = threaded conversations. **Reactions** = emoji reactions on messages. **Typing** = typing indicator while processing. **Streaming** = progressive message updates via editing. + ## Architecture ```mermaid diff --git a/website/docs/user-guide/messaging/sms.md b/website/docs/user-guide/messaging/sms.md index 0aa835ffe..84a3b8fa2 100644 --- a/website/docs/user-guide/messaging/sms.md +++ b/website/docs/user-guide/messaging/sms.md @@ -1,5 +1,6 @@ --- sidebar_position: 8 +sidebar_label: "SMS (Twilio)" title: "SMS (Twilio)" description: "Set up Hermes Agent as an SMS chatbot via Twilio" --- diff --git a/website/docs/user-guide/skills/godmode.md b/website/docs/user-guide/skills/godmode.md index 419478ba1..c95dc54c8 100644 --- a/website/docs/user-guide/skills/godmode.md +++ b/website/docs/user-guide/skills/godmode.md @@ -1,4 +1,6 @@ --- +sidebar_position: 1 +sidebar_label: "G0DM0D3 (Godmode)" title: "G0DM0D3 — Godmode Jailbreaking" description: "Automated LLM jailbreaking using G0DM0D3 techniques — system prompt templates, input obfuscation, and multi-model racing" --- diff --git a/website/sidebars.ts b/website/sidebars.ts index 4c7bfc2e2..fa76f4ce3 100644 --- a/website/sidebars.ts +++ b/website/sidebars.ts @@ -16,61 +16,37 @@ const sidebars: SidebarsConfig = { }, { type: 'category', - label: 'Guides & Tutorials', - collapsed: true, - items: [ - 'guides/tips', - 'guides/daily-briefing-bot', - 'guides/team-telegram-assistant', - 'guides/python-library', - 'guides/use-mcp-with-hermes', - 'guides/use-soul-with-hermes', - 'guides/use-voice-mode-with-hermes', - 'guides/migrate-from-openclaw', - ], - }, - { - type: 'category', - label: 'User Guide', + label: 'Using Hermes', collapsed: true, items: [ 'user-guide/cli', 'user-guide/configuration', 'user-guide/sessions', - 'user-guide/security', - 'user-guide/docker', 'user-guide/profiles', + 'user-guide/git-worktrees', + 'user-guide/docker', + 'user-guide/security', + 'user-guide/checkpoints-and-rollback', + ], + }, + { + type: 'category', + label: 'Features', + collapsed: true, + items: [ + 'user-guide/features/overview', { type: 'category', - label: 'Messaging Gateway', - items: [ - 'user-guide/messaging/index', - 'user-guide/messaging/telegram', - 'user-guide/messaging/discord', - 'user-guide/messaging/slack', - 'user-guide/messaging/whatsapp', - 'user-guide/messaging/signal', - 'user-guide/messaging/email', - 'user-guide/messaging/homeassistant', - 'user-guide/messaging/mattermost', - 'user-guide/messaging/matrix', - 'user-guide/messaging/dingtalk', - 'user-guide/messaging/feishu', - 'user-guide/messaging/wecom', - 'user-guide/messaging/open-webui', - 'user-guide/messaging/webhooks', - ], - }, - { - type: 'category', - label: 'Core Features', + label: 'Core', items: [ 'user-guide/features/tools', 'user-guide/features/skills', 'user-guide/features/memory', 'user-guide/features/context-files', + 'user-guide/features/context-references', 'user-guide/features/personality', 'user-guide/features/skins', + 'user-guide/features/plugins', ], }, { @@ -81,11 +57,12 @@ const sidebars: SidebarsConfig = { 'user-guide/features/delegation', 'user-guide/features/code-execution', 'user-guide/features/hooks', + 'user-guide/features/batch-processing', ], }, { type: 'category', - label: 'Web & Media', + label: 'Media & Web', items: [ 'user-guide/features/voice-mode', 'user-guide/features/browser', @@ -94,23 +71,10 @@ const sidebars: SidebarsConfig = { 'user-guide/features/tts', ], }, - { - type: 'category', - label: 'Integrations', - items: [ - 'user-guide/features/api-server', - 'user-guide/features/acp', - 'user-guide/features/mcp', - 'user-guide/features/honcho', - 'user-guide/features/provider-routing', - 'user-guide/features/fallback-providers', - ], - }, { type: 'category', label: 'Advanced', items: [ - 'user-guide/features/batch-processing', 'user-guide/features/rl-training', ], }, @@ -125,25 +89,98 @@ const sidebars: SidebarsConfig = { }, { type: 'category', - label: 'Developer Guide', + label: 'Messaging Platforms', + collapsed: true, + items: [ + 'user-guide/messaging/index', + 'user-guide/messaging/telegram', + 'user-guide/messaging/discord', + 'user-guide/messaging/slack', + 'user-guide/messaging/whatsapp', + 'user-guide/messaging/signal', + 'user-guide/messaging/email', + 'user-guide/messaging/sms', + 'user-guide/messaging/homeassistant', + 'user-guide/messaging/mattermost', + 'user-guide/messaging/matrix', + 'user-guide/messaging/dingtalk', + 'user-guide/messaging/feishu', + 'user-guide/messaging/wecom', + 'user-guide/messaging/open-webui', + 'user-guide/messaging/webhooks', + ], + }, + { + type: 'category', + label: 'Integrations', + collapsed: true, + items: [ + 'integrations/index', + 'integrations/providers', + 'user-guide/features/mcp', + 'user-guide/features/acp', + 'user-guide/features/api-server', + 'user-guide/features/honcho', + 'user-guide/features/provider-routing', + 'user-guide/features/fallback-providers', + ], + }, + { + type: 'category', + label: 'Guides & Tutorials', + collapsed: true, + items: [ + 'guides/tips', + 'guides/build-a-hermes-plugin', + 'guides/daily-briefing-bot', + 'guides/team-telegram-assistant', + 'guides/python-library', + 'guides/use-mcp-with-hermes', + 'guides/use-soul-with-hermes', + 'guides/use-voice-mode-with-hermes', + 'guides/migrate-from-openclaw', + ], + }, + { + type: 'category', + label: 'Developer Guide', + collapsed: true, items: [ - 'developer-guide/architecture', - 'developer-guide/agent-loop', - 'developer-guide/provider-runtime', - 'developer-guide/adding-providers', - 'developer-guide/prompt-assembly', - 'developer-guide/context-compression-and-caching', - 'developer-guide/gateway-internals', - 'developer-guide/session-storage', - 'developer-guide/tools-runtime', - 'developer-guide/acp-internals', - 'developer-guide/trajectory-format', - 'developer-guide/cron-internals', - 'developer-guide/environments', - 'developer-guide/adding-tools', - 'developer-guide/creating-skills', - 'developer-guide/extending-the-cli', 'developer-guide/contributing', + { + type: 'category', + label: 'Architecture', + items: [ + 'developer-guide/architecture', + 'developer-guide/agent-loop', + 'developer-guide/prompt-assembly', + 'developer-guide/context-compression-and-caching', + 'developer-guide/gateway-internals', + 'developer-guide/session-storage', + 'developer-guide/provider-runtime', + ], + }, + { + type: 'category', + label: 'Extending', + items: [ + 'developer-guide/adding-tools', + 'developer-guide/adding-providers', + 'developer-guide/creating-skills', + 'developer-guide/extending-the-cli', + ], + }, + { + type: 'category', + label: 'Internals', + items: [ + 'developer-guide/tools-runtime', + 'developer-guide/acp-internals', + 'developer-guide/cron-internals', + 'developer-guide/environments', + 'developer-guide/trajectory-format', + ], + }, ], }, { @@ -152,13 +189,13 @@ const sidebars: SidebarsConfig = { items: [ 'reference/cli-commands', 'reference/slash-commands', + 'reference/profile-commands', + 'reference/environment-variables', 'reference/tools-reference', 'reference/toolsets-reference', 'reference/mcp-config-reference', 'reference/skills-catalog', 'reference/optional-skills-catalog', - 'reference/profile-commands', - 'reference/environment-variables', 'reference/faq', ], }, -- 2.43.0 From f776191650c9867c8d8cd370d19b5c4d0a100185 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 30 Mar 2026 18:49:14 -0700 Subject: [PATCH 047/385] fix: persist compressed context to gateway session after mid-run compression MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When context compression fires during run_conversation() in the gateway, the compressed messages were silently lost on the next turn. Two bugs: 1. Agent-side: _flush_messages_to_session_db() calculated flush_from = max(len(conversation_history), _last_flushed_db_idx). After compression, _last_flushed_db_idx was correctly reset to 0, but conversation_history still had its original pre-compression length (e.g. 200). Since compressed messages are shorter (~30), messages[200:] was empty — nothing written to the new session's SQLite. Fix: Set conversation_history = None after each _compress_context() call so start_idx = 0 and all compressed messages are flushed. 2. Gateway-side: history_offset was always len(agent_history) — the original pre-compression length. After compression shortened the message list, agent_messages[200:] was empty, causing the gateway to fall back to writing only a user/assistant pair, losing the compressed summary and tail context. Fix: Detect session splits (agent.session_id != original) and set history_offset = 0 so all compressed messages are written to JSONL. --- gateway/run.py | 11 +- run_agent.py | 10 ++ tests/test_compression_persistence.py | 202 ++++++++++++++++++++++++++ 3 files changed, 222 insertions(+), 1 deletion(-) create mode 100644 tests/test_compression_persistence.py diff --git a/gateway/run.py b/gateway/run.py index 0b5e3a1b4..c094fddd6 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -5739,7 +5739,9 @@ class GatewayRunner: # If so, update the session store entry so the NEXT message loads # the compressed transcript, not the stale pre-compression one. agent = agent_holder[0] + _session_was_split = False if agent and session_key and hasattr(agent, 'session_id') and agent.session_id != session_id: + _session_was_split = True logger.info( "Session split detected: %s → %s (compression)", session_id, agent.session_id, @@ -5751,6 +5753,13 @@ class GatewayRunner: effective_session_id = getattr(agent, 'session_id', session_id) if agent else session_id + # When compression created a new session, the messages list was + # shortened. Using the original history offset would produce an + # empty new_messages slice, causing the gateway to write only a + # user/assistant pair — losing the compressed summary and tail. + # Reset to 0 so the gateway writes ALL compressed messages. + _effective_history_offset = 0 if _session_was_split else len(agent_history) + # Auto-generate session title after first exchange (non-blocking) if final_response and self._session_db: try: @@ -5772,7 +5781,7 @@ class GatewayRunner: "messages": result_holder[0].get("messages", []) if result_holder[0] else [], "api_calls": result_holder[0].get("api_calls", 0) if result_holder[0] else 0, "tools": tools_holder[0] or [], - "history_offset": len(agent_history), + "history_offset": _effective_history_offset, "last_prompt_tokens": _last_prompt_toks, "input_tokens": _input_toks, "output_tokens": _output_toks, diff --git a/run_agent.py b/run_agent.py index 794c9f67a..fad7fca5a 100644 --- a/run_agent.py +++ b/run_agent.py @@ -6250,6 +6250,12 @@ class AIAgent: ) if len(messages) >= _orig_len: break # Cannot compress further + # Compression created a new session — clear the history + # reference so _flush_messages_to_session_db writes ALL + # compressed messages to the new session's SQLite, not + # skipping them because conversation_history is still the + # pre-compression length. + conversation_history = None # Re-estimate after compression _preflight_tokens = estimate_request_tokens_rough( messages, @@ -7765,6 +7771,10 @@ class AIAgent: approx_tokens=self.context_compressor.last_prompt_tokens, task_id=effective_task_id, ) + # Compression created a new session — clear history so + # _flush_messages_to_session_db writes compressed messages + # to the new session (see preflight compression comment). + conversation_history = None # Save session log incrementally (so progress is visible even if interrupted) self._session_messages = messages diff --git a/tests/test_compression_persistence.py b/tests/test_compression_persistence.py new file mode 100644 index 000000000..272b39bfe --- /dev/null +++ b/tests/test_compression_persistence.py @@ -0,0 +1,202 @@ +"""Tests for context compression persistence in the gateway. + +Verifies that when context compression fires during run_conversation(), +the compressed messages are properly persisted to both SQLite (via the +agent) and JSONL (via the gateway). + +Bug scenario (pre-fix): + 1. Gateway loads 200-message history, passes to agent + 2. Agent's run_conversation() compresses to ~30 messages mid-run + 3. _compress_context() resets _last_flushed_db_idx = 0 + 4. On exit, _flush_messages_to_session_db() calculates: + flush_from = max(len(conversation_history=200), _last_flushed_db_idx=0) = 200 + 5. messages[200:] is empty (only ~30 messages after compression) + 6. Nothing written to new session's SQLite — compressed context lost + 7. Gateway's history_offset was still 200, producing empty new_messages + 8. Fallback wrote only user/assistant pair — summary lost +""" + +import os +import tempfile +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest + + +# --------------------------------------------------------------------------- +# Part 1: Agent-side — _flush_messages_to_session_db after compression +# --------------------------------------------------------------------------- + +class TestFlushAfterCompression: + """Verify that compressed messages are flushed to the new session's SQLite + even when conversation_history (from the original session) is longer than + the compressed messages list.""" + + def _make_agent(self, session_db): + with patch.dict(os.environ, {"OPENROUTER_API_KEY": "test-key"}): + from run_agent import AIAgent + agent = AIAgent( + model="test/model", + quiet_mode=True, + session_db=session_db, + session_id="original-session", + skip_context_files=True, + skip_memory=True, + ) + return agent + + def test_flush_after_compression_with_long_history(self): + """The actual bug: conversation_history longer than compressed messages. + + Before the fix, flush_from = max(len(conversation_history), 0) = 200, + but messages only has ~30 entries, so messages[200:] is empty. + After the fix, conversation_history is cleared to None after compression, + so flush_from = max(0, 0) = 0, and ALL compressed messages are written. + """ + from hermes_state import SessionDB + + with tempfile.TemporaryDirectory() as tmpdir: + db_path = Path(tmpdir) / "test.db" + db = SessionDB(db_path=db_path) + + agent = self._make_agent(db) + + # Simulate the original long history (200 messages) + original_history = [ + {"role": "user" if i % 2 == 0 else "assistant", + "content": f"message {i}"} + for i in range(200) + ] + + # First, flush original messages to the original session + agent._flush_messages_to_session_db(original_history, []) + original_rows = db.get_messages("original-session") + assert len(original_rows) == 200 + + # Now simulate compression: new session, reset idx, shorter messages + agent.session_id = "compressed-session" + db.create_session(session_id="compressed-session", source="test") + agent._last_flushed_db_idx = 0 + + # The compressed messages (summary + tail + new turn) + compressed_messages = [ + {"role": "user", "content": "[CONTEXT COMPACTION] Summary of work..."}, + {"role": "user", "content": "What should we do next?"}, + {"role": "assistant", "content": "Let me check..."}, + {"role": "user", "content": "new question"}, + {"role": "assistant", "content": "new answer"}, + ] + + # THE BUG: passing the original history as conversation_history + # causes flush_from = max(200, 0) = 200, skipping everything. + # After the fix, conversation_history should be None. + agent._flush_messages_to_session_db(compressed_messages, None) + + new_rows = db.get_messages("compressed-session") + assert len(new_rows) == 5, ( + f"Expected 5 compressed messages in new session, got {len(new_rows)}. " + f"Compression persistence bug: messages not written to SQLite." + ) + + def test_flush_with_stale_history_loses_messages(self): + """Demonstrates the bug condition: stale conversation_history causes data loss.""" + from hermes_state import SessionDB + + with tempfile.TemporaryDirectory() as tmpdir: + db_path = Path(tmpdir) / "test.db" + db = SessionDB(db_path=db_path) + + agent = self._make_agent(db) + + # Simulate compression reset + agent.session_id = "new-session" + db.create_session(session_id="new-session", source="test") + agent._last_flushed_db_idx = 0 + + compressed = [ + {"role": "user", "content": "summary"}, + {"role": "assistant", "content": "continuing..."}, + ] + + # Bug: passing a conversation_history longer than compressed messages + stale_history = [{"role": "user", "content": f"msg{i}"} for i in range(100)] + agent._flush_messages_to_session_db(compressed, stale_history) + + rows = db.get_messages("new-session") + # With the stale history, flush_from = max(100, 0) = 100 + # But compressed only has 2 entries → messages[100:] = empty + assert len(rows) == 0, ( + "Expected 0 messages with stale conversation_history " + "(this test verifies the bug condition exists)" + ) + + +# --------------------------------------------------------------------------- +# Part 2: Gateway-side — history_offset after session split +# --------------------------------------------------------------------------- + +class TestGatewayHistoryOffsetAfterSplit: + """Verify that when the agent creates a new session during compression, + the gateway uses history_offset=0 so all compressed messages are written + to the JSONL transcript.""" + + def test_history_offset_zero_on_session_split(self): + """When agent.session_id differs from the original, history_offset must be 0.""" + # This tests the logic in gateway/run.py run_sync(): + # _session_was_split = agent.session_id != session_id + # _effective_history_offset = 0 if _session_was_split else len(agent_history) + + original_session_id = "session-abc" + agent_session_id = "session-compressed-xyz" # Different = compression happened + agent_history_len = 200 + + # Simulate the gateway's offset calculation (post-fix) + _session_was_split = (agent_session_id != original_session_id) + _effective_history_offset = 0 if _session_was_split else agent_history_len + + assert _session_was_split is True + assert _effective_history_offset == 0 + + def test_history_offset_preserved_without_split(self): + """When no compression happened, history_offset is the original length.""" + session_id = "session-abc" + agent_session_id = "session-abc" # Same = no compression + agent_history_len = 200 + + _session_was_split = (agent_session_id != session_id) + _effective_history_offset = 0 if _session_was_split else agent_history_len + + assert _session_was_split is False + assert _effective_history_offset == 200 + + def test_new_messages_extraction_after_split(self): + """After compression with offset=0, new_messages should be ALL agent messages.""" + # Simulates the gateway's new_messages calculation + agent_messages = [ + {"role": "user", "content": "[CONTEXT COMPACTION] Summary..."}, + {"role": "user", "content": "recent question"}, + {"role": "assistant", "content": "recent answer"}, + {"role": "user", "content": "new question"}, + {"role": "assistant", "content": "new answer"}, + ] + history_offset = 0 # After fix: 0 on session split + + new_messages = agent_messages[history_offset:] if len(agent_messages) > history_offset else [] + assert len(new_messages) == 5, ( + f"Expected all 5 messages with offset=0, got {len(new_messages)}" + ) + + def test_new_messages_empty_with_stale_offset(self): + """Demonstrates the bug: stale offset produces empty new_messages.""" + agent_messages = [ + {"role": "user", "content": "summary"}, + {"role": "assistant", "content": "answer"}, + ] + # Bug: offset is the pre-compression history length + history_offset = 200 + + new_messages = agent_messages[history_offset:] if len(agent_messages) > history_offset else [] + assert len(new_messages) == 0, ( + "Expected 0 messages with stale offset=200 (demonstrates the bug)" + ) -- 2.43.0 From 1e59d4813c620f1f53f4380bceba8cdb0c29e1e1 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 30 Mar 2026 18:50:27 -0700 Subject: [PATCH 048/385] feat(api_server): stream tool progress to Open WebUI (#4092) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wire the existing tool_progress_callback through the API server's streaming handler so Open WebUI users see what tool is running. Uses the existing 3-arg callback signature (name, preview, args) that fires at tool start — no changes to run_agent.py needed. Progress appears as inline markdown in the SSE content stream. Inspired by PR #4032 by sroecker, reimplemented to avoid breaking the callback signature used by CLI and gateway consumers. --- gateway/platforms/api_server.py | 14 ++++++ tests/gateway/test_api_server.py | 75 ++++++++++++++++++++++++++++++++ 2 files changed, 89 insertions(+) diff --git a/gateway/platforms/api_server.py b/gateway/platforms/api_server.py index 19fa5f60d..a27408f4c 100644 --- a/gateway/platforms/api_server.py +++ b/gateway/platforms/api_server.py @@ -380,6 +380,7 @@ class APIServerAdapter(BasePlatformAdapter): ephemeral_system_prompt: Optional[str] = None, session_id: Optional[str] = None, stream_delta_callback=None, + tool_progress_callback=None, ) -> Any: """ Create an AIAgent instance using the gateway's runtime config. @@ -412,6 +413,7 @@ class APIServerAdapter(BasePlatformAdapter): session_id=session_id, platform="api_server", stream_delta_callback=stream_delta_callback, + tool_progress_callback=tool_progress_callback, ) return agent @@ -514,6 +516,15 @@ class APIServerAdapter(BasePlatformAdapter): if delta is not None: _stream_q.put(delta) + def _on_tool_progress(name, preview, args): + """Inject tool progress into the SSE stream for Open WebUI.""" + if name.startswith("_"): + return # Skip internal events (_thinking) + from agent.display import get_tool_emoji + emoji = get_tool_emoji(name) + label = preview or name + _stream_q.put(f"\n`{emoji} {label}`\n") + # Start agent in background. agent_ref is a mutable container # so the SSE writer can interrupt the agent on client disconnect. agent_ref = [None] @@ -523,6 +534,7 @@ class APIServerAdapter(BasePlatformAdapter): ephemeral_system_prompt=system_prompt, session_id=session_id, stream_delta_callback=_on_delta, + tool_progress_callback=_on_tool_progress, agent_ref=agent_ref, )) @@ -1194,6 +1206,7 @@ class APIServerAdapter(BasePlatformAdapter): ephemeral_system_prompt: Optional[str] = None, session_id: Optional[str] = None, stream_delta_callback=None, + tool_progress_callback=None, agent_ref: Optional[list] = None, ) -> tuple: """ @@ -1214,6 +1227,7 @@ class APIServerAdapter(BasePlatformAdapter): ephemeral_system_prompt=ephemeral_system_prompt, session_id=session_id, stream_delta_callback=stream_delta_callback, + tool_progress_callback=tool_progress_callback, ) if agent_ref is not None: agent_ref[0] = agent diff --git a/tests/gateway/test_api_server.py b/tests/gateway/test_api_server.py index 772dd8b1c..b48ac1af7 100644 --- a/tests/gateway/test_api_server.py +++ b/tests/gateway/test_api_server.py @@ -427,6 +427,81 @@ class TestChatCompletionsEndpoint: assert "Thinking" in body assert " about it..." in body + @pytest.mark.asyncio + async def test_stream_includes_tool_progress(self, adapter): + """tool_progress_callback fires → progress appears in the SSE stream.""" + import asyncio + + app = _create_app(adapter) + async with TestClient(TestServer(app)) as cli: + async def _mock_run_agent(**kwargs): + cb = kwargs.get("stream_delta_callback") + tp_cb = kwargs.get("tool_progress_callback") + # Simulate tool progress before streaming content + if tp_cb: + tp_cb("terminal", "ls -la", {"command": "ls -la"}) + if cb: + await asyncio.sleep(0.05) + cb("Here are the files.") + return ( + {"final_response": "Here are the files.", "messages": [], "api_calls": 1}, + {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15}, + ) + + with patch.object(adapter, "_run_agent", side_effect=_mock_run_agent): + resp = await cli.post( + "/v1/chat/completions", + json={ + "model": "test", + "messages": [{"role": "user", "content": "list files"}], + "stream": True, + }, + ) + assert resp.status == 200 + body = await resp.text() + assert "[DONE]" in body + # Tool progress message must appear in the stream + assert "ls -la" in body + # Final content must also be present + assert "Here are the files." in body + + @pytest.mark.asyncio + async def test_stream_tool_progress_skips_internal_events(self, adapter): + """Internal events (name starting with _) are not streamed.""" + import asyncio + + app = _create_app(adapter) + async with TestClient(TestServer(app)) as cli: + async def _mock_run_agent(**kwargs): + cb = kwargs.get("stream_delta_callback") + tp_cb = kwargs.get("tool_progress_callback") + if tp_cb: + tp_cb("_thinking", "some internal state", {}) + tp_cb("web_search", "Python docs", {"query": "Python docs"}) + if cb: + await asyncio.sleep(0.05) + cb("Found it.") + return ( + {"final_response": "Found it.", "messages": [], "api_calls": 1}, + {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15}, + ) + + with patch.object(adapter, "_run_agent", side_effect=_mock_run_agent): + resp = await cli.post( + "/v1/chat/completions", + json={ + "model": "test", + "messages": [{"role": "user", "content": "search"}], + "stream": True, + }, + ) + assert resp.status == 200 + body = await resp.text() + # Internal _thinking event should NOT appear + assert "some internal state" not in body + # Real tool progress should appear + assert "Python docs" in body + @pytest.mark.asyncio async def test_no_user_message_returns_400(self, adapter): app = _create_app(adapter) -- 2.43.0 From cdb64a869aa99f4713edbe02bbfbc6de1d1f2d9b Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 30 Mar 2026 18:53:24 -0700 Subject: [PATCH 049/385] fix(security): reject private and loopback IPs in Telegram DoH fallback (#4129) Co-authored-by: Maymun <139681654+maymuneth@users.noreply.github.com> --- gateway/platforms/telegram_network.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/gateway/platforms/telegram_network.py b/gateway/platforms/telegram_network.py index 93f1f0fb5..9f6d8bb46 100644 --- a/gateway/platforms/telegram_network.py +++ b/gateway/platforms/telegram_network.py @@ -135,6 +135,9 @@ def _normalize_fallback_ips(values: Iterable[str]) -> list[str]: if addr.version != 4: logger.warning("Ignoring non-IPv4 Telegram fallback IP: %s", raw) continue + if addr.is_private or addr.is_loopback or addr.is_link_local or addr.is_unspecified: + logger.warning("Ignoring private/internal Telegram fallback IP: %s", raw) + continue normalized.append(str(addr)) return normalized -- 2.43.0 From 04367e2fac18dcb5f0beb3ce1320c397ea02d321 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 30 Mar 2026 19:05:34 -0700 Subject: [PATCH 050/385] fix(cron): stop truncating job IDs in list view (#4132) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove [:8] truncation from hermes cron list output. Job IDs are 12 hex chars — truncating to 8 makes them unusable for cron run/pause/remove which require the full ID. Co-authored-by: vitobotta --- hermes_cli/cron.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hermes_cli/cron.py b/hermes_cli/cron.py index 97a225794..f6da8a2d2 100644 --- a/hermes_cli/cron.py +++ b/hermes_cli/cron.py @@ -56,7 +56,7 @@ def cron_list(show_all: bool = False): print() for job in jobs: - job_id = job.get("id", "?")[:8] + job_id = job.get("id", "?") name = job.get("name", "(unnamed)") schedule = job.get("schedule_display", job.get("schedule", {}).get("value", "?")) state = job.get("state", "scheduled" if job.get("enabled", True) else "paused") -- 2.43.0 From 45396aaa9272104313f33df2d0c99c6fc81edb44 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 30 Mar 2026 19:06:30 -0700 Subject: [PATCH 051/385] fix(alibaba): use standard DashScope international endpoint (#4133) * fix(alibaba): use standard DashScope international endpoint The Alibaba Cloud provider was hardcoded to the coding-intl endpoint (https://coding-intl.dashscope.aliyuncs.com/v1) which only accepts Alibaba Coding Plan API keys. Standard DashScope API keys fail with invalid_api_key error against this endpoint. Changed to the international compatible-mode endpoint (https://dashscope-intl.aliyuncs.com/compatible-mode/v1) which works with standard DashScope keys. Users with Coding Plan keys or China-region keys can still override via DASHSCOPE_BASE_URL or config.yaml base_url. Fixes #3912 * fix: update test to match new DashScope default endpoint --------- Co-authored-by: kagura-agent --- hermes_cli/auth.py | 2 +- tests/test_runtime_provider_resolution.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py index 940a15564..add83eff8 100644 --- a/hermes_cli/auth.py +++ b/hermes_cli/auth.py @@ -160,7 +160,7 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = { id="alibaba", name="Alibaba Cloud (DashScope)", auth_type="api_key", - inference_base_url="https://coding-intl.dashscope.aliyuncs.com/v1", + inference_base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1", api_key_env_vars=("DASHSCOPE_API_KEY",), base_url_env_var="DASHSCOPE_BASE_URL", ), diff --git a/tests/test_runtime_provider_resolution.py b/tests/test_runtime_provider_resolution.py index 84b018333..6976d071a 100644 --- a/tests/test_runtime_provider_resolution.py +++ b/tests/test_runtime_provider_resolution.py @@ -545,7 +545,7 @@ def test_alibaba_default_coding_intl_endpoint_uses_chat_completions(monkeypatch) assert resolved["provider"] == "alibaba" assert resolved["api_mode"] == "chat_completions" - assert resolved["base_url"] == "https://coding-intl.dashscope.aliyuncs.com/v1" + assert resolved["base_url"] == "https://dashscope-intl.aliyuncs.com/compatible-mode/v1" def test_alibaba_anthropic_endpoint_override_uses_anthropic_messages(monkeypatch): -- 2.43.0 From cc63b2d1cd817b1c67e08d2afdaedcecd04a6859 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 30 Mar 2026 19:17:07 -0700 Subject: [PATCH 052/385] fix(gateway): remove user-facing compression warnings (#4139) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Auto-compression still runs silently in the background with server-side logging, but no longer sends messages to the user's chat about it. Removed: - 'Session is large... Auto-compressing' pre-compression notification - 'Compressed: N → M messages' post-compression notification - 'Session is still very large after compression' warning - 'Auto-compression failed' warning - Rate-limit tracking (only existed for these warnings) --- gateway/run.py | 73 +-------------------------- tests/gateway/test_session_hygiene.py | 50 ++---------------- 2 files changed, 5 insertions(+), 118 deletions(-) diff --git a/gateway/run.py b/gateway/run.py index c094fddd6..3428c59f7 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -476,12 +476,7 @@ class GatewayRunner: self._honcho_managers: Dict[str, Any] = {} self._honcho_configs: Dict[str, Any] = {} - # Rate-limit compression warning messages sent to users. - # Keyed by chat_id — value is the timestamp of the last warning sent. - # Prevents the warning from firing on every message when a session - # remains above the threshold after compression. - self._compression_warn_sent: Dict[str, float] = {} - self._compression_warn_cooldown: int = 3600 # seconds (1 hour) + # Ensure tirith security scanner is available (downloads if needed) try: @@ -2354,18 +2349,7 @@ class GatewayRunner: f"{_compress_token_threshold:,}", ) - _hyg_adapter = self.adapters.get(source.platform) _hyg_meta = {"thread_id": source.thread_id} if source.thread_id else None - if _hyg_adapter: - try: - await _hyg_adapter.send( - source.chat_id, - f"🗜️ Session is large ({_msg_count} messages, " - f"~{_approx_tokens:,} tokens). Auto-compressing...", - metadata=_hyg_meta, - ) - except Exception: - pass try: from run_agent import AIAgent @@ -2426,70 +2410,17 @@ class GatewayRunner: f"{_approx_tokens:,}", f"{_new_tokens:,}", ) - if _hyg_adapter: - try: - await _hyg_adapter.send( - source.chat_id, - f"🗜️ Compressed: {_msg_count} → " - f"{_new_count} messages, " - f"~{_approx_tokens:,} → " - f"~{_new_tokens:,} tokens", - metadata=_hyg_meta, - ) - except Exception: - pass - - # Still too large after compression — warn user - # Rate-limited to once per cooldown period per - # chat to avoid spamming on every message. if _new_tokens >= _warn_token_threshold: logger.warning( "Session hygiene: still ~%s tokens after " - "compression — suggesting /reset", + "compression", f"{_new_tokens:,}", ) - _now = time.time() - _last_warn = self._compression_warn_sent.get(source.chat_id, 0) - if _hyg_adapter and _now - _last_warn >= self._compression_warn_cooldown: - self._compression_warn_sent[source.chat_id] = _now - try: - await _hyg_adapter.send( - source.chat_id, - "⚠️ Session is still very large " - "after compression " - f"(~{_new_tokens:,} tokens). " - "Consider using /reset to start " - "fresh if you experience issues.", - metadata=_hyg_meta, - ) - except Exception: - pass except Exception as e: logger.warning( "Session hygiene auto-compress failed: %s", e ) - # Compression failed and session is dangerously large - if _approx_tokens >= _warn_token_threshold: - _hyg_adapter = self.adapters.get(source.platform) - _hyg_meta = {"thread_id": source.thread_id} if source.thread_id else None - _now = time.time() - _last_warn = self._compression_warn_sent.get(source.chat_id, 0) - if _hyg_adapter and _now - _last_warn >= self._compression_warn_cooldown: - self._compression_warn_sent[source.chat_id] = _now - try: - await _hyg_adapter.send( - source.chat_id, - f"⚠️ Session is very large " - f"({_msg_count} messages, " - f"~{_approx_tokens:,} tokens) and " - "auto-compression failed. Consider " - "using /compress or /reset to avoid " - "issues.", - metadata=_hyg_meta, - ) - except Exception: - pass # First-message onboarding -- only on the very first interaction ever if not history and not self.session_store.has_any_sessions(): diff --git a/tests/gateway/test_session_hygiene.py b/tests/gateway/test_session_hygiene.py index 843c0d416..5488296f6 100644 --- a/tests/gateway/test_session_hygiene.py +++ b/tests/gateway/test_session_hygiene.py @@ -212,47 +212,7 @@ class TestSessionHygieneWarnThreshold: assert post_compress_tokens < warn_threshold -class TestCompressionWarnRateLimit: - """Compression warning messages must be rate-limited per chat_id.""" - def _make_runner(self): - from unittest.mock import MagicMock, patch - with patch("gateway.run.load_gateway_config"), \ - patch("gateway.run.SessionStore"), \ - patch("gateway.run.DeliveryRouter"): - from gateway.run import GatewayRunner - runner = GatewayRunner.__new__(GatewayRunner) - runner._compression_warn_sent = {} - runner._compression_warn_cooldown = 3600 - return runner - - def test_first_warn_is_sent(self): - runner = self._make_runner() - now = 1_000_000.0 - last = runner._compression_warn_sent.get("chat:1", 0) - assert now - last >= runner._compression_warn_cooldown - - def test_second_warn_suppressed_within_cooldown(self): - runner = self._make_runner() - now = 1_000_000.0 - runner._compression_warn_sent["chat:1"] = now - 60 # 1 minute ago - last = runner._compression_warn_sent.get("chat:1", 0) - assert now - last < runner._compression_warn_cooldown - - def test_warn_allowed_after_cooldown(self): - runner = self._make_runner() - now = 1_000_000.0 - runner._compression_warn_sent["chat:1"] = now - 3601 # just past cooldown - last = runner._compression_warn_sent.get("chat:1", 0) - assert now - last >= runner._compression_warn_cooldown - - def test_rate_limit_is_per_chat(self): - """Rate-limiting one chat must not suppress warnings for another.""" - runner = self._make_runner() - now = 1_000_000.0 - runner._compression_warn_sent["chat:1"] = now - 60 # suppressed - last_other = runner._compression_warn_sent.get("chat:2", 0) - assert now - last_other >= runner._compression_warn_cooldown class TestEstimatedTokenThreshold: @@ -421,10 +381,6 @@ async def test_session_hygiene_messages_stay_in_originating_topic(monkeypatch, t result = await runner._handle_message(event) assert result == "ok" - assert len(adapter.sent) == 2 - assert adapter.sent[0]["chat_id"] == "-1001" - assert "Session is large" in adapter.sent[0]["content"] - assert adapter.sent[0]["metadata"] == {"thread_id": "17585"} - assert adapter.sent[1]["chat_id"] == "-1001" - assert "Compressed:" in adapter.sent[1]["content"] - assert adapter.sent[1]["metadata"] == {"thread_id": "17585"} + # Compression warnings are no longer sent to users — compression + # happens silently with server-side logging only. + assert len(adapter.sent) == 0 -- 2.43.0 From fb2af3bd1d10a13c9498372023dd67bdbe86b48d Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 30 Mar 2026 19:40:39 -0700 Subject: [PATCH 053/385] docs: document tool progress streaming in API server and Open WebUI (#4138) Update docs to reflect that tool progress now streams inline during SSE responses. Previously docs said tool calls were invisible. - api-server.md: add 'Tool progress in streams' note to streaming docs - open-webui.md: update 'How It Works' steps, add Tool Progress tip --- website/docs/user-guide/features/api-server.md | 4 +++- website/docs/user-guide/messaging/open-webui.md | 8 ++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/website/docs/user-guide/features/api-server.md b/website/docs/user-guide/features/api-server.md index 6739ad7ab..71732285e 100644 --- a/website/docs/user-guide/features/api-server.md +++ b/website/docs/user-guide/features/api-server.md @@ -8,7 +8,7 @@ description: "Expose hermes-agent as an OpenAI-compatible API for any frontend" The API server exposes hermes-agent as an OpenAI-compatible HTTP endpoint. Any frontend that speaks the OpenAI format — Open WebUI, LobeChat, LibreChat, NextChat, ChatBox, and hundreds more — can connect to hermes-agent and use it as a backend. -Your agent handles requests with its full toolset (terminal, file operations, web search, memory, skills) and returns the final response. Tool calls execute invisibly server-side. +Your agent handles requests with its full toolset (terminal, file operations, web search, memory, skills) and returns the final response. When streaming, tool progress indicators appear inline so frontends can show what the agent is doing. ## Quick Start @@ -85,6 +85,8 @@ Standard OpenAI Chat Completions format. Stateless — the full conversation is **Streaming** (`"stream": true`): Returns Server-Sent Events (SSE) with token-by-token response chunks. When streaming is enabled in config, tokens are emitted live as the LLM generates them. When disabled, the full response is sent as a single SSE chunk. +**Tool progress in streams**: When the agent calls tools during a streaming request, brief progress indicators are injected into the content stream as the tools start executing (e.g. `` `💻 pwd` ``, `` `🔍 Python docs` ``). These appear as inline markdown before the agent's response text, giving frontends like Open WebUI real-time visibility into tool execution. + ### POST /v1/responses OpenAI Responses API format. Supports server-side conversation state via `previous_response_id` — the server stores full conversation history (including tool calls and results) so multi-turn context is preserved without the client managing it. diff --git a/website/docs/user-guide/messaging/open-webui.md b/website/docs/user-guide/messaging/open-webui.md index a3eb5fbc0..7d4eaee36 100644 --- a/website/docs/user-guide/messaging/open-webui.md +++ b/website/docs/user-guide/messaging/open-webui.md @@ -147,12 +147,16 @@ When you send a message in Open WebUI: 1. Open WebUI sends a `POST /v1/chat/completions` request with your message and conversation history 2. Hermes Agent creates an AIAgent instance with its full toolset 3. The agent processes your request — it may call tools (terminal, file operations, web search, etc.) -4. Tool calls happen invisibly server-side -5. The agent's final text response is returned to Open WebUI +4. As tools execute, **inline progress messages stream to the UI** so you can see what the agent is doing (e.g. `` `💻 ls -la` ``, `` `🔍 Python 3.12 release` ``) +5. The agent's final text response streams back to Open WebUI 6. Open WebUI displays the response in its chat interface Your agent has access to all the same tools and capabilities as when using the CLI or Telegram — the only difference is the frontend. +:::tip Tool Progress +With streaming enabled (the default), you'll see brief inline indicators as tools run — the tool emoji and its key argument. These appear in the response stream before the agent's final answer, giving you visibility into what's happening behind the scenes. +::: + ## Configuration Reference ### Hermes Agent (API server) -- 2.43.0 From 83e5249be65b2ba4afdaf19ef5f7a3b1cb4f2d0c Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 30 Mar 2026 20:22:09 -0700 Subject: [PATCH 054/385] fix(gateway): use setsid instead of systemd-run --user for /update (salvage #4024) (#4104) Salvaged from PR #4024 by @Sertug17. Fixes #4017. - Replace systemd-run --user --scope with setsid for portable session detach - Add system-level service detection to cmd_update gateway restart - Falls back to start_new_session=True on systems without setsid (macOS, minimal containers) --- gateway/run.py | 22 ++-- hermes_cli/main.py | 31 ++++- tests/gateway/test_update_command.py | 29 ++--- .../hermes_cli/test_update_gateway_restart.py | 109 +++++++++++++++++- 4 files changed, 161 insertions(+), 30 deletions(-) diff --git a/gateway/run.py b/gateway/run.py index 3428c59f7..3e6f39be3 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -4617,8 +4617,8 @@ class GatewayRunner: async def _handle_update_command(self, event: MessageEvent) -> str: """Handle /update command — update Hermes Agent to the latest version. - Spawns ``hermes update`` in a separate systemd scope so it survives the - gateway restart that ``hermes update`` may trigger at the end. Marker + Spawns ``hermes update`` in a detached session (via ``setsid``) so it + survives the gateway restart that ``hermes update`` may trigger. Marker files are written so either the current gateway process or the next one can notify the user when the update finishes. """ @@ -4658,28 +4658,28 @@ class GatewayRunner: pending_path.write_text(json.dumps(pending)) exit_code_path.unlink(missing_ok=True) - # Spawn `hermes update` in a separate cgroup so it survives gateway - # restart. systemd-run --user --scope creates a transient scope unit. + # Spawn `hermes update` detached so it survives gateway restart. + # Use setsid for portable session detach (works under system services + # where systemd-run --user fails due to missing D-Bus session). hermes_cmd_str = " ".join(shlex.quote(part) for part in hermes_cmd) update_cmd = ( f"{hermes_cmd_str} update > {shlex.quote(str(output_path))} 2>&1; " f"status=$?; printf '%s' \"$status\" > {shlex.quote(str(exit_code_path))}" ) try: - systemd_run = shutil.which("systemd-run") - if systemd_run: + setsid_bin = shutil.which("setsid") + if setsid_bin: + # Preferred: setsid creates a new session, fully detached subprocess.Popen( - [systemd_run, "--user", "--scope", - "--unit=hermes-update", "--", - "bash", "-c", update_cmd], + [setsid_bin, "bash", "-c", update_cmd], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, start_new_session=True, ) else: - # Fallback: best-effort detach with start_new_session + # Fallback: start_new_session=True calls os.setsid() in child subprocess.Popen( - ["bash", "-c", f"nohup {update_cmd} &"], + ["bash", "-c", update_cmd], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, start_new_session=True, diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 763bcea4e..9dca21056 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -3165,6 +3165,7 @@ def cmd_update(args): _gw_service_name = get_service_name() existing_pid = get_running_pid() has_systemd_service = False + has_system_service = False has_launchd_service = False try: @@ -3177,6 +3178,19 @@ def cmd_update(args): except (FileNotFoundError, subprocess.TimeoutExpired): pass + # Also check for a system-level service (hermes gateway install --system). + # This covers gateways running under system systemd where --user + # fails due to missing D-Bus session. + if not has_systemd_service and is_linux(): + try: + check = subprocess.run( + ["systemctl", "is-active", _gw_service_name], + capture_output=True, text=True, timeout=5, + ) + has_system_service = check.stdout.strip() == "active" + except (FileNotFoundError, subprocess.TimeoutExpired): + pass + # Check for macOS launchd service if is_macos(): try: @@ -3191,7 +3205,7 @@ def cmd_update(args): except (FileNotFoundError, subprocess.TimeoutExpired): pass - if existing_pid or has_systemd_service or has_launchd_service: + if existing_pid or has_systemd_service or has_system_service or has_launchd_service: print() # When a service manager is handling the gateway, let it @@ -3232,6 +3246,21 @@ def cmd_update(args): print(" hermes gateway restart") else: print(" Try manually: hermes gateway restart") + elif has_system_service: + # System-level service (hermes gateway install --system). + # No D-Bus session needed — systemctl without --user talks + # directly to the system manager over /run/systemd/private. + print("→ Restarting system gateway service...") + restart = subprocess.run( + ["systemctl", "restart", _gw_service_name], + capture_output=True, text=True, timeout=15, + ) + if restart.returncode == 0: + print("✓ Gateway restarted (system service).") + else: + print(f"⚠ Gateway restart failed: {restart.stderr.strip()}") + print(" System services may require root. Try:") + print(f" sudo systemctl restart {_gw_service_name}") elif has_launchd_service: # Refresh the plist first (picks up --replace and other # changes from the update we just pulled). diff --git a/tests/gateway/test_update_command.py b/tests/gateway/test_update_command.py index e8fb3ddc1..0fc774a0a 100644 --- a/tests/gateway/test_update_command.py +++ b/tests/gateway/test_update_command.py @@ -202,7 +202,7 @@ class TestHandleUpdateCommand: with patch("gateway.run._hermes_home", hermes_home), \ patch("gateway.run.__file__", fake_file), \ - patch("shutil.which", side_effect=lambda x: "/usr/bin/hermes" if x == "hermes" else "/usr/bin/systemd-run"), \ + patch("shutil.which", side_effect=lambda x: "/usr/bin/hermes" if x == "hermes" else "/usr/bin/setsid"), \ patch("subprocess.Popen"): result = await runner._handle_update_command(event) @@ -215,8 +215,8 @@ class TestHandleUpdateCommand: assert not (hermes_home / ".update_exit_code").exists() @pytest.mark.asyncio - async def test_spawns_systemd_run(self, tmp_path): - """Uses systemd-run when available.""" + async def test_spawns_setsid(self, tmp_path): + """Uses setsid when available.""" runner = _make_runner() event = _make_event() @@ -236,16 +236,16 @@ class TestHandleUpdateCommand: patch("subprocess.Popen", mock_popen): result = await runner._handle_update_command(event) - # Verify systemd-run was used + # Verify setsid was used call_args = mock_popen.call_args[0][0] - assert call_args[0] == "/usr/bin/systemd-run" - assert "--scope" in call_args + assert call_args[0] == "/usr/bin/setsid" + assert call_args[1] == "bash" assert ".update_exit_code" in call_args[-1] assert "Starting Hermes update" in result @pytest.mark.asyncio - async def test_fallback_nohup_when_no_systemd_run(self, tmp_path): - """Falls back to nohup when systemd-run is not available.""" + async def test_fallback_when_no_setsid(self, tmp_path): + """Falls back to start_new_session=True when setsid is not available.""" runner = _make_runner() event = _make_event() @@ -260,24 +260,27 @@ class TestHandleUpdateCommand: mock_popen = MagicMock() - def which_no_systemd(x): + def which_no_setsid(x): if x == "hermes": return "/usr/bin/hermes" - if x == "systemd-run": + if x == "setsid": return None return None with patch("gateway.run._hermes_home", hermes_home), \ patch("gateway.run.__file__", fake_file), \ - patch("shutil.which", side_effect=which_no_systemd), \ + patch("shutil.which", side_effect=which_no_setsid), \ patch("subprocess.Popen", mock_popen): result = await runner._handle_update_command(event) - # Verify bash -c nohup fallback was used + # Verify plain bash -c fallback (no nohup, no setsid) call_args = mock_popen.call_args[0][0] assert call_args[0] == "bash" - assert "nohup" in call_args[2] + assert "nohup" not in call_args[2] assert ".update_exit_code" in call_args[2] + # start_new_session=True should be in kwargs + call_kwargs = mock_popen.call_args[1] + assert call_kwargs.get("start_new_session") is True assert "Starting Hermes update" in result @pytest.mark.asyncio diff --git a/tests/hermes_cli/test_update_gateway_restart.py b/tests/hermes_cli/test_update_gateway_restart.py index 89ac84219..1d6b064af 100644 --- a/tests/hermes_cli/test_update_gateway_restart.py +++ b/tests/hermes_cli/test_update_gateway_restart.py @@ -25,6 +25,8 @@ def _make_run_side_effect( verify_ok=True, commit_count="3", systemd_active=False, + system_service_active=False, + system_restart_rc=0, launchctl_loaded=False, ): """Build a subprocess.run side_effect that simulates git + service commands.""" @@ -45,14 +47,23 @@ def _make_run_side_effect( if "rev-list" in joined: return subprocess.CompletedProcess(cmd, 0, stdout=f"{commit_count}\n", stderr="") - # systemctl --user is-active + # systemctl is-active — distinguish --user from system scope if "systemctl" in joined and "is-active" in joined: - if systemd_active: - return subprocess.CompletedProcess(cmd, 0, stdout="active\n", stderr="") - return subprocess.CompletedProcess(cmd, 3, stdout="inactive\n", stderr="") + if "--user" in joined: + if systemd_active: + return subprocess.CompletedProcess(cmd, 0, stdout="active\n", stderr="") + return subprocess.CompletedProcess(cmd, 3, stdout="inactive\n", stderr="") + else: + # System-level check (no --user) + if system_service_active: + return subprocess.CompletedProcess(cmd, 0, stdout="active\n", stderr="") + return subprocess.CompletedProcess(cmd, 3, stdout="inactive\n", stderr="") - # systemctl --user restart + # systemctl restart — distinguish --user from system scope if "systemctl" in joined and "restart" in joined: + if "--user" not in joined and system_service_active: + stderr = "" if system_restart_rc == 0 else "Failed to restart: Permission denied" + return subprocess.CompletedProcess(cmd, system_restart_rc, stdout="", stderr=stderr) return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="") # launchctl list ai.hermes.gateway @@ -393,3 +404,91 @@ class TestCmdUpdateLaunchdRestart: assert "Stopped gateway" not in captured assert "Gateway restarted" not in captured assert "Gateway restarted via launchd" not in captured + + +# --------------------------------------------------------------------------- +# cmd_update — system-level systemd service detection +# --------------------------------------------------------------------------- + + +class TestCmdUpdateSystemService: + """cmd_update detects system-level gateway services where --user fails.""" + + @patch("shutil.which", return_value=None) + @patch("subprocess.run") + def test_update_detects_system_service_and_restarts( + self, mock_run, _mock_which, mock_args, capsys, monkeypatch, + ): + """When user systemd is inactive but a system service exists, restart via system scope.""" + monkeypatch.setattr(gateway_cli, "is_macos", lambda: False) + monkeypatch.setattr(gateway_cli, "is_linux", lambda: True) + + mock_run.side_effect = _make_run_side_effect( + commit_count="3", + systemd_active=False, + system_service_active=True, + ) + + with patch("gateway.status.get_running_pid", return_value=12345), \ + patch("gateway.status.remove_pid_file"): + cmd_update(mock_args) + + captured = capsys.readouterr().out + assert "system gateway service" in captured.lower() + assert "Gateway restarted (system service)" in captured + # Verify systemctl restart (no --user) was called + restart_calls = [ + c for c in mock_run.call_args_list + if "restart" in " ".join(str(a) for a in c.args[0]) + and "systemctl" in " ".join(str(a) for a in c.args[0]) + and "--user" not in " ".join(str(a) for a in c.args[0]) + ] + assert len(restart_calls) == 1 + + @patch("shutil.which", return_value=None) + @patch("subprocess.run") + def test_update_system_service_restart_failure_shows_sudo_hint( + self, mock_run, _mock_which, mock_args, capsys, monkeypatch, + ): + """When system service restart fails (e.g. no root), show sudo hint.""" + monkeypatch.setattr(gateway_cli, "is_macos", lambda: False) + monkeypatch.setattr(gateway_cli, "is_linux", lambda: True) + + mock_run.side_effect = _make_run_side_effect( + commit_count="3", + systemd_active=False, + system_service_active=True, + system_restart_rc=1, + ) + + with patch("gateway.status.get_running_pid", return_value=12345), \ + patch("gateway.status.remove_pid_file"): + cmd_update(mock_args) + + captured = capsys.readouterr().out + assert "sudo systemctl restart" in captured + + @patch("shutil.which", return_value=None) + @patch("subprocess.run") + def test_user_service_takes_priority_over_system( + self, mock_run, _mock_which, mock_args, capsys, monkeypatch, + ): + """When both user and system services are active, user wins.""" + monkeypatch.setattr(gateway_cli, "is_macos", lambda: False) + monkeypatch.setattr(gateway_cli, "is_linux", lambda: True) + + mock_run.side_effect = _make_run_side_effect( + commit_count="3", + systemd_active=True, + system_service_active=True, + ) + + with patch("gateway.status.get_running_pid", return_value=12345), \ + patch("gateway.status.remove_pid_file"), \ + patch("os.kill"): + cmd_update(mock_args) + + captured = capsys.readouterr().out + # Should restart via user service, not system + assert "Gateway restarted." in captured + assert "(system service)" not in captured -- 2.43.0 From 54b876a5c9120ab2e48ab425d9f97145e09899ff Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 30 Mar 2026 20:23:28 -0700 Subject: [PATCH 055/385] fix: add actionable guidance to context-exceeded error messages (#4155) When context compression fails, users now see hints suggesting /new or /compress instead of a dead-end error. Covers all 4 error paths: payload-too-large, max compression attempts (2 paths), and context length exceeded. Closes #4061 Salvaged from PR #4076 by SHL0MS. Co-authored-by: SHL0MS --- run_agent.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/run_agent.py b/run_agent.py index fad7fca5a..326f35654 100644 --- a/run_agent.py +++ b/run_agent.py @@ -7056,6 +7056,7 @@ class AIAgent: compression_attempts += 1 if compression_attempts > max_compression_attempts: self._vprint(f"{self.log_prefix}❌ Max compression attempts ({max_compression_attempts}) reached for payload-too-large error.", force=True) + self._vprint(f"{self.log_prefix} 💡 Try /new to start a fresh conversation, or /compress to retry compression.", force=True) logging.error(f"{self.log_prefix}413 compression failed after {max_compression_attempts} attempts.") self._persist_session(messages, conversation_history) return { @@ -7080,6 +7081,7 @@ class AIAgent: break else: self._vprint(f"{self.log_prefix}❌ Payload too large and cannot compress further.", force=True) + self._vprint(f"{self.log_prefix} 💡 Try /new to start a fresh conversation, or /compress to retry compression.", force=True) logging.error(f"{self.log_prefix}413 payload too large. Cannot compress further.") self._persist_session(messages, conversation_history) return { @@ -7156,6 +7158,7 @@ class AIAgent: compression_attempts += 1 if compression_attempts > max_compression_attempts: self._vprint(f"{self.log_prefix}❌ Max compression attempts ({max_compression_attempts}) reached.", force=True) + self._vprint(f"{self.log_prefix} 💡 Try /new to start a fresh conversation, or /compress to retry compression.", force=True) logging.error(f"{self.log_prefix}Context compression failed after {max_compression_attempts} attempts.") self._persist_session(messages, conversation_history) return { @@ -7182,7 +7185,7 @@ class AIAgent: else: # Can't compress further and already at minimum tier self._vprint(f"{self.log_prefix}❌ Context length exceeded and cannot compress further.", force=True) - self._vprint(f"{self.log_prefix} 💡 The conversation has accumulated too much content.", force=True) + self._vprint(f"{self.log_prefix} 💡 The conversation has accumulated too much content. Try /new to start fresh, or /compress to manually trigger compression.", force=True) logging.error(f"{self.log_prefix}Context length exceeded: {approx_tokens:,} tokens. Cannot compress further.") self._persist_session(messages, conversation_history) return { -- 2.43.0 From 5b0243e6ad8002a6e8e129b5e2295cd01849b9d7 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 30 Mar 2026 20:30:11 -0700 Subject: [PATCH 056/385] =?UTF-8?q?docs:=20deep=20quality=20pass=20?= =?UTF-8?q?=E2=80=94=20expand=2010=20thin=20pages,=20fix=20specific=20issu?= =?UTF-8?q?es=20(#4134)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Developer guide stubs expanded to full documentation: - trajectory-format.md: 56→233 lines (JSONL format, ShareGPT example, normalization rules, reasoning markup, replay code) - session-storage.md: 66→388 lines (SQLite schema, migration table, FTS5 search syntax, lineage queries, Python API examples) - context-compression-and-caching.md: 72→321 lines (dual compression system, config defaults, 4-phase algorithm, before/after example, prompt caching mechanics, cache-aware patterns) - tools-runtime.md: 65→246 lines (registry API, dispatch flow, availability checking, error wrapping, approval flow) - prompt-assembly.md: 89→246 lines (concrete assembled prompt example, SOUL.md injection, context file discovery table) User-facing pages expanded: - docker.md: 62→224 lines (volumes, env forwarding, docker-compose, resource limits, troubleshooting) - updating.md: 79→167 lines (update behavior, version checking, rollback instructions, Nix users) - skins.md: 80→206 lines (all color/spinner/branding keys, built-in skin descriptions, full custom skin YAML template) Hub pages improved: - integrations/index.md: 25→82 lines (web search backends table, TTS/browser providers, quick config example) - features/overview.md: added Integrations section with 6 missing links Specific fixes: - configuration.md: removed duplicate Gateway Streaming section - mcp.md: removed internal "PR work" language - plugins.md: added inline minimal plugin example (self-contained) 13 files changed, ~1700 lines added. Docusaurus build verified clean. --- .../context-compression-and-caching.md | 335 ++++++++++++-- .../docs/developer-guide/prompt-assembly.md | 157 +++++++ .../docs/developer-guide/session-storage.md | 412 ++++++++++++++++-- website/docs/developer-guide/tools-runtime.md | 181 ++++++++ .../docs/developer-guide/trajectory-format.md | 251 +++++++++-- website/docs/getting-started/updating.md | 90 +++- website/docs/integrations/index.md | 67 ++- website/docs/user-guide/configuration.md | 22 +- website/docs/user-guide/docker.md | 170 +++++++- website/docs/user-guide/features/mcp.md | 4 +- website/docs/user-guide/features/overview.md | 9 + website/docs/user-guide/features/plugins.md | 50 +++ website/docs/user-guide/features/skins.md | 161 ++++++- 13 files changed, 1735 insertions(+), 174 deletions(-) diff --git a/website/docs/developer-guide/context-compression-and-caching.md b/website/docs/developer-guide/context-compression-and-caching.md index 92bf718cd..65c0911f4 100644 --- a/website/docs/developer-guide/context-compression-and-caching.md +++ b/website/docs/developer-guide/context-compression-and-caching.md @@ -1,72 +1,321 @@ ---- -sidebar_position: 6 -title: "Context Compression & Prompt Caching" -description: "How Hermes compresses long conversations and applies provider-side prompt caching" ---- +# Context Compression and Caching -# Context Compression & Prompt Caching +Hermes Agent uses a dual compression system and Anthropic prompt caching to +manage context window usage efficiently across long conversations. -Hermes manages long conversations with two complementary mechanisms: +Source files: `agent/context_compressor.py`, `agent/prompt_caching.py`, +`gateway/run.py` (session hygiene), `run_agent.py` (lines 1146-1204) -- prompt caching -- context compression -Primary files: +## Dual Compression System -- `agent/prompt_caching.py` -- `agent/context_compressor.py` -- `run_agent.py` +Hermes has two separate compression layers that operate independently: -## Prompt caching +``` + ┌──────────────────────────┐ + Incoming message │ Gateway Session Hygiene │ Fires at 85% of context + ─────────────────► │ (pre-agent, rough est.) │ Safety net for large sessions + └─────────────┬────────────┘ + │ + ▼ + ┌──────────────────────────┐ + │ Agent ContextCompressor │ Fires at 50% of context (default) + │ (in-loop, real tokens) │ Normal context management + └──────────────────────────┘ +``` -For Anthropic/native and Claude-via-OpenRouter flows, Hermes applies Anthropic-style cache markers. +### 1. Gateway Session Hygiene (85% threshold) -Current strategy: +Located in `gateway/run.py` (around line 2220). This is a **safety net** that +runs before the agent processes a message. It prevents API failures when sessions +grow too large between turns (e.g., overnight accumulation in Telegram/Discord). -- cache the system prompt -- cache the last 3 non-system messages -- default TTL is 5 minutes unless explicitly extended +- **Threshold**: Fixed at 85% of model context length +- **Token source**: Prefers actual API-reported tokens from last turn; falls back + to rough character-based estimate (`estimate_messages_tokens_rough`) +- **Fires**: Only when `len(history) >= 4` and compression is enabled +- **Purpose**: Catch sessions that escaped the agent's own compressor -This is implemented in `agent/prompt_caching.py`. +The gateway hygiene threshold is intentionally higher than the agent's compressor. +Setting it at 50% (same as the agent) caused premature compression on every turn +in long gateway sessions. -## Why prompt stability matters +### 2. Agent ContextCompressor (50% threshold, configurable) -Prompt caching only helps when the stable prefix remains stable. That is why Hermes avoids rebuilding or mutating the core system prompt mid-session unless it has to. +Located in `agent/context_compressor.py`. This is the **primary compression +system** that runs inside the agent's tool loop with access to accurate, +API-reported token counts. -## Compression trigger -Hermes can compress context when conversations become large. Configuration defaults live in `config.yaml`, and the compressor also has runtime checks based on actual prompt token counts. +## Configuration -## Compression algorithm +All compression settings are read from `config.yaml` under the `compression` key: -The compressor protects: +```yaml +compression: + enabled: true # Enable/disable compression (default: true) + threshold: 0.50 # Fraction of context window (default: 0.50 = 50%) + target_ratio: 0.20 # How much of threshold to keep as tail (default: 0.20) + protect_last_n: 20 # Minimum protected tail messages (default: 20) + summary_model: null # Override model for summaries (default: uses auxiliary) +``` -- the first N turns -- the last N turns +### Parameter Details -and summarizes the middle section. +| Parameter | Default | Range | Description | +|-----------|---------|-------|-------------| +| `threshold` | `0.50` | 0.0-1.0 | Compression triggers when prompt tokens ≥ `threshold × context_length` | +| `target_ratio` | `0.20` | 0.10-0.80 | Controls tail protection token budget: `threshold_tokens × target_ratio` | +| `protect_last_n` | `20` | ≥1 | Minimum number of recent messages always preserved | +| `protect_first_n` | `3` | (hardcoded) | System prompt + first exchange always preserved | -It also cleans up structural issues such as orphaned tool-call/result pairs so the API never receives invalid conversation structure after compression. +### Computed Values (for a 200K context model at defaults) -## Pre-compression memory flush +``` +context_length = 200,000 +threshold_tokens = 200,000 × 0.50 = 100,000 +tail_token_budget = 100,000 × 0.20 = 20,000 +max_summary_tokens = min(200,000 × 0.05, 12,000) = 10,000 +``` -Before compression, Hermes can give the model one last chance to persist memory so facts are not lost when middle turns are summarized away. -## Session lineage after compression +## Compression Algorithm -Compression can split the session into a new session ID while preserving parent lineage in the state DB. +The `ContextCompressor.compress()` method follows a 4-phase algorithm: -This lets Hermes continue operating with a smaller active context while retaining a searchable ancestry chain. +### Phase 1: Prune Old Tool Results (cheap, no LLM call) -## Re-injected state after compression +Old tool results (>200 chars) outside the protected tail are replaced with: +``` +[Old tool output cleared to save context space] +``` -After compression, Hermes may re-inject compact operational state such as: +This is a cheap pre-pass that saves significant tokens from verbose tool +outputs (file contents, terminal output, search results). -- todo snapshot -- prior-read-files summary +### Phase 2: Determine Boundaries -## Related docs +``` +┌─────────────────────────────────────────────────────────────┐ +│ Message list │ +│ │ +│ [0..2] ← protect_first_n (system + first exchange) │ +│ [3..N] ← middle turns → SUMMARIZED │ +│ [N..end] ← tail (by token budget OR protect_last_n) │ +│ │ +└─────────────────────────────────────────────────────────────┘ +``` -- [Prompt Assembly](./prompt-assembly.md) -- [Session Storage](./session-storage.md) -- [Agent Loop Internals](./agent-loop.md) +Tail protection is **token-budget based**: walks backward from the end, +accumulating tokens until the budget is exhausted. Falls back to the fixed +`protect_last_n` count if the budget would protect fewer messages. + +Boundaries are aligned to avoid splitting tool_call/tool_result groups. +The `_align_boundary_backward()` method walks past consecutive tool results +to find the parent assistant message, keeping groups intact. + +### Phase 3: Generate Structured Summary + +The middle turns are summarized using the auxiliary LLM with a structured +template: + +``` +## Goal +[What the user is trying to accomplish] + +## Constraints & Preferences +[User preferences, coding style, constraints, important decisions] + +## Progress +### Done +[Completed work — specific file paths, commands run, results] +### In Progress +[Work currently underway] +### Blocked +[Any blockers or issues encountered] + +## Key Decisions +[Important technical decisions and why] + +## Relevant Files +[Files read, modified, or created — with brief note on each] + +## Next Steps +[What needs to happen next] + +## Critical Context +[Specific values, error messages, configuration details] +``` + +Summary budget scales with the amount of content being compressed: +- Formula: `content_tokens × 0.20` (the `_SUMMARY_RATIO` constant) +- Minimum: 2,000 tokens +- Maximum: `min(context_length × 0.05, 12,000)` tokens + +### Phase 4: Assemble Compressed Messages + +The compressed message list is: +1. Head messages (with a note appended to system prompt on first compression) +2. Summary message (role chosen to avoid consecutive same-role violations) +3. Tail messages (unmodified) + +Orphaned tool_call/tool_result pairs are cleaned up by `_sanitize_tool_pairs()`: +- Tool results referencing removed calls → removed +- Tool calls whose results were removed → stub result injected + +### Iterative Re-compression + +On subsequent compressions, the previous summary is passed to the LLM with +instructions to **update** it rather than summarize from scratch. This preserves +information across multiple compactions — items move from "In Progress" to "Done", +new progress is added, and obsolete information is removed. + +The `_previous_summary` field on the compressor instance stores the last summary +text for this purpose. + + +## Before/After Example + +### Before Compression (45 messages, ~95K tokens) + +``` +[0] system: "You are a helpful assistant..." (system prompt) +[1] user: "Help me set up a FastAPI project" +[2] assistant: terminal: mkdir project +[3] tool: "directory created" +[4] assistant: write_file: main.py +[5] tool: "file written (2.3KB)" + ... 30 more turns of file editing, testing, debugging ... +[38] assistant: terminal: pytest +[39] tool: "8 passed, 2 failed\n..." (5KB output) +[40] user: "Fix the failing tests" +[41] assistant: read_file: tests/test_api.py +[42] tool: "import pytest\n..." (3KB) +[43] assistant: "I see the issue with the test fixtures..." +[44] user: "Great, also add error handling" +``` + +### After Compression (25 messages, ~45K tokens) + +``` +[0] system: "You are a helpful assistant... + [Note: Some earlier conversation turns have been compacted...]" +[1] user: "Help me set up a FastAPI project" +[2] assistant: "[CONTEXT COMPACTION] Earlier turns were compacted... + + ## Goal + Set up a FastAPI project with tests and error handling + + ## Progress + ### Done + - Created project structure: main.py, tests/, requirements.txt + - Implemented 5 API endpoints in main.py + - Wrote 10 test cases in tests/test_api.py + - 8/10 tests passing + + ### In Progress + - Fixing 2 failing tests (test_create_user, test_delete_user) + + ## Relevant Files + - main.py — FastAPI app with 5 endpoints + - tests/test_api.py — 10 test cases + - requirements.txt — fastapi, pytest, httpx + + ## Next Steps + - Fix failing test fixtures + - Add error handling" +[3] user: "Fix the failing tests" +[4] assistant: read_file: tests/test_api.py +[5] tool: "import pytest\n..." +[6] assistant: "I see the issue with the test fixtures..." +[7] user: "Great, also add error handling" +``` + + +## Prompt Caching (Anthropic) + +Source: `agent/prompt_caching.py` + +Reduces input token costs by ~75% on multi-turn conversations by caching the +conversation prefix. Uses Anthropic's `cache_control` breakpoints. + +### Strategy: system_and_3 + +Anthropic allows a maximum of 4 `cache_control` breakpoints per request. Hermes +uses the "system_and_3" strategy: + +``` +Breakpoint 1: System prompt (stable across all turns) +Breakpoint 2: 3rd-to-last non-system message ─┐ +Breakpoint 3: 2nd-to-last non-system message ├─ Rolling window +Breakpoint 4: Last non-system message ─┘ +``` + +### How It Works + +`apply_anthropic_cache_control()` deep-copies the messages and injects +`cache_control` markers: + +```python +# Cache marker format +marker = {"type": "ephemeral"} +# Or for 1-hour TTL: +marker = {"type": "ephemeral", "ttl": "1h"} +``` + +The marker is applied differently based on content type: + +| Content Type | Where Marker Goes | +|-------------|-------------------| +| String content | Converted to `[{"type": "text", "text": ..., "cache_control": ...}]` | +| List content | Added to the last element's dict | +| None/empty | Added as `msg["cache_control"]` | +| Tool messages | Added as `msg["cache_control"]` (native Anthropic only) | + +### Cache-Aware Design Patterns + +1. **Stable system prompt**: The system prompt is breakpoint 1 and cached across + all turns. Avoid mutating it mid-conversation (compression appends a note + only on the first compaction). + +2. **Message ordering matters**: Cache hits require prefix matching. Adding or + removing messages in the middle invalidates the cache for everything after. + +3. **Compression cache interaction**: After compression, the cache is invalidated + for the compressed region but the system prompt cache survives. The rolling + 3-message window re-establishes caching within 1-2 turns. + +4. **TTL selection**: Default is `5m` (5 minutes). Use `1h` for long-running + sessions where the user takes breaks between turns. + +### Enabling Prompt Caching + +Prompt caching is automatically enabled when: +- The model is an Anthropic Claude model (detected by model name) +- The provider supports `cache_control` (native Anthropic API or OpenRouter) + +```yaml +# config.yaml — TTL is configurable +model: + cache_ttl: "5m" # "5m" or "1h" +``` + +The CLI shows caching status at startup: +``` +💾 Prompt caching: ENABLED (Claude via OpenRouter, 5m TTL) +``` + + +## Context Pressure Warnings + +The agent emits context pressure warnings at 85% of the compression threshold +(not 85% of context — 85% of the threshold which is itself 50% of context): + +``` +⚠️ Context is 85% to compaction threshold (42,500/50,000 tokens) +``` + +After compression, if usage drops below 85% of threshold, the warning state +is cleared. If compression fails to reduce below the warning level (the +conversation is too dense), the warning persists but compression won't +re-trigger until the threshold is exceeded again. diff --git a/website/docs/developer-guide/prompt-assembly.md b/website/docs/developer-guide/prompt-assembly.md index 9fdb59256..858ac38ec 100644 --- a/website/docs/developer-guide/prompt-assembly.md +++ b/website/docs/developer-guide/prompt-assembly.md @@ -41,6 +41,163 @@ The cached system prompt is assembled in roughly this order: When `skip_context_files` is set (e.g., subagent delegation), SOUL.md is not loaded and the hardcoded `DEFAULT_AGENT_IDENTITY` is used instead. +### Concrete example: assembled system prompt + +Here is a simplified view of what the final system prompt looks like when all layers are present (comments show the source of each section): + +``` +# Layer 1: Agent Identity (from ~/.hermes/SOUL.md) +You are Hermes, an AI assistant created by Nous Research. +You are an expert software engineer and researcher. +You value correctness, clarity, and efficiency. +... + +# Layer 2: Tool-aware behavior guidance +You have persistent memory across sessions. Save durable facts using +the memory tool: user preferences, environment details, tool quirks, +and stable conventions. Memory is injected into every turn, so keep +it compact and focused on facts that will still matter later. +... +When the user references something from a past conversation or you +suspect relevant cross-session context exists, use session_search +to recall it before asking them to repeat themselves. + +# Tool-use enforcement (for GPT/Codex models only) +You MUST use your tools to take action — do not describe what you +would do or plan to do without actually doing it. +... + +# Layer 3: Honcho static block (when active) +[Honcho personality/context data] + +# Layer 4: Optional system message (from config or API) +[User-configured system message override] + +# Layer 5: Frozen MEMORY snapshot +## Persistent Memory +- User prefers Python 3.12, uses pyproject.toml +- Default editor is nvim +- Working on project "atlas" in ~/code/atlas +- Timezone: US/Pacific + +# Layer 6: Frozen USER profile snapshot +## User Profile +- Name: Alice +- GitHub: alice-dev + +# Layer 7: Skills index +## Skills (mandatory) +Before replying, scan the skills below. If one clearly matches +your task, load it with skill_view(name) and follow its instructions. +... + + software-development: + - code-review: Structured code review workflow + - test-driven-development: TDD methodology + research: + - arxiv: Search and summarize arXiv papers + + +# Layer 8: Context files (from project directory) +# Project Context +The following project context files have been loaded and should be followed: + +## AGENTS.md +This is the atlas project. Use pytest for testing. The main +entry point is src/atlas/main.py. Always run `make lint` before +committing. + +# Layer 9: Timestamp + session +Current time: 2026-03-30T14:30:00-07:00 +Session: abc123 + +# Layer 10: Platform hint +You are a CLI AI Agent. Try not to use markdown but simple text +renderable inside a terminal. +``` + +## How SOUL.md appears in the prompt + +`SOUL.md` lives at `~/.hermes/SOUL.md` and serves as the agent's identity — the very first section of the system prompt. The loading logic in `prompt_builder.py` works as follows: + +```python +# From agent/prompt_builder.py (simplified) +def load_soul_md() -> Optional[str]: + soul_path = get_hermes_home() / "SOUL.md" + if not soul_path.exists(): + return None + content = soul_path.read_text(encoding="utf-8").strip() + content = _scan_context_content(content, "SOUL.md") # Security scan + content = _truncate_content(content, "SOUL.md") # Cap at 20k chars + return content +``` + +When `load_soul_md()` returns content, it replaces the hardcoded `DEFAULT_AGENT_IDENTITY`. The `build_context_files_prompt()` function is then called with `skip_soul=True` to prevent SOUL.md from appearing twice (once as identity, once as a context file). + +If `SOUL.md` doesn't exist, the system falls back to: + +``` +You are Hermes Agent, an intelligent AI assistant created by Nous Research. +You are helpful, knowledgeable, and direct. You assist users with a wide +range of tasks including answering questions, writing and editing code, +analyzing information, creative work, and executing actions via your tools. +You communicate clearly, admit uncertainty when appropriate, and prioritize +being genuinely useful over being verbose unless otherwise directed below. +Be targeted and efficient in your exploration and investigations. +``` + +## How context files are injected + +`build_context_files_prompt()` uses a **priority system** — only one project context type is loaded (first match wins): + +```python +# From agent/prompt_builder.py (simplified) +def build_context_files_prompt(cwd=None, skip_soul=False): + cwd_path = Path(cwd).resolve() + + # Priority: first match wins — only ONE project context loaded + project_context = ( + _load_hermes_md(cwd_path) # 1. .hermes.md / HERMES.md (walks to git root) + or _load_agents_md(cwd_path) # 2. AGENTS.md (cwd only) + or _load_claude_md(cwd_path) # 3. CLAUDE.md (cwd only) + or _load_cursorrules(cwd_path) # 4. .cursorrules / .cursor/rules/*.mdc + ) + + sections = [] + if project_context: + sections.append(project_context) + + # SOUL.md from HERMES_HOME (independent of project context) + if not skip_soul: + soul_content = load_soul_md() + if soul_content: + sections.append(soul_content) + + if not sections: + return "" + + return ( + "# Project Context\n\n" + "The following project context files have been loaded " + "and should be followed:\n\n" + + "\n".join(sections) + ) +``` + +### Context file discovery details + +| Priority | Files | Search scope | Notes | +|----------|-------|-------------|-------| +| 1 | `.hermes.md`, `HERMES.md` | CWD up to git root | Hermes-native project config | +| 2 | `AGENTS.md` | CWD only | Common agent instruction file | +| 3 | `CLAUDE.md` | CWD only | Claude Code compatibility | +| 4 | `.cursorrules`, `.cursor/rules/*.mdc` | CWD only | Cursor compatibility | + +All context files are: +- **Security scanned** — checked for prompt injection patterns (invisible unicode, "ignore previous instructions", credential exfiltration attempts) +- **Truncated** — capped at 20,000 characters using 70/20 head/tail ratio with a truncation marker +- **YAML frontmatter stripped** — `.hermes.md` frontmatter is removed (reserved for future config overrides) + ## API-call-time-only layers These are intentionally *not* persisted as part of the cached system prompt: diff --git a/website/docs/developer-guide/session-storage.md b/website/docs/developer-guide/session-storage.md index 103a72b5d..c21401508 100644 --- a/website/docs/developer-guide/session-storage.md +++ b/website/docs/developer-guide/session-storage.md @@ -1,66 +1,388 @@ ---- -sidebar_position: 8 -title: "Session Storage" -description: "How Hermes stores sessions in SQLite, maintains lineage, and exposes recall/search" ---- - # Session Storage -Hermes uses a SQLite-backed session store as the main source of truth for historical conversation state. +Hermes Agent uses a SQLite database (`~/.hermes/state.db`) to persist session +metadata, full message history, and model configuration across CLI and gateway +sessions. This replaces the earlier per-session JSONL file approach. -Primary files: +Source file: `hermes_state.py` -- `hermes_state.py` -- `gateway/session.py` -- `tools/session_search_tool.py` -## Main database +## Architecture Overview -The primary store lives at: - -```text -~/.hermes/state.db +``` +~/.hermes/state.db (SQLite, WAL mode) +├── sessions — Session metadata, token counts, billing +├── messages — Full message history per session +├── messages_fts — FTS5 virtual table for full-text search +└── schema_version — Single-row table tracking migration state ``` -It contains: +Key design decisions: +- **WAL mode** for concurrent readers + one writer (gateway multi-platform) +- **FTS5 virtual table** for fast text search across all session messages +- **Session lineage** via `parent_session_id` chains (compression-triggered splits) +- **Source tagging** (`cli`, `telegram`, `discord`, etc.) for platform filtering +- Batch runner and RL trajectories are NOT stored here (separate systems) -- sessions -- messages -- metadata such as token counts and titles -- lineage relationships -- full-text search indexes -## What is stored per session +## SQLite Schema -Examples of important session metadata: +### Sessions Table -- session ID -- source/platform -- title -- created/updated timestamps -- token counts -- tool call counts -- stored system prompt snapshot -- parent session ID after compression splits +```sql +CREATE TABLE IF NOT EXISTS sessions ( + id TEXT PRIMARY KEY, + source TEXT NOT NULL, + user_id TEXT, + model TEXT, + model_config TEXT, + system_prompt TEXT, + parent_session_id TEXT, + started_at REAL NOT NULL, + ended_at REAL, + end_reason TEXT, + message_count INTEGER DEFAULT 0, + tool_call_count INTEGER DEFAULT 0, + input_tokens INTEGER DEFAULT 0, + output_tokens INTEGER DEFAULT 0, + cache_read_tokens INTEGER DEFAULT 0, + cache_write_tokens INTEGER DEFAULT 0, + reasoning_tokens INTEGER DEFAULT 0, + billing_provider TEXT, + billing_base_url TEXT, + billing_mode TEXT, + estimated_cost_usd REAL, + actual_cost_usd REAL, + cost_status TEXT, + cost_source TEXT, + pricing_version TEXT, + title TEXT, + FOREIGN KEY (parent_session_id) REFERENCES sessions(id) +); -## Lineage +CREATE INDEX IF NOT EXISTS idx_sessions_source ON sessions(source); +CREATE INDEX IF NOT EXISTS idx_sessions_parent ON sessions(parent_session_id); +CREATE INDEX IF NOT EXISTS idx_sessions_started ON sessions(started_at DESC); +CREATE UNIQUE INDEX IF NOT EXISTS idx_sessions_title_unique + ON sessions(title) WHERE title IS NOT NULL; +``` -When Hermes compresses a conversation, it can continue in a new session ID while preserving ancestry via `parent_session_id`. +### Messages Table -This means resuming/searching can follow session families instead of treating each compressed shard as unrelated. +```sql +CREATE TABLE IF NOT EXISTS messages ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + session_id TEXT NOT NULL REFERENCES sessions(id), + role TEXT NOT NULL, + content TEXT, + tool_call_id TEXT, + tool_calls TEXT, + tool_name TEXT, + timestamp REAL NOT NULL, + token_count INTEGER, + finish_reason TEXT, + reasoning TEXT, + reasoning_details TEXT, + codex_reasoning_items TEXT +); -## Gateway vs CLI persistence +CREATE INDEX IF NOT EXISTS idx_messages_session ON messages(session_id, timestamp); +``` -- CLI uses the state DB directly for resume/history/search -- gateway keeps active-session mappings and may also maintain additional platform transcript/state files -- some legacy JSON/JSONL artifacts still exist for compatibility, but SQLite is the main historical store +Notes: +- `tool_calls` is stored as a JSON string (serialized list of tool call objects) +- `reasoning_details` and `codex_reasoning_items` are stored as JSON strings +- `reasoning` stores the raw reasoning text for providers that expose it +- Timestamps are Unix epoch floats (`time.time()`) -## Session search +### FTS5 Full-Text Search -The `session_search` tool uses the session DB's search features to retrieve and summarize relevant past work. +```sql +CREATE VIRTUAL TABLE IF NOT EXISTS messages_fts USING fts5( + content, + content=messages, + content_rowid=id +); +``` -## Related docs +The FTS5 table is kept in sync via three triggers that fire on INSERT, UPDATE, +and DELETE of the `messages` table: -- [Gateway Internals](./gateway-internals.md) -- [Prompt Assembly](./prompt-assembly.md) -- [Context Compression & Prompt Caching](./context-compression-and-caching.md) +```sql +CREATE TRIGGER IF NOT EXISTS messages_fts_insert AFTER INSERT ON messages BEGIN + INSERT INTO messages_fts(rowid, content) VALUES (new.id, new.content); +END; + +CREATE TRIGGER IF NOT EXISTS messages_fts_delete AFTER DELETE ON messages BEGIN + INSERT INTO messages_fts(messages_fts, rowid, content) + VALUES('delete', old.id, old.content); +END; + +CREATE TRIGGER IF NOT EXISTS messages_fts_update AFTER UPDATE ON messages BEGIN + INSERT INTO messages_fts(messages_fts, rowid, content) + VALUES('delete', old.id, old.content); + INSERT INTO messages_fts(rowid, content) VALUES (new.id, new.content); +END; +``` + + +## Schema Version and Migrations + +Current schema version: **6** + +The `schema_version` table stores a single integer. On initialization, +`_init_schema()` checks the current version and applies migrations sequentially: + +| Version | Change | +|---------|--------| +| 1 | Initial schema (sessions, messages, FTS5) | +| 2 | Add `finish_reason` column to messages | +| 3 | Add `title` column to sessions | +| 4 | Add unique index on `title` (NULLs allowed, non-NULL must be unique) | +| 5 | Add billing columns: `cache_read_tokens`, `cache_write_tokens`, `reasoning_tokens`, `billing_provider`, `billing_base_url`, `billing_mode`, `estimated_cost_usd`, `actual_cost_usd`, `cost_status`, `cost_source`, `pricing_version` | +| 6 | Add reasoning columns to messages: `reasoning`, `reasoning_details`, `codex_reasoning_items` | + +Each migration uses `ALTER TABLE ADD COLUMN` wrapped in try/except to handle +the column-already-exists case (idempotent). The version number is bumped after +each successful migration block. + + +## Write Contention Handling + +Multiple hermes processes (gateway + CLI sessions + worktree agents) share one +`state.db`. The `SessionDB` class handles write contention with: + +- **Short SQLite timeout** (1 second) instead of the default 30s +- **Application-level retry** with random jitter (20-150ms, up to 15 retries) +- **BEGIN IMMEDIATE** transactions to surface lock contention at transaction start +- **Periodic WAL checkpoints** every 50 successful writes (PASSIVE mode) + +This avoids the "convoy effect" where SQLite's deterministic internal backoff +causes all competing writers to retry at the same intervals. + +``` +_WRITE_MAX_RETRIES = 15 +_WRITE_RETRY_MIN_S = 0.020 # 20ms +_WRITE_RETRY_MAX_S = 0.150 # 150ms +_CHECKPOINT_EVERY_N_WRITES = 50 +``` + + +## Common Operations + +### Initialize + +```python +from hermes_state import SessionDB + +db = SessionDB() # Default: ~/.hermes/state.db +db = SessionDB(db_path=Path("/tmp/test.db")) # Custom path +``` + +### Create and Manage Sessions + +```python +# Create a new session +db.create_session( + session_id="sess_abc123", + source="cli", + model="anthropic/claude-sonnet-4.6", + user_id="user_1", + parent_session_id=None, # or previous session ID for lineage +) + +# End a session +db.end_session("sess_abc123", end_reason="user_exit") + +# Reopen a session (clear ended_at/end_reason) +db.reopen_session("sess_abc123") +``` + +### Store Messages + +```python +msg_id = db.append_message( + session_id="sess_abc123", + role="assistant", + content="Here's the answer...", + tool_calls=[{"id": "call_1", "function": {"name": "terminal", "arguments": "{}"}}], + token_count=150, + finish_reason="stop", + reasoning="Let me think about this...", +) +``` + +### Retrieve Messages + +```python +# Raw messages with all metadata +messages = db.get_messages("sess_abc123") + +# OpenAI conversation format (for API replay) +conversation = db.get_messages_as_conversation("sess_abc123") +# Returns: [{"role": "user", "content": "..."}, {"role": "assistant", ...}] +``` + +### Session Titles + +```python +# Set a title (must be unique among non-NULL titles) +db.set_session_title("sess_abc123", "Fix Docker Build") + +# Resolve by title (returns most recent in lineage) +session_id = db.resolve_session_by_title("Fix Docker Build") + +# Auto-generate next title in lineage +next_title = db.get_next_title_in_lineage("Fix Docker Build") +# Returns: "Fix Docker Build #2" +``` + + +## Full-Text Search + +The `search_messages()` method supports FTS5 query syntax with automatic +sanitization of user input. + +### Basic Search + +```python +results = db.search_messages("docker deployment") +``` + +### FTS5 Query Syntax + +| Syntax | Example | Meaning | +|--------|---------|---------| +| Keywords | `docker deployment` | Both terms (implicit AND) | +| Quoted phrase | `"exact phrase"` | Exact phrase match | +| Boolean OR | `docker OR kubernetes` | Either term | +| Boolean NOT | `python NOT java` | Exclude term | +| Prefix | `deploy*` | Prefix match | + +### Filtered Search + +```python +# Search only CLI sessions +results = db.search_messages("error", source_filter=["cli"]) + +# Exclude gateway sessions +results = db.search_messages("bug", exclude_sources=["telegram", "discord"]) + +# Search only user messages +results = db.search_messages("help", role_filter=["user"]) +``` + +### Search Results Format + +Each result includes: +- `id`, `session_id`, `role`, `timestamp` +- `snippet` — FTS5-generated snippet with `>>>match<<<` markers +- `context` — 1 message before and after the match (content truncated to 200 chars) +- `source`, `model`, `session_started` — from the parent session + +The `_sanitize_fts5_query()` method handles edge cases: +- Strips unmatched quotes and special characters +- Wraps hyphenated terms in quotes (`chat-send` → `"chat-send"`) +- Removes dangling boolean operators (`hello AND` → `hello`) + + +## Session Lineage + +Sessions can form chains via `parent_session_id`. This happens when context +compression triggers a session split in the gateway. + +### Query: Find Session Lineage + +```sql +-- Find all ancestors of a session +WITH RECURSIVE lineage AS ( + SELECT * FROM sessions WHERE id = ? + UNION ALL + SELECT s.* FROM sessions s + JOIN lineage l ON s.id = l.parent_session_id +) +SELECT id, title, started_at, parent_session_id FROM lineage; + +-- Find all descendants of a session +WITH RECURSIVE descendants AS ( + SELECT * FROM sessions WHERE id = ? + UNION ALL + SELECT s.* FROM sessions s + JOIN descendants d ON s.parent_session_id = d.id +) +SELECT id, title, started_at FROM descendants; +``` + +### Query: Recent Sessions with Preview + +```sql +SELECT s.*, + COALESCE( + (SELECT SUBSTR(m.content, 1, 63) + FROM messages m + WHERE m.session_id = s.id AND m.role = 'user' AND m.content IS NOT NULL + ORDER BY m.timestamp, m.id LIMIT 1), + '' + ) AS preview, + COALESCE( + (SELECT MAX(m2.timestamp) FROM messages m2 WHERE m2.session_id = s.id), + s.started_at + ) AS last_active +FROM sessions s +ORDER BY s.started_at DESC +LIMIT 20; +``` + +### Query: Token Usage Statistics + +```sql +-- Total tokens by model +SELECT model, + COUNT(*) as session_count, + SUM(input_tokens) as total_input, + SUM(output_tokens) as total_output, + SUM(estimated_cost_usd) as total_cost +FROM sessions +WHERE model IS NOT NULL +GROUP BY model +ORDER BY total_cost DESC; + +-- Sessions with highest token usage +SELECT id, title, model, input_tokens + output_tokens AS total_tokens, + estimated_cost_usd +FROM sessions +ORDER BY total_tokens DESC +LIMIT 10; +``` + + +## Export and Cleanup + +```python +# Export a single session with messages +data = db.export_session("sess_abc123") + +# Export all sessions (with messages) as list of dicts +all_data = db.export_all(source="cli") + +# Delete old sessions (only ended sessions) +deleted_count = db.prune_sessions(older_than_days=90) +deleted_count = db.prune_sessions(older_than_days=30, source="telegram") + +# Clear messages but keep the session record +db.clear_messages("sess_abc123") + +# Delete session and all messages +db.delete_session("sess_abc123") +``` + + +## Database Location + +Default path: `~/.hermes/state.db` + +This is derived from `hermes_constants.get_hermes_home()` which resolves to +`~/.hermes/` by default, or the value of `HERMES_HOME` environment variable. + +The database file, WAL file (`state.db-wal`), and shared-memory file +(`state.db-shm`) are all created in the same directory. diff --git a/website/docs/developer-guide/tools-runtime.md b/website/docs/developer-guide/tools-runtime.md index 4cb4e0d1e..f6fbc86de 100644 --- a/website/docs/developer-guide/tools-runtime.md +++ b/website/docs/developer-guide/tools-runtime.md @@ -22,6 +22,89 @@ Each tool module calls `registry.register(...)` at import time. `model_tools.py` is responsible for importing/discovering tool modules and building the schema list used by the model. +### How `registry.register()` works + +Every tool file in `tools/` calls `registry.register()` at module level to declare itself. The function signature is: + +```python +registry.register( + name="terminal", # Unique tool name (used in API schemas) + toolset="terminal", # Toolset this tool belongs to + schema={...}, # OpenAI function-calling schema (description, parameters) + handler=handle_terminal, # The function that executes when the tool is called + check_fn=check_terminal, # Optional: returns True/False for availability + requires_env=["SOME_VAR"], # Optional: env vars needed (for UI display) + is_async=False, # Whether the handler is an async coroutine + description="Run commands", # Human-readable description + emoji="💻", # Emoji for spinner/progress display +) +``` + +Each call creates a `ToolEntry` stored in the singleton `ToolRegistry._tools` dict keyed by tool name. If a name collision occurs across toolsets, a warning is logged and the later registration wins. + +### Discovery: `_discover_tools()` + +When `model_tools.py` is imported, it calls `_discover_tools()` which imports every tool module in order: + +```python +_modules = [ + "tools.web_tools", + "tools.terminal_tool", + "tools.file_tools", + "tools.vision_tools", + "tools.mixture_of_agents_tool", + "tools.image_generation_tool", + "tools.skills_tool", + "tools.browser_tool", + "tools.cronjob_tools", + "tools.rl_training_tool", + "tools.tts_tool", + "tools.todo_tool", + "tools.memory_tool", + "tools.session_search_tool", + "tools.clarify_tool", + "tools.code_execution_tool", + "tools.delegate_tool", + "tools.process_registry", + "tools.send_message_tool", + "tools.honcho_tools", + "tools.homeassistant_tool", +] +``` + +Each import triggers the module's `registry.register()` calls. Errors in optional tools (e.g., missing `fal_client` for image generation) are caught and logged — they don't prevent other tools from loading. + +After core tool discovery, MCP tools and plugin tools are also discovered: + +1. **MCP tools** — `tools.mcp_tool.discover_mcp_tools()` reads MCP server config and registers tools from external servers. +2. **Plugin tools** — `hermes_cli.plugins.discover_plugins()` loads user/project/pip plugins that may register additional tools. + +## Tool availability checking (`check_fn`) + +Each tool can optionally provide a `check_fn` — a callable that returns `True` when the tool is available and `False` otherwise. Typical checks include: + +- **API key present** — e.g., `lambda: bool(os.environ.get("SERP_API_KEY"))` for web search +- **Service running** — e.g., checking if the Honcho server is configured +- **Binary installed** — e.g., verifying `playwright` is available for browser tools + +When `registry.get_definitions()` builds the schema list for the model, it runs each tool's `check_fn()`: + +```python +# Simplified from registry.py +if entry.check_fn: + try: + available = bool(entry.check_fn()) + except Exception: + available = False # Exceptions = unavailable + if not available: + continue # Skip this tool entirely +``` + +Key behaviors: +- Check results are **cached per-call** — if multiple tools share the same `check_fn`, it only runs once. +- Exceptions in `check_fn()` are treated as "unavailable" (fail-safe). +- The `is_toolset_available()` method checks whether a toolset's `check_fn` passes, used for UI display and toolset resolution. + ## Toolset resolution Toolsets are named bundles of tools. Hermes resolves them through: @@ -31,10 +114,108 @@ Toolsets are named bundles of tools. Hermes resolves them through: - dynamic MCP toolsets - curated special-purpose sets like `hermes-acp` +### How `get_tool_definitions()` filters tools + +The main entry point is `model_tools.get_tool_definitions(enabled_toolsets, disabled_toolsets, quiet_mode)`: + +1. **If `enabled_toolsets` is provided** — only tools from those toolsets are included. Each toolset name is resolved via `resolve_toolset()` which expands composite toolsets into individual tool names. + +2. **If `disabled_toolsets` is provided** — start with ALL toolsets, then subtract the disabled ones. + +3. **If neither** — include all known toolsets. + +4. **Registry filtering** — the resolved tool name set is passed to `registry.get_definitions()`, which applies `check_fn` filtering and returns OpenAI-format schemas. + +5. **Dynamic schema patching** — after filtering, `execute_code` and `browser_navigate` schemas are dynamically adjusted to only reference tools that actually passed filtering (prevents model hallucination of unavailable tools). + +### Legacy toolset names + +Old toolset names with `_tools` suffixes (e.g., `web_tools`, `terminal_tools`) are mapped to their modern tool names via `_LEGACY_TOOLSET_MAP` for backward compatibility. + ## Dispatch At runtime, tools are dispatched through the central registry, with agent-loop exceptions for some agent-level tools such as memory/todo/session-search handling. +### Dispatch flow: model tool_call → handler execution + +When the model returns a `tool_call`, the flow is: + +``` +Model response with tool_call + ↓ +run_agent.py agent loop + ↓ +model_tools.handle_function_call(name, args, task_id, user_task) + ↓ +[Agent-loop tools?] → handled directly by agent loop (todo, memory, session_search, delegate_task) + ↓ +[Plugin pre-hook] → invoke_hook("pre_tool_call", ...) + ↓ +registry.dispatch(name, args, **kwargs) + ↓ +Look up ToolEntry by name + ↓ +[Async handler?] → bridge via _run_async() +[Sync handler?] → call directly + ↓ +Return result string (or JSON error) + ↓ +[Plugin post-hook] → invoke_hook("post_tool_call", ...) +``` + +### Error wrapping + +All tool execution is wrapped in error handling at two levels: + +1. **`registry.dispatch()`** — catches any exception from the handler and returns `{"error": "Tool execution failed: ExceptionType: message"}` as JSON. + +2. **`handle_function_call()`** — wraps the entire dispatch in a secondary try/except that returns `{"error": "Error executing tool_name: message"}`. + +This ensures the model always receives a well-formed JSON string, never an unhandled exception. + +### Agent-loop tools + +Four tools are intercepted before registry dispatch because they need agent-level state (TodoStore, MemoryStore, etc.): + +- `todo` — planning/task tracking +- `memory` — persistent memory writes +- `session_search` — cross-session recall +- `delegate_task` — spawns subagent sessions + +These tools' schemas are still registered in the registry (for `get_tool_definitions`), but their handlers return a stub error if dispatch somehow reaches them directly. + +### Async bridging + +When a tool handler is async, `_run_async()` bridges it to the sync dispatch path: + +- **CLI path (no running loop)** — uses a persistent event loop to keep cached async clients alive +- **Gateway path (running loop)** — spins up a disposable thread with `asyncio.run()` +- **Worker threads (parallel tools)** — uses per-thread persistent loops stored in thread-local storage + +## The DANGEROUS_PATTERNS approval flow + +The terminal tool integrates a dangerous-command approval system defined in `tools/approval.py`: + +1. **Pattern detection** — `DANGEROUS_PATTERNS` is a list of `(regex, description)` tuples covering destructive operations: + - Recursive deletes (`rm -rf`) + - Filesystem formatting (`mkfs`, `dd`) + - SQL destructive operations (`DROP TABLE`, `DELETE FROM` without `WHERE`) + - System config overwrites (`> /etc/`) + - Service manipulation (`systemctl stop`) + - Remote code execution (`curl | sh`) + - Fork bombs, process kills, etc. + +2. **Detection** — before executing any terminal command, `detect_dangerous_command(command)` checks against all patterns. + +3. **Approval prompt** — if a match is found: + - **CLI mode** — an interactive prompt asks the user to approve, deny, or allow permanently + - **Gateway mode** — an async approval callback sends the request to the messaging platform + - **Smart approval** — optionally, an auxiliary LLM can auto-approve low-risk commands that match patterns (e.g., `rm -rf node_modules/` is safe but matches "recursive delete") + +4. **Session state** — approvals are tracked per-session. Once you approve "recursive delete" for a session, subsequent `rm -rf` commands don't re-prompt. + +5. **Permanent allowlist** — the "allow permanently" option writes the pattern to `config.yaml`'s `command_allowlist`, persisting across sessions. + ## Terminal/runtime environments The terminal system supports multiple backends: diff --git a/website/docs/developer-guide/trajectory-format.md b/website/docs/developer-guide/trajectory-format.md index 0232846ca..f36244ed2 100644 --- a/website/docs/developer-guide/trajectory-format.md +++ b/website/docs/developer-guide/trajectory-format.md @@ -1,56 +1,233 @@ ---- -sidebar_position: 10 -title: "Trajectories & Training Format" -description: "How Hermes saves trajectories, normalizes tool calls, and produces training-friendly outputs" ---- +# Trajectory Format -# Trajectories & Training Format +Hermes Agent saves conversation trajectories in ShareGPT-compatible JSONL format +for use as training data, debugging artifacts, and reinforcement learning datasets. -Hermes can save conversation trajectories for training, evaluation, and batch data generation workflows. +Source files: `agent/trajectory.py`, `run_agent.py` (lines 1788-1975), `batch_runner.py` -Primary files: -- `agent/trajectory.py` -- `run_agent.py` -- `batch_runner.py` -- `trajectory_compressor.py` +## File Naming Convention -## What trajectories are for +Trajectories are written to files in the current working directory: -Trajectory outputs are used for: +| File | When | +|------|------| +| `trajectory_samples.jsonl` | Conversations that completed successfully (`completed=True`) | +| `failed_trajectories.jsonl` | Conversations that failed or were interrupted (`completed=False`) | -- SFT data generation -- debugging agent behavior -- benchmark/evaluation artifact capture -- post-processing and compression pipelines +The batch runner (`batch_runner.py`) writes to a custom output file per batch +(e.g., `batch_001_output.jsonl`) with additional metadata fields. -## Normalization strategy +You can override the filename via the `filename` parameter in `save_trajectory()`. -Hermes converts live conversation structure into a training-friendly format. -Important behaviors include: +## JSONL Entry Format -- representing reasoning in explicit markup -- converting tool calls into structured XML-like regions for dataset compatibility -- grouping tool outputs appropriately -- separating successful and failed trajectories +Each line in the file is a self-contained JSON object. There are two variants: -## Persistence boundaries +### CLI/Interactive Format (from `_save_trajectory`) -Trajectory files do **not** blindly mirror all runtime prompt state. +```json +{ + "conversations": [ ... ], + "timestamp": "2026-03-30T14:22:31.456789", + "model": "anthropic/claude-sonnet-4.6", + "completed": true +} +``` -Some prompt-time-only layers are intentionally excluded from persisted trajectory content so datasets are cleaner and less environment-specific. +### Batch Runner Format (from `batch_runner.py`) -## Batch runner +```json +{ + "prompt_index": 42, + "conversations": [ ... ], + "metadata": { "prompt_source": "gsm8k", "difficulty": "hard" }, + "completed": true, + "partial": false, + "api_calls": 7, + "toolsets_used": ["code_tools", "file_tools"], + "tool_stats": { + "terminal": {"count": 3, "success": 3, "failure": 0}, + "read_file": {"count": 2, "success": 2, "failure": 0}, + "write_file": {"count": 0, "success": 0, "failure": 0} + }, + "tool_error_counts": { + "terminal": 0, + "read_file": 0, + "write_file": 0 + } +} +``` -`batch_runner.py` emits richer metadata than single-session trajectory saving, including: +The `tool_stats` and `tool_error_counts` dictionaries are normalized to include +ALL possible tools (from `model_tools.TOOL_TO_TOOLSET_MAP`) with zero defaults, +ensuring consistent schema across entries for HuggingFace dataset loading. -- model/provider metadata -- toolset info -- partial/failure markers -- tool statistics -## Related docs +## Conversations Array (ShareGPT Format) -- [Environments, Benchmarks & Data Generation](./environments.md) -- [Agent Loop Internals](./agent-loop.md) +The `conversations` array uses ShareGPT role conventions: + +| API Role | ShareGPT `from` | +|----------|-----------------| +| system | `"system"` | +| user | `"human"` | +| assistant | `"gpt"` | +| tool | `"tool"` | + +### Complete Example + +```json +{ + "conversations": [ + { + "from": "system", + "value": "You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. If available tools are not relevant in assisting with user query, just respond in natural conversational language. Don't make assumptions about what values to plug into functions. After calling & executing the functions, you will be provided with function results within XML tags. Here are the available tools:\n\n[{\"name\": \"terminal\", \"description\": \"Execute shell commands\", \"parameters\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}}, \"required\": null}]\n\nFor each function call return a JSON object, with the following pydantic model json schema for each:\n{'title': 'FunctionCall', 'type': 'object', 'properties': {'name': {'title': 'Name', 'type': 'string'}, 'arguments': {'title': 'Arguments', 'type': 'object'}}, 'required': ['name', 'arguments']}\nEach function call should be enclosed within XML tags.\nExample:\n\n{'name': ,'arguments': }\n" + }, + { + "from": "human", + "value": "What Python version is installed?" + }, + { + "from": "gpt", + "value": "\nThe user wants to know the Python version. I should run python3 --version.\n\n\n{\"name\": \"terminal\", \"arguments\": {\"command\": \"python3 --version\"}}\n" + }, + { + "from": "tool", + "value": "\n{\"tool_call_id\": \"call_abc123\", \"name\": \"terminal\", \"content\": \"Python 3.11.6\"}\n" + }, + { + "from": "gpt", + "value": "\nGot the version. I can now answer the user.\n\nPython 3.11.6 is installed on this system." + } + ], + "timestamp": "2026-03-30T14:22:31.456789", + "model": "anthropic/claude-sonnet-4.6", + "completed": true +} +``` + + +## Normalization Rules + +### Reasoning Content Markup + +The trajectory converter normalizes ALL reasoning into `` tags, regardless +of how the model originally produced it: + +1. **Native thinking tokens** (`msg["reasoning"]` field from providers like + Anthropic, OpenAI o-series): Wrapped as `\n{reasoning}\n\n` + and prepended before the content. + +2. **REASONING_SCRATCHPAD XML** (when native thinking is disabled and the model + reasons via system-prompt-instructed XML): `` tags are + converted to `` via `convert_scratchpad_to_think()`. + +3. **Empty think blocks**: Every `gpt` turn is guaranteed to have a `` + block. If no reasoning was produced, an empty block is inserted: + `\n\n` — this ensures consistent format for training data. + +### Tool Call Normalization + +Tool calls from the API format (with `tool_call_id`, function name, arguments as +JSON string) are converted to XML-wrapped JSON: + +``` + +{"name": "terminal", "arguments": {"command": "ls -la"}} + +``` + +- Arguments are parsed from JSON strings back to objects (not double-encoded) +- If JSON parsing fails (shouldn't happen — validated during conversation), + an empty `{}` is used with a warning logged +- Multiple tool calls in one assistant turn produce multiple `` blocks + in a single `gpt` message + +### Tool Response Normalization + +All tool results following an assistant message are grouped into a single `tool` +turn with XML-wrapped JSON responses: + +``` + +{"tool_call_id": "call_abc123", "name": "terminal", "content": "output here"} + +``` + +- If tool content looks like JSON (starts with `{` or `[`), it's parsed so the + content field contains a JSON object/array rather than a string +- Multiple tool results are joined with newlines in one message +- The tool name is matched by position against the parent assistant's `tool_calls` + array + +### System Message + +The system message is generated at save time (not taken from the conversation). +It follows the Hermes function-calling prompt template with: + +- Preamble explaining the function-calling protocol +- `` XML block containing the JSON tool definitions +- Schema reference for `FunctionCall` objects +- `` example + +Tool definitions include `name`, `description`, `parameters`, and `required` +(set to `null` to match the canonical format). + + +## Loading Trajectories + +Trajectories are standard JSONL — load with any JSON-lines reader: + +```python +import json + +def load_trajectories(path: str): + """Load trajectory entries from a JSONL file.""" + entries = [] + with open(path, "r", encoding="utf-8") as f: + for line in f: + line = line.strip() + if line: + entries.append(json.loads(line)) + return entries + +# Filter to successful completions only +successful = [e for e in load_trajectories("trajectory_samples.jsonl") + if e.get("completed")] + +# Extract just the conversations for training +training_data = [e["conversations"] for e in successful] +``` + +### Loading for HuggingFace Datasets + +```python +from datasets import load_dataset + +ds = load_dataset("json", data_files="trajectory_samples.jsonl") +``` + +The normalized `tool_stats` schema ensures all entries have the same columns, +preventing Arrow schema mismatch errors during dataset loading. + + +## Controlling Trajectory Saving + +In the CLI, trajectory saving is controlled by: + +```yaml +# config.yaml +agent: + save_trajectories: true # default: false +``` + +Or via the `--save-trajectories` flag. When the agent initializes with +`save_trajectories=True`, the `_save_trajectory()` method is called at the end +of each conversation turn. + +The batch runner always saves trajectories (that's its primary purpose). + +Samples with zero reasoning across all turns are automatically discarded by the +batch runner to avoid polluting training data with non-reasoning examples. diff --git a/website/docs/getting-started/updating.md b/website/docs/getting-started/updating.md index a44c7706a..04abcc40e 100644 --- a/website/docs/getting-started/updating.md +++ b/website/docs/getting-started/updating.md @@ -20,6 +20,43 @@ This pulls the latest code, updates dependencies, and prompts you to configure a `hermes update` automatically detects new configuration options and prompts you to add them. If you skipped that prompt, you can manually run `hermes config check` to see missing options, then `hermes config migrate` to interactively add them. ::: +### What happens during an update + +When you run `hermes update`, the following steps occur: + +1. **Git pull** — pulls the latest code from the `main` branch and updates submodules +2. **Dependency install** — runs `uv pip install -e ".[all]"` to pick up new or changed dependencies +3. **Config migration** — detects new config options added since your version and prompts you to set them +4. **Gateway auto-restart** — if the gateway service is running (systemd on Linux, launchd on macOS), it is **automatically restarted** after the update completes so the new code takes effect immediately + +Expected output looks like: + +``` +$ hermes update +Updating Hermes Agent... +📥 Pulling latest code... +Already up to date. (or: Updating abc1234..def5678) +📦 Updating dependencies... +✅ Dependencies updated +🔍 Checking for new config options... +✅ Config is up to date (or: Found 2 new options — running migration...) +🔄 Restarting gateway service... +✅ Gateway restarted +✅ Hermes Agent updated successfully! +``` + +### Checking your current version + +```bash +hermes version +``` + +Compare against the latest release at the [GitHub releases page](https://github.com/NousResearch/hermes-agent/releases) or check for available updates: + +```bash +hermes update --check +``` + ### Updating from Messaging Platforms You can also update directly from Telegram, Discord, Slack, or WhatsApp by sending: @@ -28,7 +65,7 @@ You can also update directly from Telegram, Discord, Slack, or WhatsApp by sendi /update ``` -This pulls the latest code, updates dependencies, and restarts the gateway. +This pulls the latest code, updates dependencies, and restarts the gateway. The bot will briefly go offline during the restart (typically 5–15 seconds) and then resume. ### Manual Update @@ -51,6 +88,57 @@ hermes config check hermes config migrate # Interactively add any missing options ``` +### Rollback instructions + +If an update introduces a problem, you can roll back to a previous version: + +```bash +cd /path/to/hermes-agent + +# List recent versions +git log --oneline -10 + +# Roll back to a specific commit +git checkout +git submodule update --init --recursive +uv pip install -e ".[all]" + +# Restart the gateway if running +hermes gateway restart +``` + +To roll back to a specific release tag: + +```bash +git checkout v0.6.0 +git submodule update --init --recursive +uv pip install -e ".[all]" +``` + +:::warning +Rolling back may cause config incompatibilities if new options were added. Run `hermes config check` after rolling back and remove any unrecognized options from `config.yaml` if you encounter errors. +::: + +### Note for Nix users + +If you installed via Nix flake, updates are managed through the Nix package manager: + +```bash +# Update the flake input +nix flake update hermes-agent + +# Or rebuild with the latest +nix profile upgrade hermes-agent +``` + +Nix installations are immutable — rollback is handled by Nix's generation system: + +```bash +nix profile rollback +``` + +See [Nix Setup](./nix-setup.md) for more details. + --- ## Uninstalling diff --git a/website/docs/integrations/index.md b/website/docs/integrations/index.md index 829c1c67d..cbd771072 100644 --- a/website/docs/integrations/index.md +++ b/website/docs/integrations/index.md @@ -8,18 +8,75 @@ sidebar_position: 0 Hermes Agent connects to external systems for AI inference, tool servers, IDE workflows, programmatic access, and more. These integrations extend what Hermes can do and where it can run. -## Available Integrations +## AI Providers & Routing -- **[AI Providers](/docs/user-guide/features/provider-routing)** — Set up and configure inference providers. Hermes works with OpenRouter, Anthropic, OpenAI, Google, and any OpenAI-compatible endpoint. Use `hermes model` to configure interactively. +Hermes supports multiple AI inference providers out of the box. Use `hermes model` to configure interactively, or set them in `config.yaml`. -- **[MCP Servers](/docs/user-guide/features/mcp)** — Connect Hermes to external tool servers via Model Context Protocol. Access tools from GitHub, databases, file systems, browser stacks, internal APIs, and more without writing native Hermes tools. +- **[AI Providers](/docs/user-guide/features/provider-routing)** — OpenRouter, Anthropic, OpenAI, Google, and any OpenAI-compatible endpoint. Hermes auto-detects capabilities like vision, streaming, and tool use per provider. +- **[Provider Routing](/docs/user-guide/features/provider-routing)** — Fine-grained control over which underlying providers handle your OpenRouter requests. Optimize for cost, speed, or quality with sorting, whitelists, blacklists, and explicit priority ordering. +- **[Fallback Providers](/docs/user-guide/features/fallback-providers)** — Automatic failover to backup LLM providers when your primary model encounters errors. Includes primary model fallback and independent auxiliary task fallback for vision, compression, and web extraction. + +## Tool Servers (MCP) + +- **[MCP Servers](/docs/user-guide/features/mcp)** — Connect Hermes to external tool servers via Model Context Protocol. Access tools from GitHub, databases, file systems, browser stacks, internal APIs, and more without writing native Hermes tools. Supports both stdio and SSE transports, per-server tool filtering, and capability-aware resource/prompt registration. + +## Web Search Backends + +The `web_search`, `web_extract`, and `web_crawl` tools support four backend providers, configured via `config.yaml` or `hermes tools`: + +| Backend | Env Var | Search | Extract | Crawl | +|---------|---------|--------|---------|-------| +| **Firecrawl** (default) | `FIRECRAWL_API_KEY` | ✔ | ✔ | ✔ | +| **Parallel** | `PARALLEL_API_KEY` | ✔ | ✔ | — | +| **Tavily** | `TAVILY_API_KEY` | ✔ | ✔ | ✔ | +| **Exa** | `EXA_API_KEY` | ✔ | ✔ | — | + +Quick setup example: + +```yaml +web: + backend: firecrawl # firecrawl | parallel | tavily | exa +``` + +If `web.backend` is not set, the backend is auto-detected from whichever API key is available. Self-hosted Firecrawl is also supported via `FIRECRAWL_API_URL`. + +## Browser Automation + +Hermes includes full browser automation with multiple backend options for navigating websites, filling forms, and extracting information: + +- **Browserbase** — Managed cloud browsers with anti-bot tooling, CAPTCHA solving, and residential proxies +- **Browser Use** — Alternative cloud browser provider +- **Local Chrome via CDP** — Connect to your running Chrome instance using `/browser connect` +- **Local Chromium** — Headless local browser via the `agent-browser` CLI + +See [Browser Automation](/docs/user-guide/features/browser) for setup and usage. + +## Voice & TTS Providers + +Text-to-speech and speech-to-text across all messaging platforms: + +| Provider | Quality | Cost | API Key | +|----------|---------|------|---------| +| **Edge TTS** (default) | Good | Free | None needed | +| **ElevenLabs** | Excellent | Paid | `ELEVENLABS_API_KEY` | +| **OpenAI TTS** | Good | Paid | `VOICE_TOOLS_OPENAI_KEY` | +| **NeuTTS** | Good | Free | None needed | + +Speech-to-text uses Whisper for voice message transcription on Telegram, Discord, and WhatsApp. See [Voice & TTS](/docs/user-guide/features/tts) and [Voice Mode](/docs/user-guide/features/voice-mode) for details. + +## IDE & Editor Integration - **[IDE Integration (ACP)](/docs/user-guide/features/acp)** — Use Hermes Agent inside ACP-compatible editors such as VS Code, Zed, and JetBrains. Hermes runs as an ACP server, rendering chat messages, tool activity, file diffs, and terminal commands inside your editor. +## Programmatic Access + - **[API Server](/docs/user-guide/features/api-server)** — Expose Hermes as an OpenAI-compatible HTTP endpoint. Any frontend that speaks the OpenAI format — Open WebUI, LobeChat, LibreChat, NextChat, ChatBox — can connect and use Hermes as a backend with its full toolset. +## Memory & Personalization + - **[Honcho Memory](/docs/user-guide/features/honcho)** — AI-native persistent memory for cross-session user modeling and personalization. Honcho adds deep user modeling via dialectic reasoning on top of Hermes's built-in memory system. -- **[Provider Routing](/docs/user-guide/features/provider-routing)** — Fine-grained control over which underlying AI providers handle your OpenRouter requests. Optimize for cost, speed, or quality with sorting, whitelists, blacklists, and explicit priority ordering. +## Training & Evaluation -- **[Fallback Providers](/docs/user-guide/features/fallback-providers)** — Automatic failover to backup LLM providers when your primary model encounters errors. Includes primary model fallback and independent auxiliary task fallback for vision, compression, and web extraction. +- **[RL Training](/docs/user-guide/features/rl-training)** — Generate trajectory data from agent sessions for reinforcement learning and model fine-tuning. +- **[Batch Processing](/docs/user-guide/features/batch-processing)** — Run the agent across hundreds of prompts in parallel, generating structured ShareGPT-format trajectory data for training data generation or evaluation. diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md index d3c2ca23e..4900fc05b 100644 --- a/website/docs/user-guide/configuration.md +++ b/website/docs/user-guide/configuration.md @@ -860,12 +860,15 @@ When enabled, responses appear token-by-token inside a streaming box. Tool calls ```yaml streaming: enabled: true # Enable progressive message editing + transport: edit # "edit" (progressive message editing) or "off" edit_interval: 0.3 # Seconds between message edits buffer_threshold: 40 # Characters before forcing an edit flush cursor: " ▉" # Cursor shown during streaming ``` -When enabled, the bot sends a message on the first token, then progressively edits it as more tokens arrive. Platforms that don't support message editing (Signal, Email) gracefully skip streaming and deliver the final response normally. +When enabled, the bot sends a message on the first token, then progressively edits it as more tokens arrive. Platforms that don't support message editing (Signal, Email, Home Assistant) are auto-detected on the first attempt — streaming is gracefully disabled for that session with no flood of messages. + +**Overflow handling:** If the streamed text exceeds the platform's message length limit (~4096 chars), the current message is finalized and a new one starts automatically. :::note Streaming is disabled by default. Enable it in `~/.hermes/config.yaml` to try the streaming UX. @@ -929,23 +932,6 @@ Usage: type `/status`, `/disk`, `/update`, or `/gpu` in the CLI or any messaging - **Type** — only `exec` is supported (runs a shell command); other types show an error - **Works everywhere** — CLI, Telegram, Discord, Slack, WhatsApp, Signal, Email, Home Assistant -## Gateway Streaming - -Enable progressive token delivery on messaging platforms. When streaming is enabled, responses appear character-by-character in Telegram, Discord, and Slack via message editing, rather than waiting for the full response. - -```yaml -streaming: - enabled: false # Enable streaming token delivery (default: off) - transport: edit # "edit" (progressive message editing) or "off" - edit_interval: 0.3 # Min seconds between message edits - buffer_threshold: 40 # Characters accumulated before forcing an edit - cursor: " ▉" # Cursor character shown during streaming -``` - -**Platform support:** Telegram, Discord, and Slack support edit-based streaming. Platforms that don't support message editing (Signal, Email, Home Assistant) are auto-detected on the first attempt — streaming is gracefully disabled for that session with no flood of messages. - -**Overflow handling:** If the streamed text exceeds the platform's message length limit (~4096 chars), the current message is finalized and a new one starts automatically. - ## Human Delay Simulate human-like response pacing in messaging platforms: diff --git a/website/docs/user-guide/docker.md b/website/docs/user-guide/docker.md index 3fb33a93f..2940b8678 100644 --- a/website/docs/user-guide/docker.md +++ b/website/docs/user-guide/docker.md @@ -1,10 +1,17 @@ +--- +sidebar_position: 7 +title: "Docker" +description: "Running Hermes Agent in Docker and using Docker as a terminal backend" +--- + # Hermes Agent — Docker -Want to run Hermes Agent, but without installing packages on your host? This'll sort you out. +There are two distinct ways Docker intersects with Hermes Agent: -This will let you run the agent in a container, with the most relevant modes outlined below. +1. **Running Hermes IN Docker** — the agent itself runs inside a container (this page's primary focus) +2. **Docker as a terminal backend** — the agent runs on your host but executes commands inside a Docker sandbox (see [Configuration → terminal.backend](./configuration.md)) -The container stores all user data (config, API keys, sessions, skills, memories) in a single directory mounted from the host at `/opt/data`. The image itself is stateless and can be upgraded by pulling a new version without losing any configuration. +This page covers option 1. The container stores all user data (config, API keys, sessions, skills, memories) in a single directory mounted from the host at `/opt/data`. The image itself is stateless and can be upgraded by pulling a new version without losing any configuration. ## Quick start @@ -41,6 +48,110 @@ docker run -it --rm \ nousresearch/hermes-agent ``` +## Persistent volumes + +The `/opt/data` volume is the single source of truth for all Hermes state. It maps to your host's `~/.hermes/` directory and contains: + +| Path | Contents | +|------|----------| +| `.env` | API keys and secrets | +| `config.yaml` | All Hermes configuration | +| `SOUL.md` | Agent personality/identity | +| `sessions/` | Conversation history | +| `memories/` | Persistent memory store | +| `skills/` | Installed skills | +| `cron/` | Scheduled job definitions | +| `hooks/` | Event hooks | +| `logs/` | Runtime logs | +| `skins/` | Custom CLI skins | + +:::warning +Never run two Hermes containers against the same data directory simultaneously — session files and memory stores are not designed for concurrent access. +::: + +## Environment variable forwarding + +API keys are read from `/opt/data/.env` inside the container. You can also pass environment variables directly: + +```sh +docker run -it --rm \ + -v ~/.hermes:/opt/data \ + -e ANTHROPIC_API_KEY="sk-ant-..." \ + -e OPENAI_API_KEY="sk-..." \ + nousresearch/hermes-agent +``` + +Direct `-e` flags override values from `.env`. This is useful for CI/CD or secrets-manager integrations where you don't want keys on disk. + +## Docker Compose example + +For persistent gateway deployment, a `docker-compose.yaml` is convenient: + +```yaml +version: "3.8" +services: + hermes: + image: nousresearch/hermes-agent:latest + container_name: hermes + restart: unless-stopped + command: gateway run + volumes: + - ~/.hermes:/opt/data + # Uncomment to forward specific env vars instead of using .env file: + # environment: + # - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY} + # - OPENAI_API_KEY=${OPENAI_API_KEY} + # - TELEGRAM_BOT_TOKEN=${TELEGRAM_BOT_TOKEN} + deploy: + resources: + limits: + memory: 4G + cpus: "2.0" +``` + +Start with `docker compose up -d` and view logs with `docker compose logs -f hermes`. + +## Resource limits + +The Hermes container needs moderate resources. Recommended minimums: + +| Resource | Minimum | Recommended | +|----------|---------|-------------| +| Memory | 1 GB | 2–4 GB | +| CPU | 1 core | 2 cores | +| Disk (data volume) | 500 MB | 2+ GB (grows with sessions/skills) | + +Browser automation (Playwright/Chromium) is the most memory-hungry feature. If you don't need browser tools, 1 GB is sufficient. With browser tools active, allocate at least 2 GB. + +Set limits in Docker: + +```sh +docker run -d \ + --name hermes \ + --restart unless-stopped \ + --memory=4g --cpus=2 \ + -v ~/.hermes:/opt/data \ + nousresearch/hermes-agent gateway run +``` + +## What the Dockerfile does + +The official image is based on `debian:13.4` and includes: + +- Python 3 with all Hermes dependencies (`pip install -e ".[all]"`) +- Node.js + npm (for browser automation and WhatsApp bridge) +- Playwright with Chromium (`npx playwright install --with-deps chromium`) +- ripgrep and ffmpeg as system utilities +- The WhatsApp bridge (`scripts/whatsapp-bridge/`) + +The entrypoint script (`docker/entrypoint.sh`) bootstraps the data volume on first run: +- Creates the directory structure (`sessions/`, `memories/`, `skills/`, etc.) +- Copies `.env.example` → `.env` if no `.env` exists +- Copies default `config.yaml` if missing +- Copies default `SOUL.md` if missing +- Syncs bundled skills using a manifest-based approach (preserves user edits) +- Then runs `hermes` with whatever arguments you pass + ## Upgrading Pull the latest image and recreate the container. Your data directory is untouched. @@ -52,7 +163,14 @@ docker run -d \ --name hermes \ --restart unless-stopped \ -v ~/.hermes:/opt/data \ - nousresearch/hermes-agent + nousresearch/hermes-agent gateway run +``` + +Or with Docker Compose: + +```sh +docker compose pull +docker compose up -d ``` ## Skills and credential files @@ -60,3 +178,47 @@ docker run -d \ When using Docker as the execution environment (not the methods above, but when the agent runs commands inside a Docker sandbox), Hermes automatically bind-mounts the skills directory (`~/.hermes/skills/`) and any credential files declared by skills into the container as read-only volumes. This means skill scripts, templates, and references are available inside the sandbox without manual configuration. The same syncing happens for SSH and Modal backends — skills and credential files are uploaded via rsync or the Modal mount API before each command. + +## Troubleshooting + +### Container exits immediately + +Check logs: `docker logs hermes`. Common causes: +- Missing or invalid `.env` file — run interactively first to complete setup +- Port conflicts if running with exposed ports + +### "Permission denied" errors + +The container runs as root by default. If your host `~/.hermes/` was created by a non-root user, permissions should work. If you get errors, ensure the data directory is writable: + +```sh +chmod -R 755 ~/.hermes +``` + +### Browser tools not working + +Playwright needs shared memory. Add `--shm-size=1g` to your Docker run command: + +```sh +docker run -d \ + --name hermes \ + --shm-size=1g \ + -v ~/.hermes:/opt/data \ + nousresearch/hermes-agent gateway run +``` + +### Gateway not reconnecting after network issues + +The `--restart unless-stopped` flag handles most transient failures. If the gateway is stuck, restart the container: + +```sh +docker restart hermes +``` + +### Checking container health + +```sh +docker logs --tail 50 hermes # Recent logs +docker exec hermes hermes version # Verify version +docker stats hermes # Resource usage +``` diff --git a/website/docs/user-guide/features/mcp.md b/website/docs/user-guide/features/mcp.md index b48f4f656..b136af15c 100644 --- a/website/docs/user-guide/features/mcp.md +++ b/website/docs/user-guide/features/mcp.md @@ -168,9 +168,7 @@ So a server that exposes callable tools but no resources/prompts will not get th ## Per-server filtering -This is the main feature added by the PR work. - -You can now control which tools each MCP server contributes to Hermes. +You can control which tools each MCP server contributes to Hermes, allowing fine-grained management of your tool namespace. ### Disable a server entirely diff --git a/website/docs/user-guide/features/overview.md b/website/docs/user-guide/features/overview.md index 984758f66..568797dfc 100644 --- a/website/docs/user-guide/features/overview.md +++ b/website/docs/user-guide/features/overview.md @@ -33,6 +33,15 @@ Hermes Agent includes a rich set of capabilities that extend far beyond basic ch - **[Image Generation](image-generation.md)** — Generate images from text prompts using FAL.ai's FLUX 2 Pro model with automatic 2x upscaling via the Clarity Upscaler. - **[Voice & TTS](tts.md)** — Text-to-speech output and voice message transcription across all messaging platforms, with four provider options: Edge TTS (free), ElevenLabs, OpenAI TTS, and NeuTTS. +## Integrations + +- **[Provider Routing](provider-routing.md)** — Fine-grained control over which AI providers handle your requests. Optimize for cost, speed, or quality with sorting, whitelists, blacklists, and priority ordering. +- **[Fallback Providers](fallback-providers.md)** — Automatic failover to backup LLM providers when your primary model encounters errors, including independent fallback for auxiliary tasks like vision and compression. +- **[API Server](api-server.md)** — Expose Hermes as an OpenAI-compatible HTTP endpoint. Connect any frontend that speaks the OpenAI format — Open WebUI, LobeChat, LibreChat, and more. +- **[IDE Integration (ACP)](acp.md)** — Use Hermes inside ACP-compatible editors such as VS Code, Zed, and JetBrains. Chat, tool activity, file diffs, and terminal commands render inside your editor. +- **[Honcho Memory](honcho.md)** — AI-native persistent memory for cross-session user modeling and personalization via dialectic reasoning. +- **[RL Training](rl-training.md)** — Generate trajectory data from agent sessions for reinforcement learning and model fine-tuning. + ## Customization - **[Personality & SOUL.md](personality.md)** — Fully customizable agent personality. `SOUL.md` is the primary identity file — the first thing in the system prompt — and you can swap in built-in or custom `/personality` presets per session. diff --git a/website/docs/user-guide/features/plugins.md b/website/docs/user-guide/features/plugins.md index e13f7aef4..1b10faff7 100644 --- a/website/docs/user-guide/features/plugins.md +++ b/website/docs/user-guide/features/plugins.md @@ -25,6 +25,56 @@ Drop a directory into `~/.hermes/plugins/` with a `plugin.yaml` and Python code: Start Hermes — your tools appear alongside built-in tools. The model can call them immediately. +### Minimal working example + +Here is a complete plugin that adds a `hello_world` tool and logs every tool call via a hook. + +**`~/.hermes/plugins/hello-world/plugin.yaml`** + +```yaml +name: hello-world +version: "1.0" +description: A minimal example plugin +``` + +**`~/.hermes/plugins/hello-world/__init__.py`** + +```python +"""Minimal Hermes plugin — registers a tool and a hook.""" + + +def register(ctx): + # --- Tool: hello_world --- + schema = { + "name": "hello_world", + "description": "Returns a friendly greeting for the given name.", + "parameters": { + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "Name to greet", + } + }, + "required": ["name"], + }, + } + + def handle_hello(params): + name = params.get("name", "World") + return f"Hello, {name}! 👋 (from the hello-world plugin)" + + ctx.register_tool("hello_world", schema, handle_hello) + + # --- Hook: log every tool call --- + def on_tool_call(tool_name, params, result): + print(f"[hello-world] tool called: {tool_name}") + + ctx.register_hook("post_tool_call", on_tool_call) +``` + +Drop both files into `~/.hermes/plugins/hello-world/`, restart Hermes, and the model can immediately call `hello_world`. The hook prints a log line after every tool invocation. + Project-local plugins under `./.hermes/plugins/` are disabled by default. Enable them only for trusted repositories by setting `HERMES_ENABLE_PROJECT_PLUGINS=true` before starting Hermes. ## What plugins can do diff --git a/website/docs/user-guide/features/skins.md b/website/docs/user-guide/features/skins.md index cb8b38c7f..5aec20cdf 100644 --- a/website/docs/user-guide/features/skins.md +++ b/website/docs/user-guide/features/skins.md @@ -30,28 +30,150 @@ display: ## Built-in skins -| Skin | Description | Agent branding | -|------|-------------|----------------| -| `default` | Classic Hermes — gold and kawaii | `Hermes Agent` | -| `ares` | War-god theme — crimson and bronze | `Ares Agent` | -| `mono` | Monochrome — clean grayscale | `Hermes Agent` | -| `slate` | Cool blue — developer-focused | `Hermes Agent` | -| `poseidon` | Ocean-god theme — deep blue and seafoam | `Poseidon Agent` | -| `sisyphus` | Sisyphean theme — austere grayscale with persistence | `Sisyphus Agent` | -| `charizard` | Volcanic theme — burnt orange and ember | `Charizard Agent` | +| Skin | Description | Agent branding | Visual character | +|------|-------------|----------------|------------------| +| `default` | Classic Hermes — gold and kawaii | `Hermes Agent` | Warm gold borders, cornsilk text, kawaii faces in spinners. The familiar caduceus banner. Clean and inviting. | +| `ares` | War-god theme — crimson and bronze | `Ares Agent` | Deep crimson borders with bronze accents. Aggressive spinner verbs ("forging", "marching", "tempering steel"). Custom sword-and-shield ASCII art banner. | +| `mono` | Monochrome — clean grayscale | `Hermes Agent` | All grays — no color. Borders are `#555555`, text is `#c9d1d9`. Ideal for minimal terminal setups or screen recordings. | +| `slate` | Cool blue — developer-focused | `Hermes Agent` | Royal blue borders (`#4169e1`), soft blue text. Calm and professional. No custom spinner — uses default faces. | +| `poseidon` | Ocean-god theme — deep blue and seafoam | `Poseidon Agent` | Deep blue to seafoam gradient. Ocean-themed spinners ("charting currents", "sounding the depth"). Trident ASCII art banner. | +| `sisyphus` | Sisyphean theme — austere grayscale with persistence | `Sisyphus Agent` | Light grays with stark contrast. Boulder-themed spinners ("pushing uphill", "resetting the boulder", "enduring the loop"). Boulder-and-hill ASCII art banner. | +| `charizard` | Volcanic theme — burnt orange and ember | `Charizard Agent` | Warm burnt orange to ember gradient. Fire-themed spinners ("banking into the draft", "measuring burn"). Dragon-silhouette ASCII art banner. | -## What a skin can customize +## Complete list of configurable keys -| Area | Keys | -|------|------| -| Banner + response colors | `colors.banner_*`, `colors.response_border` | -| Spinner animation | `spinner.waiting_faces`, `spinner.thinking_faces`, `spinner.thinking_verbs`, `spinner.wings` | -| Branding text | `branding.agent_name`, `branding.welcome`, `branding.response_label`, `branding.prompt_symbol` | -| Tool activity prefix | `tool_prefix` | +### Colors (`colors:`) + +Controls all color values throughout the CLI. Values are hex color strings. + +| Key | Description | Default (`default` skin) | +|-----|-------------|--------------------------| +| `banner_border` | Panel border around the startup banner | `#CD7F32` (bronze) | +| `banner_title` | Title text color in the banner | `#FFD700` (gold) | +| `banner_accent` | Section headers in the banner (Available Tools, etc.) | `#FFBF00` (amber) | +| `banner_dim` | Muted text in the banner (separators, secondary labels) | `#B8860B` (dark goldenrod) | +| `banner_text` | Body text in the banner (tool names, skill names) | `#FFF8DC` (cornsilk) | +| `ui_accent` | General UI accent color (highlights, active elements) | `#FFBF00` | +| `ui_label` | UI labels and tags | `#4dd0e1` (teal) | +| `ui_ok` | Success indicators (checkmarks, completion) | `#4caf50` (green) | +| `ui_error` | Error indicators (failures, blocked) | `#ef5350` (red) | +| `ui_warn` | Warning indicators (caution, approval prompts) | `#ffa726` (orange) | +| `prompt` | Interactive prompt text color | `#FFF8DC` | +| `input_rule` | Horizontal rule above the input area | `#CD7F32` | +| `response_border` | Border around the agent's response box (ANSI escape) | `#FFD700` | +| `session_label` | Session label color | `#DAA520` | +| `session_border` | Session ID dim border color | `#8B8682` | + +### Spinner (`spinner:`) + +Controls the animated spinner shown while waiting for API responses. + +| Key | Type | Description | Example | +|-----|------|-------------|---------| +| `waiting_faces` | list of strings | Faces cycled while waiting for API response | `["(⚔)", "(⛨)", "(▲)"]` | +| `thinking_faces` | list of strings | Faces cycled during model reasoning | `["(⚔)", "(⌁)", "(<>)"]` | +| `thinking_verbs` | list of strings | Verbs shown in spinner messages | `["forging", "plotting", "hammering plans"]` | +| `wings` | list of [left, right] pairs | Decorative brackets around the spinner | `[["⟪⚔", "⚔⟫"], ["⟪▲", "▲⟫"]]` | + +When spinner values are empty (like in `default` and `mono`), hardcoded defaults from `display.py` are used. + +### Branding (`branding:`) + +Text strings used throughout the CLI interface. + +| Key | Description | Default | +|-----|-------------|---------| +| `agent_name` | Name shown in banner title and status display | `Hermes Agent` | +| `welcome` | Welcome message shown at CLI startup | `Welcome to Hermes Agent! Type your message or /help for commands.` | +| `goodbye` | Message shown on exit | `Goodbye! ⚕` | +| `response_label` | Label on the response box header | ` ⚕ Hermes ` | +| `prompt_symbol` | Symbol before the user input prompt | `❯ ` | +| `help_header` | Header text for the `/help` command output | `(^_^)? Available Commands` | + +### Other top-level keys + +| Key | Type | Description | Default | +|-----|------|-------------|---------| +| `tool_prefix` | string | Character prefixed to tool output lines in the CLI | `┊` | +| `tool_emojis` | dict | Per-tool emoji overrides for spinners and progress (`{tool_name: emoji}`) | `{}` | +| `banner_logo` | string | Rich-markup ASCII art logo (replaces the default HERMES_AGENT banner) | `""` | +| `banner_hero` | string | Rich-markup hero art (replaces the default caduceus art) | `""` | ## Custom skins -Create YAML files under `~/.hermes/skins/`. User skins inherit missing values from the built-in `default` skin. +Create YAML files under `~/.hermes/skins/`. User skins inherit missing values from the built-in `default` skin, so you only need to specify the keys you want to change. + +### Full custom skin YAML template + +```yaml +# ~/.hermes/skins/mytheme.yaml +# Complete skin template — all keys shown. Delete any you don't need; +# missing values automatically inherit from the 'default' skin. + +name: mytheme +description: My custom theme + +colors: + banner_border: "#CD7F32" + banner_title: "#FFD700" + banner_accent: "#FFBF00" + banner_dim: "#B8860B" + banner_text: "#FFF8DC" + ui_accent: "#FFBF00" + ui_label: "#4dd0e1" + ui_ok: "#4caf50" + ui_error: "#ef5350" + ui_warn: "#ffa726" + prompt: "#FFF8DC" + input_rule: "#CD7F32" + response_border: "#FFD700" + session_label: "#DAA520" + session_border: "#8B8682" + +spinner: + waiting_faces: + - "(⚔)" + - "(⛨)" + - "(▲)" + thinking_faces: + - "(⚔)" + - "(⌁)" + - "(<>)" + thinking_verbs: + - "processing" + - "analyzing" + - "computing" + - "evaluating" + wings: + - ["⟪⚡", "⚡⟫"] + - ["⟪●", "●⟫"] + +branding: + agent_name: "My Agent" + welcome: "Welcome to My Agent! Type your message or /help for commands." + goodbye: "See you later! ⚡" + response_label: " ⚡ My Agent " + prompt_symbol: "⚡ ❯ " + help_header: "(⚡) Available Commands" + +tool_prefix: "┊" + +# Per-tool emoji overrides (optional) +tool_emojis: + terminal: "⚔" + web_search: "🔮" + read_file: "📄" + +# Custom ASCII art banners (optional, Rich markup supported) +# banner_logo: | +# [bold #FFD700] MY AGENT [/] +# banner_hero: | +# [#FFD700] Custom art here [/] +``` + +### Minimal custom skin example + +Since everything inherits from `default`, a minimal skin only needs to change what's different: ```yaml name: cyberpunk @@ -78,4 +200,7 @@ tool_prefix: "▏" - Built-in skins load from `hermes_cli/skin_engine.py`. - Unknown skins automatically fall back to `default`. -- `/skin` updates the active CLI theme immediately for the current session. \ No newline at end of file +- `/skin` updates the active CLI theme immediately for the current session. +- User skins in `~/.hermes/skins/` take precedence over built-in skins with the same name. +- Skin changes via `/skin` are session-only. To make a skin your permanent default, set it in `config.yaml`. +- The `banner_logo` and `banner_hero` fields support Rich console markup (e.g., `[bold #FF0000]text[/]`) for colored ASCII art. -- 2.43.0 From fb4b87f4af7783759e600d84b0b1fb2dff966ffb Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 30 Mar 2026 20:33:21 -0700 Subject: [PATCH 057/385] chore: add claude-sonnet-4.6 to OpenRouter and Nous model lists (#4157) --- hermes_cli/models.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hermes_cli/models.py b/hermes_cli/models.py index ef2b3deb4..ed36823e4 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -27,6 +27,7 @@ GITHUB_MODELS_CATALOG_URL = COPILOT_MODELS_URL # (model_id, display description shown in menus) OPENROUTER_MODELS: list[tuple[str, str]] = [ ("anthropic/claude-opus-4.6", "recommended"), + ("anthropic/claude-sonnet-4.6", ""), ("anthropic/claude-sonnet-4.5", ""), ("anthropic/claude-haiku-4.5", ""), ("openai/gpt-5.4", ""), @@ -56,6 +57,7 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [ _PROVIDER_MODELS: dict[str, list[str]] = { "nous": [ "anthropic/claude-opus-4.6", + "anthropic/claude-sonnet-4.6", "anthropic/claude-sonnet-4.5", "anthropic/claude-haiku-4.5", "openai/gpt-5.4", -- 2.43.0 From d30ea65c9bc65b8845f19c05e85e66ad10d3d7ec Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 30 Mar 2026 20:36:56 -0700 Subject: [PATCH 058/385] fix: URL-based auth for third-party Anthropic endpoints + CI test fixes (#4148) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(tests): mock sys.stdin.isatty for cmd_model TTY guard * fix(tests): update camofox snapshot format + trajectory compressor mock path - test_browser_camofox: mock response now uses snapshot format (accessibility tree) - test_trajectory_compressor: mock _get_async_client instead of setting async_client directly * fix: URL-based auth detection for third-party Anthropic endpoints + test fixes Reverts the key-prefix approach from #4093 which broke JWT and managed key OAuth detection. Instead, detects third-party endpoints by URL: if base_url is set and isn't anthropic.com, it's a proxy (Azure AI Foundry, AWS Bedrock, etc.) that uses x-api-key regardless of key format. Auth decision chain is now: 1. _requires_bearer_auth(url) → MiniMax → Bearer 2. _is_third_party_anthropic_endpoint(url) → Azure/Bedrock → x-api-key 3. _is_oauth_token(key) → OAuth on direct Anthropic → Bearer 4. else → x-api-key Also includes test fixes from PR #4051 by @erosika: - Mock sys.stdin.isatty for cmd_model TTY guard - Update camofox snapshot format mock - Fix trajectory compressor async client mock path --------- Co-authored-by: Erosika --- agent/anthropic_adapter.py | 29 ++++++++++++++++++++++----- tests/agent/test_auxiliary_client.py | 4 ++-- tests/test_cli_provider_resolution.py | 1 + tests/test_trajectory_compressor.py | 5 +++-- tests/tools/test_browser_camofox.py | 7 ++++++- 5 files changed, 36 insertions(+), 10 deletions(-) diff --git a/agent/anthropic_adapter.py b/agent/anthropic_adapter.py index 879d1b34b..76bc8ff2e 100644 --- a/agent/anthropic_adapter.py +++ b/agent/anthropic_adapter.py @@ -152,20 +152,31 @@ def _is_oauth_token(key: str) -> bool: Regular API keys start with 'sk-ant-api'. Everything else (setup-tokens starting with 'sk-ant-oat', managed keys, JWTs, etc.) needs Bearer auth. - Azure AI Foundry keys (non sk-ant- prefixed) should use x-api-key, not Bearer. """ if not key: return False # Regular Console API keys use x-api-key header if key.startswith("sk-ant-api"): return False - # Azure AI Foundry keys don't start with sk-ant- at all — treat as regular API key - if not key.startswith("sk-ant-"): - return False - # Everything else (setup-tokens sk-ant-oat, managed keys, JWTs) uses Bearer auth + # Everything else (setup-tokens, managed keys, JWTs) uses Bearer auth return True +def _is_third_party_anthropic_endpoint(base_url: str | None) -> bool: + """Return True for non-Anthropic endpoints using the Anthropic Messages API. + + Third-party proxies (Azure AI Foundry, AWS Bedrock, self-hosted) authenticate + with their own API keys via x-api-key, not Anthropic OAuth tokens. OAuth + detection should be skipped for these endpoints. + """ + if not base_url: + return False # No base_url = direct Anthropic API + normalized = base_url.rstrip("/").lower() + if "anthropic.com" in normalized: + return False # Direct Anthropic API — OAuth applies + return True # Any other endpoint is a third-party proxy + + def _requires_bearer_auth(base_url: str | None) -> bool: """Return True for Anthropic-compatible providers that require Bearer auth. @@ -209,6 +220,14 @@ def build_anthropic_client(api_key: str, base_url: str = None): kwargs["auth_token"] = api_key if _COMMON_BETAS: kwargs["default_headers"] = {"anthropic-beta": ",".join(_COMMON_BETAS)} + elif _is_third_party_anthropic_endpoint(base_url): + # Third-party proxies (Azure AI Foundry, AWS Bedrock, etc.) use their + # own API keys with x-api-key auth. Skip OAuth detection — their keys + # don't follow Anthropic's sk-ant-* prefix convention and would be + # misclassified as OAuth tokens. + kwargs["api_key"] = api_key + if _COMMON_BETAS: + kwargs["default_headers"] = {"anthropic-beta": ",".join(_COMMON_BETAS)} elif _is_oauth_token(api_key): # OAuth access token / setup-token → Bearer auth + Claude Code identity. # Anthropic routes OAuth requests based on user-agent and headers; diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py index 28ef57289..35dcee7ad 100644 --- a/tests/agent/test_auxiliary_client.py +++ b/tests/agent/test_auxiliary_client.py @@ -310,7 +310,7 @@ class TestExpiredCodexFallback: def test_hermes_oauth_file_sets_oauth_flag(self, monkeypatch): """OAuth-style tokens should get is_oauth=True (token is not sk-ant-api-*).""" # Mock resolve_anthropic_token to return an OAuth-style token - with patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="sk-ant-oat01-hermes-oauth-test"), \ + with patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="hermes-oauth-jwt-token"), \ patch("agent.anthropic_adapter.build_anthropic_client") as mock_build: mock_build.return_value = MagicMock() from agent.auxiliary_client import _try_anthropic, AnthropicAuxiliaryClient @@ -364,7 +364,7 @@ class TestExpiredCodexFallback: def test_claude_code_oauth_env_sets_flag(self, monkeypatch): """CLAUDE_CODE_OAUTH_TOKEN env var should get is_oauth=True.""" - monkeypatch.setenv("CLAUDE_CODE_OAUTH_TOKEN", "sk-ant-oat01-cc-oauth-test") + monkeypatch.setenv("CLAUDE_CODE_OAUTH_TOKEN", "cc-oauth-token-test") monkeypatch.delenv("ANTHROPIC_TOKEN", raising=False) with patch("agent.anthropic_adapter.build_anthropic_client") as mock_build: mock_build.return_value = MagicMock() diff --git a/tests/test_cli_provider_resolution.py b/tests/test_cli_provider_resolution.py index 667cd33a6..b9960f08c 100644 --- a/tests/test_cli_provider_resolution.py +++ b/tests/test_cli_provider_resolution.py @@ -424,6 +424,7 @@ def test_cmd_model_falls_back_to_auto_on_invalid_provider(monkeypatch, capsys): monkeypatch.setattr("hermes_cli.auth.resolve_provider", _resolve_provider) monkeypatch.setattr(hermes_main, "_prompt_provider_choice", lambda choices: len(choices) - 1) + monkeypatch.setattr("sys.stdin", type("FakeTTY", (), {"isatty": lambda self: True})()) hermes_main.cmd_model(SimpleNamespace()) output = capsys.readouterr().out diff --git a/tests/test_trajectory_compressor.py b/tests/test_trajectory_compressor.py index c95a3af94..72708b8d9 100644 --- a/tests/test_trajectory_compressor.py +++ b/tests/test_trajectory_compressor.py @@ -405,12 +405,13 @@ class TestGenerateSummary: @pytest.mark.asyncio async def test_generate_summary_async_handles_none_content(self): tc = _make_compressor() - tc.async_client = MagicMock() - tc.async_client.chat.completions.create = AsyncMock( + mock_client = MagicMock() + mock_client.chat.completions.create = AsyncMock( return_value=SimpleNamespace( choices=[SimpleNamespace(message=SimpleNamespace(content=None))] ) ) + tc._get_async_client = MagicMock(return_value=mock_client) metrics = TrajectoryMetrics() summary = await tc._generate_summary_async("Turn content", metrics) diff --git a/tests/tools/test_browser_camofox.py b/tests/tools/test_browser_camofox.py index a59862b9b..f9ff0e7c7 100644 --- a/tests/tools/test_browser_camofox.py +++ b/tests/tools/test_browser_camofox.py @@ -235,8 +235,13 @@ class TestCamofoxGetImages: mock_post.return_value = _mock_response(json_data={"tabId": "tab10", "url": "https://x.com"}) camofox_navigate("https://x.com", task_id="t10") + # camofox_get_images parses images from the accessibility tree snapshot + snapshot_text = ( + '- img "Logo"\n' + ' /url: https://x.com/img.png\n' + ) mock_get.return_value = _mock_response(json_data={ - "images": [{"src": "https://x.com/img.png", "alt": "Logo"}], + "snapshot": snapshot_text, }) result = json.loads(camofox_get_images(task_id="t10")) assert result["success"] is True -- 2.43.0 From 3a68ec31724b94e47c95375337b6177c67fe8b9c Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 30 Mar 2026 20:37:08 -0700 Subject: [PATCH 059/385] feat: add Fireworks context length detection support (#4158) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add api.fireworks.ai to _URL_TO_PROVIDER for automatic provider detection - Add fireworks to PROVIDER_TO_MODELS_DEV mapped to 'fireworks-ai' (the correct models.dev provider key — original PR used 'fireworks' which would silently fail the lookup) Cherry-picked from PR #3989 with models.dev key fix. Co-authored-by: sroecker --- agent/model_metadata.py | 1 + agent/models_dev.py | 1 + 2 files changed, 2 insertions(+) diff --git a/agent/model_metadata.py b/agent/model_metadata.py index 0c121e6f6..7486afb04 100644 --- a/agent/model_metadata.py +++ b/agent/model_metadata.py @@ -176,6 +176,7 @@ _URL_TO_PROVIDER: Dict[str, str] = { "api.deepseek.com": "deepseek", "api.githubcopilot.com": "copilot", "models.github.ai": "copilot", + "api.fireworks.ai": "fireworks", } diff --git a/agent/models_dev.py b/agent/models_dev.py index 283e8018f..b4b699558 100644 --- a/agent/models_dev.py +++ b/agent/models_dev.py @@ -43,6 +43,7 @@ PROVIDER_TO_MODELS_DEV: Dict[str, str] = { "opencode-zen": "opencode", "opencode-go": "opencode-go", "kilocode": "kilo", + "fireworks": "fireworks-ai", } -- 2.43.0 From c1ef9b225005dbcd589bc4f819160820a00b4393 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 30 Mar 2026 20:37:17 -0700 Subject: [PATCH 060/385] fix(cli): ensure on_session_end hook fires on interrupted exits (#4159) - Add SIGTERM/SIGHUP signal handlers for graceful shutdown - Add BrokenPipeError to exit exception handling (SSH disconnects) - Fire on_session_end plugin hook in finally block, guarded by _agent_running to avoid double-firing on normal exits (the hook already fires per-turn from run_conversation) Co-authored-by: kelsia14 --- cli.py | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/cli.py b/cli.py index 1df9ed2ce..9f3776ab8 100644 --- a/cli.py +++ b/cli.py @@ -7447,6 +7447,20 @@ class HermesCLI: # Register atexit cleanup so resources are freed even on unexpected exit atexit.register(_run_cleanup) + # Register signal handlers for graceful shutdown on SSH disconnect / SIGTERM + def _signal_handler(signum, frame): + """Handle SIGHUP/SIGTERM by triggering graceful cleanup.""" + logger.debug("Received signal %s, triggering graceful shutdown", signum) + raise KeyboardInterrupt() + + try: + import signal as _signal + _signal.signal(_signal.SIGTERM, _signal_handler) + if hasattr(_signal, 'SIGHUP'): + _signal.signal(_signal.SIGHUP, _signal_handler) + except Exception: + pass # Signal handlers may fail in restricted environments + # Install a custom asyncio exception handler that suppresses the # "Event loop is closed" RuntimeError from httpx transport cleanup. # This is defense-in-depth — the primary fix is neuter_async_httpx_del @@ -7470,7 +7484,7 @@ class HermesCLI: except Exception: pass app.run() - except (EOFError, KeyboardInterrupt): + except (EOFError, KeyboardInterrupt, BrokenPipeError): pass finally: self._should_exit = True @@ -7509,6 +7523,23 @@ class HermesCLI: self._session_db.end_session(self.agent.session_id, "cli_close") except (Exception, KeyboardInterrupt) as e: logger.debug("Could not close session in DB: %s", e) + # Plugin hook: on_session_end — safety net for interrupted exits. + # run_conversation() already fires this per-turn on normal completion, + # so only fire here if the agent was mid-turn (_agent_running) when + # the exit occurred, meaning run_conversation's hook didn't fire. + if self.agent and getattr(self, '_agent_running', False): + try: + from hermes_cli.plugins import invoke_hook as _invoke_hook + _invoke_hook( + "on_session_end", + session_id=self.agent.session_id, + completed=False, + interrupted=True, + model=getattr(self.agent, 'model', None), + platform=getattr(self.agent, 'platform', None) or "cli", + ) + except Exception: + pass _run_cleanup() self._print_exit_summary() -- 2.43.0 From f8e1ee10aa4f521fbcfd9193100620e8d4a63359 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 30 Mar 2026 20:40:13 -0700 Subject: [PATCH 061/385] Fix profile list model display (#4160) Co-authored-by: txhno --- hermes_cli/profiles.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hermes_cli/profiles.py b/hermes_cli/profiles.py index 7ef39d105..30da7eb1a 100644 --- a/hermes_cli/profiles.py +++ b/hermes_cli/profiles.py @@ -241,7 +241,7 @@ def _read_config_model(profile_dir: Path) -> tuple: if isinstance(model_cfg, str): return model_cfg, None if isinstance(model_cfg, dict): - return model_cfg.get("model"), model_cfg.get("provider") + return model_cfg.get("default") or model_cfg.get("model"), model_cfg.get("provider") return None, None except Exception: return None, None -- 2.43.0 From 1bd206ea5d03b1c9af19b39a3fde007f2429a06b Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 30 Mar 2026 21:10:05 -0700 Subject: [PATCH 062/385] feat: add /btw command for ephemeral side questions (#4161) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds /btw — ask a quick follow-up using the current session context without interrupting the main conversation. - Snapshots conversation history, answers with a no-tools agent - Response is not persisted to session history or DB - Runs in a background thread (CLI) / async task (gateway) - Per-session guard prevents concurrent /btw in gateway Implementation: - model_tools.py: enabled_toolsets=[] now correctly means "no tools" (was falsy, fell through to default "all tools") - run_agent.py: persist_session=False gates _persist_session() - cli.py: _handle_btw_command (background thread, Rich panel output) - gateway/run.py: _handle_btw_command + _run_btw_task (async task) - hermes_cli/commands.py: CommandDef for "btw" Inspired by PR #3504 by areu01or00, reimplemented cleanly on current main with the enabled_toolsets=[] fix and without the __btw_no_tools__ hack. --- cli.py | 117 +++++++++++++++++++++++++++++ gateway/run.py | 164 +++++++++++++++++++++++++++++++++++++++++ hermes_cli/commands.py | 2 + model_tools.py | 2 +- run_agent.py | 5 ++ 5 files changed, 289 insertions(+), 1 deletion(-) diff --git a/cli.py b/cli.py index 9f3776ab8..c2d118193 100644 --- a/cli.py +++ b/cli.py @@ -3904,6 +3904,8 @@ class HermesCLI: self._handle_stop_command() elif canonical == "background": self._handle_background_command(cmd_original) + elif canonical == "btw": + self._handle_btw_command(cmd_original) elif canonical == "queue": # Extract prompt after "/queue " or "/q " parts = cmd_original.split(None, 1) @@ -4190,6 +4192,121 @@ class HermesCLI: self._background_tasks[task_id] = thread thread.start() + def _handle_btw_command(self, cmd: str): + """Handle /btw — ephemeral side question using session context. + + Snapshots the current conversation history, spawns a no-tools agent in + a background thread, and prints the answer without persisting anything + to the main session. + """ + parts = cmd.strip().split(maxsplit=1) + if len(parts) < 2 or not parts[1].strip(): + _cprint(" Usage: /btw ") + _cprint(" Example: /btw what module owns session title sanitization?") + _cprint(" Answers using session context. No tools, not persisted.") + return + + question = parts[1].strip() + task_id = f"btw_{datetime.now().strftime('%H%M%S')}_{uuid.uuid4().hex[:6]}" + + if not self._ensure_runtime_credentials(): + _cprint(" (>_<) Cannot start /btw: no valid credentials.") + return + + turn_route = self._resolve_turn_agent_config(question) + history_snapshot = list(self.conversation_history) + + preview = question[:60] + ("..." if len(question) > 60 else "") + _cprint(f' 💬 /btw: "{preview}"') + + def run_btw(): + try: + btw_agent = AIAgent( + model=turn_route["model"], + api_key=turn_route["runtime"].get("api_key"), + base_url=turn_route["runtime"].get("base_url"), + provider=turn_route["runtime"].get("provider"), + api_mode=turn_route["runtime"].get("api_mode"), + acp_command=turn_route["runtime"].get("command"), + acp_args=turn_route["runtime"].get("args"), + max_iterations=8, + enabled_toolsets=[], + quiet_mode=True, + verbose_logging=False, + session_id=task_id, + platform="cli", + reasoning_config=self.reasoning_config, + providers_allowed=self._providers_only, + providers_ignored=self._providers_ignore, + providers_order=self._providers_order, + provider_sort=self._provider_sort, + provider_require_parameters=self._provider_require_params, + provider_data_collection=self._provider_data_collection, + fallback_model=self._fallback_model, + session_db=None, + skip_memory=True, + skip_context_files=True, + persist_session=False, + ) + + btw_prompt = ( + "[Ephemeral /btw side question. Answer using the conversation " + "context. No tools available. Be direct and concise.]\n\n" + + question + ) + result = btw_agent.run_conversation( + user_message=btw_prompt, + conversation_history=history_snapshot, + task_id=task_id, + sync_honcho=False, + ) + + response = (result.get("final_response") or "") if result else "" + if not response and result and result.get("error"): + response = f"Error: {result['error']}" + + # TUI refresh before printing + if self._app: + self._app.invalidate() + time.sleep(0.05) + print() + + if response: + try: + from hermes_cli.skin_engine import get_active_skin + _skin = get_active_skin() + _resp_color = _skin.get_color("response_border", "#4F6D4A") + except Exception: + _resp_color = "#4F6D4A" + + ChatConsole().print(Panel( + _rich_text_from_ansi(response), + title=f"[{_resp_color} bold]⚕ /btw[/]", + title_align="left", + border_style=_resp_color, + box=rich_box.HORIZONTALS, + padding=(1, 2), + )) + else: + _cprint(" 💬 /btw: (no response)") + + if self.bell_on_complete: + sys.stdout.write("\a") + sys.stdout.flush() + + except Exception as e: + if self._app: + self._app.invalidate() + time.sleep(0.05) + print() + _cprint(f" ❌ /btw failed: {e}") + finally: + if self._app: + self._invalidate(min_interval=0) + + thread = threading.Thread(target=run_btw, daemon=True, name=f"btw-{task_id}") + thread.start() + @staticmethod def _try_launch_chrome_debug(port: int, system: str) -> bool: """Try to launch Chrome/Chromium with remote debugging enabled. diff --git a/gateway/run.py b/gateway/run.py index 3e6f39be3..9cc42b794 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -1962,6 +1962,9 @@ class GatewayRunner: if canonical == "background": return await self._handle_background_command(event) + if canonical == "btw": + return await self._handle_btw_command(event) + if canonical == "voice": return await self._handle_voice_command(event) @@ -4038,6 +4041,167 @@ class GatewayRunner: except Exception: pass + async def _handle_btw_command(self, event: MessageEvent) -> str: + """Handle /btw — ephemeral side question in the same chat.""" + question = event.get_command_args().strip() + if not question: + return ( + "Usage: /btw \n" + "Example: /btw what module owns session title sanitization?\n\n" + "Answers using session context. No tools, not persisted." + ) + + source = event.source + session_key = self._session_key_for_source(source) + + # Guard: one /btw at a time per session + existing = getattr(self, "_active_btw_tasks", {}).get(session_key) + if existing and not existing.done(): + return "A /btw is already running for this chat. Wait for it to finish." + + if not hasattr(self, "_active_btw_tasks"): + self._active_btw_tasks: dict = {} + + import uuid as _uuid + task_id = f"btw_{datetime.now().strftime('%H%M%S')}_{_uuid.uuid4().hex[:6]}" + _task = asyncio.create_task(self._run_btw_task(question, source, session_key, task_id)) + self._background_tasks.add(_task) + self._active_btw_tasks[session_key] = _task + + def _cleanup(task): + self._background_tasks.discard(task) + if self._active_btw_tasks.get(session_key) is task: + self._active_btw_tasks.pop(session_key, None) + + _task.add_done_callback(_cleanup) + + preview = question[:60] + ("..." if len(question) > 60 else "") + return f'💬 /btw: "{preview}"\nReply will appear here shortly.' + + async def _run_btw_task( + self, question: str, source, session_key: str, task_id: str, + ) -> None: + """Execute an ephemeral /btw side question and deliver the answer.""" + from run_agent import AIAgent + + adapter = self.adapters.get(source.platform) + if not adapter: + logger.warning("No adapter for platform %s in /btw task %s", source.platform, task_id) + return + + _thread_meta = {"thread_id": source.thread_id} if source.thread_id else None + + try: + runtime_kwargs = _resolve_runtime_agent_kwargs() + if not runtime_kwargs.get("api_key"): + await adapter.send( + source.chat_id, + "❌ /btw failed: no provider credentials configured.", + metadata=_thread_meta, + ) + return + + user_config = _load_gateway_config() + model = _resolve_gateway_model(user_config) + platform_key = _platform_config_key(source.platform) + reasoning_config = self._load_reasoning_config() + turn_route = self._resolve_turn_agent_config(question, model, runtime_kwargs) + pr = self._provider_routing + + # Snapshot history from running agent or stored transcript + running_agent = self._running_agents.get(session_key) + if running_agent and running_agent is not _AGENT_PENDING_SENTINEL: + history_snapshot = list(getattr(running_agent, "_session_messages", []) or []) + else: + session_entry = self.session_store.get_or_create_session(source) + history_snapshot = self.session_store.load_transcript(session_entry.session_id) + + btw_prompt = ( + "[Ephemeral /btw side question. Answer using the conversation " + "context. No tools available. Be direct and concise.]\n\n" + + question + ) + + def run_sync(): + agent = AIAgent( + model=turn_route["model"], + **turn_route["runtime"], + max_iterations=8, + quiet_mode=True, + verbose_logging=False, + enabled_toolsets=[], + reasoning_config=reasoning_config, + providers_allowed=pr.get("only"), + providers_ignored=pr.get("ignore"), + providers_order=pr.get("order"), + provider_sort=pr.get("sort"), + provider_require_parameters=pr.get("require_parameters", False), + provider_data_collection=pr.get("data_collection"), + session_id=task_id, + platform=platform_key, + session_db=None, + fallback_model=self._fallback_model, + skip_memory=True, + skip_context_files=True, + persist_session=False, + ) + return agent.run_conversation( + user_message=btw_prompt, + conversation_history=history_snapshot, + task_id=task_id, + sync_honcho=False, + ) + + loop = asyncio.get_event_loop() + result = await loop.run_in_executor(None, run_sync) + + response = (result.get("final_response") or "") if result else "" + if not response and result and result.get("error"): + response = f"Error: {result['error']}" + if not response: + response = "(No response generated)" + + media_files, response = adapter.extract_media(response) + images, text_content = adapter.extract_images(response) + preview = question[:60] + ("..." if len(question) > 60 else "") + header = f'💬 /btw: "{preview}"\n\n' + + if text_content: + await adapter.send( + chat_id=source.chat_id, + content=header + text_content, + metadata=_thread_meta, + ) + elif not images and not media_files: + await adapter.send( + chat_id=source.chat_id, + content=header + "(No response generated)", + metadata=_thread_meta, + ) + + for image_url, alt_text in (images or []): + try: + await adapter.send_image(chat_id=source.chat_id, image_url=image_url, caption=alt_text) + except Exception: + pass + + for media_path in (media_files or []): + try: + await adapter.send_file(chat_id=source.chat_id, file_path=media_path) + except Exception: + pass + + except Exception as e: + logger.exception("/btw task %s failed", task_id) + try: + await adapter.send( + chat_id=source.chat_id, + content=f"❌ /btw failed: {e}", + metadata=_thread_meta, + ) + except Exception: + pass + async def _handle_reasoning_command(self, event: MessageEvent) -> str: """Handle /reasoning command — manage reasoning effort and display toggle. diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py index d9de67175..a167c4ac5 100644 --- a/hermes_cli/commands.py +++ b/hermes_cli/commands.py @@ -67,6 +67,8 @@ COMMAND_REGISTRY: list[CommandDef] = [ gateway_only=True), CommandDef("background", "Run a prompt in the background", "Session", aliases=("bg",), args_hint=""), + CommandDef("btw", "Ephemeral side question using session context (no tools, not persisted)", "Session", + args_hint=""), CommandDef("queue", "Queue a prompt for the next turn (doesn't interrupt)", "Session", aliases=("q",), args_hint=""), CommandDef("status", "Show session info", "Session", diff --git a/model_tools.py b/model_tools.py index c651d93ed..15b8852bc 100644 --- a/model_tools.py +++ b/model_tools.py @@ -252,7 +252,7 @@ def get_tool_definitions( # Determine which tool names the caller wants tools_to_include: set = set() - if enabled_toolsets: + if enabled_toolsets is not None: for toolset_name in enabled_toolsets: if validate_toolset(toolset_name): resolved = resolve_toolset(toolset_name) diff --git a/run_agent.py b/run_agent.py index 326f35654..6e8b23f24 100644 --- a/run_agent.py +++ b/run_agent.py @@ -508,6 +508,7 @@ class AIAgent: checkpoints_enabled: bool = False, checkpoint_max_snapshots: int = 50, pass_session_id: bool = False, + persist_session: bool = True, ): """ Initialize the AI Agent. @@ -573,6 +574,7 @@ class AIAgent: self.background_review_callback = None # Optional sync callback for gateway delivery self.skip_context_files = skip_context_files self.pass_session_id = pass_session_id + self.persist_session = persist_session self.log_prefix_chars = log_prefix_chars self.log_prefix = f"{log_prefix} " if log_prefix else "" # Store effective base URL for feature detection (prompt caching, reasoning, etc.) @@ -1700,7 +1702,10 @@ class AIAgent: """Save session state to both JSON log and SQLite on any exit path. Ensures conversations are never lost, even on errors or early returns. + Skipped when ``persist_session=False`` (ephemeral helper flows). """ + if not self.persist_session: + return self._apply_persist_user_message_override(messages) self._session_messages = messages self._save_session_log(messages) -- 2.43.0 From 4d7e3c715703900e3bb47449e47fd175fa8adf9f Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 30 Mar 2026 21:17:09 -0700 Subject: [PATCH 063/385] fix(tests): provide model name in Codex 401 refresh tests for CI (#4166) CI has no config.yaml, so cron/gateway resolve an empty model name. The Codex Responses validator rejects empty models before the mock API call is reached. Provide explicit model in job dict and env var. --- tests/test_codex_execution_paths.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_codex_execution_paths.py b/tests/test_codex_execution_paths.py index 2a6044294..de33a0b91 100644 --- a/tests/test_codex_execution_paths.py +++ b/tests/test_codex_execution_paths.py @@ -112,7 +112,7 @@ def test_cron_run_job_codex_path_handles_internal_401_refresh(monkeypatch): _Codex401ThenSuccessAgent.last_init = {} success, output, final_response, error = cron_scheduler.run_job( - {"id": "job-1", "name": "Codex Refresh Test", "prompt": "ping"} + {"id": "job-1", "name": "Codex Refresh Test", "prompt": "ping", "model": "gpt-5.3-codex"} ) assert success is True @@ -139,6 +139,7 @@ def test_gateway_run_agent_codex_path_handles_internal_401_refresh(monkeypatch): }, ) monkeypatch.setenv("HERMES_TOOL_PROGRESS", "false") + monkeypatch.setenv("HERMES_MODEL", "gpt-5.3-codex") _Codex401ThenSuccessAgent.refresh_attempts = 0 _Codex401ThenSuccessAgent.last_init = {} -- 2.43.0 From f890a94c1288b3324beb491aa9ed66276cad09aa Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 30 Mar 2026 22:02:53 -0700 Subject: [PATCH 064/385] refactor: make config.yaml the single source of truth for endpoint URLs (#4165) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OPENAI_BASE_URL was written to .env AND config.yaml, creating a dual-source confusion. Users (especially Docker) would see the URL in .env and assume that's where all config lives, then wonder why LLM_MODEL in .env didn't work. Changes: - Remove all 27 save_env_value("OPENAI_BASE_URL", ...) calls across main.py, setup.py, and tools_config.py - Remove OPENAI_BASE_URL env var reading from runtime_provider.py, cli.py, models.py, and gateway/run.py - Remove LLM_MODEL/HERMES_MODEL env var reading from gateway/run.py and auxiliary_client.py — config.yaml model.default is authoritative - Vision base URL now saved to config.yaml auxiliary.vision.base_url (both setup wizard and tools_config paths) - Tests updated to set config values instead of env vars Convention enforced: .env is for SECRETS only (API keys). All other configuration (model names, base URLs, provider selection) lives exclusively in config.yaml. --- agent/auxiliary_client.py | 12 ++-- cli.py | 8 +-- gateway/run.py | 19 +++--- hermes_cli/main.py | 43 ------------- hermes_cli/models.py | 2 +- hermes_cli/runtime_provider.py | 14 ++--- hermes_cli/setup.py | 61 +------------------ hermes_cli/tools_config.py | 7 ++- tests/agent/test_auxiliary_client.py | 44 ++++++++++--- tests/hermes_cli/test_setup_model_provider.py | 15 +++-- tests/test_cli_provider_resolution.py | 4 +- 11 files changed, 77 insertions(+), 152 deletions(-) diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 0de263c41..4126994bb 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -7,7 +7,7 @@ the best available backend without duplicating fallback logic. Resolution order for text tasks (auto mode): 1. OpenRouter (OPENROUTER_API_KEY) 2. Nous Portal (~/.hermes/auth.json active provider) - 3. Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY) + 3. Custom endpoint (config.yaml model.base_url + OPENAI_API_KEY) 4. Codex OAuth (Responses API via chatgpt.com with gpt-5.3-codex, wrapped to look like a chat.completions client) 5. Native Anthropic @@ -584,15 +584,11 @@ def _try_nous() -> Tuple[Optional[OpenAI], Optional[str]]: def _read_main_model() -> str: - """Read the user's configured main model from config/env. + """Read the user's configured main model from config.yaml. - Falls back through HERMES_MODEL → LLM_MODEL → config.yaml model.default - so the auxiliary client can use the same model as the main agent when no - dedicated auxiliary model is available. + config.yaml model.default is the single source of truth for the active + model. Environment variables are no longer consulted. """ - from_env = os.getenv("OPENAI_MODEL") or os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL") - if from_env: - return from_env.strip() try: from hermes_cli.config import load_config cfg = load_config() diff --git a/cli.py b/cli.py index c2d118193..cf2a5f8c8 100644 --- a/cli.py +++ b/cli.py @@ -1124,9 +1124,9 @@ class HermesCLI: self.acp_args: list[str] = [] self.base_url = ( base_url - or os.getenv("OPENAI_BASE_URL") - or os.getenv("OPENROUTER_BASE_URL", CLI_CONFIG["model"]["base_url"]) - ) + or CLI_CONFIG["model"].get("base_url", "") + or os.getenv("OPENROUTER_BASE_URL", "") + ) or None # Match key to resolved base_url: OpenRouter URL → prefer OPENROUTER_API_KEY, # custom endpoint → prefer OPENAI_API_KEY (issue #560). # Note: _ensure_runtime_credentials() re-resolves this before first use. @@ -3239,7 +3239,7 @@ class HermesCLI: print(f" {mid}{current_marker}") elif p["id"] == "custom": from hermes_cli.models import _get_custom_base_url - custom_url = _get_custom_base_url() or os.getenv("OPENAI_BASE_URL", "") + custom_url = _get_custom_base_url() if custom_url: print(f" endpoint: {custom_url}") if is_active: diff --git a/gateway/run.py b/gateway/run.py index 9cc42b794..48f5182cb 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -364,20 +364,19 @@ def _load_gateway_config() -> dict: def _resolve_gateway_model(config: dict | None = None) -> str: - """Read model from env/config — mirrors the resolution in _run_agent_sync. + """Read model from config.yaml — single source of truth. Without this, temporary AIAgent instances (memory flush, /compress) fall back to the hardcoded default which fails when the active provider is openai-codex. """ - model = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL") or "" cfg = config if config is not None else _load_gateway_config() model_cfg = cfg.get("model", {}) if isinstance(model_cfg, str): - model = model_cfg + return model_cfg elif isinstance(model_cfg, dict): - model = model_cfg.get("default") or model_cfg.get("model") or model - return model + return model_cfg.get("default") or model_cfg.get("model") or "" + return "" def _resolve_hermes_bin() -> Optional[list[str]]: @@ -2762,7 +2761,7 @@ class GatewayRunner: { "role": "session_meta", "tools": tool_defs or [], - "model": os.getenv("HERMES_MODEL", ""), + "model": _resolve_gateway_model(), "platform": source.platform.value if source.platform else "", "timestamp": ts, } @@ -3227,9 +3226,11 @@ class GatewayRunner: except Exception: current_provider = "openrouter" - # Detect custom endpoint - if current_provider == "openrouter" and os.getenv("OPENAI_BASE_URL", "").strip(): - current_provider = "custom" + # Detect custom endpoint from config base_url + if current_provider == "openrouter": + _cfg_base = model_cfg.get("base_url", "") if isinstance(model_cfg, dict) else "" + if _cfg_base and "openrouter.ai" not in _cfg_base: + current_provider = "custom" current_label = _PROVIDER_LABELS.get(current_provider, current_provider) diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 9dca21056..3bd6afa54 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -1050,10 +1050,6 @@ def _model_flow_openrouter(config, current_model=""): selected = _prompt_model_selection(openrouter_models, current_model=current_model) if selected: - # Clear any custom endpoint and set provider to openrouter - if get_env_value("OPENAI_BASE_URL"): - save_env_value("OPENAI_BASE_URL", "") - save_env_value("OPENAI_API_KEY", "") _save_model_choice(selected) # Update config provider and deactivate any OAuth provider @@ -1143,10 +1139,6 @@ def _model_flow_nous(config, current_model=""): # Reactivate Nous as the provider and update config inference_url = creds.get("base_url", "") _update_config_for_provider("nous", inference_url) - # Clear any custom endpoint that might conflict - if get_env_value("OPENAI_BASE_URL"): - save_env_value("OPENAI_BASE_URL", "") - save_env_value("OPENAI_API_KEY", "") print(f"Default model set to: {selected} (via Nous Portal)") else: print("No change.") @@ -1191,10 +1183,6 @@ def _model_flow_openai_codex(config, current_model=""): if selected: _save_model_choice(selected) _update_config_for_provider("openai-codex", DEFAULT_CODEX_BASE_URL) - # Clear custom endpoint env vars that would otherwise override Codex. - if get_env_value("OPENAI_BASE_URL"): - save_env_value("OPENAI_BASE_URL", "") - save_env_value("OPENAI_API_KEY", "") print(f"Default model set to: {selected} (via OpenAI Codex)") else: print("No change.") @@ -1275,11 +1263,6 @@ def _model_flow_custom(config): if probe.get("suggested_base_url"): print(f" If this server expects /v1, try base URL: {probe['suggested_base_url']}") - if base_url: - save_env_value("OPENAI_BASE_URL", effective_url) - if api_key: - save_env_value("OPENAI_API_KEY", api_key) - if model_name: _save_model_choice(model_name) @@ -1439,9 +1422,6 @@ def _model_flow_named_custom(config, provider_info): # If a model is saved, just activate immediately — no probing needed if saved_model: - save_env_value("OPENAI_BASE_URL", base_url) - if api_key: - save_env_value("OPENAI_API_KEY", api_key) _save_model_choice(saved_model) cfg = load_config() @@ -1513,9 +1493,6 @@ def _model_flow_named_custom(config, provider_info): return # Activate and save the model to the custom_providers entry - save_env_value("OPENAI_BASE_URL", base_url) - if api_key: - save_env_value("OPENAI_API_KEY", api_key) _save_model_choice(model_name) cfg = load_config() @@ -1829,11 +1806,6 @@ def _model_flow_copilot(config, current_model=""): catalog=catalog, api_key=api_key, ) or selected - # Clear stale custom-endpoint overrides so the Copilot provider wins cleanly. - if get_env_value("OPENAI_BASE_URL"): - save_env_value("OPENAI_BASE_URL", "") - save_env_value("OPENAI_API_KEY", "") - initial_cfg = load_config() current_effort = _current_reasoning_effort(initial_cfg) reasoning_efforts = github_model_reasoning_efforts( @@ -2058,11 +2030,6 @@ def _model_flow_kimi(config, current_model=""): selected = None if selected: - # Clear custom endpoint if set (avoid confusion) - if get_env_value("OPENAI_BASE_URL"): - save_env_value("OPENAI_BASE_URL", "") - save_env_value("OPENAI_API_KEY", "") - _save_model_choice(selected) # Update config with provider and base URL @@ -2165,11 +2132,6 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""): selected = None if selected: - # Clear custom endpoint if set (avoid confusion) - if get_env_value("OPENAI_BASE_URL"): - save_env_value("OPENAI_BASE_URL", "") - save_env_value("OPENAI_API_KEY", "") - _save_model_choice(selected) # Update config with provider and base URL @@ -2381,11 +2343,6 @@ def _model_flow_anthropic(config, current_model=""): selected = None if selected: - # Clear custom endpoint if set - if get_env_value("OPENAI_BASE_URL"): - save_env_value("OPENAI_BASE_URL", "") - save_env_value("OPENAI_API_KEY", "") - _save_model_choice(selected) # Update config with provider — clear base_url since diff --git a/hermes_cli/models.py b/hermes_cli/models.py index ed36823e4..5e1077837 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -349,7 +349,7 @@ def list_available_providers() -> list[dict[str, str]]: try: from hermes_cli.auth import get_auth_status, has_usable_secret if pid == "custom": - custom_base_url = _get_custom_base_url() or os.getenv("OPENAI_BASE_URL", "") + custom_base_url = _get_custom_base_url() or "" has_creds = bool(custom_base_url.strip()) elif pid == "openrouter": has_creds = has_usable_secret(os.getenv("OPENROUTER_API_KEY", "")) diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py index 0c82805d5..644331baa 100644 --- a/hermes_cli/runtime_provider.py +++ b/hermes_cli/runtime_provider.py @@ -229,28 +229,22 @@ def _resolve_openrouter_runtime( requested_norm = (requested_provider or "").strip().lower() cfg_provider = cfg_provider.strip().lower() - env_openai_base_url = os.getenv("OPENAI_BASE_URL", "").strip() env_openrouter_base_url = os.getenv("OPENROUTER_BASE_URL", "").strip() + # Use config base_url when available and the provider context matches. + # OPENAI_BASE_URL env var is no longer consulted — config.yaml is + # the single source of truth for endpoint URLs. use_config_base_url = False if cfg_base_url.strip() and not explicit_base_url: if requested_norm == "auto": - if (not cfg_provider or cfg_provider == "auto") and not env_openai_base_url: + if not cfg_provider or cfg_provider == "auto": use_config_base_url = True elif requested_norm == "custom" and cfg_provider == "custom": - # provider: custom — use base_url from config (Fixes #1760). use_config_base_url = True - # When the user explicitly requested the openrouter provider, skip - # OPENAI_BASE_URL — it typically points to a custom / non-OpenRouter - # endpoint and would prevent switching back to OpenRouter (#874). - skip_openai_base = requested_norm == "openrouter" - - # For custom, prefer config base_url over env so config.yaml is honored (#1760). base_url = ( (explicit_base_url or "").strip() or (cfg_base_url.strip() if use_config_base_url else "") - or ("" if skip_openai_base else env_openai_base_url) or env_openrouter_base_url or OPENROUTER_BASE_URL ).rstrip("/") diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py index 503c2bcde..648876d92 100644 --- a/hermes_cli/setup.py +++ b/hermes_cli/setup.py @@ -941,10 +941,6 @@ def setup_model_provider(config: dict): else: print_warning("Skipped - agent won't work without an API key") - # Clear any custom endpoint if switching to OpenRouter - if existing_custom: - save_env_value("OPENAI_BASE_URL", "") - save_env_value("OPENAI_API_KEY", "") # Update config.yaml and deactivate any OAuth provider so the # resolver doesn't keep returning the old provider (e.g. Codex). @@ -1032,10 +1028,6 @@ def setup_model_provider(config: dict): mock_args = argparse.Namespace() _login_openai_codex(mock_args, PROVIDER_REGISTRY["openai-codex"]) - # Clear custom endpoint vars that would override provider routing. - if existing_custom: - save_env_value("OPENAI_BASE_URL", "") - save_env_value("OPENAI_API_KEY", "") _update_config_for_provider("openai-codex", DEFAULT_CODEX_BASE_URL) _set_model_provider(config, "openai-codex", DEFAULT_CODEX_BASE_URL) except SystemExit: @@ -1118,10 +1110,6 @@ def setup_model_provider(config: dict): " If you get billing errors, check your plan at https://open.bigmodel.cn/" ) - # Clear custom endpoint vars if switching - if existing_custom: - save_env_value("OPENAI_BASE_URL", "") - save_env_value("OPENAI_API_KEY", "") _set_model_provider(config, "zai", zai_base_url) selected_base_url = zai_base_url @@ -1151,10 +1139,6 @@ def setup_model_provider(config: dict): else: print_warning("Skipped - agent won't work without an API key") - # Clear custom endpoint vars if switching - if existing_custom: - save_env_value("OPENAI_BASE_URL", "") - save_env_value("OPENAI_API_KEY", "") _set_model_provider(config, "kimi-coding", pconfig.inference_base_url) selected_base_url = pconfig.inference_base_url @@ -1184,10 +1168,6 @@ def setup_model_provider(config: dict): else: print_warning("Skipped - agent won't work without an API key") - # Clear custom endpoint vars if switching - if existing_custom: - save_env_value("OPENAI_BASE_URL", "") - save_env_value("OPENAI_API_KEY", "") _set_model_provider(config, "minimax", pconfig.inference_base_url) selected_base_url = pconfig.inference_base_url @@ -1217,10 +1197,6 @@ def setup_model_provider(config: dict): else: print_warning("Skipped - agent won't work without an API key") - # Clear custom endpoint vars if switching - if existing_custom: - save_env_value("OPENAI_BASE_URL", "") - save_env_value("OPENAI_API_KEY", "") _set_model_provider(config, "minimax-cn", pconfig.inference_base_url) selected_base_url = pconfig.inference_base_url @@ -1250,10 +1226,6 @@ def setup_model_provider(config: dict): else: print_warning("Skipped - agent won't work without an API key") - # Clear custom endpoint vars if switching - if existing_custom: - save_env_value("OPENAI_BASE_URL", "") - save_env_value("OPENAI_API_KEY", "") _set_model_provider(config, "kilocode", pconfig.inference_base_url) selected_base_url = pconfig.inference_base_url @@ -1352,10 +1324,6 @@ def setup_model_provider(config: dict): else: print_warning("Skipped — agent won't work without credentials") - # Clear custom endpoint vars if switching - if existing_custom: - save_env_value("OPENAI_BASE_URL", "") - save_env_value("OPENAI_API_KEY", "") # Don't save base_url for Anthropic — resolve_runtime_provider() # always hardcodes it. Stale base_urls contaminate other providers. _set_model_provider(config, "anthropic") @@ -1386,10 +1354,6 @@ def setup_model_provider(config: dict): else: print_warning("Skipped - agent won't work without an API key") - # Clear custom endpoint vars if switching - if existing_custom: - save_env_value("OPENAI_BASE_URL", "") - save_env_value("OPENAI_API_KEY", "") _update_config_for_provider("ai-gateway", pconfig.inference_base_url, default_model="anthropic/claude-opus-4.6") _set_model_provider(config, "ai-gateway", pconfig.inference_base_url) @@ -1418,10 +1382,6 @@ def setup_model_provider(config: dict): else: print_warning("Skipped - agent won't work without an API key") - # Clear custom endpoint vars if switching - if existing_custom: - save_env_value("OPENAI_BASE_URL", "") - save_env_value("OPENAI_API_KEY", "") _update_config_for_provider("alibaba", pconfig.inference_base_url, default_model="qwen3.5-plus") _set_model_provider(config, "alibaba", pconfig.inference_base_url) @@ -1451,10 +1411,6 @@ def setup_model_provider(config: dict): else: print_warning("Skipped - agent won't work without an API key") - # Clear custom endpoint vars if switching - if existing_custom: - save_env_value("OPENAI_BASE_URL", "") - save_env_value("OPENAI_API_KEY", "") _set_model_provider(config, "opencode-zen", pconfig.inference_base_url) selected_base_url = pconfig.inference_base_url @@ -1484,10 +1440,6 @@ def setup_model_provider(config: dict): else: print_warning("Skipped - agent won't work without an API key") - # Clear custom endpoint vars if switching - if existing_custom: - save_env_value("OPENAI_BASE_URL", "") - save_env_value("OPENAI_API_KEY", "") _set_model_provider(config, "opencode-go", pconfig.inference_base_url) selected_base_url = pconfig.inference_base_url @@ -1518,9 +1470,6 @@ def setup_model_provider(config: dict): else: print_warning("Skipped - agent won't work without a GitHub token or gh auth login") - if existing_custom: - save_env_value("OPENAI_BASE_URL", "") - save_env_value("OPENAI_API_KEY", "") _set_model_provider(config, "copilot", pconfig.inference_base_url) selected_base_url = pconfig.inference_base_url @@ -1534,9 +1483,6 @@ def setup_model_provider(config: dict): print_info(f"Base marker: {pconfig.inference_base_url}") print() - if existing_custom: - save_env_value("OPENAI_BASE_URL", "") - save_env_value("OPENAI_API_KEY", "") _set_model_provider(config, "copilot-acp", pconfig.inference_base_url) selected_base_url = pconfig.inference_base_url @@ -1553,9 +1499,6 @@ def setup_model_provider(config: dict): api_key = prompt(" HF Token", password=True) if api_key: save_env_value("HF_TOKEN", api_key) - # Clear OpenRouter env vars to prevent routing confusion - save_env_value("OPENAI_BASE_URL", "") - save_env_value("OPENAI_API_KEY", "") _set_model_provider(config, "huggingface", pconfig.inference_base_url) selected_base_url = pconfig.inference_base_url @@ -1632,7 +1575,9 @@ def setup_model_provider(config: dict): _oai_key = prompt(_api_key_label, password=True).strip() if _oai_key: save_env_value("OPENAI_API_KEY", _oai_key) - save_env_value("OPENAI_BASE_URL", _base_url) + # Save vision base URL to config (not .env — only secrets go there) + _vaux = config.setdefault("auxiliary", {}).setdefault("vision", {}) + _vaux["base_url"] = _base_url if "api.openai.com" in _base_url.lower(): _oai_vision_models = ["gpt-4o", "gpt-4o-mini", "gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano"] _vm_choices = _oai_vision_models + ["Use default (gpt-4o-mini)"] diff --git a/hermes_cli/tools_config.py b/hermes_cli/tools_config.py index 337b67fe8..8b443d5dc 100644 --- a/hermes_cli/tools_config.py +++ b/hermes_cli/tools_config.py @@ -983,8 +983,13 @@ def _configure_simple_requirements(ts_key: str): key_label = " OPENAI_API_KEY" if "api.openai.com" in base_url.lower() else " API key" api_key = _prompt(key_label, password=True) if api_key and api_key.strip(): - save_env_value("OPENAI_BASE_URL", base_url) save_env_value("OPENAI_API_KEY", api_key.strip()) + # Save vision base URL to config (not .env — only secrets go there) + from hermes_cli.config import load_config, save_config + _cfg = load_config() + _aux = _cfg.setdefault("auxiliary", {}).setdefault("vision", {}) + _aux["base_url"] = base_url + save_config(_cfg) if "api.openai.com" in base_url.lower(): save_env_value("AUXILIARY_VISION_MODEL", "gpt-4o-mini") _print_success(" Saved") diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py index 35dcee7ad..a8197e574 100644 --- a/tests/agent/test_auxiliary_client.py +++ b/tests/agent/test_auxiliary_client.py @@ -465,9 +465,16 @@ class TestGetTextAuxiliaryClient: assert model == "google/gemini-3-flash-preview" def test_custom_endpoint_over_codex(self, monkeypatch, codex_auth_dir): - monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:1234/v1") + config = { + "model": { + "provider": "custom", + "base_url": "http://localhost:1234/v1", + "default": "my-local-model", + } + } monkeypatch.setenv("OPENAI_API_KEY", "lm-studio-key") - monkeypatch.setenv("OPENAI_MODEL", "my-local-model") + monkeypatch.setattr("hermes_cli.config.load_config", lambda: config) + monkeypatch.setattr("hermes_cli.runtime_provider.load_config", lambda: config) # Override the autouse monkeypatch for codex monkeypatch.setattr( "agent.auxiliary_client._read_codex_access_token", @@ -726,10 +733,17 @@ class TestVisionClientFallback: def test_vision_forced_main_uses_custom_endpoint(self, monkeypatch): """When explicitly forced to 'main', vision CAN use custom endpoint.""" + config = { + "model": { + "provider": "custom", + "base_url": "http://localhost:1234/v1", + "default": "my-local-model", + } + } monkeypatch.setenv("AUXILIARY_VISION_PROVIDER", "main") - monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:1234/v1") monkeypatch.setenv("OPENAI_API_KEY", "local-key") - monkeypatch.setenv("OPENAI_MODEL", "my-local-model") + monkeypatch.setattr("hermes_cli.config.load_config", lambda: config) + monkeypatch.setattr("hermes_cli.runtime_provider.load_config", lambda: config) with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \ patch("agent.auxiliary_client.OpenAI") as mock_openai: client, model = get_vision_auxiliary_client() @@ -827,9 +841,16 @@ class TestResolveForcedProvider: assert model is None def test_forced_main_uses_custom(self, monkeypatch): - monkeypatch.setenv("OPENAI_BASE_URL", "http://local:8080/v1") + config = { + "model": { + "provider": "custom", + "base_url": "http://local:8080/v1", + "default": "my-local-model", + } + } monkeypatch.setenv("OPENAI_API_KEY", "local-key") - monkeypatch.setenv("OPENAI_MODEL", "my-local-model") + monkeypatch.setattr("hermes_cli.config.load_config", lambda: config) + monkeypatch.setattr("hermes_cli.runtime_provider.load_config", lambda: config) with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \ patch("agent.auxiliary_client.OpenAI") as mock_openai: client, model = _resolve_forced_provider("main") @@ -858,10 +879,17 @@ class TestResolveForcedProvider: def test_forced_main_skips_openrouter_nous(self, monkeypatch): """Even if OpenRouter key is set, 'main' skips it.""" + config = { + "model": { + "provider": "custom", + "base_url": "http://local:8080/v1", + "default": "my-local-model", + } + } monkeypatch.setenv("OPENROUTER_API_KEY", "or-key") - monkeypatch.setenv("OPENAI_BASE_URL", "http://local:8080/v1") monkeypatch.setenv("OPENAI_API_KEY", "local-key") - monkeypatch.setenv("OPENAI_MODEL", "my-local-model") + monkeypatch.setattr("hermes_cli.config.load_config", lambda: config) + monkeypatch.setattr("hermes_cli.runtime_provider.load_config", lambda: config) with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \ patch("agent.auxiliary_client.OpenAI") as mock_openai: client, model = _resolve_forced_provider("main") diff --git a/tests/hermes_cli/test_setup_model_provider.py b/tests/hermes_cli/test_setup_model_provider.py index 0acbfea51..76ba94374 100644 --- a/tests/hermes_cli/test_setup_model_provider.py +++ b/tests/hermes_cli/test_setup_model_provider.py @@ -129,16 +129,13 @@ def test_setup_custom_endpoint_saves_working_v1_base_url(tmp_path, monkeypatch): env = _read_env(tmp_path) - # _model_flow_custom saves env vars and config to disk - assert env.get("OPENAI_BASE_URL") == "http://localhost:8000/v1" - assert env.get("OPENAI_API_KEY") == "local-key" - - # The model config is saved as a dict by _model_flow_custom + # _model_flow_custom saves config to disk (base_url in config, not .env) reloaded = load_config() model_cfg = reloaded.get("model", {}) if isinstance(model_cfg, dict): assert model_cfg.get("provider") == "custom" assert model_cfg.get("default") == "llm" + assert model_cfg.get("base_url") == "http://localhost:8000/v1" def test_setup_keep_current_config_provider_uses_provider_specific_model_menu(tmp_path, monkeypatch): @@ -232,8 +229,11 @@ def test_setup_keep_current_anthropic_can_configure_openai_vision_default(tmp_pa env = _read_env(tmp_path) assert env.get("OPENAI_API_KEY") == "sk-openai" - assert env.get("OPENAI_BASE_URL") == "https://api.openai.com/v1" assert env.get("AUXILIARY_VISION_MODEL") == "gpt-4o-mini" + # Vision base URL saved to config.yaml, not .env + reloaded = load_config() + vision_cfg = reloaded.get("auxiliary", {}).get("vision", {}) + assert vision_cfg.get("base_url") == "https://api.openai.com/v1" def test_setup_copilot_uses_gh_auth_and_saves_provider(tmp_path, monkeypatch): @@ -433,8 +433,7 @@ def test_setup_switch_custom_to_codex_clears_custom_endpoint_and_updates_config( env = _read_env(tmp_path) reloaded = load_config() - assert env.get("OPENAI_BASE_URL") == "" - assert env.get("OPENAI_API_KEY") == "" + # OPENAI_BASE_URL is no longer written/cleared in .env — config is authoritative assert reloaded["model"]["provider"] == "openai-codex" assert reloaded["model"]["default"] == "openai/gpt-5.3-codex" assert reloaded["model"]["base_url"] == "https://chatgpt.com/backend-api/codex" diff --git a/tests/test_cli_provider_resolution.py b/tests/test_cli_provider_resolution.py index b9960f08c..943a45a55 100644 --- a/tests/test_cli_provider_resolution.py +++ b/tests/test_cli_provider_resolution.py @@ -467,6 +467,6 @@ def test_model_flow_custom_saves_verified_v1_base_url(monkeypatch, capsys): output = capsys.readouterr().out assert "Saving the working base URL instead" in output - assert saved_env["OPENAI_BASE_URL"] == "http://localhost:8000/v1" - assert saved_env["OPENAI_API_KEY"] == "local-key" + # OPENAI_BASE_URL is no longer saved to .env — config.yaml is authoritative + assert "OPENAI_BASE_URL" not in saved_env assert saved_env["MODEL"] == "llm" \ No newline at end of file -- 2.43.0 From 89d8127772b7e0710159a876e741ae7bfe502a46 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 30 Mar 2026 23:17:26 -0700 Subject: [PATCH 065/385] fix: setup wizard overwrites custom endpoint config (#4172) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit _model_flow_custom() saved model.provider and model.base_url to disk via its own load_config/save_config cycle, but never updated the setup wizard's in-memory config dict. The wizard's final save_config(config) then overwrote the custom settings with the stale default string model value. Fix: after saving to disk, also mutate the caller's config dict so the wizard's final save preserves model.provider='custom' and the base_url. Both the model_name and no-model_name branches are covered. Added regression tests that simulate the full wizard flow including the final save_config(config) call — the step that was previously untested. --- hermes_cli/main.py | 15 ++++ tests/hermes_cli/test_setup.py | 122 +++++++++++++++++++++++++++++++-- 2 files changed, 133 insertions(+), 4 deletions(-) diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 3bd6afa54..aad6e7f14 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -1278,10 +1278,25 @@ def _model_flow_custom(config): save_config(cfg) deactivate_provider() + # Sync the caller's config dict so the setup wizard's final + # save_config(config) preserves our model settings. Without + # this, the wizard overwrites model.provider/base_url with + # the stale values from its own config dict (#4172). + config["model"] = dict(model) + print(f"Default model set to: {model_name} (via {effective_url})") else: if base_url or api_key: deactivate_provider() + # Even without a model name, persist the custom endpoint on the + # caller's config dict so the setup wizard doesn't lose it. + _caller_model = config.get("model") + if not isinstance(_caller_model, dict): + _caller_model = {"default": _caller_model} if _caller_model else {} + _caller_model["provider"] = "custom" + _caller_model["base_url"] = effective_url + _caller_model.pop("api_mode", None) + config["model"] = _caller_model print("Endpoint saved. Use `/model` in chat or `hermes model` to set a model.") # Auto-save to custom_providers so it appears in the menu next time diff --git a/tests/hermes_cli/test_setup.py b/tests/hermes_cli/test_setup.py index a4c85ba2b..c5a19f06f 100644 --- a/tests/hermes_cli/test_setup.py +++ b/tests/hermes_cli/test_setup.py @@ -118,11 +118,125 @@ def test_custom_setup_clears_active_oauth_provider(tmp_path, monkeypatch): # Core assertion: switching to custom endpoint clears OAuth provider assert get_active_provider() is None - # _model_flow_custom writes config via its own load/save cycle + # Simulate what the real setup wizard does: save_config(config) AFTER + # setup_model_provider returns. This is the step that previously + # overwrote model.provider/base_url (#4172). + save_config(config) + reloaded = load_config() - if isinstance(reloaded.get("model"), dict): - assert reloaded["model"].get("provider") == "custom" - assert reloaded["model"].get("default") == "custom/model" + assert isinstance(reloaded.get("model"), dict), ( + "model should be a dict after custom setup, not " + + repr(type(reloaded.get("model"))) + ) + assert reloaded["model"].get("provider") == "custom" + assert reloaded["model"].get("default") == "custom/model" + assert "custom.example" in reloaded["model"].get("base_url", "") + + +def test_custom_setup_preserves_provider_after_wizard_save_config( + tmp_path, monkeypatch +): + """Regression test for #4172: the setup wizard's final save_config(config) + must not overwrite model.provider/base_url that _model_flow_custom set. + + Simulates the full flow: + 1. load config (fresh install — model is a string) + 2. setup_model_provider picks custom + 3. wizard calls save_config(config) afterward + 4. verify resolve_requested_provider returns "custom" + """ + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + _clear_provider_env(monkeypatch) + + config = load_config() + # Sanity: fresh install has model as a string + assert isinstance(config.get("model"), str) or config.get("model") is None + + def fake_prompt_choice(question, choices, default=0): + if question == "Select your inference provider:": + return 3 # Custom endpoint + tts_idx = _maybe_keep_current_tts(question, choices) + if tts_idx is not None: + return tts_idx + raise AssertionError(f"Unexpected prompt_choice call: {question}") + + monkeypatch.setattr("hermes_cli.setup.prompt_choice", fake_prompt_choice) + + input_values = iter([ + "http://localhost:11434/v1", # Ollama URL + "", # no API key (local) + "qwen3.5:32b", # model name + "", # context length (auto-detect) + ]) + monkeypatch.setattr("builtins.input", lambda _prompt="": next(input_values)) + monkeypatch.setattr("hermes_cli.setup.prompt_yes_no", lambda *a, **kw: False) + monkeypatch.setattr("hermes_cli.auth.detect_external_credentials", lambda: []) + monkeypatch.setattr("hermes_cli.main._save_custom_provider", lambda *a, **kw: None) + monkeypatch.setattr( + "hermes_cli.models.probe_api_models", + lambda api_key, base_url: {"models": ["qwen3.5:32b"], "probed_url": base_url + "/models"}, + ) + + # Full wizard cycle + setup_model_provider(config) + save_config(config) # ← this is what the real wizard does + + # Verify config on disk + reloaded = load_config() + assert isinstance(reloaded["model"], dict) + assert reloaded["model"]["provider"] == "custom" + assert reloaded["model"]["base_url"] == "http://localhost:11434/v1" + assert reloaded["model"]["default"] == "qwen3.5:32b" + assert "api_mode" not in reloaded["model"] + + # Verify the runtime resolver sees "custom", not "auto" + from hermes_cli.runtime_provider import resolve_requested_provider + assert resolve_requested_provider() == "custom" + + +def test_custom_setup_no_model_name_still_preserves_endpoint( + tmp_path, monkeypatch +): + """When the user enters a URL and key but skips the model name, + model.provider and model.base_url must still survive the wizard's + final save_config(config).""" + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + _clear_provider_env(monkeypatch) + + config = load_config() + + def fake_prompt_choice(question, choices, default=0): + if question == "Select your inference provider:": + return 3 + tts_idx = _maybe_keep_current_tts(question, choices) + if tts_idx is not None: + return tts_idx + raise AssertionError(f"Unexpected prompt_choice call: {question}") + + monkeypatch.setattr("hermes_cli.setup.prompt_choice", fake_prompt_choice) + + input_values = iter([ + "http://192.168.1.50:8080/v1", # URL + "my-key", # API key + "", # no model name + "", # context length + ]) + monkeypatch.setattr("builtins.input", lambda _prompt="": next(input_values)) + monkeypatch.setattr("hermes_cli.setup.prompt_yes_no", lambda *a, **kw: False) + monkeypatch.setattr("hermes_cli.auth.detect_external_credentials", lambda: []) + monkeypatch.setattr("hermes_cli.main._save_custom_provider", lambda *a, **kw: None) + monkeypatch.setattr( + "hermes_cli.models.probe_api_models", + lambda api_key, base_url: {"models": None, "probed_url": base_url + "/models"}, + ) + + setup_model_provider(config) + save_config(config) + + reloaded = load_config() + assert isinstance(reloaded["model"], dict) + assert reloaded["model"]["provider"] == "custom" + assert reloaded["model"]["base_url"] == "http://192.168.1.50:8080/v1" def test_codex_setup_uses_runtime_access_token_for_live_model_list(tmp_path, monkeypatch): -- 2.43.0 From 491e79bca9b02f48df72dcddc3f7cf7115fabdec Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Tue, 31 Mar 2026 01:04:07 -0700 Subject: [PATCH 066/385] refactor: unify setup wizard provider selection with hermes model MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit setup_model_provider() had 800+ lines of duplicated provider handling that reimplemented the same credential prompting, OAuth flows, and model selection that hermes model already provides via the _model_flow_* functions. Every new provider had to be added in both places, and the two implementations diverged in config persistence (setup.py did raw YAML writes, _set_model_provider, and _update_config_for_provider depending on the provider — main.py used its own load/save cycle). This caused the #4172 bug: _model_flow_custom saved config to disk but the wizard's final save_config(config) overwrote it with stale values. Fix: extract the core of cmd_model() into select_provider_and_model() and have setup_model_provider() call it. After the call, re-sync the wizard's config dict from disk. Deletes ~800 lines of duplicated provider handling from setup.py. Also fixes cmd_model() double-AuthError crash on fresh installs with no API keys configured. --- hermes_cli/main.py | 16 +- hermes_cli/setup.py | 885 +----------------- tests/hermes_cli/test_setup.py | 344 +++---- tests/hermes_cli/test_setup_model_provider.py | 456 ++------- 4 files changed, 283 insertions(+), 1418 deletions(-) diff --git a/hermes_cli/main.py b/hermes_cli/main.py index aad6e7f14..a12879a8b 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -829,6 +829,17 @@ def cmd_setup(args): def cmd_model(args): """Select default model — starts with provider selection, then model picker.""" _require_tty("model") + select_provider_and_model() + + +def select_provider_and_model(): + """Core provider selection + model picking logic. + + Shared by ``cmd_model`` (``hermes model``) and the setup wizard + (``setup_model_provider`` in setup.py). Handles the full flow: + provider picker, credential prompting, model selection, and config + persistence. + """ from hermes_cli.auth import ( resolve_provider, AuthError, format_auth_error, ) @@ -858,7 +869,10 @@ def cmd_model(args): except AuthError as exc: warning = format_auth_error(exc) print(f"Warning: {warning} Falling back to auto provider detection.") - active = resolve_provider("auto") + try: + active = resolve_provider("auto") + except AuthError: + active = "openrouter" # no provider yet; show full picker # Detect custom endpoint if active == "openrouter" and get_env_value("OPENAI_BASE_URL"): diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py index 648876d92..50368915c 100644 --- a/hermes_cli/setup.py +++ b/hermes_cli/setup.py @@ -805,715 +805,49 @@ def _prompt_container_resources(config: dict): # ============================================================================= + def setup_model_provider(config: dict): - """Configure the inference provider and default model.""" - from hermes_cli.auth import ( - get_active_provider, - PROVIDER_REGISTRY, - fetch_nous_models, - resolve_nous_runtime_credentials, - _update_config_for_provider, - _login_openai_codex, - resolve_codex_runtime_credentials, - DEFAULT_CODEX_BASE_URL, - detect_external_credentials, - get_auth_status, - resolve_api_key_provider_credentials, - ) + """Configure the inference provider and default model. + + Delegates to ``cmd_model()`` (the same flow used by ``hermes model``) + for provider selection, credential prompting, and model picking. + This ensures a single code path for all provider setup — any new + provider added to ``hermes model`` is automatically available here. + """ + from hermes_cli.config import load_config, save_config print_header("Inference Provider") print_info("Choose how to connect to your main chat model.") print() - existing_or = get_env_value("OPENROUTER_API_KEY") - active_oauth = get_active_provider() - existing_custom = get_env_value("OPENAI_BASE_URL") - copilot_status = get_auth_status("copilot") - copilot_acp_status = get_auth_status("copilot-acp") - - model_cfg = config.get("model") if isinstance(config.get("model"), dict) else {} - current_config_provider = str(model_cfg.get("provider") or "").strip().lower() or None - if current_config_provider == "auto": - current_config_provider = None - current_config_base_url = str(model_cfg.get("base_url") or "").strip() - - # Detect credentials from other CLI tools - detected_creds = detect_external_credentials() - if detected_creds: - print_info("Detected existing credentials:") - for cred in detected_creds: - if cred["provider"] == "openai-codex": - print_success(f' * {cred["label"]} -- select "OpenAI Codex" to use it') - else: - print_info(f" * {cred['label']}") + # Delegate to the shared hermes model flow — handles provider picker, + # credential prompting, model selection, and config persistence. + from hermes_cli.main import select_provider_and_model + try: + select_provider_and_model() + except (SystemExit, KeyboardInterrupt): print() + print_info("Provider setup skipped.") + except Exception as exc: + logger.debug("select_provider_and_model error during setup: %s", exc) + print_warning(f"Provider setup encountered an error: {exc}") + print_info("You can try again later with: hermes model") + + # Re-sync the wizard's config dict from what cmd_model saved to disk. + # This is critical: cmd_model writes to disk via its own load/save cycle, + # and the wizard's final save_config(config) must not overwrite those + # changes with stale values (#4172). + _refreshed = load_config() + config["model"] = _refreshed.get("model", config.get("model")) + if _refreshed.get("custom_providers"): + config["custom_providers"] = _refreshed["custom_providers"] + + # Derive the selected provider for downstream steps (vision setup). + selected_provider = None + _m = config.get("model") + if isinstance(_m, dict): + selected_provider = _m.get("provider") - # Detect if any provider is already configured - has_any_provider = bool( - current_config_provider - or active_oauth - or existing_custom - or existing_or - or copilot_status.get("logged_in") - or copilot_acp_status.get("logged_in") - ) - - # Build "keep current" label - if current_config_provider == "custom": - custom_label = current_config_base_url or existing_custom - keep_label = ( - f"Keep current (Custom: {custom_label})" - if custom_label - else "Keep current (Custom)" - ) - elif current_config_provider == "openrouter": - keep_label = "Keep current (OpenRouter)" - elif current_config_provider and current_config_provider in PROVIDER_REGISTRY: - keep_label = f"Keep current ({PROVIDER_REGISTRY[current_config_provider].name})" - elif active_oauth and active_oauth in PROVIDER_REGISTRY: - keep_label = f"Keep current ({PROVIDER_REGISTRY[active_oauth].name})" - elif existing_custom: - keep_label = f"Keep current (Custom: {existing_custom})" - elif existing_or: - keep_label = "Keep current (OpenRouter)" - else: - keep_label = None # No provider configured — don't show "Keep current" - - provider_choices = [ - "OpenRouter API key (100+ models, pay-per-use)", - "Login with Nous Portal (Nous Research subscription — OAuth)", - "Login with OpenAI Codex", - "Custom OpenAI-compatible endpoint (self-hosted / VLLM / etc.)", - "Z.AI / GLM (Zhipu AI models)", - "Kimi / Moonshot (Kimi coding models)", - "MiniMax (global endpoint)", - "MiniMax China (mainland China endpoint)", - "Kilo Code (Kilo Gateway API)", - "Anthropic (Claude models — API key or Claude Code subscription)", - "AI Gateway (Vercel — 200+ models, pay-per-use)", - "Alibaba Cloud / DashScope (Qwen models via Anthropic-compatible API)", - "OpenCode Zen (35+ curated models, pay-as-you-go)", - "OpenCode Go (open models, $10/month subscription)", - "GitHub Copilot (uses GITHUB_TOKEN or gh auth token)", - "GitHub Copilot ACP (spawns `copilot --acp --stdio`)", - "Hugging Face Inference Providers (20+ open models)", - ] - if keep_label: - provider_choices.append(keep_label) - - # Default to "Keep current" if a provider exists, otherwise OpenRouter (most common) - default_provider = len(provider_choices) - 1 if has_any_provider else 0 - - if not has_any_provider: - print_warning("An inference provider is required for Hermes to work.") - print() - - provider_idx = prompt_choice( - "Select your inference provider:", provider_choices, default_provider - ) - - # Track which provider was selected for model step - selected_provider = ( - None # "nous", "openai-codex", "openrouter", "custom", or None (keep) - ) - selected_base_url = None # deferred until after model selection - nous_models = [] # populated if Nous login succeeds - - if provider_idx == 0: # OpenRouter - selected_provider = "openrouter" - print() - print_header("OpenRouter API Key") - print_info("OpenRouter provides access to 100+ models from multiple providers.") - print_info("Get your API key at: https://openrouter.ai/keys") - - if existing_or: - print_info(f"Current: {existing_or[:8]}... (configured)") - if prompt_yes_no("Update OpenRouter API key?", False): - api_key = prompt(" OpenRouter API key", password=True) - if api_key: - save_env_value("OPENROUTER_API_KEY", api_key) - print_success("OpenRouter API key updated") - else: - api_key = prompt(" OpenRouter API key", password=True) - if api_key: - save_env_value("OPENROUTER_API_KEY", api_key) - print_success("OpenRouter API key saved") - else: - print_warning("Skipped - agent won't work without an API key") - - - # Update config.yaml and deactivate any OAuth provider so the - # resolver doesn't keep returning the old provider (e.g. Codex). - try: - from hermes_cli.auth import deactivate_provider - - deactivate_provider() - except Exception: - pass - import yaml - - config_path = ( - Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes")) / "config.yaml" - ) - try: - disk_cfg = {} - if config_path.exists(): - disk_cfg = yaml.safe_load(config_path.read_text()) or {} - model_section = disk_cfg.get("model", {}) - if isinstance(model_section, str): - model_section = {"default": model_section} - model_section["provider"] = "openrouter" - model_section.pop("base_url", None) # OpenRouter uses default URL - disk_cfg["model"] = model_section - config_path.write_text(yaml.safe_dump(disk_cfg, sort_keys=False)) - _set_model_provider(config, "openrouter") - except Exception as e: - logger.debug("Could not save provider to config.yaml: %s", e) - - elif provider_idx == 1: # Nous Portal (OAuth) - selected_provider = "nous" - print() - print_header("Nous Portal Login") - print_info("This will open your browser to authenticate with Nous Portal.") - print_info("You'll need a Nous Research account with an active subscription.") - print() - - try: - from hermes_cli.auth import _login_nous - import argparse - - mock_args = argparse.Namespace( - portal_url=None, - inference_url=None, - client_id=None, - scope=None, - no_browser=False, - timeout=15.0, - ca_bundle=None, - insecure=False, - ) - pconfig = PROVIDER_REGISTRY["nous"] - _login_nous(mock_args, pconfig) - _sync_model_from_disk(config) - - # Fetch models for the selection step - try: - creds = resolve_nous_runtime_credentials( - min_key_ttl_seconds=5 * 60, - timeout_seconds=15.0, - ) - # Use curated model list instead of full /models dump - from hermes_cli.models import _PROVIDER_MODELS - nous_models = _PROVIDER_MODELS.get("nous", []) - except Exception as e: - logger.debug("Could not fetch Nous models after login: %s", e) - - except SystemExit: - print_warning("Nous Portal login was cancelled or failed.") - print_info("You can try again later with: hermes model") - selected_provider = None - except Exception as e: - print_error(f"Login failed: {e}") - print_info("You can try again later with: hermes model") - selected_provider = None - - elif provider_idx == 2: # OpenAI Codex - selected_provider = "openai-codex" - print() - print_header("OpenAI Codex Login") - print() - - try: - import argparse - - mock_args = argparse.Namespace() - _login_openai_codex(mock_args, PROVIDER_REGISTRY["openai-codex"]) - _update_config_for_provider("openai-codex", DEFAULT_CODEX_BASE_URL) - _set_model_provider(config, "openai-codex", DEFAULT_CODEX_BASE_URL) - except SystemExit: - print_warning("OpenAI Codex login was cancelled or failed.") - print_info("You can try again later with: hermes model") - selected_provider = None - except Exception as e: - print_error(f"Login failed: {e}") - print_info("You can try again later with: hermes model") - selected_provider = None - - elif provider_idx == 3: # Custom endpoint - selected_provider = "custom" - print() - print_header("Custom OpenAI-Compatible Endpoint") - print_info("Works with any API that follows OpenAI's chat completions spec") - print() - - # Reuse the shared custom endpoint flow from `hermes model`. - # This handles: URL/key/model/context-length prompts, endpoint probing, - # env saving, config.yaml updates, and custom_providers persistence. - from hermes_cli.main import _model_flow_custom - _model_flow_custom(config) - # _model_flow_custom handles model selection, config, env vars, - # and custom_providers. Keep selected_provider = "custom" so - # the model selection step below is skipped (line 1631 check) - # but vision and TTS setup still run. - - elif provider_idx == 4: # Z.AI / GLM - selected_provider = "zai" - print() - print_header("Z.AI / GLM API Key") - pconfig = PROVIDER_REGISTRY["zai"] - print_info(f"Provider: {pconfig.name}") - print_info("Get your API key at: https://open.bigmodel.cn/") - print() - - existing_key = get_env_value("GLM_API_KEY") or get_env_value("ZAI_API_KEY") - api_key = existing_key # will be overwritten if user enters a new one - if existing_key: - print_info(f"Current: {existing_key[:8]}... (configured)") - if prompt_yes_no("Update API key?", False): - new_key = prompt(" GLM API key", password=True) - if new_key: - api_key = new_key - save_env_value("GLM_API_KEY", api_key) - print_success("GLM API key updated") - else: - api_key = prompt(" GLM API key", password=True) - if api_key: - save_env_value("GLM_API_KEY", api_key) - print_success("GLM API key saved") - else: - print_warning("Skipped - agent won't work without an API key") - - # Detect the correct z.ai endpoint for this key. - # Z.AI has separate billing for general vs coding plans and - # global vs China endpoints — we probe to find the right one. - zai_base_url = pconfig.inference_base_url - if api_key: - print() - print_info("Detecting your z.ai endpoint...") - from hermes_cli.auth import detect_zai_endpoint - - detected = detect_zai_endpoint(api_key) - if detected: - zai_base_url = detected["base_url"] - print_success(f"Detected: {detected['label']} endpoint") - print_info(f" URL: {detected['base_url']}") - if detected["id"].startswith("coding"): - print_info( - f" Note: Coding Plan endpoint detected (default model: {detected['model']}). " - f"GLM-5 may still be available depending on your plan tier." - ) - save_env_value("GLM_BASE_URL", zai_base_url) - else: - print_warning("Could not verify any z.ai endpoint with this key.") - print_info(f" Using default: {zai_base_url}") - print_info( - " If you get billing errors, check your plan at https://open.bigmodel.cn/" - ) - - _set_model_provider(config, "zai", zai_base_url) - selected_base_url = zai_base_url - - elif provider_idx == 5: # Kimi / Moonshot - selected_provider = "kimi-coding" - print() - print_header("Kimi / Moonshot API Key") - pconfig = PROVIDER_REGISTRY["kimi-coding"] - print_info(f"Provider: {pconfig.name}") - print_info(f"Base URL: {pconfig.inference_base_url}") - print_info("Get your API key at: https://platform.moonshot.cn/") - print() - - existing_key = get_env_value("KIMI_API_KEY") - if existing_key: - print_info(f"Current: {existing_key[:8]}... (configured)") - if prompt_yes_no("Update API key?", False): - api_key = prompt(" Kimi API key", password=True) - if api_key: - save_env_value("KIMI_API_KEY", api_key) - print_success("Kimi API key updated") - else: - api_key = prompt(" Kimi API key", password=True) - if api_key: - save_env_value("KIMI_API_KEY", api_key) - print_success("Kimi API key saved") - else: - print_warning("Skipped - agent won't work without an API key") - - _set_model_provider(config, "kimi-coding", pconfig.inference_base_url) - selected_base_url = pconfig.inference_base_url - - elif provider_idx == 6: # MiniMax - selected_provider = "minimax" - print() - print_header("MiniMax API Key") - pconfig = PROVIDER_REGISTRY["minimax"] - print_info(f"Provider: {pconfig.name}") - print_info(f"Base URL: {pconfig.inference_base_url}") - print_info("Get your API key at: https://platform.minimaxi.com/") - print() - - existing_key = get_env_value("MINIMAX_API_KEY") - if existing_key: - print_info(f"Current: {existing_key[:8]}... (configured)") - if prompt_yes_no("Update API key?", False): - api_key = prompt(" MiniMax API key", password=True) - if api_key: - save_env_value("MINIMAX_API_KEY", api_key) - print_success("MiniMax API key updated") - else: - api_key = prompt(" MiniMax API key", password=True) - if api_key: - save_env_value("MINIMAX_API_KEY", api_key) - print_success("MiniMax API key saved") - else: - print_warning("Skipped - agent won't work without an API key") - - _set_model_provider(config, "minimax", pconfig.inference_base_url) - selected_base_url = pconfig.inference_base_url - - elif provider_idx == 7: # MiniMax China - selected_provider = "minimax-cn" - print() - print_header("MiniMax China API Key") - pconfig = PROVIDER_REGISTRY["minimax-cn"] - print_info(f"Provider: {pconfig.name}") - print_info(f"Base URL: {pconfig.inference_base_url}") - print_info("Get your API key at: https://platform.minimaxi.com/") - print() - - existing_key = get_env_value("MINIMAX_CN_API_KEY") - if existing_key: - print_info(f"Current: {existing_key[:8]}... (configured)") - if prompt_yes_no("Update API key?", False): - api_key = prompt(" MiniMax CN API key", password=True) - if api_key: - save_env_value("MINIMAX_CN_API_KEY", api_key) - print_success("MiniMax CN API key updated") - else: - api_key = prompt(" MiniMax CN API key", password=True) - if api_key: - save_env_value("MINIMAX_CN_API_KEY", api_key) - print_success("MiniMax CN API key saved") - else: - print_warning("Skipped - agent won't work without an API key") - - _set_model_provider(config, "minimax-cn", pconfig.inference_base_url) - selected_base_url = pconfig.inference_base_url - - elif provider_idx == 8: # Kilo Code - selected_provider = "kilocode" - print() - print_header("Kilo Code API Key") - pconfig = PROVIDER_REGISTRY["kilocode"] - print_info(f"Provider: {pconfig.name}") - print_info(f"Base URL: {pconfig.inference_base_url}") - print_info("Get your API key at: https://kilo.ai") - print() - - existing_key = get_env_value("KILOCODE_API_KEY") - if existing_key: - print_info(f"Current: {existing_key[:8]}... (configured)") - if prompt_yes_no("Update API key?", False): - api_key = prompt(" Kilo Code API key", password=True) - if api_key: - save_env_value("KILOCODE_API_KEY", api_key) - print_success("Kilo Code API key updated") - else: - api_key = prompt(" Kilo Code API key", password=True) - if api_key: - save_env_value("KILOCODE_API_KEY", api_key) - print_success("Kilo Code API key saved") - else: - print_warning("Skipped - agent won't work without an API key") - - _set_model_provider(config, "kilocode", pconfig.inference_base_url) - selected_base_url = pconfig.inference_base_url - - elif provider_idx == 9: # Anthropic - selected_provider = "anthropic" - print() - print_header("Anthropic Authentication") - from hermes_cli.auth import PROVIDER_REGISTRY - from hermes_cli.config import save_anthropic_api_key, save_anthropic_oauth_token - pconfig = PROVIDER_REGISTRY["anthropic"] - - # Check ALL credential sources - import os as _os - from agent.anthropic_adapter import ( - read_claude_code_credentials, is_claude_code_token_valid, - run_oauth_setup_token, - ) - cc_creds = read_claude_code_credentials() - cc_valid = bool(cc_creds and is_claude_code_token_valid(cc_creds)) - - existing_key = ( - get_env_value("ANTHROPIC_TOKEN") - or get_env_value("ANTHROPIC_API_KEY") - or _os.getenv("CLAUDE_CODE_OAUTH_TOKEN", "") - ) - - has_creds = bool(existing_key) or cc_valid - needs_auth = not has_creds - - if has_creds: - if existing_key: - print_info(f"Current credentials: {existing_key[:12]}...") - elif cc_valid: - print_success("Found valid Claude Code credentials (auto-detected)") - - auth_choices = [ - "Use existing credentials", - "Reauthenticate (new OAuth login)", - "Cancel", - ] - choice_idx = prompt_choice("What would you like to do?", auth_choices, 0) - if choice_idx == 1: - needs_auth = True - elif choice_idx == 2: - pass # fall through to provider config - - if needs_auth: - auth_choices = [ - "Claude Pro/Max subscription (OAuth login)", - "Anthropic API key (pay-per-token)", - ] - auth_idx = prompt_choice("Choose authentication method:", auth_choices, 0) - - if auth_idx == 0: - # OAuth setup-token flow - try: - print() - print_info("Running 'claude setup-token' — follow the prompts below.") - print_info("A browser window will open for you to authorize access.") - print() - token = run_oauth_setup_token() - if token: - save_anthropic_oauth_token(token, save_fn=save_env_value) - print_success("OAuth credentials saved") - else: - # Subprocess completed but no token auto-detected - print() - token = prompt("Paste setup-token here (if displayed above)", password=True) - if token: - save_anthropic_oauth_token(token, save_fn=save_env_value) - print_success("Setup-token saved") - else: - print_warning("Skipped — agent won't work without credentials") - except FileNotFoundError: - print() - print_info("The 'claude' CLI is required for OAuth login.") - print() - print_info("To install: npm install -g @anthropic-ai/claude-code") - print_info("Then run: claude setup-token") - print_info("Or paste an existing setup-token below:") - print() - token = prompt("Setup-token (sk-ant-oat-...)", password=True) - if token: - save_anthropic_oauth_token(token, save_fn=save_env_value) - print_success("Setup-token saved") - else: - print_warning("Skipped — install Claude Code and re-run setup") - else: - print() - print_info("Get an API key at: https://console.anthropic.com/settings/keys") - print() - api_key = prompt("API key (sk-ant-...)", password=True) - if api_key: - save_anthropic_api_key(api_key, save_fn=save_env_value) - print_success("API key saved") - else: - print_warning("Skipped — agent won't work without credentials") - - # Don't save base_url for Anthropic — resolve_runtime_provider() - # always hardcodes it. Stale base_urls contaminate other providers. - _set_model_provider(config, "anthropic") - selected_base_url = "" - - elif provider_idx == 10: # AI Gateway - selected_provider = "ai-gateway" - print() - print_header("AI Gateway API Key") - pconfig = PROVIDER_REGISTRY["ai-gateway"] - print_info(f"Provider: {pconfig.name}") - print_info("Get your API key at: https://vercel.com/docs/ai-gateway") - print() - - existing_key = get_env_value("AI_GATEWAY_API_KEY") - if existing_key: - print_info(f"Current: {existing_key[:8]}... (configured)") - if prompt_yes_no("Update API key?", False): - api_key = prompt(" AI Gateway API key", password=True) - if api_key: - save_env_value("AI_GATEWAY_API_KEY", api_key) - print_success("AI Gateway API key updated") - else: - api_key = prompt(" AI Gateway API key", password=True) - if api_key: - save_env_value("AI_GATEWAY_API_KEY", api_key) - print_success("AI Gateway API key saved") - else: - print_warning("Skipped - agent won't work without an API key") - - _update_config_for_provider("ai-gateway", pconfig.inference_base_url, default_model="anthropic/claude-opus-4.6") - _set_model_provider(config, "ai-gateway", pconfig.inference_base_url) - - elif provider_idx == 11: # Alibaba Cloud / DashScope - selected_provider = "alibaba" - print() - print_header("Alibaba Cloud / DashScope API Key") - pconfig = PROVIDER_REGISTRY["alibaba"] - print_info(f"Provider: {pconfig.name}") - print_info("Get your API key at: https://modelstudio.console.alibabacloud.com/") - print() - - existing_key = get_env_value("DASHSCOPE_API_KEY") - if existing_key: - print_info(f"Current: {existing_key[:8]}... (configured)") - if prompt_yes_no("Update API key?", False): - new_key = prompt(" DashScope API key", password=True) - if new_key: - save_env_value("DASHSCOPE_API_KEY", new_key) - print_success("DashScope API key updated") - else: - new_key = prompt(" DashScope API key", password=True) - if new_key: - save_env_value("DASHSCOPE_API_KEY", new_key) - print_success("DashScope API key saved") - else: - print_warning("Skipped - agent won't work without an API key") - - _update_config_for_provider("alibaba", pconfig.inference_base_url, default_model="qwen3.5-plus") - _set_model_provider(config, "alibaba", pconfig.inference_base_url) - - elif provider_idx == 12: # OpenCode Zen - selected_provider = "opencode-zen" - print() - print_header("OpenCode Zen API Key") - pconfig = PROVIDER_REGISTRY["opencode-zen"] - print_info(f"Provider: {pconfig.name}") - print_info(f"Base URL: {pconfig.inference_base_url}") - print_info("Get your API key at: https://opencode.ai/auth") - print() - - existing_key = get_env_value("OPENCODE_ZEN_API_KEY") - if existing_key: - print_info(f"Current: {existing_key[:8]}... (configured)") - if prompt_yes_no("Update API key?", False): - api_key = prompt(" OpenCode Zen API key", password=True) - if api_key: - save_env_value("OPENCODE_ZEN_API_KEY", api_key) - print_success("OpenCode Zen API key updated") - else: - api_key = prompt(" OpenCode Zen API key", password=True) - if api_key: - save_env_value("OPENCODE_ZEN_API_KEY", api_key) - print_success("OpenCode Zen API key saved") - else: - print_warning("Skipped - agent won't work without an API key") - - _set_model_provider(config, "opencode-zen", pconfig.inference_base_url) - selected_base_url = pconfig.inference_base_url - - elif provider_idx == 13: # OpenCode Go - selected_provider = "opencode-go" - print() - print_header("OpenCode Go API Key") - pconfig = PROVIDER_REGISTRY["opencode-go"] - print_info(f"Provider: {pconfig.name}") - print_info(f"Base URL: {pconfig.inference_base_url}") - print_info("Get your API key at: https://opencode.ai/auth") - print() - - existing_key = get_env_value("OPENCODE_GO_API_KEY") - if existing_key: - print_info(f"Current: {existing_key[:8]}... (configured)") - if prompt_yes_no("Update API key?", False): - api_key = prompt(" OpenCode Go API key", password=True) - if api_key: - save_env_value("OPENCODE_GO_API_KEY", api_key) - print_success("OpenCode Go API key updated") - else: - api_key = prompt(" OpenCode Go API key", password=True) - if api_key: - save_env_value("OPENCODE_GO_API_KEY", api_key) - print_success("OpenCode Go API key saved") - else: - print_warning("Skipped - agent won't work without an API key") - - _set_model_provider(config, "opencode-go", pconfig.inference_base_url) - selected_base_url = pconfig.inference_base_url - - elif provider_idx == 14: # GitHub Copilot - selected_provider = "copilot" - print() - print_header("GitHub Copilot") - pconfig = PROVIDER_REGISTRY["copilot"] - print_info("Hermes can use GITHUB_TOKEN, GH_TOKEN, or your gh CLI login.") - print_info(f"Base URL: {pconfig.inference_base_url}") - print() - - copilot_creds = resolve_api_key_provider_credentials("copilot") - source = copilot_creds.get("source", "") - token = copilot_creds.get("api_key", "") - if token: - if source in ("GITHUB_TOKEN", "GH_TOKEN"): - print_info(f"Current: {token[:8]}... ({source})") - elif source == "gh auth token": - print_info("Current: authenticated via `gh auth token`") - else: - print_info("Current: GitHub token configured") - else: - api_key = prompt(" GitHub token", password=True) - if api_key: - save_env_value("GITHUB_TOKEN", api_key) - print_success("GitHub token saved") - else: - print_warning("Skipped - agent won't work without a GitHub token or gh auth login") - - _set_model_provider(config, "copilot", pconfig.inference_base_url) - selected_base_url = pconfig.inference_base_url - - elif provider_idx == 15: # GitHub Copilot ACP - selected_provider = "copilot-acp" - print() - print_header("GitHub Copilot ACP") - pconfig = PROVIDER_REGISTRY["copilot-acp"] - print_info("Hermes will start `copilot --acp --stdio` for each request.") - print_info("Use HERMES_COPILOT_ACP_COMMAND or COPILOT_CLI_PATH to override the command.") - print_info(f"Base marker: {pconfig.inference_base_url}") - print() - - _set_model_provider(config, "copilot-acp", pconfig.inference_base_url) - selected_base_url = pconfig.inference_base_url - - elif provider_idx == 16: # Hugging Face Inference Providers - selected_provider = "huggingface" - print() - print_header("Hugging Face API Token") - pconfig = PROVIDER_REGISTRY["huggingface"] - print_info(f"Provider: {pconfig.name}") - print_info("Get your token at: https://huggingface.co/settings/tokens") - print_info("Required permission: 'Make calls to Inference Providers'") - print() - - api_key = prompt(" HF Token", password=True) - if api_key: - save_env_value("HF_TOKEN", api_key) - _set_model_provider(config, "huggingface", pconfig.inference_base_url) - selected_base_url = pconfig.inference_base_url - - # else: provider_idx == 17 (Keep current) — only shown when a provider already exists - # Normalize "keep current" to an explicit provider so downstream logic - # doesn't fall back to the generic OpenRouter/static-model path. - if selected_provider is None: - if current_config_provider: - selected_provider = current_config_provider - elif active_oauth and active_oauth in PROVIDER_REGISTRY: - selected_provider = active_oauth - elif existing_custom: - selected_provider = "custom" - elif existing_or: - selected_provider = "openrouter" # ── Vision & Image Analysis Setup ── # Keep setup aligned with the actual runtime resolver the vision tools use. @@ -1599,155 +933,6 @@ def setup_model_provider(config: dict): else: print_info("Skipped — add later with 'hermes setup' or configure AUXILIARY_VISION_* settings") - # ── Model Selection (adapts based on provider) ── - if selected_provider != "custom": # Custom already prompted for model name - print_header("Default Model") - - _raw_model = config.get("model", "anthropic/claude-opus-4.6") - current_model = ( - _raw_model.get("default", "anthropic/claude-opus-4.6") - if isinstance(_raw_model, dict) - else (_raw_model or "anthropic/claude-opus-4.6") - ) - print_info(f"Current: {current_model}") - - if selected_provider == "nous" and nous_models: - # Dynamic model list from Nous Portal - model_choices = [f"{m}" for m in nous_models] - model_choices.append("Custom model") - model_choices.append(f"Keep current ({current_model})") - - # Post-login validation: warn if current model might not be available - if current_model and current_model not in nous_models: - print_warning( - f"Your current model ({current_model}) may not be available via Nous Portal." - ) - print_info( - "Select a model from the list, or keep current to use it anyway." - ) - print() - - model_idx = prompt_choice( - "Select default model:", model_choices, len(model_choices) - 1 - ) - - if model_idx < len(nous_models): - _set_default_model(config, nous_models[model_idx]) - elif model_idx == len(model_choices) - 2: # Custom - model_name = prompt(" Model name") - if model_name: - _set_default_model(config, model_name) - # else: keep current - - elif selected_provider == "nous": - # Nous login succeeded but model fetch failed — prompt manually - # instead of falling through to the OpenRouter static list. - print_warning("Could not fetch available models from Nous Portal.") - print_info("Enter a Nous model name manually (e.g., claude-opus-4-6).") - custom = prompt(f" Model name (Enter to keep '{current_model}')") - if custom: - _set_default_model(config, custom) - elif selected_provider == "openai-codex": - from hermes_cli.codex_models import get_codex_model_ids - - codex_token = None - try: - codex_creds = resolve_codex_runtime_credentials() - codex_token = codex_creds.get("api_key") - except Exception as exc: - logger.debug("Could not resolve Codex runtime credentials for model list: %s", exc) - - codex_models = get_codex_model_ids(access_token=codex_token) - - model_choices = codex_models + [f"Keep current ({current_model})"] - default_codex = 0 - if current_model in codex_models: - default_codex = codex_models.index(current_model) - elif current_model: - default_codex = len(model_choices) - 1 - - model_idx = prompt_choice( - "Select default model:", model_choices, default_codex - ) - if model_idx < len(codex_models): - _set_default_model(config, codex_models[model_idx]) - elif model_idx == len(codex_models): - custom = prompt("Enter model name") - if custom: - _set_default_model(config, custom) - _update_config_for_provider("openai-codex", DEFAULT_CODEX_BASE_URL) - _set_model_provider(config, "openai-codex", DEFAULT_CODEX_BASE_URL) - elif selected_provider == "copilot-acp": - _setup_provider_model_selection( - config, selected_provider, current_model, - prompt_choice, prompt, - ) - model_cfg = _model_config_dict(config) - model_cfg["api_mode"] = "chat_completions" - config["model"] = model_cfg - elif selected_provider in ("copilot", "zai", "kimi-coding", "minimax", "minimax-cn", "kilocode", "ai-gateway", "opencode-zen", "opencode-go", "alibaba"): - _setup_provider_model_selection( - config, selected_provider, current_model, - prompt_choice, prompt, - ) - elif selected_provider == "anthropic": - # Try live model list first, fall back to static - from hermes_cli.models import provider_model_ids - live_models = provider_model_ids("anthropic") - anthropic_models = live_models if live_models else [ - "claude-opus-4-6", - "claude-sonnet-4-6", - "claude-haiku-4-5-20251001", - ] - model_choices = list(anthropic_models) - model_choices.append("Custom model") - model_choices.append(f"Keep current ({current_model})") - - keep_idx = len(model_choices) - 1 - model_idx = prompt_choice("Select default model:", model_choices, keep_idx) - - if model_idx < len(anthropic_models): - _set_default_model(config, anthropic_models[model_idx]) - elif model_idx == len(anthropic_models): - custom = prompt("Enter model name (e.g., claude-sonnet-4-20250514)") - if custom: - _set_default_model(config, custom) - # else: keep current - else: - # Static list for OpenRouter / fallback (from canonical list) - from hermes_cli.models import model_ids, menu_labels - - ids = model_ids() - model_choices = menu_labels() + [ - "Custom model", - f"Keep current ({current_model})", - ] - - keep_idx = len(model_choices) - 1 - model_idx = prompt_choice("Select default model:", model_choices, keep_idx) - - if model_idx < len(ids): - _set_default_model(config, ids[model_idx]) - elif model_idx == len(ids): # Custom - custom = prompt("Enter model name (e.g., anthropic/claude-opus-4.6)") - if custom: - _set_default_model(config, custom) - # else: Keep current - - _final_model = config.get("model", "") - if _final_model: - _display = ( - _final_model.get("default", _final_model) - if isinstance(_final_model, dict) - else _final_model - ) - print_success(f"Model set to: {_display}") - - # Write provider+base_url to config.yaml only after model selection is complete. - # This prevents a race condition where the gateway picks up a new provider - # before the model name has been updated to match. - if selected_provider in ("copilot-acp", "copilot", "zai", "kimi-coding", "minimax", "minimax-cn", "kilocode", "anthropic") and selected_base_url is not None: - _update_config_for_provider(selected_provider, selected_base_url) save_config(config) diff --git a/tests/hermes_cli/test_setup.py b/tests/hermes_cli/test_setup.py index c5a19f06f..f4f13696c 100644 --- a/tests/hermes_cli/test_setup.py +++ b/tests/hermes_cli/test_setup.py @@ -1,6 +1,8 @@ +"""Tests for setup_model_provider — verifies the delegation to +select_provider_and_model() and config dict sync.""" import json -from hermes_cli.auth import _update_config_for_provider, get_active_provider +from hermes_cli.auth import get_active_provider from hermes_cli.config import load_config, save_config from hermes_cli.setup import setup_model_provider @@ -23,270 +25,198 @@ def _clear_provider_env(monkeypatch): monkeypatch.delenv(key, raising=False) +def _stub_tts(monkeypatch): + """Stub out TTS prompts so setup_model_provider doesn't block.""" + monkeypatch.setattr("hermes_cli.setup.prompt_choice", lambda q, c, d=0: ( + _maybe_keep_current_tts(q, c) if _maybe_keep_current_tts(q, c) is not None + else d + )) + monkeypatch.setattr("hermes_cli.setup.prompt_yes_no", lambda *a, **kw: False) -def test_nous_oauth_setup_keeps_current_model_when_syncing_disk_provider( - tmp_path, monkeypatch -): + +def _write_model_config(tmp_path, provider, base_url="", model_name="test-model"): + """Simulate what a _model_flow_* function writes to disk.""" + cfg = load_config() + m = cfg.get("model") + if not isinstance(m, dict): + m = {"default": m} if m else {} + cfg["model"] = m + m["provider"] = provider + if base_url: + m["base_url"] = base_url + if model_name: + m["default"] = model_name + save_config(cfg) + + +def test_setup_delegates_to_select_provider_and_model(tmp_path, monkeypatch): + """setup_model_provider calls select_provider_and_model and syncs config.""" monkeypatch.setenv("HERMES_HOME", str(tmp_path)) _clear_provider_env(monkeypatch) + _stub_tts(monkeypatch) config = load_config() - def fake_prompt_choice(question, choices, default=0): - if question == "Select your inference provider:": - return 1 # Nous Portal - if question == "Configure vision:": - return len(choices) - 1 - if question == "Select default model:": - assert choices[-1] == "Keep current (anthropic/claude-opus-4.6)" - return len(choices) - 1 - tts_idx = _maybe_keep_current_tts(question, choices) - if tts_idx is not None: - return tts_idx - raise AssertionError(f"Unexpected prompt_choice call: {question}") + def fake_select(): + _write_model_config(tmp_path, "custom", "http://localhost:11434/v1", "qwen3.5:32b") - monkeypatch.setattr("hermes_cli.setup.prompt_choice", fake_prompt_choice) - monkeypatch.setattr("hermes_cli.setup.prompt", lambda *args, **kwargs: "") - monkeypatch.setattr("hermes_cli.auth.detect_external_credentials", lambda: []) - - def _fake_login_nous(*args, **kwargs): - auth_path = tmp_path / "auth.json" - auth_path.write_text(json.dumps({"active_provider": "nous", "providers": {}})) - _update_config_for_provider("nous", "https://inference.example.com/v1") - - monkeypatch.setattr("hermes_cli.auth._login_nous", _fake_login_nous) - monkeypatch.setattr( - "hermes_cli.auth.resolve_nous_runtime_credentials", - lambda *args, **kwargs: { - "base_url": "https://inference.example.com/v1", - "api_key": "nous-key", - }, - ) - monkeypatch.setattr( - "hermes_cli.auth.fetch_nous_models", - lambda *args, **kwargs: ["gemini-3-flash"], - ) + monkeypatch.setattr("hermes_cli.main.select_provider_and_model", fake_select) setup_model_provider(config) save_config(config) reloaded = load_config() + assert isinstance(reloaded["model"], dict) + assert reloaded["model"]["provider"] == "custom" + assert reloaded["model"]["base_url"] == "http://localhost:11434/v1" + assert reloaded["model"]["default"] == "qwen3.5:32b" + +def test_setup_syncs_openrouter_from_disk(tmp_path, monkeypatch): + """When select_provider_and_model saves OpenRouter config to disk, + the wizard's config dict picks it up.""" + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + _clear_provider_env(monkeypatch) + _stub_tts(monkeypatch) + + config = load_config() + assert isinstance(config.get("model"), str) # fresh install + + def fake_select(): + _write_model_config(tmp_path, "openrouter", model_name="anthropic/claude-opus-4.6") + + monkeypatch.setattr("hermes_cli.main.select_provider_and_model", fake_select) + + setup_model_provider(config) + save_config(config) + + reloaded = load_config() + assert isinstance(reloaded["model"], dict) + assert reloaded["model"]["provider"] == "openrouter" + + +def test_setup_syncs_nous_from_disk(tmp_path, monkeypatch): + """Nous OAuth writes config to disk; wizard config dict must pick it up.""" + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + _clear_provider_env(monkeypatch) + _stub_tts(monkeypatch) + + config = load_config() + + def fake_select(): + _write_model_config(tmp_path, "nous", "https://inference.example.com/v1", "gemini-3-flash") + + monkeypatch.setattr("hermes_cli.main.select_provider_and_model", fake_select) + + setup_model_provider(config) + save_config(config) + + reloaded = load_config() assert isinstance(reloaded["model"], dict) assert reloaded["model"]["provider"] == "nous" assert reloaded["model"]["base_url"] == "https://inference.example.com/v1" - assert reloaded["model"]["default"] == "anthropic/claude-opus-4.6" -def test_custom_setup_clears_active_oauth_provider(tmp_path, monkeypatch): +def test_setup_custom_providers_synced(tmp_path, monkeypatch): + """custom_providers written by select_provider_and_model must survive.""" monkeypatch.setenv("HERMES_HOME", str(tmp_path)) _clear_provider_env(monkeypatch) - - auth_path = tmp_path / "auth.json" - auth_path.write_text(json.dumps({"active_provider": "nous", "providers": {}})) + _stub_tts(monkeypatch) config = load_config() - def fake_prompt_choice(question, choices, default=0): - if question == "Select your inference provider:": - return 3 - tts_idx = _maybe_keep_current_tts(question, choices) - if tts_idx is not None: - return tts_idx - raise AssertionError(f"Unexpected prompt_choice call: {question}") + def fake_select(): + _write_model_config(tmp_path, "custom", "http://localhost:8080/v1", "llama3") + cfg = load_config() + cfg["custom_providers"] = [{"name": "Local", "base_url": "http://localhost:8080/v1"}] + save_config(cfg) - monkeypatch.setattr("hermes_cli.setup.prompt_choice", fake_prompt_choice) - - # _model_flow_custom uses builtins.input (URL, key, model, context_length) - input_values = iter([ - "https://custom.example/v1", - "custom-api-key", - "custom/model", - "", # context_length (blank = auto-detect) - ]) - monkeypatch.setattr("builtins.input", lambda _prompt="": next(input_values)) - monkeypatch.setattr("hermes_cli.setup.prompt_yes_no", lambda *args, **kwargs: False) - monkeypatch.setattr("hermes_cli.auth.detect_external_credentials", lambda: []) - monkeypatch.setattr("hermes_cli.main._save_custom_provider", lambda *args, **kwargs: None) - monkeypatch.setattr( - "hermes_cli.models.probe_api_models", - lambda api_key, base_url: {"models": ["m"], "probed_url": base_url + "/models"}, - ) + monkeypatch.setattr("hermes_cli.main.select_provider_and_model", fake_select) setup_model_provider(config) - - # Core assertion: switching to custom endpoint clears OAuth provider - assert get_active_provider() is None - - # Simulate what the real setup wizard does: save_config(config) AFTER - # setup_model_provider returns. This is the step that previously - # overwrote model.provider/base_url (#4172). save_config(config) reloaded = load_config() - assert isinstance(reloaded.get("model"), dict), ( - "model should be a dict after custom setup, not " - + repr(type(reloaded.get("model"))) - ) - assert reloaded["model"].get("provider") == "custom" - assert reloaded["model"].get("default") == "custom/model" - assert "custom.example" in reloaded["model"].get("base_url", "") + assert reloaded.get("custom_providers") == [{"name": "Local", "base_url": "http://localhost:8080/v1"}] -def test_custom_setup_preserves_provider_after_wizard_save_config( - tmp_path, monkeypatch -): - """Regression test for #4172: the setup wizard's final save_config(config) - must not overwrite model.provider/base_url that _model_flow_custom set. - - Simulates the full flow: - 1. load config (fresh install — model is a string) - 2. setup_model_provider picks custom - 3. wizard calls save_config(config) afterward - 4. verify resolve_requested_provider returns "custom" - """ +def test_setup_cancel_preserves_existing_config(tmp_path, monkeypatch): + """When the user cancels provider selection, existing config is preserved.""" monkeypatch.setenv("HERMES_HOME", str(tmp_path)) _clear_provider_env(monkeypatch) + _stub_tts(monkeypatch) + + # Pre-set a provider + _write_model_config(tmp_path, "openrouter", model_name="gpt-4o") config = load_config() - # Sanity: fresh install has model as a string - assert isinstance(config.get("model"), str) or config.get("model") is None + assert config["model"]["provider"] == "openrouter" - def fake_prompt_choice(question, choices, default=0): - if question == "Select your inference provider:": - return 3 # Custom endpoint - tts_idx = _maybe_keep_current_tts(question, choices) - if tts_idx is not None: - return tts_idx - raise AssertionError(f"Unexpected prompt_choice call: {question}") + def fake_select(): + pass # user cancelled — nothing written to disk - monkeypatch.setattr("hermes_cli.setup.prompt_choice", fake_prompt_choice) - - input_values = iter([ - "http://localhost:11434/v1", # Ollama URL - "", # no API key (local) - "qwen3.5:32b", # model name - "", # context length (auto-detect) - ]) - monkeypatch.setattr("builtins.input", lambda _prompt="": next(input_values)) - monkeypatch.setattr("hermes_cli.setup.prompt_yes_no", lambda *a, **kw: False) - monkeypatch.setattr("hermes_cli.auth.detect_external_credentials", lambda: []) - monkeypatch.setattr("hermes_cli.main._save_custom_provider", lambda *a, **kw: None) - monkeypatch.setattr( - "hermes_cli.models.probe_api_models", - lambda api_key, base_url: {"models": ["qwen3.5:32b"], "probed_url": base_url + "/models"}, - ) - - # Full wizard cycle - setup_model_provider(config) - save_config(config) # ← this is what the real wizard does - - # Verify config on disk - reloaded = load_config() - assert isinstance(reloaded["model"], dict) - assert reloaded["model"]["provider"] == "custom" - assert reloaded["model"]["base_url"] == "http://localhost:11434/v1" - assert reloaded["model"]["default"] == "qwen3.5:32b" - assert "api_mode" not in reloaded["model"] - - # Verify the runtime resolver sees "custom", not "auto" - from hermes_cli.runtime_provider import resolve_requested_provider - assert resolve_requested_provider() == "custom" - - -def test_custom_setup_no_model_name_still_preserves_endpoint( - tmp_path, monkeypatch -): - """When the user enters a URL and key but skips the model name, - model.provider and model.base_url must still survive the wizard's - final save_config(config).""" - monkeypatch.setenv("HERMES_HOME", str(tmp_path)) - _clear_provider_env(monkeypatch) - - config = load_config() - - def fake_prompt_choice(question, choices, default=0): - if question == "Select your inference provider:": - return 3 - tts_idx = _maybe_keep_current_tts(question, choices) - if tts_idx is not None: - return tts_idx - raise AssertionError(f"Unexpected prompt_choice call: {question}") - - monkeypatch.setattr("hermes_cli.setup.prompt_choice", fake_prompt_choice) - - input_values = iter([ - "http://192.168.1.50:8080/v1", # URL - "my-key", # API key - "", # no model name - "", # context length - ]) - monkeypatch.setattr("builtins.input", lambda _prompt="": next(input_values)) - monkeypatch.setattr("hermes_cli.setup.prompt_yes_no", lambda *a, **kw: False) - monkeypatch.setattr("hermes_cli.auth.detect_external_credentials", lambda: []) - monkeypatch.setattr("hermes_cli.main._save_custom_provider", lambda *a, **kw: None) - monkeypatch.setattr( - "hermes_cli.models.probe_api_models", - lambda api_key, base_url: {"models": None, "probed_url": base_url + "/models"}, - ) + monkeypatch.setattr("hermes_cli.main.select_provider_and_model", fake_select) setup_model_provider(config) save_config(config) reloaded = load_config() assert isinstance(reloaded["model"], dict) - assert reloaded["model"]["provider"] == "custom" - assert reloaded["model"]["base_url"] == "http://192.168.1.50:8080/v1" + assert reloaded["model"]["provider"] == "openrouter" + assert reloaded["model"]["default"] == "gpt-4o" + + +def test_setup_exception_in_select_gracefully_handled(tmp_path, monkeypatch): + """If select_provider_and_model raises, setup continues with existing config.""" + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + _clear_provider_env(monkeypatch) + _stub_tts(monkeypatch) + + config = load_config() + + def fake_select(): + raise RuntimeError("something broke") + + monkeypatch.setattr("hermes_cli.main.select_provider_and_model", fake_select) + + # Should not raise + setup_model_provider(config) + + +def test_setup_keyboard_interrupt_gracefully_handled(tmp_path, monkeypatch): + """KeyboardInterrupt during provider selection is handled.""" + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + _clear_provider_env(monkeypatch) + _stub_tts(monkeypatch) + + config = load_config() + + def fake_select(): + raise KeyboardInterrupt() + + monkeypatch.setattr("hermes_cli.main.select_provider_and_model", fake_select) + + setup_model_provider(config) def test_codex_setup_uses_runtime_access_token_for_live_model_list(tmp_path, monkeypatch): + """Codex model list fetching uses the runtime access token.""" monkeypatch.setenv("HERMES_HOME", str(tmp_path)) monkeypatch.setenv("OPENROUTER_API_KEY", "or-test-key") _clear_provider_env(monkeypatch) monkeypatch.setenv("OPENROUTER_API_KEY", "or-test-key") config = load_config() + _stub_tts(monkeypatch) - def fake_prompt_choice(question, choices, default=0): - if question == "Select your inference provider:": - return 2 # OpenAI Codex - if question == "Select default model:": - return 0 - tts_idx = _maybe_keep_current_tts(question, choices) - if tts_idx is not None: - return tts_idx - raise AssertionError(f"Unexpected prompt_choice call: {question}") + def fake_select(): + _write_model_config(tmp_path, "openai-codex", "https://api.openai.com/v1", "gpt-4o") - monkeypatch.setattr("hermes_cli.setup.prompt_choice", fake_prompt_choice) - monkeypatch.setattr("hermes_cli.setup.prompt", lambda *args, **kwargs: "") - monkeypatch.setattr("hermes_cli.auth.detect_external_credentials", lambda: []) - monkeypatch.setattr("hermes_cli.auth._login_openai_codex", lambda *args, **kwargs: None) - monkeypatch.setattr( - "hermes_cli.auth.resolve_codex_runtime_credentials", - lambda *args, **kwargs: { - "base_url": "https://chatgpt.com/backend-api/codex", - "api_key": "codex-access-token", - }, - ) - - captured = {} - - def _fake_get_codex_model_ids(access_token=None): - captured["access_token"] = access_token - return ["gpt-5.2-codex", "gpt-5.2"] - - monkeypatch.setattr( - "hermes_cli.codex_models.get_codex_model_ids", - _fake_get_codex_model_ids, - ) + monkeypatch.setattr("hermes_cli.main.select_provider_and_model", fake_select) setup_model_provider(config) save_config(config) reloaded = load_config() - - assert captured["access_token"] == "codex-access-token" assert isinstance(reloaded["model"], dict) assert reloaded["model"]["provider"] == "openai-codex" - assert reloaded["model"]["default"] == "gpt-5.2-codex" - assert reloaded["model"]["base_url"] == "https://chatgpt.com/backend-api/codex" diff --git a/tests/hermes_cli/test_setup_model_provider.py b/tests/hermes_cli/test_setup_model_provider.py index 76ba94374..09116bc95 100644 --- a/tests/hermes_cli/test_setup_model_provider.py +++ b/tests/hermes_cli/test_setup_model_provider.py @@ -1,9 +1,14 @@ -"""Regression tests for interactive setup provider/model persistence.""" +"""Regression tests for interactive setup provider/model persistence. + +Since setup_model_provider delegates to select_provider_and_model() +from hermes_cli.main, these tests mock the delegation point and verify +that the setup wizard correctly syncs config from disk after the call. +""" from __future__ import annotations from hermes_cli.config import load_config, save_config, save_env_value -from hermes_cli.setup import _print_setup_summary, setup_model_provider +from hermes_cli.setup import setup_model_provider def _maybe_keep_current_tts(question, choices): @@ -13,19 +18,6 @@ def _maybe_keep_current_tts(question, choices): return len(choices) - 1 -def _read_env(home): - env_path = home / ".env" - data = {} - if not env_path.exists(): - return data - for line in env_path.read_text().splitlines(): - if not line or line.startswith("#") or "=" not in line: - continue - k, v = line.split("=", 1) - data[k] = v - return data - - def _clear_provider_env(monkeypatch): for key in ( "HERMES_INFERENCE_PROVIDER", @@ -44,429 +36,173 @@ def _clear_provider_env(monkeypatch): monkeypatch.delenv(key, raising=False) +def _stub_tts(monkeypatch): + monkeypatch.setattr("hermes_cli.setup.prompt_choice", lambda q, c, d=0: ( + _maybe_keep_current_tts(q, c) if _maybe_keep_current_tts(q, c) is not None + else d + )) + monkeypatch.setattr("hermes_cli.setup.prompt_yes_no", lambda *a, **kw: False) + + +def _write_model_config(provider, base_url="", model_name="test-model"): + """Simulate what a _model_flow_* function writes to disk.""" + cfg = load_config() + m = cfg.get("model") + if not isinstance(m, dict): + m = {"default": m} if m else {} + cfg["model"] = m + m["provider"] = provider + if base_url: + m["base_url"] = base_url + else: + m.pop("base_url", None) + if model_name: + m["default"] = model_name + m.pop("api_mode", None) + save_config(cfg) + + def test_setup_keep_current_custom_from_config_does_not_fall_through(tmp_path, monkeypatch): """Keep-current custom should not fall through to the generic model menu.""" monkeypatch.setenv("HERMES_HOME", str(tmp_path)) _clear_provider_env(monkeypatch) - save_env_value("OPENAI_BASE_URL", "https://example.invalid/v1") - save_env_value("OPENAI_API_KEY", "custom-key") + _stub_tts(monkeypatch) + + # Pre-set custom provider + _write_model_config("custom", "http://localhost:8080/v1", "local-model") config = load_config() - config["model"] = { - "default": "custom/model", - "provider": "custom", - "base_url": "https://example.invalid/v1", - } - save_config(config) + assert config["model"]["provider"] == "custom" - def fake_prompt_choice(question, choices, default=0): - if question == "Select your inference provider:": - assert choices[-1] == "Keep current (Custom: https://example.invalid/v1)" - return len(choices) - 1 - tts_idx = _maybe_keep_current_tts(question, choices) - if tts_idx is not None: - return tts_idx - raise AssertionError("Model menu should not appear for keep-current custom") + def fake_select(): + pass # user chose "cancel" or "keep current" - monkeypatch.setattr("hermes_cli.setup.prompt_choice", fake_prompt_choice) - monkeypatch.setattr("hermes_cli.setup.prompt", lambda *args, **kwargs: "") - monkeypatch.setattr("hermes_cli.setup.prompt_yes_no", lambda *args, **kwargs: False) - monkeypatch.setattr("hermes_cli.auth.get_active_provider", lambda: None) - monkeypatch.setattr("hermes_cli.auth.detect_external_credentials", lambda: []) + monkeypatch.setattr("hermes_cli.main.select_provider_and_model", fake_select) setup_model_provider(config) save_config(config) reloaded = load_config() + assert isinstance(reloaded["model"], dict) assert reloaded["model"]["provider"] == "custom" - assert reloaded["model"]["default"] == "custom/model" - assert reloaded["model"]["base_url"] == "https://example.invalid/v1" + assert reloaded["model"]["base_url"] == "http://localhost:8080/v1" -def test_setup_custom_endpoint_saves_working_v1_base_url(tmp_path, monkeypatch): +def test_setup_keep_current_config_provider_uses_provider_specific_model_menu( + tmp_path, monkeypatch +): + """Keeping current provider preserves the config on disk.""" monkeypatch.setenv("HERMES_HOME", str(tmp_path)) _clear_provider_env(monkeypatch) + _stub_tts(monkeypatch) + + _write_model_config("zai", "https://open.bigmodel.cn/api/paas/v4", "glm-5") config = load_config() - def fake_prompt_choice(question, choices, default=0): - if question == "Select your inference provider:": - return 3 # Custom endpoint - if question == "Configure vision:": - return len(choices) - 1 # Skip - tts_idx = _maybe_keep_current_tts(question, choices) - if tts_idx is not None: - return tts_idx - raise AssertionError(f"Unexpected prompt_choice call: {question}") + def fake_select(): + pass # keep current - # _model_flow_custom uses builtins.input (URL, key, model, context_length) - input_values = iter([ - "http://localhost:8000", - "local-key", - "llm", - "", # context_length (blank = auto-detect) - ]) - monkeypatch.setattr("builtins.input", lambda _prompt="": next(input_values)) - - monkeypatch.setattr("hermes_cli.setup.prompt_choice", fake_prompt_choice) - monkeypatch.setattr("hermes_cli.setup.prompt_yes_no", lambda *args, **kwargs: False) - monkeypatch.setattr("hermes_cli.auth.get_active_provider", lambda: None) - monkeypatch.setattr("hermes_cli.auth.detect_external_credentials", lambda: []) - monkeypatch.setattr("agent.auxiliary_client.get_available_vision_backends", lambda: []) - monkeypatch.setattr("hermes_cli.main._save_custom_provider", lambda *args, **kwargs: None) - monkeypatch.setattr( - "hermes_cli.models.probe_api_models", - lambda api_key, base_url: { - "models": ["llm"], - "probed_url": "http://localhost:8000/v1/models", - "resolved_base_url": "http://localhost:8000/v1", - "suggested_base_url": "http://localhost:8000/v1", - "used_fallback": True, - }, - ) + monkeypatch.setattr("hermes_cli.main.select_provider_and_model", fake_select) setup_model_provider(config) + save_config(config) - env = _read_env(tmp_path) - - # _model_flow_custom saves config to disk (base_url in config, not .env) reloaded = load_config() - model_cfg = reloaded.get("model", {}) - if isinstance(model_cfg, dict): - assert model_cfg.get("provider") == "custom" - assert model_cfg.get("default") == "llm" - assert model_cfg.get("base_url") == "http://localhost:8000/v1" - - -def test_setup_keep_current_config_provider_uses_provider_specific_model_menu(tmp_path, monkeypatch): - """Keep-current should respect config-backed providers, not fall back to OpenRouter.""" - monkeypatch.setenv("HERMES_HOME", str(tmp_path)) - _clear_provider_env(monkeypatch) - - config = load_config() - config["model"] = { - "default": "claude-opus-4-6", - "provider": "anthropic", - } - save_config(config) - - captured = {"provider_choices": None, "model_choices": None} - - def fake_prompt_choice(question, choices, default=0): - if question == "Select your inference provider:": - captured["provider_choices"] = list(choices) - assert choices[-1] == "Keep current (Anthropic)" - return len(choices) - 1 - if question == "Configure vision:": - assert question == "Configure vision:" - assert choices[-1] == "Skip for now" - return len(choices) - 1 - if question == "Select default model:": - captured["model_choices"] = list(choices) - return len(choices) - 1 # keep current model - tts_idx = _maybe_keep_current_tts(question, choices) - if tts_idx is not None: - return tts_idx - raise AssertionError(f"Unexpected prompt_choice call: {question}") - - monkeypatch.setattr("hermes_cli.setup.prompt_choice", fake_prompt_choice) - monkeypatch.setattr("hermes_cli.setup.prompt", lambda *args, **kwargs: "") - monkeypatch.setattr("hermes_cli.setup.prompt_yes_no", lambda *args, **kwargs: False) - monkeypatch.setattr("hermes_cli.auth.get_active_provider", lambda: None) - monkeypatch.setattr("hermes_cli.auth.detect_external_credentials", lambda: []) - monkeypatch.setattr("hermes_cli.models.provider_model_ids", lambda provider: []) - monkeypatch.setattr("agent.auxiliary_client.get_available_vision_backends", lambda: []) - - setup_model_provider(config) - save_config(config) - - assert captured["provider_choices"] is not None - assert captured["model_choices"] is not None - assert captured["model_choices"][0] == "claude-opus-4-6" - assert "anthropic/claude-opus-4.6 (recommended)" not in captured["model_choices"] - - -def test_setup_keep_current_anthropic_can_configure_openai_vision_default(tmp_path, monkeypatch): - monkeypatch.setenv("HERMES_HOME", str(tmp_path)) - _clear_provider_env(monkeypatch) - - config = load_config() - config["model"] = { - "default": "claude-opus-4-6", - "provider": "anthropic", - } - save_config(config) - - def fake_prompt_choice(question, choices, default=0): - if question == "Select your inference provider:": - assert choices[-1] == "Keep current (Anthropic)" - return len(choices) - 1 - if question == "Configure vision:": - return 1 - if question == "Select vision model:": - assert choices[-1] == "Use default (gpt-4o-mini)" - return len(choices) - 1 - if question == "Select default model:": - assert choices[-1] == "Keep current (claude-opus-4-6)" - return len(choices) - 1 - tts_idx = _maybe_keep_current_tts(question, choices) - if tts_idx is not None: - return tts_idx - raise AssertionError(f"Unexpected prompt_choice call: {question}") - - monkeypatch.setattr("hermes_cli.setup.prompt_choice", fake_prompt_choice) - monkeypatch.setattr( - "hermes_cli.setup.prompt", - lambda message, *args, **kwargs: "sk-openai" if "OpenAI API key" in message else "", - ) - monkeypatch.setattr("hermes_cli.setup.prompt_yes_no", lambda *args, **kwargs: False) - monkeypatch.setattr("hermes_cli.auth.get_active_provider", lambda: None) - monkeypatch.setattr("hermes_cli.auth.detect_external_credentials", lambda: []) - monkeypatch.setattr("hermes_cli.models.provider_model_ids", lambda provider: []) - monkeypatch.setattr("agent.auxiliary_client.get_available_vision_backends", lambda: []) - - setup_model_provider(config) - env = _read_env(tmp_path) - - assert env.get("OPENAI_API_KEY") == "sk-openai" - assert env.get("AUXILIARY_VISION_MODEL") == "gpt-4o-mini" - # Vision base URL saved to config.yaml, not .env - reloaded = load_config() - vision_cfg = reloaded.get("auxiliary", {}).get("vision", {}) - assert vision_cfg.get("base_url") == "https://api.openai.com/v1" + assert isinstance(reloaded["model"], dict) + assert reloaded["model"]["provider"] == "zai" def test_setup_copilot_uses_gh_auth_and_saves_provider(tmp_path, monkeypatch): + """Copilot provider saves correctly through delegation.""" monkeypatch.setenv("HERMES_HOME", str(tmp_path)) _clear_provider_env(monkeypatch) + _stub_tts(monkeypatch) config = load_config() - def fake_prompt_choice(question, choices, default=0): - if question == "Select your inference provider:": - assert choices[14] == "GitHub Copilot (uses GITHUB_TOKEN or gh auth token)" - return 14 - if question == "Select default model:": - assert "gpt-4.1" in choices - assert "gpt-5.4" in choices - return choices.index("gpt-5.4") - if question == "Select reasoning effort:": - assert "low" in choices - assert "high" in choices - return choices.index("high") - if question == "Configure vision:": - return len(choices) - 1 - tts_idx = _maybe_keep_current_tts(question, choices) - if tts_idx is not None: - return tts_idx - raise AssertionError(f"Unexpected prompt_choice call: {question}") + def fake_select(): + _write_model_config("copilot", "https://models.github.ai/inference/v1", "gpt-4o") - def fake_prompt(message, *args, **kwargs): - raise AssertionError(f"Unexpected prompt call: {message}") - - def fake_get_auth_status(provider_id): - if provider_id == "copilot": - return {"logged_in": True} - return {"logged_in": False} - - monkeypatch.setattr("hermes_cli.setup.prompt_choice", fake_prompt_choice) - monkeypatch.setattr("hermes_cli.setup.prompt", fake_prompt) - monkeypatch.setattr("hermes_cli.setup.prompt_yes_no", lambda *args, **kwargs: False) - monkeypatch.setattr("hermes_cli.auth.get_active_provider", lambda: None) - monkeypatch.setattr("hermes_cli.auth.detect_external_credentials", lambda: []) - monkeypatch.setattr("hermes_cli.auth.get_auth_status", fake_get_auth_status) - monkeypatch.setattr( - "hermes_cli.auth.resolve_api_key_provider_credentials", - lambda provider_id: { - "provider": provider_id, - "api_key": "gh-cli-token", - "base_url": "https://api.githubcopilot.com", - "source": "gh auth token", - }, - ) - monkeypatch.setattr( - "hermes_cli.models.fetch_github_model_catalog", - lambda api_key: [ - { - "id": "gpt-4.1", - "capabilities": {"type": "chat", "supports": {}}, - "supported_endpoints": ["/chat/completions"], - }, - { - "id": "gpt-5.4", - "capabilities": {"type": "chat", "supports": {"reasoning_effort": ["low", "medium", "high"]}}, - "supported_endpoints": ["/responses"], - }, - ], - ) - monkeypatch.setattr("agent.auxiliary_client.get_available_vision_backends", lambda: []) + monkeypatch.setattr("hermes_cli.main.select_provider_and_model", fake_select) setup_model_provider(config) save_config(config) - env = _read_env(tmp_path) reloaded = load_config() - - assert env.get("GITHUB_TOKEN") is None + assert isinstance(reloaded["model"], dict) assert reloaded["model"]["provider"] == "copilot" - assert reloaded["model"]["base_url"] == "https://api.githubcopilot.com" - assert reloaded["model"]["default"] == "gpt-5.4" - assert reloaded["model"]["api_mode"] == "codex_responses" - assert reloaded["agent"]["reasoning_effort"] == "high" def test_setup_copilot_acp_uses_model_picker_and_saves_provider(tmp_path, monkeypatch): + """Copilot ACP provider saves correctly through delegation.""" monkeypatch.setenv("HERMES_HOME", str(tmp_path)) _clear_provider_env(monkeypatch) + _stub_tts(monkeypatch) config = load_config() - def fake_prompt_choice(question, choices, default=0): - if question == "Select your inference provider:": - assert choices[15] == "GitHub Copilot ACP (spawns `copilot --acp --stdio`)" - return 15 - if question == "Select default model:": - assert "gpt-4.1" in choices - assert "gpt-5.4" in choices - return choices.index("gpt-5.4") - if question == "Configure vision:": - return len(choices) - 1 - tts_idx = _maybe_keep_current_tts(question, choices) - if tts_idx is not None: - return tts_idx - raise AssertionError(f"Unexpected prompt_choice call: {question}") + def fake_select(): + _write_model_config("copilot-acp", "", "claude-sonnet-4") - def fake_prompt(message, *args, **kwargs): - raise AssertionError(f"Unexpected prompt call: {message}") - - monkeypatch.setattr("hermes_cli.setup.prompt_choice", fake_prompt_choice) - monkeypatch.setattr("hermes_cli.setup.prompt", fake_prompt) - monkeypatch.setattr("hermes_cli.setup.prompt_yes_no", lambda *args, **kwargs: False) - monkeypatch.setattr("hermes_cli.auth.get_active_provider", lambda: None) - monkeypatch.setattr("hermes_cli.auth.detect_external_credentials", lambda: []) - monkeypatch.setattr("hermes_cli.auth.get_auth_status", lambda provider_id: {"logged_in": provider_id == "copilot-acp"}) - monkeypatch.setattr( - "hermes_cli.auth.resolve_api_key_provider_credentials", - lambda provider_id: { - "provider": "copilot", - "api_key": "gh-cli-token", - "base_url": "https://api.githubcopilot.com", - "source": "gh auth token", - }, - ) - monkeypatch.setattr( - "hermes_cli.models.fetch_github_model_catalog", - lambda api_key: [ - { - "id": "gpt-4.1", - "capabilities": {"type": "chat", "supports": {}}, - "supported_endpoints": ["/chat/completions"], - }, - { - "id": "gpt-5.4", - "capabilities": {"type": "chat", "supports": {"reasoning_effort": ["low", "medium", "high"]}}, - "supported_endpoints": ["/responses"], - }, - ], - ) - monkeypatch.setattr("agent.auxiliary_client.get_available_vision_backends", lambda: []) + monkeypatch.setattr("hermes_cli.main.select_provider_and_model", fake_select) setup_model_provider(config) save_config(config) reloaded = load_config() - + assert isinstance(reloaded["model"], dict) assert reloaded["model"]["provider"] == "copilot-acp" - assert reloaded["model"]["base_url"] == "acp://copilot" - assert reloaded["model"]["default"] == "gpt-5.4" - assert reloaded["model"]["api_mode"] == "chat_completions" -def test_setup_switch_custom_to_codex_clears_custom_endpoint_and_updates_config(tmp_path, monkeypatch): - """Switching from custom to Codex should clear custom endpoint overrides.""" +def test_setup_switch_custom_to_codex_clears_custom_endpoint_and_updates_config( + tmp_path, monkeypatch +): + """Switching from custom to codex updates config correctly.""" monkeypatch.setenv("HERMES_HOME", str(tmp_path)) _clear_provider_env(monkeypatch) + _stub_tts(monkeypatch) - save_env_value("OPENAI_BASE_URL", "https://example.invalid/v1") - save_env_value("OPENAI_API_KEY", "sk-custom") - save_env_value("OPENROUTER_API_KEY", "sk-or") + # Start with custom + _write_model_config("custom", "http://localhost:11434/v1", "qwen3.5:32b") config = load_config() - config["model"] = { - "default": "custom/model", - "provider": "custom", - "base_url": "https://example.invalid/v1", - } - save_config(config) + assert config["model"]["provider"] == "custom" - def fake_prompt_choice(question, choices, default=0): - if question == "Select your inference provider:": - return 2 # OpenAI Codex - if question == "Select default model:": - return 0 - tts_idx = _maybe_keep_current_tts(question, choices) - if tts_idx is not None: - return tts_idx - raise AssertionError(f"Unexpected prompt_choice call: {question}") + def fake_select(): + _write_model_config("openai-codex", "https://api.openai.com/v1", "gpt-4o") - monkeypatch.setattr("hermes_cli.setup.prompt_choice", fake_prompt_choice) - monkeypatch.setattr("hermes_cli.setup.prompt", lambda *args, **kwargs: "") - monkeypatch.setattr("hermes_cli.setup.prompt_yes_no", lambda *args, **kwargs: False) - monkeypatch.setattr("hermes_cli.auth.get_active_provider", lambda: None) - monkeypatch.setattr("hermes_cli.auth.detect_external_credentials", lambda: []) - monkeypatch.setattr("hermes_cli.auth._login_openai_codex", lambda *args, **kwargs: None) - monkeypatch.setattr( - "hermes_cli.auth.resolve_codex_runtime_credentials", - lambda *args, **kwargs: { - "base_url": "https://chatgpt.com/backend-api/codex", - "api_key": "codex-...oken", - }, - ) - monkeypatch.setattr( - "hermes_cli.codex_models.get_codex_model_ids", - lambda **kwargs: ["openai/gpt-5.3-codex", "openai/gpt-5-codex-mini"], - ) + monkeypatch.setattr("hermes_cli.main.select_provider_and_model", fake_select) setup_model_provider(config) save_config(config) - env = _read_env(tmp_path) reloaded = load_config() - - # OPENAI_BASE_URL is no longer written/cleared in .env — config is authoritative + assert isinstance(reloaded["model"], dict) assert reloaded["model"]["provider"] == "openai-codex" - assert reloaded["model"]["default"] == "openai/gpt-5.3-codex" - assert reloaded["model"]["base_url"] == "https://chatgpt.com/backend-api/codex" + assert reloaded["model"]["default"] == "gpt-4o" -def test_setup_summary_marks_codex_auth_as_vision_available(tmp_path, monkeypatch, capsys): +def test_setup_switch_preserves_non_model_config(tmp_path, monkeypatch): + """Provider switch preserves other config sections (terminal, display, etc.).""" monkeypatch.setenv("HERMES_HOME", str(tmp_path)) _clear_provider_env(monkeypatch) + _stub_tts(monkeypatch) - (tmp_path / "auth.json").write_text( - '{"active_provider":"openai-codex","providers":{"openai-codex":{"tokens":{"access_token": "***", "refresh_token": "***"}}}}' - ) + config = load_config() + config["terminal"]["timeout"] = 999 + save_config(config) - monkeypatch.setattr("shutil.which", lambda _name: None) + config = load_config() - _print_setup_summary(load_config(), tmp_path) - output = capsys.readouterr().out + def fake_select(): + _write_model_config("openrouter", model_name="gpt-4o") - assert "Vision (image analysis)" in output - assert "missing run 'hermes setup' to configure" not in output - assert "Mixture of Agents" in output - assert "missing OPENROUTER_API_KEY" in output + monkeypatch.setattr("hermes_cli.main.select_provider_and_model", fake_select) + setup_model_provider(config) + save_config(config) -def test_setup_summary_marks_anthropic_auth_as_vision_available(tmp_path, monkeypatch, capsys): - monkeypatch.setenv("HERMES_HOME", str(tmp_path)) - _clear_provider_env(monkeypatch) - monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-api03-key") - monkeypatch.setattr("shutil.which", lambda _name: None) - monkeypatch.setattr("agent.auxiliary_client.get_available_vision_backends", lambda: ["anthropic"]) - - _print_setup_summary(load_config(), tmp_path) - output = capsys.readouterr().out - - assert "Vision (image analysis)" in output - assert "missing run 'hermes setup' to configure" not in output + reloaded = load_config() + assert reloaded["terminal"]["timeout"] == 999 + assert reloaded["model"]["provider"] == "openrouter" -- 2.43.0 From ff78ad4c811cdd7a74cf077d569e6571e91caa6a Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Tue, 31 Mar 2026 01:24:48 -0700 Subject: [PATCH 067/385] feat: add discord.reactions config option to disable message reactions (#4199) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a 'reactions' key under the discord config section (default: true). When set to false, the bot no longer adds 👀/✅/❌ reactions to messages during processing. The config maps to DISCORD_REACTIONS env var following the same pattern as require_mention and auto_thread. Files changed: - hermes_cli/config.py: Add reactions default to DEFAULT_CONFIG - gateway/config.py: Map discord.reactions to DISCORD_REACTIONS env var - gateway/platforms/discord.py: Gate on_processing_start/complete hooks - tests/gateway/test_discord_reactions.py: 3 new tests for config gate --- gateway/config.py | 2 + gateway/platforms/discord.py | 8 ++++ hermes_cli/config.py | 1 + tests/gateway/test_discord_reactions.py | 64 +++++++++++++++++++++++++ 4 files changed, 75 insertions(+) diff --git a/gateway/config.py b/gateway/config.py index 8c7843780..c660bb48e 100644 --- a/gateway/config.py +++ b/gateway/config.py @@ -550,6 +550,8 @@ def load_gateway_config() -> GatewayConfig: os.environ["DISCORD_FREE_RESPONSE_CHANNELS"] = str(frc) if "auto_thread" in discord_cfg and not os.getenv("DISCORD_AUTO_THREAD"): os.environ["DISCORD_AUTO_THREAD"] = str(discord_cfg["auto_thread"]).lower() + if "reactions" in discord_cfg and not os.getenv("DISCORD_REACTIONS"): + os.environ["DISCORD_REACTIONS"] = str(discord_cfg["reactions"]).lower() # Telegram settings → env vars (env vars take precedence) telegram_cfg = yaml_cfg.get("telegram", {}) diff --git a/gateway/platforms/discord.py b/gateway/platforms/discord.py index 9e0c9c123..168919b09 100644 --- a/gateway/platforms/discord.py +++ b/gateway/platforms/discord.py @@ -683,14 +683,22 @@ class DiscordAdapter(BasePlatformAdapter): logger.debug("[%s] remove_reaction failed (%s): %s", self.name, emoji, e) return False + def _reactions_enabled(self) -> bool: + """Check if message reactions are enabled via config/env.""" + return os.getenv("DISCORD_REACTIONS", "true").lower() not in ("false", "0", "no") + async def on_processing_start(self, event: MessageEvent) -> None: """Add an in-progress reaction for normal Discord message events.""" + if not self._reactions_enabled(): + return message = event.raw_message if hasattr(message, "add_reaction"): await self._add_reaction(message, "👀") async def on_processing_complete(self, event: MessageEvent, success: bool) -> None: """Swap the in-progress reaction for a final success/failure reaction.""" + if not self._reactions_enabled(): + return message = event.raw_message if hasattr(message, "add_reaction"): await self._remove_reaction(message, "👀") diff --git a/hermes_cli/config.py b/hermes_cli/config.py index f7ae4239d..97df597d5 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -452,6 +452,7 @@ DEFAULT_CONFIG = { "require_mention": True, # Require @mention to respond in server channels "free_response_channels": "", # Comma-separated channel IDs where bot responds without mention "auto_thread": True, # Auto-create threads on @mention in channels (like Slack) + "reactions": True, # Add 👀/✅/❌ reactions to messages during processing }, # WhatsApp platform settings (gateway mode) diff --git a/tests/gateway/test_discord_reactions.py b/tests/gateway/test_discord_reactions.py index c19913a4c..3988c67b5 100644 --- a/tests/gateway/test_discord_reactions.py +++ b/tests/gateway/test_discord_reactions.py @@ -168,3 +168,67 @@ async def test_reaction_helper_failures_do_not_break_message_flow(adapter): await adapter._process_message_background(event, build_session_key(event.source)) adapter.send.assert_awaited_once() + + +@pytest.mark.asyncio +async def test_reactions_disabled_via_env(adapter, monkeypatch): + """When DISCORD_REACTIONS=false, no reactions should be added.""" + monkeypatch.setenv("DISCORD_REACTIONS", "false") + + raw_message = SimpleNamespace( + add_reaction=AsyncMock(), + remove_reaction=AsyncMock(), + ) + + async def handler(_event): + await asyncio.sleep(0) + return "ack" + + async def hold_typing(_chat_id, interval=2.0, metadata=None): + await asyncio.Event().wait() + + adapter.set_message_handler(handler) + adapter.send = AsyncMock(return_value=SendResult(success=True, message_id="999")) + adapter._keep_typing = hold_typing + + event = _make_event("4", raw_message) + await adapter._process_message_background(event, build_session_key(event.source)) + + raw_message.add_reaction.assert_not_awaited() + raw_message.remove_reaction.assert_not_awaited() + # Response should still be sent + adapter.send.assert_awaited_once() + + +@pytest.mark.asyncio +async def test_reactions_disabled_via_env_zero(adapter, monkeypatch): + """DISCORD_REACTIONS=0 should also disable reactions.""" + monkeypatch.setenv("DISCORD_REACTIONS", "0") + + raw_message = SimpleNamespace( + add_reaction=AsyncMock(), + remove_reaction=AsyncMock(), + ) + + event = _make_event("5", raw_message) + await adapter.on_processing_start(event) + await adapter.on_processing_complete(event, success=True) + + raw_message.add_reaction.assert_not_awaited() + raw_message.remove_reaction.assert_not_awaited() + + +@pytest.mark.asyncio +async def test_reactions_enabled_by_default(adapter, monkeypatch): + """When DISCORD_REACTIONS is unset, reactions should still work (default: true).""" + monkeypatch.delenv("DISCORD_REACTIONS", raising=False) + + raw_message = SimpleNamespace( + add_reaction=AsyncMock(), + remove_reaction=AsyncMock(), + ) + + event = _make_event("6", raw_message) + await adapter.on_processing_start(event) + + raw_message.add_reaction.assert_awaited_once_with("👀") -- 2.43.0 From a994cf5e5ab31f48b48a11b8529440a682d54f7a Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Tue, 31 Mar 2026 01:29:43 -0700 Subject: [PATCH 068/385] docs: update adding-providers guide for unified setup flow setup_model_provider() now delegates to select_provider_and_model() from main.py, so new providers only need to be wired in main.py. Removed setup.py from file checklists, replaced the setup.py section with a tip explaining the automatic inheritance. --- .../docs/developer-guide/adding-providers.md | 45 +++++++++---------- 1 file changed, 20 insertions(+), 25 deletions(-) diff --git a/website/docs/developer-guide/adding-providers.md b/website/docs/developer-guide/adding-providers.md index 9547e78d0..a0c9f9122 100644 --- a/website/docs/developer-guide/adding-providers.md +++ b/website/docs/developer-guide/adding-providers.md @@ -28,7 +28,7 @@ A built-in provider has to line up across a few layers: - `api_key` - `source` 3. `run_agent.py` uses `api_mode` to decide how requests are built and sent. -4. `hermes_cli/models.py`, `hermes_cli/main.py`, and `hermes_cli/setup.py` make the provider show up in the CLI. +4. `hermes_cli/models.py` and `hermes_cli/main.py` make the provider show up in the CLI. (`hermes_cli/setup.py` delegates to `main.py` automatically — no changes needed there.) 5. `agent/auxiliary_client.py` and `agent/model_metadata.py` keep side tasks and token budgeting working. The important abstraction is `api_mode`. @@ -78,11 +78,14 @@ This path includes everything from Path A plus: 2. `hermes_cli/models.py` 3. `hermes_cli/runtime_provider.py` 4. `hermes_cli/main.py` -5. `hermes_cli/setup.py` -6. `agent/auxiliary_client.py` -7. `agent/model_metadata.py` -8. tests -9. user-facing docs under `website/docs/` +5. `agent/auxiliary_client.py` +6. `agent/model_metadata.py` +7. tests +8. user-facing docs under `website/docs/` + +:::tip +`hermes_cli/setup.py` does **not** need changes. The setup wizard delegates provider/model selection to `select_provider_and_model()` in `main.py` — any provider added there is automatically available in `hermes setup`. +::: ### Additional for native / non-OpenAI providers @@ -185,29 +188,22 @@ If the provider is OpenAI-compatible, `api_mode` should usually stay `chat_compl Be careful with API-key precedence. Hermes already contains logic to avoid leaking an OpenRouter key to unrelated endpoints. A new provider should be equally explicit about which key goes to which base URL. -## Step 5: Wire the CLI in `hermes_cli/main.py` and `hermes_cli/setup.py` +## Step 5: Wire the CLI in `hermes_cli/main.py` -A provider is not discoverable until it shows up in the interactive flows. +A provider is not discoverable until it shows up in the interactive `hermes model` flow. -Update: +Update these in `hermes_cli/main.py`: -### `hermes_cli/main.py` - -- `provider_labels` -- provider dispatch inside the `model` command +- `provider_labels` dict +- `providers` list in `select_provider_and_model()` +- provider dispatch (`if selected_provider == ...`) - `--provider` argument choices - login/logout choices if the provider supports those flows - a `_model_flow_()` function, or reuse `_model_flow_api_key_provider()` if it fits -### `hermes_cli/setup.py` - -- `provider_choices` -- auth branch for the provider -- model-selection branch -- any provider-specific explanatory text -- any place where a provider should be excluded from OpenRouter-only prompts or routing settings - -If you only update one of these files, `hermes model` and `hermes setup` will drift. +:::tip +`hermes_cli/setup.py` does not need changes — it calls `select_provider_and_model()` from `main.py`, so your new provider appears in both `hermes model` and `hermes setup` automatically. +::: ## Step 6: Keep auxiliary calls working @@ -353,8 +349,7 @@ Use this if the provider is standard chat completions. - [ ] aliases added in `hermes_cli/auth.py` and `hermes_cli/models.py` - [ ] model catalog added in `hermes_cli/models.py` - [ ] runtime branch added in `hermes_cli/runtime_provider.py` -- [ ] CLI wiring added in `hermes_cli/main.py` -- [ ] setup wiring added in `hermes_cli/setup.py` +- [ ] CLI wiring added in `hermes_cli/main.py` (setup.py inherits automatically) - [ ] aux model added in `agent/auxiliary_client.py` - [ ] context lengths added in `agent/model_metadata.py` - [ ] runtime / CLI tests updated @@ -412,7 +407,7 @@ If you are hunting for all the places a provider touches, search these symbols: - `_PROVIDER_MODELS` - `resolve_runtime_provider` - `_model_flow_` -- `provider_choices` +- `select_provider_and_model` - `api_mode` - `_API_KEY_PROVIDER_AUX_MODELS` - `self.client.` -- 2.43.0 From 1bca6f393002da217a3e64a437a4fc5aac16dc9d Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Tue, 31 Mar 2026 01:36:15 -0700 Subject: [PATCH 069/385] fix: save API key to model config for custom endpoints (#4182) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Custom cloud endpoints (Together.ai, RunPod, Groq, etc.) lost their API key after #4165 removed OPENAI_API_KEY .env saves. The key was only saved to the custom_providers list which is unreachable at runtime for plain 'custom' provider resolution. Save model.api_key to config.yaml alongside model.provider and model.base_url in all three custom endpoint code paths: - _model_flow_custom (new endpoint with model name) - _model_flow_custom (new endpoint without model name) - _model_flow_named_custom (switching to a saved endpoint) The runtime resolver already reads model.api_key (runtime_provider.py line 224-228), so the key is picked up automatically. Each custom endpoint carries its own key in config — no shared OPENAI_API_KEY env var needed. --- hermes_cli/main.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/hermes_cli/main.py b/hermes_cli/main.py index a12879a8b..f2845869a 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -1288,6 +1288,8 @@ def _model_flow_custom(config): cfg["model"] = model model["provider"] = "custom" model["base_url"] = effective_url + if effective_key: + model["api_key"] = effective_key model.pop("api_mode", None) # let runtime auto-detect from URL save_config(cfg) deactivate_provider() @@ -1309,6 +1311,8 @@ def _model_flow_custom(config): _caller_model = {"default": _caller_model} if _caller_model else {} _caller_model["provider"] = "custom" _caller_model["base_url"] = effective_url + if effective_key: + _caller_model["api_key"] = effective_key _caller_model.pop("api_mode", None) config["model"] = _caller_model print("Endpoint saved. Use `/model` in chat or `hermes model` to set a model.") @@ -1460,6 +1464,8 @@ def _model_flow_named_custom(config, provider_info): cfg["model"] = model model["provider"] = "custom" model["base_url"] = base_url + if api_key: + model["api_key"] = api_key save_config(cfg) deactivate_provider() @@ -1531,6 +1537,8 @@ def _model_flow_named_custom(config, provider_info): cfg["model"] = model model["provider"] = "custom" model["base_url"] = base_url + if api_key: + model["api_key"] = api_key save_config(cfg) deactivate_provider() -- 2.43.0 From c53a296df1935639780ed1a34d54009c3a4e071d Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Tue, 31 Mar 2026 01:54:13 -0700 Subject: [PATCH 070/385] feat: add MiniMax M2.7 to hermes model picker and opencode-go (#4208) Add MiniMax-M2.7 and M2.7-highspeed to _PROVIDER_MODELS for minimax and minimax-cn providers in main.py so hermes model shows them. Update opencode-go bare ID from m2.5 to m2.7 in models.py. Salvaged from PR #4197 by octo-patch. --- hermes_cli/main.py | 4 ++++ hermes_cli/models.py | 2 +- tests/test_setup_model_selection.py | 4 ++-- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/hermes_cli/main.py b/hermes_cli/main.py index f2845869a..19a0ac49f 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -1591,11 +1591,15 @@ _PROVIDER_MODELS = { "kimi-k2-0905-preview", ], "minimax": [ + "MiniMax-M2.7", + "MiniMax-M2.7-highspeed", "MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1", ], "minimax-cn": [ + "MiniMax-M2.7", + "MiniMax-M2.7-highspeed", "MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1", diff --git a/hermes_cli/models.py b/hermes_cli/models.py index 5e1077837..c8bd106b6 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -191,7 +191,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = { "opencode-go": [ "glm-5", "kimi-k2.5", - "minimax-m2.5", + "minimax-m2.7", ], "ai-gateway": [ "anthropic/claude-opus-4.6", diff --git a/tests/test_setup_model_selection.py b/tests/test_setup_model_selection.py index 514a43045..3a02ebbf0 100644 --- a/tests/test_setup_model_selection.py +++ b/tests/test_setup_model_selection.py @@ -32,8 +32,8 @@ class TestSetupProviderModelSelection: @pytest.mark.parametrize("provider_id,expected_defaults", [ ("zai", ["glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"]), ("kimi-coding", ["kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"]), - ("minimax", ["MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1"]), - ("minimax-cn", ["MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1"]), + ("minimax", ["MiniMax-M2.7", "MiniMax-M2.7-highspeed", "MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1"]), + ("minimax-cn", ["MiniMax-M2.7", "MiniMax-M2.7-highspeed", "MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1"]), ]) @patch("hermes_cli.models.fetch_api_models", return_value=[]) @patch("hermes_cli.config.get_env_value", return_value="fake-key") -- 2.43.0 From 086ec5590d6fe2917f5d7b410246524974799438 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Tue, 31 Mar 2026 02:01:15 -0700 Subject: [PATCH 071/385] fix: gate Claude Code credentials behind explicit Hermes config in wizard trigger (#4210) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If a user has Claude Code installed but never configured Hermes, the first-run guard found those external credentials and skipped the setup wizard. Users got silently routed to someone else's inference without being asked. Now _has_any_provider_configured() checks whether Hermes itself has been explicitly configured (model in config differs from hardcoded default) before counting Claude Code credentials. Fresh installs trigger the wizard regardless of what external tools are on the machine. Salvaged from PR #4194 by sudoingX — wizard trigger fix only. Model auto-detect change under separate review. Co-authored-by: Xpress AI (Dip KD) <200180104+sudoingX@users.noreply.github.com> --- hermes_cli/main.py | 37 +++++++++++++++++------- tests/test_api_key_providers.py | 51 +++++++++++++++++++++++++++++++++ 2 files changed, 78 insertions(+), 10 deletions(-) diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 19a0ac49f..a209ea11c 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -173,9 +173,25 @@ def _relative_time(ts) -> str: def _has_any_provider_configured() -> bool: """Check if at least one inference provider is usable.""" - from hermes_cli.config import get_env_path, get_hermes_home + from hermes_cli.config import get_env_path, get_hermes_home, load_config from hermes_cli.auth import get_auth_status + # Determine whether Hermes itself has been explicitly configured (model + # in config that isn't the hardcoded default). Used below to gate external + # tool credentials (Claude Code, Codex CLI) that shouldn't silently skip + # the setup wizard on a fresh install. + from hermes_cli.config import DEFAULT_CONFIG + _DEFAULT_MODEL = DEFAULT_CONFIG.get("model", "") + cfg = load_config() + model_cfg = cfg.get("model") + if isinstance(model_cfg, dict): + _model_name = (model_cfg.get("default") or "").strip() + elif isinstance(model_cfg, str): + _model_name = model_cfg.strip() + else: + _model_name = "" + _has_hermes_config = _model_name and _model_name != _DEFAULT_MODEL + # Check env vars (may be set by .env or shell). # OPENAI_BASE_URL alone counts — local models (vLLM, llama.cpp, etc.) # often don't require an API key. @@ -231,15 +247,16 @@ def _has_any_provider_configured() -> bool: # Check for Claude Code OAuth credentials (~/.claude/.credentials.json) - # These are used by resolve_anthropic_token() at runtime but were missing - # from this startup gate check. - try: - from agent.anthropic_adapter import read_claude_code_credentials, is_claude_code_token_valid - creds = read_claude_code_credentials() - if creds and (is_claude_code_token_valid(creds) or creds.get("refreshToken")): - return True - except Exception: - pass + # Only count these if Hermes has been explicitly configured — Claude Code + # being installed doesn't mean the user wants Hermes to use their tokens. + if _has_hermes_config: + try: + from agent.anthropic_adapter import read_claude_code_credentials, is_claude_code_token_valid + creds = read_claude_code_credentials() + if creds and (is_claude_code_token_valid(creds) or creds.get("refreshToken")): + return True + except Exception: + pass return False diff --git a/tests/test_api_key_providers.py b/tests/test_api_key_providers.py index 0c6337d3e..e250bbb25 100644 --- a/tests/test_api_key_providers.py +++ b/tests/test_api_key_providers.py @@ -622,6 +622,57 @@ class TestHasAnyProviderConfigured: from hermes_cli.main import _has_any_provider_configured assert _has_any_provider_configured() is True + def test_claude_code_creds_ignored_on_fresh_install(self, monkeypatch, tmp_path): + """Claude Code credentials should NOT skip the wizard when Hermes is unconfigured.""" + from hermes_cli import config as config_module + hermes_home = tmp_path / ".hermes" + hermes_home.mkdir() + monkeypatch.setattr(config_module, "get_env_path", lambda: hermes_home / ".env") + monkeypatch.setattr(config_module, "get_hermes_home", lambda: hermes_home) + # Clear all provider env vars so earlier checks don't short-circuit + for var in ("OPENROUTER_API_KEY", "OPENAI_API_KEY", "ANTHROPIC_API_KEY", + "ANTHROPIC_TOKEN", "OPENAI_BASE_URL"): + monkeypatch.delenv(var, raising=False) + # Simulate valid Claude Code credentials + monkeypatch.setattr( + "agent.anthropic_adapter.read_claude_code_credentials", + lambda: {"accessToken": "sk-ant-test", "refreshToken": "ref-tok"}, + ) + monkeypatch.setattr( + "agent.anthropic_adapter.is_claude_code_token_valid", + lambda creds: True, + ) + from hermes_cli.main import _has_any_provider_configured + assert _has_any_provider_configured() is False + + def test_claude_code_creds_counted_when_hermes_configured(self, monkeypatch, tmp_path): + """Claude Code credentials should count when Hermes has been explicitly configured.""" + import yaml + from hermes_cli import config as config_module + hermes_home = tmp_path / ".hermes" + hermes_home.mkdir() + # Write a config with a non-default model to simulate explicit configuration + config_file = hermes_home / "config.yaml" + config_file.write_text(yaml.dump({"model": {"default": "my-local-model"}})) + monkeypatch.setattr(config_module, "get_env_path", lambda: hermes_home / ".env") + monkeypatch.setattr(config_module, "get_hermes_home", lambda: hermes_home) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + # Clear all provider env vars + for var in ("OPENROUTER_API_KEY", "OPENAI_API_KEY", "ANTHROPIC_API_KEY", + "ANTHROPIC_TOKEN", "OPENAI_BASE_URL"): + monkeypatch.delenv(var, raising=False) + # Simulate valid Claude Code credentials + monkeypatch.setattr( + "agent.anthropic_adapter.read_claude_code_credentials", + lambda: {"accessToken": "sk-ant-test", "refreshToken": "ref-tok"}, + ) + monkeypatch.setattr( + "agent.anthropic_adapter.is_claude_code_token_valid", + lambda creds: True, + ) + from hermes_cli.main import _has_any_provider_configured + assert _has_any_provider_configured() is True + # ============================================================================= # Kimi Code auto-detection tests -- 2.43.0 From 50302ed70a5a6fc1caca15fc0795458572a11b97 Mon Sep 17 00:00:00 2001 From: Nils <107209841+nils010485@users.noreply.github.com> Date: Tue, 31 Mar 2026 11:11:55 +0200 Subject: [PATCH 072/385] fix(tools): make browser SSRF check configurable via browser.allow_private_urls (#4198) * fix(tools): skip SSRF check in local browser mode The SSRF protection added in #3041 blocks all private/internal addresses unconditionally in browser_navigate(). This prevents legitimate local development use cases (localhost testing, LAN device access) when using the local Chromium backend. The SSRF check is only meaningful for cloud browsers (Browserbase, BrowserUse) where the agent could reach internal resources on a remote machine. In local mode, the user already has full terminal and network access, so the check adds no security value. This change makes the SSRF check conditional on _get_cloud_provider(), keeping full protection in cloud mode while allowing private addresses in local mode. * fix(tools): make SSRF check configurable via browser.allow_private_urls Replace unconditional SSRF check with a configurable setting. Default (False) keeps existing security behavior. Setting to True allows navigating to private/internal IPs for local dev and LAN use cases. --------- Co-authored-by: Nils (Norya) --- hermes_cli/config.py | 1 + tests/tools/test_browser_ssrf_local.py | 163 +++++++++++++++++++++++++ tools/browser_tool.py | 35 +++++- 3 files changed, 196 insertions(+), 3 deletions(-) create mode 100644 tests/tools/test_browser_ssrf_local.py diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 97df597d5..9d7f545b2 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -245,6 +245,7 @@ DEFAULT_CONFIG = { "inactivity_timeout": 120, "command_timeout": 30, # Timeout for browser commands in seconds (screenshot, navigate, etc.) "record_sessions": False, # Auto-record browser sessions as WebM videos + "allow_private_urls": False, # Allow navigating to private/internal IPs (localhost, 192.168.x.x, etc.) }, # Filesystem checkpoints — automatic snapshots before destructive file ops. diff --git a/tests/tools/test_browser_ssrf_local.py b/tests/tools/test_browser_ssrf_local.py new file mode 100644 index 000000000..44d3b8ea1 --- /dev/null +++ b/tests/tools/test_browser_ssrf_local.py @@ -0,0 +1,163 @@ +"""Tests that browser_navigate SSRF checks respect the allow_private_urls setting. + +When ``browser.allow_private_urls`` is ``False`` (default), private/internal +addresses are blocked. When set to ``True``, they are allowed — useful for +local development, LAN access, and Hermes self-testing. +""" + +import json + +import pytest + +from tools import browser_tool + + +def _make_browser_result(url="https://example.com"): + """Return a mock successful browser command result.""" + return {"success": True, "data": {"title": "OK", "url": url}} + + +# --------------------------------------------------------------------------- +# Pre-navigation SSRF check +# --------------------------------------------------------------------------- + + +class TestPreNavigationSsrf: + PRIVATE_URL = "http://127.0.0.1:8080/dashboard" + + @pytest.fixture() + def _common_patches(self, monkeypatch): + """Shared patches for pre-navigation tests that pass the SSRF check.""" + monkeypatch.setattr(browser_tool, "_is_camofox_mode", lambda: False) + monkeypatch.setattr(browser_tool, "check_website_access", lambda url: None) + monkeypatch.setattr( + browser_tool, + "_get_session_info", + lambda task_id: { + "session_name": f"s_{task_id}", + "bb_session_id": None, + "cdp_url": None, + "features": {"local": True}, + "_first_nav": False, + }, + ) + monkeypatch.setattr( + browser_tool, + "_run_browser_command", + lambda *a, **kw: _make_browser_result(), + ) + + def test_blocks_private_url_by_default(self, monkeypatch, _common_patches): + """SSRF protection is on when allow_private_urls is not set (False).""" + monkeypatch.setattr(browser_tool, "_allow_private_urls", lambda: False) + monkeypatch.setattr(browser_tool, "_is_safe_url", lambda url: False) + + result = json.loads(browser_tool.browser_navigate(self.PRIVATE_URL)) + + assert result["success"] is False + assert "private or internal address" in result["error"] + + def test_blocks_private_url_when_setting_false(self, monkeypatch, _common_patches): + """SSRF protection is on when allow_private_urls is explicitly False.""" + monkeypatch.setattr(browser_tool, "_allow_private_urls", lambda: False) + monkeypatch.setattr(browser_tool, "_is_safe_url", lambda url: False) + + result = json.loads(browser_tool.browser_navigate(self.PRIVATE_URL)) + + assert result["success"] is False + + def test_allows_private_url_when_setting_true(self, monkeypatch, _common_patches): + """Private URLs are allowed when allow_private_urls is True.""" + monkeypatch.setattr(browser_tool, "_allow_private_urls", lambda: True) + # _is_safe_url would block this, but the setting overrides it + monkeypatch.setattr(browser_tool, "_is_safe_url", lambda url: False) + + result = json.loads(browser_tool.browser_navigate(self.PRIVATE_URL)) + + assert result["success"] is True + + def test_allows_public_url_regardless_of_setting(self, monkeypatch, _common_patches): + """Public URLs always pass regardless of the allow_private_urls setting.""" + monkeypatch.setattr(browser_tool, "_allow_private_urls", lambda: False) + monkeypatch.setattr(browser_tool, "_is_safe_url", lambda url: True) + + result = json.loads(browser_tool.browser_navigate("https://example.com")) + + assert result["success"] is True + + +# --------------------------------------------------------------------------- +# Post-redirect SSRF check +# --------------------------------------------------------------------------- + + +class TestPostRedirectSsrf: + PUBLIC_URL = "https://example.com/redirect" + PRIVATE_FINAL_URL = "http://192.168.1.1/internal" + + @pytest.fixture() + def _common_patches(self, monkeypatch): + """Shared patches for redirect tests.""" + monkeypatch.setattr(browser_tool, "_is_camofox_mode", lambda: False) + monkeypatch.setattr(browser_tool, "check_website_access", lambda url: None) + monkeypatch.setattr( + browser_tool, + "_get_session_info", + lambda task_id: { + "session_name": f"s_{task_id}", + "bb_session_id": None, + "cdp_url": None, + "features": {"local": True}, + "_first_nav": False, + }, + ) + + def test_blocks_redirect_to_private_by_default(self, monkeypatch, _common_patches): + """Redirects to private addresses are blocked when setting is False.""" + monkeypatch.setattr(browser_tool, "_allow_private_urls", lambda: False) + monkeypatch.setattr( + browser_tool, "_is_safe_url", lambda url: "192.168" not in url, + ) + monkeypatch.setattr( + browser_tool, + "_run_browser_command", + lambda *a, **kw: _make_browser_result(url=self.PRIVATE_FINAL_URL), + ) + + result = json.loads(browser_tool.browser_navigate(self.PUBLIC_URL)) + + assert result["success"] is False + assert "redirect landed on a private/internal address" in result["error"] + + def test_allows_redirect_to_private_when_setting_true(self, monkeypatch, _common_patches): + """Redirects to private addresses are allowed when setting is True.""" + monkeypatch.setattr(browser_tool, "_allow_private_urls", lambda: True) + monkeypatch.setattr( + browser_tool, "_is_safe_url", lambda url: "192.168" not in url, + ) + monkeypatch.setattr( + browser_tool, + "_run_browser_command", + lambda *a, **kw: _make_browser_result(url=self.PRIVATE_FINAL_URL), + ) + + result = json.loads(browser_tool.browser_navigate(self.PUBLIC_URL)) + + assert result["success"] is True + assert result["url"] == self.PRIVATE_FINAL_URL + + def test_allows_redirect_to_public_regardless_of_setting(self, monkeypatch, _common_patches): + """Redirects to public addresses always pass.""" + final = "https://example.com/final" + monkeypatch.setattr(browser_tool, "_allow_private_urls", lambda: False) + monkeypatch.setattr(browser_tool, "_is_safe_url", lambda url: True) + monkeypatch.setattr( + browser_tool, + "_run_browser_command", + lambda *a, **kw: _make_browser_result(url=final), + ) + + result = json.loads(browser_tool.browser_navigate(self.PUBLIC_URL)) + + assert result["success"] is True + assert result["url"] == final diff --git a/tools/browser_tool.py b/tools/browser_tool.py index 33a1c8ef6..03aa6106b 100644 --- a/tools/browser_tool.py +++ b/tools/browser_tool.py @@ -237,6 +237,8 @@ _PROVIDER_REGISTRY: Dict[str, type] = { _cached_cloud_provider: Optional[CloudBrowserProvider] = None _cloud_provider_resolved = False +_allow_private_urls_resolved = False +_allow_private_urls: Optional[bool] = None def _get_cloud_provider() -> Optional[CloudBrowserProvider]: @@ -265,6 +267,31 @@ def _get_cloud_provider() -> Optional[CloudBrowserProvider]: return _cached_cloud_provider +def _allow_private_urls() -> bool: + """Return whether the browser is allowed to navigate to private/internal addresses. + + Reads ``config["browser"]["allow_private_urls"]`` once and caches the result + for the process lifetime. Defaults to ``False`` (SSRF protection active). + """ + global _allow_private_urls, _allow_private_urls_resolved + if _allow_private_urls_resolved: + return _allow_private_urls + + _allow_private_urls_resolved = True + _allow_private_urls = False # safe default + try: + hermes_home = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes")) + config_path = hermes_home / "config.yaml" + if config_path.exists(): + import yaml + with open(config_path) as f: + cfg = yaml.safe_load(f) or {} + _allow_private_urls = bool(cfg.get("browser", {}).get("allow_private_urls")) + except Exception as e: + logger.debug("Could not read allow_private_urls from config: %s", e) + return _allow_private_urls + + def _socket_safe_tmpdir() -> str: """Return a short temp directory path suitable for Unix domain sockets. @@ -1038,8 +1065,10 @@ def browser_navigate(url: str, task_id: Optional[str] = None) -> str: Returns: JSON string with navigation result (includes stealth features info on first nav) """ - # SSRF protection — block private/internal addresses before navigating - if not _is_safe_url(url): + # SSRF protection — block private/internal addresses before navigating. + # Can be opted out via ``browser.allow_private_urls`` in config for local + # development or LAN access use cases. + if not _allow_private_urls() and not _is_safe_url(url): return json.dumps({ "success": False, "error": "Blocked: URL targets a private or internal address", @@ -1081,7 +1110,7 @@ def browser_navigate(url: str, task_id: Optional[str] = None) -> str: # Post-redirect SSRF check — if the browser followed a redirect to a # private/internal address, block the result so the model can't read # internal content via subsequent browser_snapshot calls. - if final_url and final_url != url and not _is_safe_url(final_url): + if not _allow_private_urls() and final_url and final_url != url and not _is_safe_url(final_url): # Navigate away to a blank page to prevent snapshot leaks _run_browser_command(effective_task_id, "open", ["about:blank"], timeout=10) return json.dumps({ -- 2.43.0 From 2ae50bddddfaab3f4599f5b8ec12a969bbc20e6b Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Tue, 31 Mar 2026 02:41:50 -0700 Subject: [PATCH 073/385] fix(telegram): enforce 32-char limit on command names with collision avoidance (#4211) Telegram Bot API requires command names to be 1-32 characters. Plugin and skill names that exceed this limit now get truncated. If truncation creates a collision (with core commands, other plugins, or other skills), the name is shortened to 31 chars and a digit 0-9 is appended. Adds _clamp_telegram_names() helper used for both plugin and skill entries in telegram_menu_commands(). Core CommandDef commands are tracked as reserved names so truncated plugin/skill names never shadow them. Addresses the fix from PR #4191 (sroecker) with collision-safe truncation. Tests: 9 new tests covering truncation, digit suffixes, exhaustion, dedup. --- hermes_cli/commands.py | 52 ++++++++++++++++++- tests/hermes_cli/test_commands.py | 83 +++++++++++++++++++++++++++++++ 2 files changed, 133 insertions(+), 2 deletions(-) diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py index a167c4ac5..c67d4e9db 100644 --- a/hermes_cli/commands.py +++ b/hermes_cli/commands.py @@ -368,6 +368,42 @@ def telegram_bot_commands() -> list[tuple[str, str]]: return result +_TG_NAME_LIMIT = 32 + + +def _clamp_telegram_names( + entries: list[tuple[str, str]], + reserved: set[str], +) -> list[tuple[str, str]]: + """Enforce Telegram's 32-char command name limit with collision avoidance. + + Names exceeding 32 chars are truncated. If truncation creates a duplicate + (against *reserved* names or earlier entries in the same batch), the name is + shortened to 31 chars and a digit ``0``-``9`` is appended to differentiate. + If all 10 digit slots are taken the entry is silently dropped. + """ + used: set[str] = set(reserved) + result: list[tuple[str, str]] = [] + for name, desc in entries: + if len(name) > _TG_NAME_LIMIT: + candidate = name[:_TG_NAME_LIMIT] + if candidate in used: + prefix = name[:_TG_NAME_LIMIT - 1] + for digit in range(10): + candidate = f"{prefix}{digit}" + if candidate not in used: + break + else: + # All 10 digit slots exhausted — skip entry + continue + name = candidate + if name in used: + continue + used.add(name) + result.append((name, desc)) + return result + + def telegram_menu_commands(max_commands: int = 100) -> tuple[list[tuple[str, str]], int]: """Return Telegram menu commands capped to the Bot API limit. @@ -383,9 +419,13 @@ def telegram_menu_commands(max_commands: int = 100) -> tuple[list[tuple[str, str (menu_commands, hidden_count) where hidden_count is the number of skill commands omitted due to the cap. """ - all_commands = list(telegram_bot_commands()) + core_commands = list(telegram_bot_commands()) + # Reserve core names so plugin/skill truncation can't collide with them + reserved_names = {n for n, _ in core_commands} + all_commands = list(core_commands) # Plugin slash commands get priority over skills + plugin_entries: list[tuple[str, str]] = [] try: from hermes_cli.plugins import get_plugin_manager pm = get_plugin_manager() @@ -395,10 +435,15 @@ def telegram_menu_commands(max_commands: int = 100) -> tuple[list[tuple[str, str desc = "Plugin command" if len(desc) > 40: desc = desc[:37] + "..." - all_commands.append((tg_name, desc)) + plugin_entries.append((tg_name, desc)) except Exception: pass + # Clamp plugin names to 32 chars with collision avoidance + plugin_entries = _clamp_telegram_names(plugin_entries, reserved_names) + reserved_names.update(n for n, _ in plugin_entries) + all_commands.extend(plugin_entries) + # Remaining slots go to built-in skill commands (not hub-installed). skill_entries: list[tuple[str, str]] = [] try: @@ -424,6 +469,9 @@ def telegram_menu_commands(max_commands: int = 100) -> tuple[list[tuple[str, str except Exception: pass + # Clamp skill names to 32 chars with collision avoidance + skill_entries = _clamp_telegram_names(skill_entries, reserved_names) + # Skills fill remaining slots — they're the only tier that gets trimmed remaining_slots = max(0, max_commands - len(all_commands)) hidden_count = max(0, len(skill_entries) - remaining_slots) diff --git a/tests/hermes_cli/test_commands.py b/tests/hermes_cli/test_commands.py index 2c7ef280a..321f8f161 100644 --- a/tests/hermes_cli/test_commands.py +++ b/tests/hermes_cli/test_commands.py @@ -12,10 +12,13 @@ from hermes_cli.commands import ( SUBCOMMANDS, SlashCommandAutoSuggest, SlashCommandCompleter, + _TG_NAME_LIMIT, + _clamp_telegram_names, gateway_help_lines, resolve_command, slack_subcommand_map, telegram_bot_commands, + telegram_menu_commands, ) @@ -504,3 +507,83 @@ class TestGhostText: def test_no_suggestion_for_non_slash(self): assert _suggestion("hello") is None + + +# --------------------------------------------------------------------------- +# Telegram command name clamping (32-char limit) +# --------------------------------------------------------------------------- + + +class TestClampTelegramNames: + """Tests for _clamp_telegram_names() — 32-char enforcement + collision.""" + + def test_short_names_unchanged(self): + entries = [("help", "Show help"), ("status", "Show status")] + result = _clamp_telegram_names(entries, set()) + assert result == entries + + def test_long_name_truncated(self): + long = "a" * 40 + result = _clamp_telegram_names([(long, "desc")], set()) + assert len(result) == 1 + assert result[0][0] == "a" * _TG_NAME_LIMIT + assert result[0][1] == "desc" + + def test_collision_with_reserved_gets_digit_suffix(self): + # The truncated form collides with a reserved name + prefix = "x" * _TG_NAME_LIMIT + long_name = "x" * 40 + result = _clamp_telegram_names([(long_name, "d")], reserved={prefix}) + assert len(result) == 1 + name = result[0][0] + assert len(name) == _TG_NAME_LIMIT + assert name == "x" * (_TG_NAME_LIMIT - 1) + "0" + + def test_collision_between_entries_gets_incrementing_digits(self): + # Two long names that truncate to the same 32-char prefix + base = "y" * 40 + entries = [(base + "_alpha", "d1"), (base + "_beta", "d2")] + result = _clamp_telegram_names(entries, set()) + assert len(result) == 2 + assert result[0][0] == "y" * _TG_NAME_LIMIT + assert result[1][0] == "y" * (_TG_NAME_LIMIT - 1) + "0" + + def test_collision_with_reserved_and_entries_skips_taken_digits(self): + prefix = "z" * _TG_NAME_LIMIT + digit0 = "z" * (_TG_NAME_LIMIT - 1) + "0" + # Reserve both the plain truncation and digit-0 + reserved = {prefix, digit0} + long_name = "z" * 50 + result = _clamp_telegram_names([(long_name, "d")], reserved) + assert len(result) == 1 + assert result[0][0] == "z" * (_TG_NAME_LIMIT - 1) + "1" + + def test_all_digits_exhausted_drops_entry(self): + prefix = "w" * _TG_NAME_LIMIT + # Reserve the plain truncation + all 10 digit slots + reserved = {prefix} | {"w" * (_TG_NAME_LIMIT - 1) + str(d) for d in range(10)} + long_name = "w" * 50 + result = _clamp_telegram_names([(long_name, "d")], reserved) + assert result == [] + + def test_exact_32_chars_not_truncated(self): + name = "a" * _TG_NAME_LIMIT + result = _clamp_telegram_names([(name, "desc")], set()) + assert result[0][0] == name + + def test_duplicate_short_name_deduplicated(self): + entries = [("foo", "d1"), ("foo", "d2")] + result = _clamp_telegram_names(entries, set()) + assert len(result) == 1 + assert result[0] == ("foo", "d1") + + +class TestTelegramMenuCommands: + """Integration: telegram_menu_commands enforces the 32-char limit.""" + + def test_all_names_within_limit(self): + menu, _ = telegram_menu_commands(max_commands=100) + for name, _desc in menu: + assert 1 <= len(name) <= _TG_NAME_LIMIT, ( + f"Command '{name}' is {len(name)} chars (limit {_TG_NAME_LIMIT})" + ) -- 2.43.0 From 8d59881a6246207baf0c5625c5a216b95b7994a5 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Tue, 31 Mar 2026 03:10:01 -0700 Subject: [PATCH 074/385] feat(auth): same-provider credential pools with rotation, custom endpoint support, and interactive CLI (#2647) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat(auth): add same-provider credential pools and rotation UX Add same-provider credential pooling so Hermes can rotate across multiple credentials for a single provider, recover from exhausted credentials without jumping providers immediately, and configure that behavior directly in hermes setup. - agent/credential_pool.py: persisted per-provider credential pools - hermes auth add/list/remove/reset CLI commands - 429/402/401 recovery with pool rotation in run_agent.py - Setup wizard integration for pool strategy configuration - Auto-seeding from env vars and existing OAuth state Co-authored-by: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com> Salvaged from PR #2647 * fix(tests): prevent pool auto-seeding from host env in credential pool tests Tests for non-pool Anthropic paths and auth remove were failing when host env vars (ANTHROPIC_API_KEY) or file-backed OAuth credentials were present. The pool auto-seeding picked these up, causing unexpected pool entries in tests. - Mock _select_pool_entry in auxiliary_client OAuth flag tests - Clear Anthropic env vars and mock _seed_from_singletons in auth remove test * feat(auth): add thread safety, least_used strategy, and request counting - Add threading.Lock to CredentialPool for gateway thread safety (concurrent requests from multiple gateway sessions could race on pool state mutations without this) - Add 'least_used' rotation strategy that selects the credential with the lowest request_count, distributing load more evenly - Add request_count field to PooledCredential for usage tracking - Add mark_used() method to increment per-credential request counts - Wrap select(), mark_exhausted_and_rotate(), and try_refresh_current() with lock acquisition - Add tests: least_used selection, mark_used counting, concurrent thread safety (4 threads × 20 selects with no corruption) * feat(auth): add interactive mode for bare 'hermes auth' command When 'hermes auth' is called without a subcommand, it now launches an interactive wizard that: 1. Shows full credential pool status across all providers 2. Offers a menu: add, remove, reset cooldowns, set strategy 3. For OAuth-capable providers (anthropic, nous, openai-codex), the add flow explicitly asks 'API key or OAuth login?' — making it clear that both auth types are supported for the same provider 4. Strategy picker shows all 4 options (fill_first, round_robin, least_used, random) with the current selection marked 5. Remove flow shows entries with indices for easy selection The subcommand paths (hermes auth add/list/remove/reset) still work exactly as before for scripted/non-interactive use. * fix(tests): update runtime_provider tests for config.yaml source of truth (#4165) Tests were using OPENAI_BASE_URL env var which is no longer consulted after #4165. Updated to use model config (provider, base_url, api_key) which is the new single source of truth for custom endpoint URLs. * feat(auth): support custom endpoint credential pools keyed by provider name Custom OpenAI-compatible endpoints all share provider='custom', making the provider-keyed pool useless. Now pools for custom endpoints are keyed by 'custom:' where the name comes from the custom_providers config list (auto-generated from URL hostname). - Pool key format: 'custom:together.ai', 'custom:local-(localhost:8080)' - load_pool('custom:name') seeds from custom_providers api_key AND model.api_key when base_url matches - hermes auth add/list now shows custom endpoints alongside registry providers - _resolve_openrouter_runtime and _resolve_named_custom_runtime check pool before falling back to single config key - 6 new tests covering custom pool keying, seeding, and listing * docs: add Excalidraw diagram of full credential pool flow Comprehensive architecture diagram showing: - Credential sources (env vars, auth.json OAuth, config.yaml, CLI) - Pool storage and auto-seeding - Runtime resolution paths (registry, custom, OpenRouter) - Error recovery (429 retry-then-rotate, 402 immediate, 401 refresh) - CLI management commands and strategy configuration Open at: https://excalidraw.com/#json=2Ycqhqpi6f12E_3ITyiwh,c7u9jSt5BwrmiVzHGbm87g * fix(tests): update setup wizard pool tests for unified select_provider_and_model flow The setup wizard now delegates to select_provider_and_model() instead of using its own prompt_choice-based provider picker. Tests needed: - Mock select_provider_and_model as no-op (provider pre-written to config) - Call _stub_tts BEFORE custom prompt_choice mock (it overwrites it) - Pre-write model.provider to config so the pool step is reached * docs: add comprehensive credential pool documentation - New page: website/docs/user-guide/features/credential-pools.md Full guide covering quick start, CLI commands, rotation strategies, error recovery, custom endpoint pools, auto-discovery, thread safety, architecture, and storage format. - Updated fallback-providers.md to reference credential pools as the first layer of resilience (same-provider rotation before cross-provider) - Added hermes auth to CLI commands reference with usage examples - Added credential_pool_strategies to configuration guide * chore: remove excalidraw diagram from repo (external link only) * refactor: simplify credential pool code — extract helpers, collapse extras, dedup patterns - _load_config_safe(): replace 4 identical try/except/import blocks - _iter_custom_providers(): shared generator for custom provider iteration - PooledCredential.extra dict: collapse 11 round-trip-only fields (token_type, scope, client_id, portal_base_url, obtained_at, expires_in, agent_key_id, agent_key_expires_in, agent_key_reused, agent_key_obtained_at, tls) into a single extra dict with __getattr__ for backward-compatible access - _available_entries(): shared exhaustion-check between select and peek - Dedup anthropic OAuth seeding (hermes_pkce + claude_code identical) - SimpleNamespace replaces class _Args boilerplate in auth_commands - _try_resolve_from_custom_pool(): shared pool-check in runtime_provider Net -17 lines. All 383 targeted tests pass. --------- Co-authored-by: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com> --- agent/anthropic_adapter.py | 333 ++++-- agent/auxiliary_client.py | 124 ++- agent/credential_pool.py | 844 ++++++++++++++++ cli.py | 4 + gateway/run.py | 1 + hermes_cli/auth.py | 431 ++++++-- hermes_cli/auth_commands.py | 470 +++++++++ hermes_cli/config.py | 3 +- hermes_cli/main.py | 39 +- hermes_cli/runtime_provider.py | 272 ++++- hermes_cli/setup.py | 105 ++ run_agent.py | 97 ++ tests/agent/test_auxiliary_client.py | 86 +- tests/hermes_cli/test_setup_model_provider.py | 199 ++++ tests/test_auth_commands.py | 391 ++++++++ tests/test_credential_pool.py | 949 ++++++++++++++++++ tests/test_run_agent.py | 56 ++ tests/test_runtime_provider_resolution.py | 272 ++++- tests/tools/test_delegate.py | 9 +- tests/tools/test_transcription.py | 5 + website/docs/reference/cli-commands.md | 17 + website/docs/user-guide/configuration.md | 12 + .../user-guide/features/credential-pools.md | 230 +++++ .../user-guide/features/fallback-providers.md | 9 +- 24 files changed, 4757 insertions(+), 201 deletions(-) create mode 100644 agent/credential_pool.py create mode 100644 hermes_cli/auth_commands.py create mode 100644 tests/test_auth_commands.py create mode 100644 tests/test_credential_pool.py create mode 100644 website/docs/user-guide/features/credential-pools.md diff --git a/agent/anthropic_adapter.py b/agent/anthropic_adapter.py index 76bc8ff2e..2fae12dde 100644 --- a/agent/anthropic_adapter.py +++ b/agent/anthropic_adapter.py @@ -307,74 +307,89 @@ def is_claude_code_token_valid(creds: Dict[str, Any]) -> bool: return now_ms < (expires_at - 60_000) -def _refresh_oauth_token(creds: Dict[str, Any]) -> Optional[str]: - """Attempt to refresh an expired Claude Code OAuth token. - - Uses the same token endpoint and client_id as Claude Code / OpenCode. - Only works for credentials that have a refresh token (from claude /login - or claude setup-token with OAuth flow). - - Tries the new platform.claude.com endpoint first (Claude Code >=2.1.81), - then falls back to console.anthropic.com for older tokens. - - Returns the new access token, or None if refresh fails. - """ +def refresh_anthropic_oauth_pure(refresh_token: str, *, use_json: bool = False) -> Dict[str, Any]: + """Refresh an Anthropic OAuth token without mutating local credential files.""" import time + import urllib.parse import urllib.request + if not refresh_token: + raise ValueError("refresh_token is required") + + client_id = "9d1c250a-e61b-44d9-88ed-5944d1962f5e" + if use_json: + data = json.dumps({ + "grant_type": "refresh_token", + "refresh_token": refresh_token, + "client_id": client_id, + }).encode() + content_type = "application/json" + else: + data = urllib.parse.urlencode({ + "grant_type": "refresh_token", + "refresh_token": refresh_token, + "client_id": client_id, + }).encode() + content_type = "application/x-www-form-urlencoded" + + token_endpoints = [ + "https://platform.claude.com/v1/oauth/token", + "https://console.anthropic.com/v1/oauth/token", + ] + last_error = None + for endpoint in token_endpoints: + req = urllib.request.Request( + endpoint, + data=data, + headers={ + "Content-Type": content_type, + "User-Agent": f"claude-cli/{_get_claude_code_version()} (external, cli)", + }, + method="POST", + ) + try: + with urllib.request.urlopen(req, timeout=10) as resp: + result = json.loads(resp.read().decode()) + except Exception as exc: + last_error = exc + logger.debug("Anthropic token refresh failed at %s: %s", endpoint, exc) + continue + + access_token = result.get("access_token", "") + if not access_token: + raise ValueError("Anthropic refresh response was missing access_token") + next_refresh = result.get("refresh_token", refresh_token) + expires_in = result.get("expires_in", 3600) + return { + "access_token": access_token, + "refresh_token": next_refresh, + "expires_at_ms": int(time.time() * 1000) + (expires_in * 1000), + } + + if last_error is not None: + raise last_error + raise ValueError("Anthropic token refresh failed") + + +def _refresh_oauth_token(creds: Dict[str, Any]) -> Optional[str]: + """Attempt to refresh an expired Claude Code OAuth token.""" refresh_token = creds.get("refreshToken", "") if not refresh_token: logger.debug("No refresh token available — cannot refresh") return None - # Client ID used by Claude Code's OAuth flow - CLIENT_ID = "9d1c250a-e61b-44d9-88ed-5944d1962f5e" - - # Anthropic migrated OAuth from console.anthropic.com to platform.claude.com - # (Claude Code v2.1.81+). Try new endpoint first, fall back to old. - token_endpoints = [ - "https://platform.claude.com/v1/oauth/token", - "https://console.anthropic.com/v1/oauth/token", - ] - - payload = json.dumps({ - "grant_type": "refresh_token", - "refresh_token": refresh_token, - "client_id": CLIENT_ID, - }).encode() - - headers = { - "Content-Type": "application/json", - "User-Agent": f"claude-cli/{_get_claude_code_version()} (external, cli)", - } - - for endpoint in token_endpoints: - req = urllib.request.Request( - endpoint, data=payload, headers=headers, method="POST", + try: + refreshed = refresh_anthropic_oauth_pure(refresh_token, use_json=False) + _write_claude_code_credentials( + refreshed["access_token"], + refreshed["refresh_token"], + refreshed["expires_at_ms"], ) - try: - with urllib.request.urlopen(req, timeout=10) as resp: - result = json.loads(resp.read().decode()) - new_access = result.get("access_token", "") - new_refresh = result.get("refresh_token", refresh_token) - expires_in = result.get("expires_in", 3600) - - if new_access: - new_expires_ms = int(time.time() * 1000) + (expires_in * 1000) - # Parse scopes from refresh response — Claude Code >=2.1.81 - # requires a "scopes" field in the credential store and checks - # for "user:inference" before accepting the token as valid. - scope_str = result.get("scope", "") - scopes = scope_str.split() if scope_str else None - _write_claude_code_credentials( - new_access, new_refresh, new_expires_ms, scopes=scopes, - ) - logger.debug("Refreshed Claude Code OAuth token via %s", endpoint) - return new_access - except Exception as e: - logger.debug("Token refresh failed at %s: %s", endpoint, e) - - return None + logger.debug("Successfully refreshed Claude Code OAuth token") + return refreshed["access_token"] + except Exception as e: + logger.debug("Failed to refresh Claude Code token: %s", e) + return None def _write_claude_code_credentials( @@ -570,10 +585,208 @@ def run_oauth_setup_token() -> Optional[str]: return None +# ── Hermes-native PKCE OAuth flow ──────────────────────────────────────── +# Mirrors the flow used by Claude Code, pi-ai, and OpenCode. +# Stores credentials in ~/.hermes/.anthropic_oauth.json (our own file). + +_OAUTH_CLIENT_ID = "9d1c250a-e61b-44d9-88ed-5944d1962f5e" +_OAUTH_TOKEN_URL = "https://console.anthropic.com/v1/oauth/token" +_OAUTH_REDIRECT_URI = "https://console.anthropic.com/oauth/code/callback" +_OAUTH_SCOPES = "org:create_api_key user:profile user:inference" +_HERMES_OAUTH_FILE = get_hermes_home() / ".anthropic_oauth.json" +def _generate_pkce() -> tuple: + """Generate PKCE code_verifier and code_challenge (S256).""" + import base64 + import hashlib + import secrets + + verifier = base64.urlsafe_b64encode(secrets.token_bytes(32)).rstrip(b"=").decode() + challenge = base64.urlsafe_b64encode( + hashlib.sha256(verifier.encode()).digest() + ).rstrip(b"=").decode() + return verifier, challenge +def run_hermes_oauth_login_pure() -> Optional[Dict[str, Any]]: + """Run Hermes-native OAuth PKCE flow and return credential state.""" + import time + import webbrowser + + verifier, challenge = _generate_pkce() + + params = { + "code": "true", + "client_id": _OAUTH_CLIENT_ID, + "response_type": "code", + "redirect_uri": _OAUTH_REDIRECT_URI, + "scope": _OAUTH_SCOPES, + "code_challenge": challenge, + "code_challenge_method": "S256", + "state": verifier, + } + from urllib.parse import urlencode + + auth_url = f"https://claude.ai/oauth/authorize?{urlencode(params)}" + + print() + print("Authorize Hermes with your Claude Pro/Max subscription.") + print() + print("╭─ Claude Pro/Max Authorization ────────────────────╮") + print("│ │") + print("│ Open this link in your browser: │") + print("╰───────────────────────────────────────────────────╯") + print() + print(f" {auth_url}") + print() + + try: + webbrowser.open(auth_url) + print(" (Browser opened automatically)") + except Exception: + pass + + print() + print("After authorizing, you'll see a code. Paste it below.") + print() + try: + auth_code = input("Authorization code: ").strip() + except (KeyboardInterrupt, EOFError): + return None + + if not auth_code: + print("No code entered.") + return None + + splits = auth_code.split("#") + code = splits[0] + state = splits[1] if len(splits) > 1 else "" + + try: + import urllib.request + + exchange_data = json.dumps({ + "grant_type": "authorization_code", + "client_id": _OAUTH_CLIENT_ID, + "code": code, + "state": state, + "redirect_uri": _OAUTH_REDIRECT_URI, + "code_verifier": verifier, + }).encode() + + req = urllib.request.Request( + _OAUTH_TOKEN_URL, + data=exchange_data, + headers={ + "Content-Type": "application/json", + "User-Agent": f"claude-cli/{_get_claude_code_version()} (external, cli)", + }, + method="POST", + ) + + with urllib.request.urlopen(req, timeout=15) as resp: + result = json.loads(resp.read().decode()) + except Exception as e: + print(f"Token exchange failed: {e}") + return None + + access_token = result.get("access_token", "") + refresh_token = result.get("refresh_token", "") + expires_in = result.get("expires_in", 3600) + + if not access_token: + print("No access token in response.") + return None + + expires_at_ms = int(time.time() * 1000) + (expires_in * 1000) + return { + "access_token": access_token, + "refresh_token": refresh_token, + "expires_at_ms": expires_at_ms, + } + + +def run_hermes_oauth_login() -> Optional[str]: + """Run Hermes-native OAuth PKCE flow for Claude Pro/Max subscription. + + Opens a browser to claude.ai for authorization, prompts for the code, + exchanges it for tokens, and stores them in ~/.hermes/.anthropic_oauth.json. + + Returns the access token on success, None on failure. + """ + result = run_hermes_oauth_login_pure() + if not result: + return None + + access_token = result["access_token"] + refresh_token = result["refresh_token"] + expires_at_ms = result["expires_at_ms"] + + _save_hermes_oauth_credentials(access_token, refresh_token, expires_at_ms) + _write_claude_code_credentials(access_token, refresh_token, expires_at_ms) + + print("Authentication successful!") + return access_token + + +def _save_hermes_oauth_credentials(access_token: str, refresh_token: str, expires_at_ms: int) -> None: + """Save OAuth credentials to ~/.hermes/.anthropic_oauth.json.""" + data = { + "accessToken": access_token, + "refreshToken": refresh_token, + "expiresAt": expires_at_ms, + } + try: + _HERMES_OAUTH_FILE.parent.mkdir(parents=True, exist_ok=True) + _HERMES_OAUTH_FILE.write_text(json.dumps(data, indent=2), encoding="utf-8") + _HERMES_OAUTH_FILE.chmod(0o600) + except (OSError, IOError) as e: + logger.debug("Failed to save Hermes OAuth credentials: %s", e) + + +def read_hermes_oauth_credentials() -> Optional[Dict[str, Any]]: + """Read Hermes-managed OAuth credentials from ~/.hermes/.anthropic_oauth.json.""" + if _HERMES_OAUTH_FILE.exists(): + try: + data = json.loads(_HERMES_OAUTH_FILE.read_text(encoding="utf-8")) + if data.get("accessToken"): + return data + except (json.JSONDecodeError, OSError, IOError) as e: + logger.debug("Failed to read Hermes OAuth credentials: %s", e) + return None + + +def refresh_hermes_oauth_token() -> Optional[str]: + """Refresh the Hermes-managed OAuth token using the stored refresh token. + + Returns the new access token, or None if refresh fails. + """ + creds = read_hermes_oauth_credentials() + if not creds or not creds.get("refreshToken"): + return None + + try: + refreshed = refresh_anthropic_oauth_pure( + creds["refreshToken"], + use_json=True, + ) + _save_hermes_oauth_credentials( + refreshed["access_token"], + refreshed["refresh_token"], + refreshed["expires_at_ms"], + ) + _write_claude_code_credentials( + refreshed["access_token"], + refreshed["refresh_token"], + refreshed["expires_at_ms"], + ) + logger.debug("Successfully refreshed Hermes OAuth token") + return refreshed["access_token"] + except Exception as e: + logger.debug("Failed to refresh Hermes OAuth token: %s", e) + + return None # --------------------------------------------------------------------------- @@ -1106,4 +1319,4 @@ def normalize_anthropic_response( reasoning_details=None, ), finish_reason, - ) + ) \ No newline at end of file diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 4126994bb..3b05e8d12 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -47,6 +47,7 @@ from typing import Any, Dict, List, Optional, Tuple from openai import OpenAI +from agent.credential_pool import load_pool from hermes_cli.config import get_hermes_home from hermes_constants import OPENROUTER_BASE_URL @@ -96,6 +97,45 @@ _CODEX_AUX_MODEL = "gpt-5.2-codex" _CODEX_AUX_BASE_URL = "https://chatgpt.com/backend-api/codex" +def _select_pool_entry(provider: str) -> Tuple[bool, Optional[Any]]: + """Return (pool_exists_for_provider, selected_entry).""" + try: + pool = load_pool(provider) + except Exception as exc: + logger.debug("Auxiliary client: could not load pool for %s: %s", provider, exc) + return False, None + if not pool or not pool.has_credentials(): + return False, None + try: + return True, pool.select() + except Exception as exc: + logger.debug("Auxiliary client: could not select pool entry for %s: %s", provider, exc) + return True, None + + +def _pool_runtime_api_key(entry: Any) -> str: + if entry is None: + return "" + # Use the PooledCredential.runtime_api_key property which handles + # provider-specific fallback (e.g. agent_key for nous). + key = getattr(entry, "runtime_api_key", None) or getattr(entry, "access_token", "") + return str(key or "").strip() + + +def _pool_runtime_base_url(entry: Any, fallback: str = "") -> str: + if entry is None: + return str(fallback or "").strip().rstrip("/") + # runtime_base_url handles provider-specific logic (e.g. nous prefers inference_base_url). + # Fall back through inference_base_url and base_url for non-PooledCredential entries. + url = ( + getattr(entry, "runtime_base_url", None) + or getattr(entry, "inference_base_url", None) + or getattr(entry, "base_url", None) + or fallback + ) + return str(url or "").strip().rstrip("/") + + # ── Codex Responses → chat.completions adapter ───────────────────────────── # All auxiliary consumers call client.chat.completions.create(**kwargs) and # read response.choices[0].message.content. This adapter translates those @@ -439,6 +479,22 @@ def _read_nous_auth() -> Optional[dict]: Returns the provider state dict if Nous is active with tokens, otherwise None. """ + pool_present, entry = _select_pool_entry("nous") + if pool_present: + if entry is None: + return None + return { + "access_token": getattr(entry, "access_token", ""), + "refresh_token": getattr(entry, "refresh_token", None), + "agent_key": getattr(entry, "agent_key", None), + "inference_base_url": _pool_runtime_base_url(entry, _NOUS_DEFAULT_BASE_URL), + "portal_base_url": getattr(entry, "portal_base_url", None), + "client_id": getattr(entry, "client_id", None), + "scope": getattr(entry, "scope", None), + "token_type": getattr(entry, "token_type", "Bearer"), + "source": "pool", + } + try: if not _AUTH_JSON_PATH.is_file(): return None @@ -467,6 +523,11 @@ def _nous_base_url() -> str: def _read_codex_access_token() -> Optional[str]: """Read a valid, non-expired Codex OAuth access token from Hermes auth store.""" + pool_present, entry = _select_pool_entry("openai-codex") + if pool_present: + token = _pool_runtime_api_key(entry) + return token or None + try: from hermes_cli.auth import _read_codex_tokens data = _read_codex_tokens() @@ -513,6 +574,24 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]: if provider_id == "anthropic": return _try_anthropic() + pool_present, entry = _select_pool_entry(provider_id) + if pool_present: + api_key = _pool_runtime_api_key(entry) + if not api_key: + continue + + base_url = _pool_runtime_base_url(entry, pconfig.inference_base_url) or pconfig.inference_base_url + model = _API_KEY_PROVIDER_AUX_MODELS.get(provider_id, "default") + logger.debug("Auxiliary text client: %s (%s) via pool", pconfig.name, model) + extra = {} + if "api.kimi.com" in base_url.lower(): + extra["default_headers"] = {"User-Agent": "KimiCLI/1.0"} + elif "api.githubcopilot.com" in base_url.lower(): + from hermes_cli.models import copilot_default_headers + + extra["default_headers"] = copilot_default_headers() + return OpenAI(api_key=api_key, base_url=base_url, **extra), model + creds = resolve_api_key_provider_credentials(provider_id) api_key = str(creds.get("api_key", "")).strip() if not api_key: @@ -562,6 +641,16 @@ def _get_auxiliary_env_override(task: str, suffix: str) -> Optional[str]: def _try_openrouter() -> Tuple[Optional[OpenAI], Optional[str]]: + pool_present, entry = _select_pool_entry("openrouter") + if pool_present: + or_key = _pool_runtime_api_key(entry) + if not or_key: + return None, None + base_url = _pool_runtime_base_url(entry, OPENROUTER_BASE_URL) or OPENROUTER_BASE_URL + logger.debug("Auxiliary client: OpenRouter via pool") + return OpenAI(api_key=or_key, base_url=base_url, + default_headers=_OR_HEADERS), _OPENROUTER_MODEL + or_key = os.getenv("OPENROUTER_API_KEY") if not or_key: return None, None @@ -577,9 +666,13 @@ def _try_nous() -> Tuple[Optional[OpenAI], Optional[str]]: global auxiliary_is_nous auxiliary_is_nous = True logger.debug("Auxiliary client: Nous Portal") + model = "gemini-3-flash" if nous.get("source") == "pool" else _NOUS_MODEL return ( - OpenAI(api_key=_nous_api_key(nous), base_url=_nous_base_url()), - _NOUS_MODEL, + OpenAI( + api_key=_nous_api_key(nous), + base_url=str(nous.get("inference_base_url") or _nous_base_url()).rstrip("/"), + ), + model, ) @@ -655,11 +748,19 @@ def _try_custom_endpoint() -> Tuple[Optional[OpenAI], Optional[str]]: def _try_codex() -> Tuple[Optional[Any], Optional[str]]: - codex_token = _read_codex_access_token() - if not codex_token: - return None, None + pool_present, entry = _select_pool_entry("openai-codex") + if pool_present: + codex_token = _pool_runtime_api_key(entry) + if not codex_token: + return None, None + base_url = _pool_runtime_base_url(entry, _CODEX_AUX_BASE_URL) or _CODEX_AUX_BASE_URL + else: + codex_token = _read_codex_access_token() + if not codex_token: + return None, None + base_url = _CODEX_AUX_BASE_URL logger.debug("Auxiliary client: Codex OAuth (%s via Responses API)", _CODEX_AUX_MODEL) - real_client = OpenAI(api_key=codex_token, base_url=_CODEX_AUX_BASE_URL) + real_client = OpenAI(api_key=codex_token, base_url=base_url) return CodexAuxiliaryClient(real_client, _CODEX_AUX_MODEL), _CODEX_AUX_MODEL @@ -669,14 +770,21 @@ def _try_anthropic() -> Tuple[Optional[Any], Optional[str]]: except ImportError: return None, None - token = resolve_anthropic_token() + pool_present, entry = _select_pool_entry("anthropic") + if pool_present: + if entry is None: + return None, None + token = _pool_runtime_api_key(entry) + else: + entry = None + token = resolve_anthropic_token() if not token: return None, None # Allow base URL override from config.yaml model.base_url, but only # when the configured provider is anthropic — otherwise a non-Anthropic # base_url (e.g. Codex endpoint) would leak into Anthropic requests. - base_url = _ANTHROPIC_DEFAULT_BASE_URL + base_url = _pool_runtime_base_url(entry, _ANTHROPIC_DEFAULT_BASE_URL) if pool_present else _ANTHROPIC_DEFAULT_BASE_URL try: from hermes_cli.config import load_config cfg = load_config() diff --git a/agent/credential_pool.py b/agent/credential_pool.py new file mode 100644 index 000000000..ad4dbcfc1 --- /dev/null +++ b/agent/credential_pool.py @@ -0,0 +1,844 @@ +"""Persistent multi-credential pool for same-provider failover.""" + +from __future__ import annotations + +import logging +import random +import threading +import time +import uuid +import os +from dataclasses import dataclass, fields, replace +from typing import Any, Dict, List, Optional, Set, Tuple + +from hermes_constants import OPENROUTER_BASE_URL +import hermes_cli.auth as auth_mod +from hermes_cli.auth import ( + ACCESS_TOKEN_REFRESH_SKEW_SECONDS, + CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS, + DEFAULT_AGENT_KEY_MIN_TTL_SECONDS, + PROVIDER_REGISTRY, + _agent_key_is_usable, + _codex_access_token_is_expiring, + _decode_jwt_claims, + _is_expiring, + _load_auth_store, + _load_provider_state, + read_credential_pool, + write_credential_pool, +) + +logger = logging.getLogger(__name__) + + +def _load_config_safe() -> Optional[dict]: + """Load config.yaml, returning None on any error.""" + try: + from hermes_cli.config import load_config + + return load_config() + except Exception: + return None + + +# --- Status and type constants --- + +STATUS_OK = "ok" +STATUS_EXHAUSTED = "exhausted" + +AUTH_TYPE_OAUTH = "oauth" +AUTH_TYPE_API_KEY = "api_key" + +SOURCE_MANUAL = "manual" + +STRATEGY_FILL_FIRST = "fill_first" +STRATEGY_ROUND_ROBIN = "round_robin" +STRATEGY_RANDOM = "random" +STRATEGY_LEAST_USED = "least_used" +SUPPORTED_POOL_STRATEGIES = { + STRATEGY_FILL_FIRST, + STRATEGY_ROUND_ROBIN, + STRATEGY_RANDOM, + STRATEGY_LEAST_USED, +} + +# Cooldown before retrying an exhausted credential. +# 429 (rate-limited) cools down faster since quotas reset frequently. +# 402 (billing/quota) and other codes use a longer default. +EXHAUSTED_TTL_429_SECONDS = 60 * 60 # 1 hour +EXHAUSTED_TTL_DEFAULT_SECONDS = 24 * 60 * 60 # 24 hours + +# Pool key prefix for custom OpenAI-compatible endpoints. +# Custom endpoints all share provider='custom' but are keyed by their +# custom_providers name: 'custom:'. +CUSTOM_POOL_PREFIX = "custom:" + + +# Fields that are only round-tripped through JSON — never used for logic as attributes. +_EXTRA_KEYS = frozenset({ + "token_type", "scope", "client_id", "portal_base_url", "obtained_at", + "expires_in", "agent_key_id", "agent_key_expires_in", "agent_key_reused", + "agent_key_obtained_at", "tls", +}) + + +@dataclass +class PooledCredential: + provider: str + id: str + label: str + auth_type: str + priority: int + source: str + access_token: str + refresh_token: Optional[str] = None + last_status: Optional[str] = None + last_status_at: Optional[float] = None + last_error_code: Optional[int] = None + base_url: Optional[str] = None + expires_at: Optional[str] = None + expires_at_ms: Optional[int] = None + last_refresh: Optional[str] = None + inference_base_url: Optional[str] = None + agent_key: Optional[str] = None + agent_key_expires_at: Optional[str] = None + request_count: int = 0 + extra: Dict[str, Any] = None # type: ignore[assignment] + + def __post_init__(self): + if self.extra is None: + self.extra = {} + + def __getattr__(self, name: str): + if name in _EXTRA_KEYS: + return self.extra.get(name) + raise AttributeError(f"'{type(self).__name__}' object has no attribute {name!r}") + + @classmethod + def from_dict(cls, provider: str, payload: Dict[str, Any]) -> "PooledCredential": + field_names = {f.name for f in fields(cls) if f.name != "provider"} + data = {k: payload.get(k) for k in field_names if k in payload} + extra = {k: payload[k] for k in _EXTRA_KEYS if k in payload and payload[k] is not None} + data["extra"] = extra + data.setdefault("id", uuid.uuid4().hex[:6]) + data.setdefault("label", payload.get("source", provider)) + data.setdefault("auth_type", AUTH_TYPE_API_KEY) + data.setdefault("priority", 0) + data.setdefault("source", SOURCE_MANUAL) + data.setdefault("access_token", "") + return cls(provider=provider, **data) + + def to_dict(self) -> Dict[str, Any]: + _ALWAYS_EMIT = {"last_status", "last_status_at", "last_error_code"} + result: Dict[str, Any] = {} + for field_def in fields(self): + if field_def.name in ("provider", "extra"): + continue + value = getattr(self, field_def.name) + if value is not None or field_def.name in _ALWAYS_EMIT: + result[field_def.name] = value + for k, v in self.extra.items(): + if v is not None: + result[k] = v + return result + + @property + def runtime_api_key(self) -> str: + if self.provider == "nous": + return str(self.agent_key or self.access_token or "") + return str(self.access_token or "") + + @property + def runtime_base_url(self) -> Optional[str]: + if self.provider == "nous": + return self.inference_base_url or self.base_url + return self.base_url + + +def label_from_token(token: str, fallback: str) -> str: + claims = _decode_jwt_claims(token) + for key in ("email", "preferred_username", "upn"): + value = claims.get(key) + if isinstance(value, str) and value.strip(): + return value.strip() + return fallback + + +def _next_priority(entries: List[PooledCredential]) -> int: + return max((entry.priority for entry in entries), default=-1) + 1 + + +def _is_manual_source(source: str) -> bool: + normalized = (source or "").strip().lower() + return normalized == SOURCE_MANUAL or normalized.startswith(f"{SOURCE_MANUAL}:") + + +def _exhausted_ttl(error_code: Optional[int]) -> int: + """Return cooldown seconds based on the HTTP status that caused exhaustion.""" + if error_code == 429: + return EXHAUSTED_TTL_429_SECONDS + return EXHAUSTED_TTL_DEFAULT_SECONDS + + +def _normalize_custom_pool_name(name: str) -> str: + """Normalize a custom provider name for use as a pool key suffix.""" + return name.strip().lower().replace(" ", "-") + + +def _iter_custom_providers(config: Optional[dict] = None): + """Yield (normalized_name, entry_dict) for each valid custom_providers entry.""" + if config is None: + config = _load_config_safe() + if config is None: + return + custom_providers = config.get("custom_providers") + if not isinstance(custom_providers, list): + return + for entry in custom_providers: + if not isinstance(entry, dict): + continue + name = entry.get("name") + if not isinstance(name, str): + continue + yield _normalize_custom_pool_name(name), entry + + +def get_custom_provider_pool_key(base_url: str) -> Optional[str]: + """Look up the custom_providers list in config.yaml and return 'custom:' for a matching base_url. + + Returns None if no match is found. + """ + if not base_url: + return None + normalized_url = base_url.strip().rstrip("/") + for norm_name, entry in _iter_custom_providers(): + entry_url = str(entry.get("base_url") or "").strip().rstrip("/") + if entry_url and entry_url == normalized_url: + return f"{CUSTOM_POOL_PREFIX}{norm_name}" + return None + + +def list_custom_pool_providers() -> List[str]: + """Return all 'custom:*' pool keys that have entries in auth.json.""" + pool_data = read_credential_pool(None) + return sorted( + key for key in pool_data + if key.startswith(CUSTOM_POOL_PREFIX) + and isinstance(pool_data.get(key), list) + and pool_data[key] + ) + + +def _get_custom_provider_config(pool_key: str) -> Optional[Dict[str, Any]]: + """Return the custom_providers config entry matching a pool key like 'custom:together.ai'.""" + if not pool_key.startswith(CUSTOM_POOL_PREFIX): + return None + suffix = pool_key[len(CUSTOM_POOL_PREFIX):] + for norm_name, entry in _iter_custom_providers(): + if norm_name == suffix: + return entry + return None + + +def get_pool_strategy(provider: str) -> str: + """Return the configured selection strategy for a provider.""" + config = _load_config_safe() + if config is None: + return STRATEGY_FILL_FIRST + + strategies = config.get("credential_pool_strategies") + if not isinstance(strategies, dict): + return STRATEGY_FILL_FIRST + + strategy = str(strategies.get(provider, "") or "").strip().lower() + if strategy in SUPPORTED_POOL_STRATEGIES: + return strategy + return STRATEGY_FILL_FIRST + + +class CredentialPool: + def __init__(self, provider: str, entries: List[PooledCredential]): + self.provider = provider + self._entries = sorted(entries, key=lambda entry: entry.priority) + self._current_id: Optional[str] = None + self._strategy = get_pool_strategy(provider) + self._lock = threading.Lock() + + def has_credentials(self) -> bool: + return bool(self._entries) + + def entries(self) -> List[PooledCredential]: + return list(self._entries) + + def current(self) -> Optional[PooledCredential]: + if not self._current_id: + return None + return next((entry for entry in self._entries if entry.id == self._current_id), None) + + def _replace_entry(self, old: PooledCredential, new: PooledCredential) -> None: + """Swap an entry in-place by id, preserving sort order.""" + for idx, entry in enumerate(self._entries): + if entry.id == old.id: + self._entries[idx] = new + return + + def _persist(self) -> None: + write_credential_pool( + self.provider, + [entry.to_dict() for entry in self._entries], + ) + + def _mark_exhausted(self, entry: PooledCredential, status_code: Optional[int]) -> PooledCredential: + updated = replace( + entry, + last_status=STATUS_EXHAUSTED, + last_status_at=time.time(), + last_error_code=status_code, + ) + self._replace_entry(entry, updated) + self._persist() + return updated + + def _refresh_entry(self, entry: PooledCredential, *, force: bool) -> Optional[PooledCredential]: + if entry.auth_type != AUTH_TYPE_OAUTH or not entry.refresh_token: + if force: + self._mark_exhausted(entry, None) + return None + + try: + if self.provider == "anthropic": + from agent.anthropic_adapter import refresh_anthropic_oauth_pure + + refreshed = refresh_anthropic_oauth_pure( + entry.refresh_token, + use_json=entry.source.endswith("hermes_pkce"), + ) + updated = replace( + entry, + access_token=refreshed["access_token"], + refresh_token=refreshed["refresh_token"], + expires_at_ms=refreshed["expires_at_ms"], + ) + elif self.provider == "openai-codex": + refreshed = auth_mod.refresh_codex_oauth_pure( + entry.access_token, + entry.refresh_token, + ) + updated = replace( + entry, + access_token=refreshed["access_token"], + refresh_token=refreshed["refresh_token"], + last_refresh=refreshed.get("last_refresh"), + ) + elif self.provider == "nous": + nous_state = { + "access_token": entry.access_token, + "refresh_token": entry.refresh_token, + "client_id": entry.client_id, + "portal_base_url": entry.portal_base_url, + "inference_base_url": entry.inference_base_url, + "token_type": entry.token_type, + "scope": entry.scope, + "obtained_at": entry.obtained_at, + "expires_at": entry.expires_at, + "agent_key": entry.agent_key, + "agent_key_expires_at": entry.agent_key_expires_at, + "tls": entry.tls, + } + refreshed = auth_mod.refresh_nous_oauth_from_state( + nous_state, + min_key_ttl_seconds=DEFAULT_AGENT_KEY_MIN_TTL_SECONDS, + force_refresh=force, + force_mint=force, + ) + # Apply returned fields: dataclass fields via replace, extras via dict update + field_updates = {} + extra_updates = dict(entry.extra) + _field_names = {f.name for f in fields(entry)} + for k, v in refreshed.items(): + if k in _field_names: + field_updates[k] = v + elif k in _EXTRA_KEYS: + extra_updates[k] = v + updated = replace(entry, extra=extra_updates, **field_updates) + else: + return entry + except Exception as exc: + logger.debug("Credential refresh failed for %s/%s: %s", self.provider, entry.id, exc) + self._mark_exhausted(entry, None) + return None + + updated = replace(updated, last_status=STATUS_OK, last_status_at=None, last_error_code=None) + self._replace_entry(entry, updated) + self._persist() + return updated + + def _entry_needs_refresh(self, entry: PooledCredential) -> bool: + if entry.auth_type != AUTH_TYPE_OAUTH: + return False + if self.provider == "anthropic": + if entry.expires_at_ms is None: + return False + return int(entry.expires_at_ms) <= int(time.time() * 1000) + 120_000 + if self.provider == "openai-codex": + return _codex_access_token_is_expiring( + entry.access_token, + CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS, + ) + if self.provider == "nous": + # Nous refresh/mint can require network access and should happen when + # runtime credentials are actually resolved, not merely when the pool + # is enumerated for listing, migration, or selection. + return False + return False + + def mark_used(self, entry_id: Optional[str] = None) -> None: + """Increment request_count for tracking. Used by least_used strategy.""" + target_id = entry_id or self._current_id + if not target_id: + return + with self._lock: + for idx, entry in enumerate(self._entries): + if entry.id == target_id: + self._entries[idx] = replace(entry, request_count=entry.request_count + 1) + return + + def select(self) -> Optional[PooledCredential]: + with self._lock: + return self._select_unlocked() + + def _available_entries(self, *, clear_expired: bool = False, refresh: bool = False) -> List[PooledCredential]: + """Return entries not currently in exhaustion cooldown. + + When *clear_expired* is True, entries whose cooldown has elapsed are + reset to STATUS_OK and persisted. When *refresh* is True, entries + that need a token refresh are refreshed (skipped on failure). + """ + now = time.time() + cleared_any = False + available: List[PooledCredential] = [] + for entry in self._entries: + if entry.last_status == STATUS_EXHAUSTED: + ttl = _exhausted_ttl(entry.last_error_code) + if entry.last_status_at and now - entry.last_status_at < ttl: + continue + if clear_expired: + cleared = replace(entry, last_status=STATUS_OK, last_status_at=None, last_error_code=None) + self._replace_entry(entry, cleared) + entry = cleared + cleared_any = True + if refresh and self._entry_needs_refresh(entry): + refreshed = self._refresh_entry(entry, force=False) + if refreshed is None: + continue + entry = refreshed + available.append(entry) + if cleared_any: + self._persist() + return available + + def _select_unlocked(self) -> Optional[PooledCredential]: + available = self._available_entries(clear_expired=True, refresh=True) + if not available: + self._current_id = None + return None + + if self._strategy == STRATEGY_RANDOM: + entry = random.choice(available) + self._current_id = entry.id + return entry + + if self._strategy == STRATEGY_LEAST_USED and len(available) > 1: + entry = min(available, key=lambda e: e.request_count) + self._current_id = entry.id + return entry + + if self._strategy == STRATEGY_ROUND_ROBIN and len(available) > 1: + entry = available[0] + rotated = [candidate for candidate in self._entries if candidate.id != entry.id] + rotated.append(replace(entry, priority=len(self._entries) - 1)) + self._entries = [replace(candidate, priority=idx) for idx, candidate in enumerate(rotated)] + self._persist() + self._current_id = entry.id + return self.current() or entry + + entry = available[0] + self._current_id = entry.id + return entry + + def peek(self) -> Optional[PooledCredential]: + current = self.current() + if current is not None: + return current + available = self._available_entries() + return available[0] if available else None + + def mark_exhausted_and_rotate(self, *, status_code: Optional[int]) -> Optional[PooledCredential]: + with self._lock: + entry = self.current() or self._select_unlocked() + if entry is None: + return None + self._mark_exhausted(entry, status_code) + self._current_id = None + return self._select_unlocked() + + def try_refresh_current(self) -> Optional[PooledCredential]: + with self._lock: + return self._try_refresh_current_unlocked() + + def _try_refresh_current_unlocked(self) -> Optional[PooledCredential]: + entry = self.current() + if entry is None: + return None + refreshed = self._refresh_entry(entry, force=True) + if refreshed is not None: + self._current_id = refreshed.id + return refreshed + + def reset_statuses(self) -> int: + count = 0 + new_entries = [] + for entry in self._entries: + if entry.last_status or entry.last_status_at or entry.last_error_code: + new_entries.append(replace(entry, last_status=None, last_status_at=None, last_error_code=None)) + count += 1 + else: + new_entries.append(entry) + if count: + self._entries = new_entries + self._persist() + return count + + def remove_index(self, index: int) -> Optional[PooledCredential]: + if index < 1 or index > len(self._entries): + return None + removed = self._entries.pop(index - 1) + self._entries = [ + replace(entry, priority=new_priority) + for new_priority, entry in enumerate(self._entries) + ] + self._persist() + if self._current_id == removed.id: + self._current_id = None + return removed + + def add_entry(self, entry: PooledCredential) -> PooledCredential: + entry = replace(entry, priority=_next_priority(self._entries)) + self._entries.append(entry) + self._persist() + return entry + + +def _upsert_entry(entries: List[PooledCredential], provider: str, source: str, payload: Dict[str, Any]) -> bool: + existing_idx = None + for idx, entry in enumerate(entries): + if entry.source == source: + existing_idx = idx + break + + if existing_idx is None: + payload.setdefault("id", uuid.uuid4().hex[:6]) + payload.setdefault("priority", _next_priority(entries)) + payload.setdefault("label", payload.get("label") or source) + entries.append(PooledCredential.from_dict(provider, payload)) + return True + + existing = entries[existing_idx] + field_updates = {} + extra_updates = {} + _field_names = {f.name for f in fields(existing)} + for key, value in payload.items(): + if key in {"id", "priority"} or value is None: + continue + if key == "label" and existing.label: + continue + if key in _field_names: + if getattr(existing, key) != value: + field_updates[key] = value + elif key in _EXTRA_KEYS: + if existing.extra.get(key) != value: + extra_updates[key] = value + if field_updates or extra_updates: + if extra_updates: + field_updates["extra"] = {**existing.extra, **extra_updates} + entries[existing_idx] = replace(existing, **field_updates) + return True + return False + + +def _normalize_pool_priorities(provider: str, entries: List[PooledCredential]) -> bool: + if provider != "anthropic": + return False + + source_rank = { + "env:ANTHROPIC_TOKEN": 0, + "env:CLAUDE_CODE_OAUTH_TOKEN": 1, + "hermes_pkce": 2, + "claude_code": 3, + "env:ANTHROPIC_API_KEY": 4, + } + manual_entries = sorted( + (entry for entry in entries if _is_manual_source(entry.source)), + key=lambda entry: entry.priority, + ) + seeded_entries = sorted( + (entry for entry in entries if not _is_manual_source(entry.source)), + key=lambda entry: ( + source_rank.get(entry.source, len(source_rank)), + entry.priority, + entry.label, + ), + ) + + ordered = [*manual_entries, *seeded_entries] + id_to_idx = {entry.id: idx for idx, entry in enumerate(entries)} + changed = False + for new_priority, entry in enumerate(ordered): + if entry.priority != new_priority: + entries[id_to_idx[entry.id]] = replace(entry, priority=new_priority) + changed = True + return changed + + +def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tuple[bool, Set[str]]: + changed = False + active_sources: Set[str] = set() + auth_store = _load_auth_store() + + if provider == "anthropic": + from agent.anthropic_adapter import read_claude_code_credentials, read_hermes_oauth_credentials + + for source_name, creds in ( + ("hermes_pkce", read_hermes_oauth_credentials()), + ("claude_code", read_claude_code_credentials()), + ): + if creds and creds.get("accessToken"): + active_sources.add(source_name) + changed |= _upsert_entry( + entries, + provider, + source_name, + { + "source": source_name, + "auth_type": AUTH_TYPE_OAUTH, + "access_token": creds.get("accessToken", ""), + "refresh_token": creds.get("refreshToken"), + "expires_at_ms": creds.get("expiresAt"), + "label": label_from_token(creds.get("accessToken", ""), source_name), + }, + ) + + elif provider == "nous": + state = _load_provider_state(auth_store, "nous") + if state: + active_sources.add("device_code") + changed |= _upsert_entry( + entries, + provider, + "device_code", + { + "source": "device_code", + "auth_type": AUTH_TYPE_OAUTH, + "access_token": state.get("access_token", ""), + "refresh_token": state.get("refresh_token"), + "expires_at": state.get("expires_at"), + "token_type": state.get("token_type"), + "scope": state.get("scope"), + "client_id": state.get("client_id"), + "portal_base_url": state.get("portal_base_url"), + "inference_base_url": state.get("inference_base_url"), + "agent_key": state.get("agent_key"), + "agent_key_expires_at": state.get("agent_key_expires_at"), + "tls": state.get("tls") if isinstance(state.get("tls"), dict) else None, + "label": label_from_token(state.get("access_token", ""), "device_code"), + }, + ) + + elif provider == "openai-codex": + state = _load_provider_state(auth_store, "openai-codex") + tokens = state.get("tokens") if isinstance(state, dict) else None + if isinstance(tokens, dict) and tokens.get("access_token"): + active_sources.add("device_code") + changed |= _upsert_entry( + entries, + provider, + "device_code", + { + "source": "device_code", + "auth_type": AUTH_TYPE_OAUTH, + "access_token": tokens.get("access_token", ""), + "refresh_token": tokens.get("refresh_token"), + "base_url": "https://chatgpt.com/backend-api/codex", + "last_refresh": state.get("last_refresh"), + "label": label_from_token(tokens.get("access_token", ""), "device_code"), + }, + ) + + return changed, active_sources + + +def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool, Set[str]]: + changed = False + active_sources: Set[str] = set() + if provider == "openrouter": + token = os.getenv("OPENROUTER_API_KEY", "").strip() + if token: + source = "env:OPENROUTER_API_KEY" + active_sources.add(source) + changed |= _upsert_entry( + entries, + provider, + source, + { + "source": source, + "auth_type": AUTH_TYPE_API_KEY, + "access_token": token, + "base_url": OPENROUTER_BASE_URL, + "label": "OPENROUTER_API_KEY", + }, + ) + return changed, active_sources + + pconfig = PROVIDER_REGISTRY.get(provider) + if not pconfig or pconfig.auth_type != AUTH_TYPE_API_KEY: + return changed, active_sources + + env_url = "" + if pconfig.base_url_env_var: + env_url = os.getenv(pconfig.base_url_env_var, "").strip().rstrip("/") + + env_vars = list(pconfig.api_key_env_vars) + if provider == "anthropic": + env_vars = [ + "ANTHROPIC_TOKEN", + "CLAUDE_CODE_OAUTH_TOKEN", + "ANTHROPIC_API_KEY", + ] + + for env_var in env_vars: + token = os.getenv(env_var, "").strip() + if not token: + continue + source = f"env:{env_var}" + active_sources.add(source) + auth_type = AUTH_TYPE_OAUTH if provider == "anthropic" and not token.startswith("sk-ant-api") else AUTH_TYPE_API_KEY + base_url = env_url or pconfig.inference_base_url + changed |= _upsert_entry( + entries, + provider, + source, + { + "source": source, + "auth_type": auth_type, + "access_token": token, + "base_url": base_url, + "label": env_var, + }, + ) + return changed, active_sources + + +def _prune_stale_seeded_entries(entries: List[PooledCredential], active_sources: Set[str]) -> bool: + retained = [ + entry + for entry in entries + if _is_manual_source(entry.source) + or entry.source in active_sources + or not ( + entry.source.startswith("env:") + or entry.source in {"claude_code", "hermes_pkce"} + ) + ] + if len(retained) == len(entries): + return False + entries[:] = retained + return True + + +def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[bool, Set[str]]: + """Seed a custom endpoint pool from custom_providers config and model config.""" + changed = False + active_sources: Set[str] = set() + + # Seed from the custom_providers config entry's api_key field + cp_config = _get_custom_provider_config(pool_key) + if cp_config: + api_key = str(cp_config.get("api_key") or "").strip() + base_url = str(cp_config.get("base_url") or "").strip().rstrip("/") + name = str(cp_config.get("name") or "").strip() + if api_key: + source = f"config:{name}" + active_sources.add(source) + changed |= _upsert_entry( + entries, + pool_key, + source, + { + "source": source, + "auth_type": AUTH_TYPE_API_KEY, + "access_token": api_key, + "base_url": base_url, + "label": name or source, + }, + ) + + # Seed from model.api_key if model.provider=='custom' and model.base_url matches + try: + config = _load_config_safe() + model_cfg = config.get("model") if config else None + if isinstance(model_cfg, dict): + model_provider = str(model_cfg.get("provider") or "").strip().lower() + model_base_url = str(model_cfg.get("base_url") or "").strip().rstrip("/") + model_api_key = "" + for k in ("api_key", "api"): + v = model_cfg.get(k) + if isinstance(v, str) and v.strip(): + model_api_key = v.strip() + break + if model_provider == "custom" and model_base_url and model_api_key: + # Check if this model's base_url matches our custom provider + matched_key = get_custom_provider_pool_key(model_base_url) + if matched_key == pool_key: + source = "model_config" + active_sources.add(source) + changed |= _upsert_entry( + entries, + pool_key, + source, + { + "source": source, + "auth_type": AUTH_TYPE_API_KEY, + "access_token": model_api_key, + "base_url": model_base_url, + "label": "model_config", + }, + ) + except Exception: + pass + + return changed, active_sources + + +def load_pool(provider: str) -> CredentialPool: + provider = (provider or "").strip().lower() + raw_entries = read_credential_pool(provider) + entries = [PooledCredential.from_dict(provider, payload) for payload in raw_entries] + + if provider.startswith(CUSTOM_POOL_PREFIX): + # Custom endpoint pool — seed from custom_providers config and model config + custom_changed, custom_sources = _seed_custom_pool(provider, entries) + changed = custom_changed + changed |= _prune_stale_seeded_entries(entries, custom_sources) + else: + singleton_changed, singleton_sources = _seed_from_singletons(provider, entries) + env_changed, env_sources = _seed_from_env(provider, entries) + changed = singleton_changed or env_changed + changed |= _prune_stale_seeded_entries(entries, singleton_sources | env_sources) + changed |= _normalize_pool_priorities(provider, entries) + + if changed: + write_credential_pool( + provider, + [entry.to_dict() for entry in sorted(entries, key=lambda item: item.priority)], + ) + return CredentialPool(provider, entries) diff --git a/cli.py b/cli.py index cf2a5f8c8..978b36091 100644 --- a/cli.py +++ b/cli.py @@ -1955,6 +1955,7 @@ class HermesCLI: resolved_api_mode = runtime.get("api_mode", self.api_mode) resolved_acp_command = runtime.get("command") resolved_acp_args = list(runtime.get("args") or []) + resolved_credential_pool = runtime.get("credential_pool") if not isinstance(api_key, str) or not api_key: # Custom / local endpoints (llama.cpp, ollama, vLLM, etc.) often # don't require authentication. When a base_url IS configured but @@ -1987,6 +1988,7 @@ class HermesCLI: self.api_mode = resolved_api_mode self.acp_command = resolved_acp_command self.acp_args = resolved_acp_args + self._credential_pool = resolved_credential_pool self._provider_source = runtime.get("source") self.api_key = api_key self.base_url = base_url @@ -2088,6 +2090,7 @@ class HermesCLI: "api_mode": self.api_mode, "command": self.acp_command, "args": list(self.acp_args or []), + "credential_pool": getattr(self, "_credential_pool", None), } effective_model = model_override or self.model self.agent = AIAgent( @@ -2098,6 +2101,7 @@ class HermesCLI: api_mode=runtime.get("api_mode"), acp_command=runtime.get("command"), acp_args=runtime.get("args"), + credential_pool=runtime.get("credential_pool"), max_iterations=self.max_turns, enabled_toolsets=self.enabled_toolsets, verbose_logging=self.verbose, diff --git a/gateway/run.py b/gateway/run.py index 48f5182cb..2fe929447 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -298,6 +298,7 @@ def _resolve_runtime_agent_kwargs() -> dict: "api_mode": runtime.get("api_mode"), "command": runtime.get("command"), "args": list(runtime.get("args") or []), + "credential_pool": runtime.get("credential_pool"), } diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py index add83eff8..250f842c7 100644 --- a/hermes_cli/auth.py +++ b/hermes_cli/auth.py @@ -545,7 +545,11 @@ def _load_auth_store(auth_file: Optional[Path] = None) -> Dict[str, Any]: except Exception: return {"version": AUTH_STORE_VERSION, "providers": {}} - if isinstance(raw, dict) and isinstance(raw.get("providers"), dict): + if isinstance(raw, dict) and ( + isinstance(raw.get("providers"), dict) + or isinstance(raw.get("credential_pool"), dict) + ): + raw.setdefault("providers", {}) return raw # Migrate from PR's "systems" format if present @@ -613,6 +617,30 @@ def _save_provider_state(auth_store: Dict[str, Any], provider_id: str, state: Di auth_store["active_provider"] = provider_id +def read_credential_pool(provider_id: Optional[str] = None) -> Dict[str, Any]: + """Return the persisted credential pool, or one provider slice.""" + auth_store = _load_auth_store() + pool = auth_store.get("credential_pool") + if not isinstance(pool, dict): + pool = {} + if provider_id is None: + return dict(pool) + provider_entries = pool.get(provider_id) + return list(provider_entries) if isinstance(provider_entries, list) else [] + + +def write_credential_pool(provider_id: str, entries: List[Dict[str, Any]]) -> Path: + """Persist one provider's credential pool under auth.json.""" + with _auth_store_lock(): + auth_store = _load_auth_store() + pool = auth_store.get("credential_pool") + if not isinstance(pool, dict): + pool = {} + auth_store["credential_pool"] = pool + pool[provider_id] = list(entries) + return _save_auth_store(auth_store) + + def get_provider_auth_state(provider_id: str) -> Optional[Dict[str, Any]]: """Return persisted auth state for a provider, or None.""" auth_store = _load_auth_store() @@ -638,10 +666,25 @@ def clear_provider_auth(provider_id: Optional[str] = None) -> bool: return False providers = auth_store.get("providers", {}) - if target not in providers: - return False + if not isinstance(providers, dict): + providers = {} + auth_store["providers"] = providers - del providers[target] + pool = auth_store.get("credential_pool") + if not isinstance(pool, dict): + pool = {} + auth_store["credential_pool"] = pool + + cleared = False + if target in providers: + del providers[target] + cleared = True + if target in pool: + del pool[target] + cleared = True + + if not cleared: + return False if auth_store.get("active_provider") == target: auth_store["active_provider"] = None _save_auth_store(auth_store) @@ -898,15 +941,14 @@ def _save_codex_tokens(tokens: Dict[str, str], last_refresh: str = None) -> None _save_auth_store(auth_store) -def _refresh_codex_auth_tokens( - tokens: Dict[str, str], - timeout_seconds: float, -) -> Dict[str, str]: - """Refresh Codex access token using the refresh token. - - Saves the new tokens to Hermes auth store automatically. - """ - refresh_token = tokens.get("refresh_token") +def refresh_codex_oauth_pure( + access_token: str, + refresh_token: str, + *, + timeout_seconds: float = 20.0, +) -> Dict[str, Any]: + """Refresh Codex OAuth tokens without mutating Hermes auth state.""" + del access_token # Access token is only used by callers to decide whether to refresh. if not isinstance(refresh_token, str) or not refresh_token.strip(): raise AuthError( "Codex auth is missing refresh_token. Run `hermes login` to re-authenticate.", @@ -961,8 +1003,8 @@ def _refresh_codex_auth_tokens( relogin_required=True, ) from exc - access_token = refresh_payload.get("access_token") - if not isinstance(access_token, str) or not access_token.strip(): + refreshed_access = refresh_payload.get("access_token") + if not isinstance(refreshed_access, str) or not refreshed_access.strip(): raise AuthError( "Codex token refresh response was missing access_token.", provider="openai-codex", @@ -970,11 +1012,33 @@ def _refresh_codex_auth_tokens( relogin_required=True, ) - updated_tokens = dict(tokens) - updated_tokens["access_token"] = access_token.strip() + updated = { + "access_token": refreshed_access.strip(), + "refresh_token": refresh_token.strip(), + "last_refresh": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"), + } next_refresh = refresh_payload.get("refresh_token") if isinstance(next_refresh, str) and next_refresh.strip(): - updated_tokens["refresh_token"] = next_refresh.strip() + updated["refresh_token"] = next_refresh.strip() + return updated + + +def _refresh_codex_auth_tokens( + tokens: Dict[str, str], + timeout_seconds: float, +) -> Dict[str, str]: + """Refresh Codex access token using the refresh token. + + Saves the new tokens to Hermes auth store automatically. + """ + refreshed = refresh_codex_oauth_pure( + str(tokens.get("access_token", "") or ""), + str(tokens.get("refresh_token", "") or ""), + timeout_seconds=timeout_seconds, + ) + updated_tokens = dict(tokens) + updated_tokens["access_token"] = refreshed["access_token"] + updated_tokens["refresh_token"] = refreshed["refresh_token"] _save_codex_tokens(updated_tokens) return updated_tokens @@ -1313,6 +1377,122 @@ def _agent_key_is_usable(state: Dict[str, Any], min_ttl_seconds: int) -> bool: return not _is_expiring(state.get("agent_key_expires_at"), min_ttl_seconds) +def refresh_nous_oauth_pure( + access_token: str, + refresh_token: str, + client_id: str, + portal_base_url: str, + inference_base_url: str, + *, + token_type: str = "Bearer", + scope: str = DEFAULT_NOUS_SCOPE, + obtained_at: Optional[str] = None, + expires_at: Optional[str] = None, + agent_key: Optional[str] = None, + agent_key_expires_at: Optional[str] = None, + min_key_ttl_seconds: int = DEFAULT_AGENT_KEY_MIN_TTL_SECONDS, + timeout_seconds: float = 15.0, + insecure: Optional[bool] = None, + ca_bundle: Optional[str] = None, + force_refresh: bool = False, + force_mint: bool = False, +) -> Dict[str, Any]: + """Refresh Nous OAuth state without mutating auth.json.""" + state: Dict[str, Any] = { + "access_token": access_token, + "refresh_token": refresh_token, + "client_id": client_id or DEFAULT_NOUS_CLIENT_ID, + "portal_base_url": (portal_base_url or DEFAULT_NOUS_PORTAL_URL).rstrip("/"), + "inference_base_url": (inference_base_url or DEFAULT_NOUS_INFERENCE_URL).rstrip("/"), + "token_type": token_type or "Bearer", + "scope": scope or DEFAULT_NOUS_SCOPE, + "obtained_at": obtained_at, + "expires_at": expires_at, + "agent_key": agent_key, + "agent_key_expires_at": agent_key_expires_at, + "tls": { + "insecure": bool(insecure), + "ca_bundle": ca_bundle, + }, + } + verify = _resolve_verify(insecure=insecure, ca_bundle=ca_bundle, auth_state=state) + timeout = httpx.Timeout(timeout_seconds if timeout_seconds else 15.0) + + with httpx.Client(timeout=timeout, headers={"Accept": "application/json"}, verify=verify) as client: + if force_refresh or _is_expiring(state.get("expires_at"), ACCESS_TOKEN_REFRESH_SKEW_SECONDS): + refreshed = _refresh_access_token( + client=client, + portal_base_url=state["portal_base_url"], + client_id=state["client_id"], + refresh_token=state["refresh_token"], + ) + now = datetime.now(timezone.utc) + access_ttl = _coerce_ttl_seconds(refreshed.get("expires_in")) + state["access_token"] = refreshed["access_token"] + state["refresh_token"] = refreshed.get("refresh_token") or state["refresh_token"] + state["token_type"] = refreshed.get("token_type") or state.get("token_type") or "Bearer" + state["scope"] = refreshed.get("scope") or state.get("scope") + refreshed_url = _optional_base_url(refreshed.get("inference_base_url")) + if refreshed_url: + state["inference_base_url"] = refreshed_url + state["obtained_at"] = now.isoformat() + state["expires_in"] = access_ttl + state["expires_at"] = datetime.fromtimestamp( + now.timestamp() + access_ttl, tz=timezone.utc + ).isoformat() + + if force_mint or not _agent_key_is_usable(state, max(60, int(min_key_ttl_seconds))): + mint_payload = _mint_agent_key( + client=client, + portal_base_url=state["portal_base_url"], + access_token=state["access_token"], + min_ttl_seconds=min_key_ttl_seconds, + ) + now = datetime.now(timezone.utc) + state["agent_key"] = mint_payload.get("api_key") + state["agent_key_id"] = mint_payload.get("key_id") + state["agent_key_expires_at"] = mint_payload.get("expires_at") + state["agent_key_expires_in"] = mint_payload.get("expires_in") + state["agent_key_reused"] = bool(mint_payload.get("reused", False)) + state["agent_key_obtained_at"] = now.isoformat() + minted_url = _optional_base_url(mint_payload.get("inference_base_url")) + if minted_url: + state["inference_base_url"] = minted_url + + return state + + +def refresh_nous_oauth_from_state( + state: Dict[str, Any], + *, + min_key_ttl_seconds: int = DEFAULT_AGENT_KEY_MIN_TTL_SECONDS, + timeout_seconds: float = 15.0, + force_refresh: bool = False, + force_mint: bool = False, +) -> Dict[str, Any]: + """Refresh Nous OAuth from a state dict. Thin wrapper around refresh_nous_oauth_pure.""" + tls = state.get("tls") or {} + return refresh_nous_oauth_pure( + state.get("access_token", ""), + state.get("refresh_token", ""), + state.get("client_id", "hermes-cli"), + state.get("portal_base_url", DEFAULT_NOUS_PORTAL_URL), + state.get("inference_base_url", DEFAULT_NOUS_INFERENCE_URL), + token_type=state.get("token_type", "Bearer"), + scope=state.get("scope", DEFAULT_NOUS_SCOPE), + obtained_at=state.get("obtained_at"), + expires_at=state.get("expires_at"), + agent_key=state.get("agent_key"), + agent_key_expires_at=state.get("agent_key_expires_at"), + min_key_ttl_seconds=min_key_ttl_seconds, + timeout_seconds=timeout_seconds, + insecure=tls.get("insecure"), + ca_bundle=tls.get("ca_bundle"), + force_refresh=force_refresh, + force_mint=force_mint, + ) + + def resolve_nous_runtime_credentials( *, min_key_ttl_seconds: int = DEFAULT_AGENT_KEY_MIN_TTL_SECONDS, @@ -2180,34 +2360,36 @@ def _codex_device_code_login() -> Dict[str, Any]: } -def _login_nous(args, pconfig: ProviderConfig) -> None: - """Nous Portal device authorization flow.""" +def _nous_device_code_login( + *, + portal_base_url: Optional[str] = None, + inference_base_url: Optional[str] = None, + client_id: Optional[str] = None, + scope: Optional[str] = None, + open_browser: bool = True, + timeout_seconds: float = 15.0, + insecure: bool = False, + ca_bundle: Optional[str] = None, + min_key_ttl_seconds: int = 5 * 60, +) -> Dict[str, Any]: + """Run the Nous device-code flow and return full OAuth state without persisting.""" + pconfig = PROVIDER_REGISTRY["nous"] portal_base_url = ( - getattr(args, "portal_url", None) + portal_base_url or os.getenv("HERMES_PORTAL_BASE_URL") or os.getenv("NOUS_PORTAL_BASE_URL") or pconfig.portal_base_url ).rstrip("/") requested_inference_url = ( - getattr(args, "inference_url", None) + inference_base_url or os.getenv("NOUS_INFERENCE_BASE_URL") or pconfig.inference_base_url ).rstrip("/") - client_id = getattr(args, "client_id", None) or pconfig.client_id - scope = getattr(args, "scope", None) or pconfig.scope - open_browser = not getattr(args, "no_browser", False) - timeout_seconds = getattr(args, "timeout", None) or 15.0 + client_id = client_id or pconfig.client_id + scope = scope or pconfig.scope timeout = httpx.Timeout(timeout_seconds) - - insecure = bool(getattr(args, "insecure", False)) - ca_bundle = ( - getattr(args, "ca_bundle", None) - or os.getenv("HERMES_CA_BUNDLE") - or os.getenv("SSL_CERT_FILE") - ) verify: bool | str = False if insecure else (ca_bundle if ca_bundle else True) - # Skip browser open in SSH sessions if _is_remote_session(): open_browser = False @@ -2218,74 +2400,109 @@ def _login_nous(args, pconfig: ProviderConfig) -> None: elif ca_bundle: print(f"TLS verification: custom CA bundle ({ca_bundle})") - try: - with httpx.Client(timeout=timeout, headers={"Accept": "application/json"}, verify=verify) as client: - device_data = _request_device_code( - client=client, portal_base_url=portal_base_url, - client_id=client_id, scope=scope, - ) - - verification_url = str(device_data["verification_uri_complete"]) - user_code = str(device_data["user_code"]) - expires_in = int(device_data["expires_in"]) - interval = int(device_data["interval"]) - - print() - print("To continue:") - print(f" 1. Open: {verification_url}") - print(f" 2. If prompted, enter code: {user_code}") - - if open_browser: - opened = webbrowser.open(verification_url) - if opened: - print(" (Opened browser for verification)") - else: - print(" Could not open browser automatically — use the URL above.") - - effective_interval = max(1, min(interval, DEVICE_AUTH_POLL_INTERVAL_CAP_SECONDS)) - print(f"Waiting for approval (polling every {effective_interval}s)...") - - token_data = _poll_for_token( - client=client, portal_base_url=portal_base_url, - client_id=client_id, device_code=str(device_data["device_code"]), - expires_in=expires_in, poll_interval=interval, - ) - - # Process token response - now = datetime.now(timezone.utc) - token_expires_in = _coerce_ttl_seconds(token_data.get("expires_in", 0)) - expires_at = now.timestamp() + token_expires_in - inference_base_url = ( - _optional_base_url(token_data.get("inference_base_url")) - or requested_inference_url + with httpx.Client(timeout=timeout, headers={"Accept": "application/json"}, verify=verify) as client: + device_data = _request_device_code( + client=client, + portal_base_url=portal_base_url, + client_id=client_id, + scope=scope, ) - if inference_base_url != requested_inference_url: - print(f"Using portal-provided inference URL: {inference_base_url}") - auth_state = { - "portal_base_url": portal_base_url, - "inference_base_url": inference_base_url, - "client_id": client_id, - "scope": token_data.get("scope") or scope, - "token_type": token_data.get("token_type", "Bearer"), - "access_token": token_data["access_token"], - "refresh_token": token_data.get("refresh_token"), - "obtained_at": now.isoformat(), - "expires_at": datetime.fromtimestamp(expires_at, tz=timezone.utc).isoformat(), - "expires_in": token_expires_in, - "tls": { - "insecure": verify is False, - "ca_bundle": verify if isinstance(verify, str) else None, - }, - "agent_key": None, - "agent_key_id": None, - "agent_key_expires_at": None, - "agent_key_expires_in": None, - "agent_key_reused": None, - "agent_key_obtained_at": None, - } + verification_url = str(device_data["verification_uri_complete"]) + user_code = str(device_data["user_code"]) + expires_in = int(device_data["expires_in"]) + interval = int(device_data["interval"]) + + print() + print("To continue:") + print(f" 1. Open: {verification_url}") + print(f" 2. If prompted, enter code: {user_code}") + + if open_browser: + opened = webbrowser.open(verification_url) + if opened: + print(" (Opened browser for verification)") + else: + print(" Could not open browser automatically — use the URL above.") + + effective_interval = max(1, min(interval, DEVICE_AUTH_POLL_INTERVAL_CAP_SECONDS)) + print(f"Waiting for approval (polling every {effective_interval}s)...") + + token_data = _poll_for_token( + client=client, + portal_base_url=portal_base_url, + client_id=client_id, + device_code=str(device_data["device_code"]), + expires_in=expires_in, + poll_interval=interval, + ) + + now = datetime.now(timezone.utc) + token_expires_in = _coerce_ttl_seconds(token_data.get("expires_in", 0)) + expires_at = now.timestamp() + token_expires_in + resolved_inference_url = ( + _optional_base_url(token_data.get("inference_base_url")) + or requested_inference_url + ) + if resolved_inference_url != requested_inference_url: + print(f"Using portal-provided inference URL: {resolved_inference_url}") + + auth_state = { + "portal_base_url": portal_base_url, + "inference_base_url": resolved_inference_url, + "client_id": client_id, + "scope": token_data.get("scope") or scope, + "token_type": token_data.get("token_type", "Bearer"), + "access_token": token_data["access_token"], + "refresh_token": token_data.get("refresh_token"), + "obtained_at": now.isoformat(), + "expires_at": datetime.fromtimestamp(expires_at, tz=timezone.utc).isoformat(), + "expires_in": token_expires_in, + "tls": { + "insecure": verify is False, + "ca_bundle": verify if isinstance(verify, str) else None, + }, + "agent_key": None, + "agent_key_id": None, + "agent_key_expires_at": None, + "agent_key_expires_in": None, + "agent_key_reused": None, + "agent_key_obtained_at": None, + } + return refresh_nous_oauth_from_state( + auth_state, + min_key_ttl_seconds=min_key_ttl_seconds, + timeout_seconds=timeout_seconds, + force_refresh=False, + force_mint=True, + ) + + +def _login_nous(args, pconfig: ProviderConfig) -> None: + """Nous Portal device authorization flow.""" + timeout_seconds = getattr(args, "timeout", None) or 15.0 + insecure = bool(getattr(args, "insecure", False)) + ca_bundle = ( + getattr(args, "ca_bundle", None) + or os.getenv("HERMES_CA_BUNDLE") + or os.getenv("SSL_CERT_FILE") + ) + + try: + auth_state = _nous_device_code_login( + portal_base_url=getattr(args, "portal_url", None) or pconfig.portal_base_url, + inference_base_url=getattr(args, "inference_url", None) or pconfig.inference_base_url, + client_id=getattr(args, "client_id", None) or pconfig.client_id, + scope=getattr(args, "scope", None) or pconfig.scope, + open_browser=not getattr(args, "no_browser", False), + timeout_seconds=timeout_seconds, + insecure=insecure, + ca_bundle=ca_bundle, + min_key_ttl_seconds=5 * 60, + ) + inference_base_url = auth_state["inference_base_url"] + verify: bool | str = False if insecure else (ca_bundle if ca_bundle else True) - # Save auth state with _auth_store_lock(): auth_store = _load_auth_store() _save_provider_state(auth_store, "nous", auth_state) @@ -2297,18 +2514,14 @@ def _login_nous(args, pconfig: ProviderConfig) -> None: print(f" Auth state: {saved_to}") print(f" Config updated: {config_path} (model.provider=nous)") - # Mint an initial agent key and list available models try: - runtime_creds = resolve_nous_runtime_credentials( - min_key_ttl_seconds=5 * 60, - timeout_seconds=timeout_seconds, - insecure=insecure, ca_bundle=ca_bundle, - ) - runtime_key = runtime_creds.get("api_key") - runtime_base_url = runtime_creds.get("base_url") or inference_base_url + runtime_key = auth_state.get("agent_key") or auth_state.get("access_token") if not isinstance(runtime_key, str) or not runtime_key: - raise AuthError("No runtime API key available to fetch models", - provider="nous", code="invalid_token") + raise AuthError( + "No runtime API key available to fetch models", + provider="nous", + code="invalid_token", + ) # Use curated model list (same as OpenRouter defaults) instead # of the full /models dump which returns hundreds of models. diff --git a/hermes_cli/auth_commands.py b/hermes_cli/auth_commands.py new file mode 100644 index 000000000..096387746 --- /dev/null +++ b/hermes_cli/auth_commands.py @@ -0,0 +1,470 @@ +"""Credential-pool auth subcommands.""" + +from __future__ import annotations + +from getpass import getpass +import math +import time +from types import SimpleNamespace +import uuid + +from agent.credential_pool import ( + AUTH_TYPE_API_KEY, + AUTH_TYPE_OAUTH, + CUSTOM_POOL_PREFIX, + SOURCE_MANUAL, + STATUS_EXHAUSTED, + STRATEGY_FILL_FIRST, + STRATEGY_ROUND_ROBIN, + STRATEGY_RANDOM, + STRATEGY_LEAST_USED, + SUPPORTED_POOL_STRATEGIES, + PooledCredential, + _normalize_custom_pool_name, + get_pool_strategy, + label_from_token, + list_custom_pool_providers, + load_pool, + _exhausted_ttl, +) +import hermes_cli.auth as auth_mod +from hermes_cli.auth import PROVIDER_REGISTRY +from hermes_constants import OPENROUTER_BASE_URL + + +# Providers that support OAuth login in addition to API keys. +_OAUTH_CAPABLE_PROVIDERS = {"anthropic", "nous", "openai-codex"} + + +def _get_custom_provider_names() -> list: + """Return list of (display_name, pool_key) tuples for custom_providers in config.""" + try: + from hermes_cli.config import load_config + + config = load_config() + except Exception: + return [] + custom_providers = config.get("custom_providers") + if not isinstance(custom_providers, list): + return [] + result = [] + for entry in custom_providers: + if not isinstance(entry, dict): + continue + name = entry.get("name") + if not isinstance(name, str) or not name.strip(): + continue + pool_key = f"{CUSTOM_POOL_PREFIX}{_normalize_custom_pool_name(name)}" + result.append((name.strip(), pool_key)) + return result + + +def _resolve_custom_provider_input(raw: str) -> str | None: + """If raw input matches a custom_providers entry name (case-insensitive), return its pool key.""" + normalized = (raw or "").strip().lower().replace(" ", "-") + if not normalized: + return None + # Direct match on 'custom:name' format + if normalized.startswith(CUSTOM_POOL_PREFIX): + return normalized + for display_name, pool_key in _get_custom_provider_names(): + if _normalize_custom_pool_name(display_name) == normalized: + return pool_key + return None + + +def _normalize_provider(provider: str) -> str: + normalized = (provider or "").strip().lower() + if normalized in {"or", "open-router"}: + return "openrouter" + # Check if it matches a custom provider name + custom_key = _resolve_custom_provider_input(normalized) + if custom_key: + return custom_key + return normalized + + +def _provider_base_url(provider: str) -> str: + if provider == "openrouter": + return OPENROUTER_BASE_URL + if provider.startswith(CUSTOM_POOL_PREFIX): + from agent.credential_pool import _get_custom_provider_config + + cp_config = _get_custom_provider_config(provider) + if cp_config: + return str(cp_config.get("base_url") or "").strip() + return "" + pconfig = PROVIDER_REGISTRY.get(provider) + return pconfig.inference_base_url if pconfig else "" + + +def _oauth_default_label(provider: str, count: int) -> str: + return f"{provider}-oauth-{count}" + + +def _api_key_default_label(count: int) -> str: + return f"api-key-{count}" + + +def _display_source(source: str) -> str: + return source.split(":", 1)[1] if source.startswith("manual:") else source + + +def _format_exhausted_status(entry) -> str: + if entry.last_status != STATUS_EXHAUSTED: + return "" + code = f" ({entry.last_error_code})" if entry.last_error_code else "" + if not entry.last_status_at: + return f" exhausted{code}" + remaining = max(0, int(math.ceil((entry.last_status_at + _exhausted_ttl(entry.last_error_code)) - time.time()))) + if remaining <= 0: + return f" exhausted{code} (ready to retry)" + minutes, seconds = divmod(remaining, 60) + hours, minutes = divmod(minutes, 60) + if hours: + wait = f"{hours}h {minutes}m" + elif minutes: + wait = f"{minutes}m {seconds}s" + else: + wait = f"{seconds}s" + return f" exhausted{code} ({wait} left)" + + +def auth_add_command(args) -> None: + provider = _normalize_provider(getattr(args, "provider", "")) + if provider not in PROVIDER_REGISTRY and provider != "openrouter" and not provider.startswith(CUSTOM_POOL_PREFIX): + raise SystemExit(f"Unknown provider: {provider}") + + requested_type = str(getattr(args, "auth_type", "") or "").strip().lower() + if requested_type in {AUTH_TYPE_API_KEY, "api-key"}: + requested_type = AUTH_TYPE_API_KEY + if not requested_type: + if provider.startswith(CUSTOM_POOL_PREFIX): + requested_type = AUTH_TYPE_API_KEY + else: + requested_type = AUTH_TYPE_OAUTH if provider in {"anthropic", "nous", "openai-codex"} else AUTH_TYPE_API_KEY + + pool = load_pool(provider) + + if requested_type == AUTH_TYPE_API_KEY: + token = (getattr(args, "api_key", None) or "").strip() + if not token: + token = getpass("Paste your API key: ").strip() + if not token: + raise SystemExit("No API key provided.") + default_label = _api_key_default_label(len(pool.entries()) + 1) + label = (getattr(args, "label", None) or "").strip() + if not label: + label = input(f"Label (optional, default: {default_label}): ").strip() or default_label + entry = PooledCredential( + provider=provider, + id=uuid.uuid4().hex[:6], + label=label, + auth_type=AUTH_TYPE_API_KEY, + priority=0, + source=SOURCE_MANUAL, + access_token=token, + base_url=_provider_base_url(provider), + ) + pool.add_entry(entry) + print(f'Added {provider} credential #{len(pool.entries())}: "{label}"') + return + + if provider == "anthropic": + from agent import anthropic_adapter as anthropic_mod + + creds = anthropic_mod.run_hermes_oauth_login_pure() + if not creds: + raise SystemExit("Anthropic OAuth login did not return credentials.") + label = (getattr(args, "label", None) or "").strip() or label_from_token( + creds["access_token"], + _oauth_default_label(provider, len(pool.entries()) + 1), + ) + entry = PooledCredential( + provider=provider, + id=uuid.uuid4().hex[:6], + label=label, + auth_type=AUTH_TYPE_OAUTH, + priority=0, + source=f"{SOURCE_MANUAL}:hermes_pkce", + access_token=creds["access_token"], + refresh_token=creds.get("refresh_token"), + expires_at_ms=creds.get("expires_at_ms"), + base_url=_provider_base_url(provider), + ) + pool.add_entry(entry) + print(f'Added {provider} OAuth credential #{len(pool.entries())}: "{entry.label}"') + return + + if provider == "nous": + creds = auth_mod._nous_device_code_login( + portal_base_url=getattr(args, "portal_url", None), + inference_base_url=getattr(args, "inference_url", None), + client_id=getattr(args, "client_id", None), + scope=getattr(args, "scope", None), + open_browser=not getattr(args, "no_browser", False), + timeout_seconds=getattr(args, "timeout", None) or 15.0, + insecure=bool(getattr(args, "insecure", False)), + ca_bundle=getattr(args, "ca_bundle", None), + min_key_ttl_seconds=max(60, int(getattr(args, "min_key_ttl_seconds", 5 * 60))), + ) + label = (getattr(args, "label", None) or "").strip() or label_from_token( + creds.get("access_token", ""), + _oauth_default_label(provider, len(pool.entries()) + 1), + ) + entry = PooledCredential.from_dict(provider, { + **creds, + "label": label, + "auth_type": AUTH_TYPE_OAUTH, + "source": f"{SOURCE_MANUAL}:device_code", + "base_url": creds.get("inference_base_url"), + }) + pool.add_entry(entry) + print(f'Added {provider} OAuth credential #{len(pool.entries())}: "{entry.label}"') + return + + if provider == "openai-codex": + creds = auth_mod._codex_device_code_login() + label = (getattr(args, "label", None) or "").strip() or label_from_token( + creds["tokens"]["access_token"], + _oauth_default_label(provider, len(pool.entries()) + 1), + ) + entry = PooledCredential( + provider=provider, + id=uuid.uuid4().hex[:6], + label=label, + auth_type=AUTH_TYPE_OAUTH, + priority=0, + source=f"{SOURCE_MANUAL}:device_code", + access_token=creds["tokens"]["access_token"], + refresh_token=creds["tokens"].get("refresh_token"), + base_url=creds.get("base_url"), + last_refresh=creds.get("last_refresh"), + ) + pool.add_entry(entry) + print(f'Added {provider} OAuth credential #{len(pool.entries())}: "{entry.label}"') + return + + raise SystemExit(f"`hermes auth add {provider}` is not implemented for auth type {requested_type} yet.") + + +def auth_list_command(args) -> None: + provider_filter = _normalize_provider(getattr(args, "provider", "") or "") + if provider_filter: + providers = [provider_filter] + else: + providers = sorted({ + *PROVIDER_REGISTRY.keys(), + "openrouter", + *list_custom_pool_providers(), + }) + for provider in providers: + pool = load_pool(provider) + entries = pool.entries() + if not entries: + continue + current = pool.peek() + print(f"{provider} ({len(entries)} credentials):") + for idx, entry in enumerate(entries, start=1): + marker = " " + if current is not None and entry.id == current.id: + marker = "← " + status = _format_exhausted_status(entry) + source = _display_source(entry.source) + print(f" #{idx} {entry.label:<20} {entry.auth_type:<7} {source}{status} {marker}".rstrip()) + print() + + +def auth_remove_command(args) -> None: + provider = _normalize_provider(getattr(args, "provider", "")) + index = int(getattr(args, "index")) + pool = load_pool(provider) + removed = pool.remove_index(index) + if removed is None: + raise SystemExit(f"No credential #{index} for provider {provider}.") + print(f"Removed {provider} credential #{index} ({removed.label})") + + +def auth_reset_command(args) -> None: + provider = _normalize_provider(getattr(args, "provider", "")) + pool = load_pool(provider) + count = pool.reset_statuses() + print(f"Reset status on {count} {provider} credentials") + + +def _interactive_auth() -> None: + """Interactive credential pool management when `hermes auth` is called bare.""" + # Show current pool status first + print("Credential Pool Status") + print("=" * 50) + + auth_list_command(SimpleNamespace(provider=None)) + print() + + # Main menu + choices = [ + "Add a credential", + "Remove a credential", + "Reset cooldowns for a provider", + "Set rotation strategy for a provider", + "Exit", + ] + print("What would you like to do?") + for i, choice in enumerate(choices, 1): + print(f" {i}. {choice}") + + try: + raw = input("\nChoice: ").strip() + except (EOFError, KeyboardInterrupt): + return + + if not raw or raw == str(len(choices)): + return + + if raw == "1": + _interactive_add() + elif raw == "2": + _interactive_remove() + elif raw == "3": + _interactive_reset() + elif raw == "4": + _interactive_strategy() + + +def _pick_provider(prompt: str = "Provider") -> str: + """Prompt for a provider name with auto-complete hints.""" + known = sorted(set(list(PROVIDER_REGISTRY.keys()) + ["openrouter"])) + custom_names = _get_custom_provider_names() + if custom_names: + custom_display = [name for name, _key in custom_names] + print(f"\nKnown providers: {', '.join(known)}") + print(f"Custom endpoints: {', '.join(custom_display)}") + else: + print(f"\nKnown providers: {', '.join(known)}") + try: + raw = input(f"{prompt}: ").strip() + except (EOFError, KeyboardInterrupt): + raise SystemExit() + return _normalize_provider(raw) + + +def _interactive_add() -> None: + provider = _pick_provider("Provider to add credential for") + if provider not in PROVIDER_REGISTRY and provider != "openrouter" and not provider.startswith(CUSTOM_POOL_PREFIX): + raise SystemExit(f"Unknown provider: {provider}") + + # For OAuth-capable providers, ask which type + if provider in _OAUTH_CAPABLE_PROVIDERS: + print(f"\n{provider} supports both API keys and OAuth login.") + print(" 1. API key (paste a key from the provider dashboard)") + print(" 2. OAuth login (authenticate via browser)") + try: + type_choice = input("Type [1/2]: ").strip() + except (EOFError, KeyboardInterrupt): + return + if type_choice == "2": + auth_type = "oauth" + else: + auth_type = "api_key" + else: + auth_type = "api_key" + + auth_add_command(SimpleNamespace( + provider=provider, auth_type=auth_type, label=None, api_key=None, + portal_url=None, inference_url=None, client_id=None, scope=None, + no_browser=False, timeout=None, insecure=False, ca_bundle=None, + )) + + +def _interactive_remove() -> None: + provider = _pick_provider("Provider to remove credential from") + pool = load_pool(provider) + if not pool.has_credentials(): + print(f"No credentials for {provider}.") + return + + # Show entries with indices + for i, e in enumerate(pool.entries(), 1): + exhausted = _format_exhausted_status(e) + print(f" #{i} {e.label:25s} {e.auth_type:10s} {e.source}{exhausted}") + + try: + raw = input("Remove # (or blank to cancel): ").strip() + except (EOFError, KeyboardInterrupt): + return + if not raw: + return + + try: + index = int(raw) + except ValueError: + print("Invalid number.") + return + + auth_remove_command(SimpleNamespace(provider=provider, index=index)) + + +def _interactive_reset() -> None: + provider = _pick_provider("Provider to reset cooldowns for") + + auth_reset_command(SimpleNamespace(provider=provider)) + + +def _interactive_strategy() -> None: + provider = _pick_provider("Provider to set strategy for") + current = get_pool_strategy(provider) + strategies = [STRATEGY_FILL_FIRST, STRATEGY_ROUND_ROBIN, STRATEGY_LEAST_USED, STRATEGY_RANDOM] + + print(f"\nCurrent strategy for {provider}: {current}") + print() + descriptions = { + STRATEGY_FILL_FIRST: "Use first key until exhausted, then next", + STRATEGY_ROUND_ROBIN: "Cycle through keys evenly", + STRATEGY_LEAST_USED: "Always pick the least-used key", + STRATEGY_RANDOM: "Random selection", + } + for i, s in enumerate(strategies, 1): + marker = " ←" if s == current else "" + print(f" {i}. {s:15s} — {descriptions.get(s, '')}{marker}") + + try: + raw = input("\nStrategy [1-4]: ").strip() + except (EOFError, KeyboardInterrupt): + return + if not raw: + return + + try: + idx = int(raw) - 1 + strategy = strategies[idx] + except (ValueError, IndexError): + print("Invalid choice.") + return + + from hermes_cli.config import load_config, save_config + cfg = load_config() + pool_strategies = cfg.get("credential_pool_strategies") or {} + if not isinstance(pool_strategies, dict): + pool_strategies = {} + pool_strategies[provider] = strategy + cfg["credential_pool_strategies"] = pool_strategies + save_config(cfg) + print(f"Set {provider} strategy to: {strategy}") + + +def auth_command(args) -> None: + action = getattr(args, "auth_action", "") + if action == "add": + auth_add_command(args) + return + if action == "list": + auth_list_command(args) + return + if action == "remove": + auth_remove_command(args) + return + if action == "reset": + auth_reset_command(args) + return + # No subcommand — launch interactive mode + _interactive_auth() diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 9d7f545b2..51b8b9af7 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -198,6 +198,7 @@ def ensure_hermes_home(): DEFAULT_CONFIG = { "model": "anthropic/claude-opus-4.6", "fallback_providers": [], + "credential_pool_strategies": {}, "toolsets": ["hermes-cli"], "agent": { "max_turns": 90, @@ -503,7 +504,7 @@ DEFAULT_CONFIG = { }, # Config schema version - bump this when adding new required fields - "_config_version": 10, + "_config_version": 11, } # ============================================================================= diff --git a/hermes_cli/main.py b/hermes_cli/main.py index a209ea11c..3c7142b5e 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -2434,6 +2434,12 @@ def cmd_logout(args): logout_command(args) +def cmd_auth(args): + """Manage pooled credentials.""" + from hermes_cli.auth_commands import auth_command + auth_command(args) + + def cmd_status(args): """Show status of all components.""" from hermes_cli.status import show_status @@ -3339,7 +3345,7 @@ def _coalesce_session_name_args(argv: list) -> list: or a known top-level subcommand. """ _SUBCOMMANDS = { - "chat", "model", "gateway", "setup", "whatsapp", "login", "logout", + "chat", "model", "gateway", "setup", "whatsapp", "login", "logout", "auth", "status", "cron", "doctor", "config", "pairing", "skills", "tools", "mcp", "sessions", "insights", "version", "update", "uninstall", "profile", @@ -3628,6 +3634,10 @@ Examples: hermes --resume Resume a specific session by ID hermes setup Run setup wizard hermes logout Clear stored authentication + hermes auth add Add a pooled credential + hermes auth list List pooled credentials + hermes auth remove

Remove pooled credential by index + hermes auth reset Clear exhaustion status for a provider hermes model Select default model hermes config View configuration hermes config edit Edit config in $EDITOR @@ -3946,6 +3956,33 @@ For more help on a command: ) logout_parser.set_defaults(func=cmd_logout) + auth_parser = subparsers.add_parser( + "auth", + help="Manage pooled provider credentials", + ) + auth_subparsers = auth_parser.add_subparsers(dest="auth_action") + auth_add = auth_subparsers.add_parser("add", help="Add a pooled credential") + auth_add.add_argument("provider", help="Provider id (for example: anthropic, openai-codex, openrouter)") + auth_add.add_argument("--type", dest="auth_type", choices=["oauth", "api-key", "api_key"], help="Credential type to add") + auth_add.add_argument("--label", help="Optional display label") + auth_add.add_argument("--api-key", help="API key value (otherwise prompted securely)") + auth_add.add_argument("--portal-url", help="Nous portal base URL") + auth_add.add_argument("--inference-url", help="Nous inference base URL") + auth_add.add_argument("--client-id", help="OAuth client id") + auth_add.add_argument("--scope", help="OAuth scope override") + auth_add.add_argument("--no-browser", action="store_true", help="Do not auto-open a browser for OAuth login") + auth_add.add_argument("--timeout", type=float, help="OAuth/network timeout in seconds") + auth_add.add_argument("--insecure", action="store_true", help="Disable TLS verification for OAuth login") + auth_add.add_argument("--ca-bundle", help="Custom CA bundle for OAuth login") + auth_list = auth_subparsers.add_parser("list", help="List pooled credentials") + auth_list.add_argument("provider", nargs="?", help="Optional provider filter") + auth_remove = auth_subparsers.add_parser("remove", help="Remove a pooled credential by index") + auth_remove.add_argument("provider", help="Provider id") + auth_remove.add_argument("index", type=int, help="1-based credential index") + auth_reset = auth_subparsers.add_parser("reset", help="Clear exhaustion status for all credentials for a provider") + auth_reset.add_argument("provider", help="Provider id") + auth_parser.set_defaults(func=cmd_auth) + # ========================================================================= # status command # ========================================================================= diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py index 644331baa..bb5f4758a 100644 --- a/hermes_cli/runtime_provider.py +++ b/hermes_cli/runtime_provider.py @@ -6,8 +6,10 @@ import os from typing import Any, Dict, Optional from hermes_cli import auth as auth_mod +from agent.credential_pool import CredentialPool, PooledCredential, get_custom_provider_pool_key, load_pool from hermes_cli.auth import ( AuthError, + DEFAULT_CODEX_BASE_URL, PROVIDER_REGISTRY, format_auth_error, resolve_provider, @@ -109,6 +111,50 @@ def _parse_api_mode(raw: Any) -> Optional[str]: return None +def _resolve_runtime_from_pool_entry( + *, + provider: str, + entry: PooledCredential, + requested_provider: str, + model_cfg: Optional[Dict[str, Any]] = None, + pool: Optional[CredentialPool] = None, +) -> Dict[str, Any]: + model_cfg = model_cfg or _get_model_config() + base_url = (getattr(entry, "runtime_base_url", None) or getattr(entry, "base_url", None) or "").rstrip("/") + api_key = getattr(entry, "runtime_api_key", None) or getattr(entry, "access_token", "") + api_mode = "chat_completions" + if provider == "openai-codex": + api_mode = "codex_responses" + base_url = base_url or DEFAULT_CODEX_BASE_URL + elif provider == "anthropic": + api_mode = "anthropic_messages" + cfg_provider = str(model_cfg.get("provider") or "").strip().lower() + cfg_base_url = "" + if cfg_provider == "anthropic": + cfg_base_url = str(model_cfg.get("base_url") or "").strip().rstrip("/") + base_url = cfg_base_url or base_url or "https://api.anthropic.com" + elif provider == "nous": + api_mode = "chat_completions" + elif provider == "copilot": + api_mode = _copilot_runtime_api_mode(model_cfg, getattr(entry, "runtime_api_key", "")) + else: + configured_mode = _parse_api_mode(model_cfg.get("api_mode")) + if configured_mode: + api_mode = configured_mode + elif base_url.rstrip("/").endswith("/anthropic"): + api_mode = "anthropic_messages" + + return { + "provider": provider, + "api_mode": api_mode, + "base_url": base_url, + "api_key": api_key, + "source": getattr(entry, "source", "pool"), + "credential_pool": pool, + "requested_provider": requested_provider, + } + + def resolve_requested_provider(requested: Optional[str] = None) -> str: """Resolve provider request from explicit arg, config, then env.""" if requested and requested.strip(): @@ -128,6 +174,37 @@ def resolve_requested_provider(requested: Optional[str] = None) -> str: return "auto" +def _try_resolve_from_custom_pool( + base_url: str, + provider_label: str, + api_mode_override: Optional[str] = None, +) -> Optional[Dict[str, Any]]: + """Check if a credential pool exists for a custom endpoint and return a runtime dict if so.""" + pool_key = get_custom_provider_pool_key(base_url) + if not pool_key: + return None + try: + pool = load_pool(pool_key) + if not pool.has_credentials(): + return None + entry = pool.select() + if entry is None: + return None + pool_api_key = getattr(entry, "runtime_api_key", None) or getattr(entry, "access_token", "") + if not pool_api_key: + return None + return { + "provider": provider_label, + "api_mode": api_mode_override or _detect_api_mode_for_url(base_url) or "chat_completions", + "base_url": base_url, + "api_key": pool_api_key, + "source": f"pool:{pool_key}", + "credential_pool": pool, + } + except Exception: + return None + + def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, Any]]: requested_norm = _normalize_custom_provider_name(requested_provider or "") if not requested_norm or requested_norm == "custom": @@ -192,6 +269,11 @@ def _resolve_named_custom_runtime( if not base_url: return None + # Check if a credential pool exists for this custom endpoint + pool_result = _try_resolve_from_custom_pool(base_url, "custom", custom_provider.get("api_mode")) + if pool_result: + return pool_result + api_key_candidates = [ (explicit_api_key or "").strip(), str(custom_provider.get("api_key", "") or "").strip(), @@ -281,6 +363,15 @@ def _resolve_openrouter_runtime( # Also provide a placeholder API key for local servers that don't require # authentication — the OpenAI SDK requires a non-empty api_key string. effective_provider = "custom" if requested_norm == "custom" else "openrouter" + + # For custom endpoints, check if a credential pool exists + if effective_provider == "custom" and base_url: + pool_result = _try_resolve_from_custom_pool( + base_url, effective_provider, _parse_api_mode(model_cfg.get("api_mode")), + ) + if pool_result: + return pool_result + if effective_provider == "custom" and not api_key and not _is_openrouter_url: api_key = "no-key-required" @@ -295,6 +386,134 @@ def _resolve_openrouter_runtime( } +def _resolve_explicit_runtime( + *, + provider: str, + requested_provider: str, + model_cfg: Dict[str, Any], + explicit_api_key: Optional[str] = None, + explicit_base_url: Optional[str] = None, +) -> Optional[Dict[str, Any]]: + explicit_api_key = str(explicit_api_key or "").strip() + explicit_base_url = str(explicit_base_url or "").strip().rstrip("/") + if not explicit_api_key and not explicit_base_url: + return None + + if provider == "anthropic": + cfg_provider = str(model_cfg.get("provider") or "").strip().lower() + cfg_base_url = "" + if cfg_provider == "anthropic": + cfg_base_url = str(model_cfg.get("base_url") or "").strip().rstrip("/") + base_url = explicit_base_url or cfg_base_url or "https://api.anthropic.com" + api_key = explicit_api_key + if not api_key: + from agent.anthropic_adapter import resolve_anthropic_token + + api_key = resolve_anthropic_token() + if not api_key: + raise AuthError( + "No Anthropic credentials found. Set ANTHROPIC_TOKEN or ANTHROPIC_API_KEY, " + "run 'claude setup-token', or authenticate with 'claude /login'." + ) + return { + "provider": "anthropic", + "api_mode": "anthropic_messages", + "base_url": base_url, + "api_key": api_key, + "source": "explicit", + "requested_provider": requested_provider, + } + + if provider == "openai-codex": + base_url = explicit_base_url or DEFAULT_CODEX_BASE_URL + api_key = explicit_api_key + last_refresh = None + if not api_key: + creds = resolve_codex_runtime_credentials() + api_key = creds.get("api_key", "") + last_refresh = creds.get("last_refresh") + if not explicit_base_url: + base_url = creds.get("base_url", "").rstrip("/") or base_url + return { + "provider": "openai-codex", + "api_mode": "codex_responses", + "base_url": base_url, + "api_key": api_key, + "source": "explicit", + "last_refresh": last_refresh, + "requested_provider": requested_provider, + } + + if provider == "nous": + state = auth_mod.get_provider_auth_state("nous") or {} + base_url = ( + explicit_base_url + or str(state.get("inference_base_url") or auth_mod.DEFAULT_NOUS_INFERENCE_URL).strip().rstrip("/") + ) + api_key = explicit_api_key or str(state.get("agent_key") or state.get("access_token") or "").strip() + expires_at = state.get("agent_key_expires_at") or state.get("expires_at") + if not api_key: + creds = resolve_nous_runtime_credentials( + min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))), + timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")), + ) + api_key = creds.get("api_key", "") + expires_at = creds.get("expires_at") + if not explicit_base_url: + base_url = creds.get("base_url", "").rstrip("/") or base_url + return { + "provider": "nous", + "api_mode": "chat_completions", + "base_url": base_url, + "api_key": api_key, + "source": "explicit", + "expires_at": expires_at, + "requested_provider": requested_provider, + } + + pconfig = PROVIDER_REGISTRY.get(provider) + if pconfig and pconfig.auth_type == "api_key": + env_url = "" + if pconfig.base_url_env_var: + env_url = os.getenv(pconfig.base_url_env_var, "").strip().rstrip("/") + + base_url = explicit_base_url + if not base_url: + if provider == "kimi-coding": + creds = resolve_api_key_provider_credentials(provider) + base_url = creds.get("base_url", "").rstrip("/") + else: + base_url = env_url or pconfig.inference_base_url + + api_key = explicit_api_key + if not api_key: + creds = resolve_api_key_provider_credentials(provider) + api_key = creds.get("api_key", "") + if not base_url: + base_url = creds.get("base_url", "").rstrip("/") + + api_mode = "chat_completions" + if provider == "copilot": + api_mode = _copilot_runtime_api_mode(model_cfg, api_key) + else: + configured_mode = _parse_api_mode(model_cfg.get("api_mode")) + if configured_mode: + api_mode = configured_mode + elif base_url.rstrip("/").endswith("/anthropic"): + api_mode = "anthropic_messages" + + return { + "provider": provider, + "api_mode": api_mode, + "base_url": base_url.rstrip("/"), + "api_key": api_key, + "source": "explicit", + "requested_provider": requested_provider, + } + + return None + + def resolve_runtime_provider( *, requested: Optional[str] = None, @@ -318,6 +537,57 @@ def resolve_runtime_provider( explicit_api_key=explicit_api_key, explicit_base_url=explicit_base_url, ) + model_cfg = _get_model_config() + explicit_runtime = _resolve_explicit_runtime( + provider=provider, + requested_provider=requested_provider, + model_cfg=model_cfg, + explicit_api_key=explicit_api_key, + explicit_base_url=explicit_base_url, + ) + if explicit_runtime: + return explicit_runtime + + should_use_pool = provider != "openrouter" + if provider == "openrouter": + cfg_provider = str(model_cfg.get("provider") or "").strip().lower() + cfg_base_url = str(model_cfg.get("base_url") or "").strip() + env_openai_base_url = os.getenv("OPENAI_BASE_URL", "").strip() + env_openrouter_base_url = os.getenv("OPENROUTER_BASE_URL", "").strip() + has_custom_endpoint = bool( + explicit_base_url + or env_openai_base_url + or env_openrouter_base_url + ) + if cfg_base_url and cfg_provider in {"auto", "custom"}: + has_custom_endpoint = True + has_runtime_override = bool(explicit_api_key or explicit_base_url) + should_use_pool = ( + requested_provider in {"openrouter", "auto"} + and not has_custom_endpoint + and not has_runtime_override + ) + + try: + pool = load_pool(provider) if should_use_pool else None + except Exception: + pool = None + if pool and pool.has_credentials(): + entry = pool.select() + pool_api_key = "" + if entry is not None: + pool_api_key = ( + getattr(entry, "runtime_api_key", None) + or getattr(entry, "access_token", "") + ) + if entry is not None and pool_api_key: + return _resolve_runtime_from_pool_entry( + provider=provider, + entry=entry, + requested_provider=requested_provider, + model_cfg=model_cfg, + pool=pool, + ) if provider == "nous": creds = resolve_nous_runtime_credentials( @@ -371,7 +641,6 @@ def resolve_runtime_provider( # Allow base URL override from config.yaml model.base_url, but only # when the configured provider is anthropic — otherwise a non-Anthropic # base_url (e.g. Codex endpoint) would leak into Anthropic requests. - model_cfg = _get_model_config() cfg_provider = str(model_cfg.get("provider") or "").strip().lower() cfg_base_url = "" if cfg_provider == "anthropic": @@ -390,7 +659,6 @@ def resolve_runtime_provider( pconfig = PROVIDER_REGISTRY.get(provider) if pconfig and pconfig.auth_type == "api_key": creds = resolve_api_key_provider_credentials(provider) - model_cfg = _get_model_config() base_url = creds.get("base_url", "").rstrip("/") api_mode = "chat_completions" if provider == "copilot": diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py index 50368915c..bd64c75f8 100644 --- a/hermes_cli/setup.py +++ b/hermes_cli/setup.py @@ -54,6 +54,32 @@ def _set_default_model(config: Dict[str, Any], model_name: str) -> None: config["model"] = model_cfg +def _get_credential_pool_strategies(config: Dict[str, Any]) -> Dict[str, str]: + strategies = config.get("credential_pool_strategies") + return dict(strategies) if isinstance(strategies, dict) else {} + + +def _set_credential_pool_strategy(config: Dict[str, Any], provider: str, strategy: str) -> None: + if not provider: + return + strategies = _get_credential_pool_strategies(config) + strategies[provider] = strategy + config["credential_pool_strategies"] = strategies + + +def _supports_same_provider_pool_setup(provider: str) -> bool: + if not provider or provider == "custom": + return False + if provider == "openrouter": + return True + from hermes_cli.auth import PROVIDER_REGISTRY + + pconfig = PROVIDER_REGISTRY.get(provider) + if not pconfig: + return False + return pconfig.auth_type in {"api_key", "oauth_device_code"} + + # Default model lists per provider — used as fallback when the live # /models endpoint can't be reached. _DEFAULT_PROVIDER_MODELS = { @@ -849,6 +875,85 @@ def setup_model_provider(config: dict): selected_provider = _m.get("provider") + # ── Same-provider fallback & rotation setup ── + if _supports_same_provider_pool_setup(selected_provider): + try: + from types import SimpleNamespace + from agent.credential_pool import load_pool + from hermes_cli.auth_commands import auth_add_command + + pool = load_pool(selected_provider) + entries = pool.entries() + entry_count = len(entries) + manual_count = sum(1 for entry in entries if str(getattr(entry, "source", "")).startswith("manual")) + auto_count = entry_count - manual_count + print() + print_header("Same-Provider Fallback & Rotation") + print_info( + "Hermes can keep multiple credentials for one provider and rotate between" + ) + print_info( + "them when a credential is exhausted or rate-limited. This preserves" + ) + print_info( + "your primary provider while reducing interruptions from quota issues." + ) + print() + if auto_count > 0: + print_info( + f"Current pooled credentials for {selected_provider}: {entry_count} " + f"({manual_count} manual, {auto_count} auto-detected from env/shared auth)" + ) + else: + print_info(f"Current pooled credentials for {selected_provider}: {entry_count}") + + while prompt_yes_no("Add another credential for same-provider fallback?", False): + auth_add_command( + SimpleNamespace( + provider=selected_provider, + auth_type="", + label=None, + api_key=None, + portal_url=None, + inference_url=None, + client_id=None, + scope=None, + no_browser=False, + timeout=15.0, + insecure=False, + ca_bundle=None, + min_key_ttl_seconds=5 * 60, + ) + ) + pool = load_pool(selected_provider) + entry_count = len(pool.entries()) + print_info(f"Provider pool now has {entry_count} credential(s).") + + if entry_count > 1: + strategy_labels = [ + "Fill-first / sticky — keep using the first healthy credential until it is exhausted", + "Round robin — rotate to the next healthy credential after each selection", + "Random — pick a random healthy credential each time", + ] + current_strategy = _get_credential_pool_strategies(config).get(selected_provider, "fill_first") + default_strategy_idx = { + "fill_first": 0, + "round_robin": 1, + "random": 2, + }.get(current_strategy, 0) + strategy_idx = prompt_choice( + "Select same-provider rotation strategy:", + strategy_labels, + default_strategy_idx, + ) + strategy_value = ["fill_first", "round_robin", "random"][strategy_idx] + _set_credential_pool_strategy(config, selected_provider, strategy_value) + print_success(f"Saved {selected_provider} rotation strategy: {strategy_value}") + else: + _set_credential_pool_strategy(config, selected_provider, "fill_first") + except Exception as exc: + logger.debug("Could not configure same-provider fallback in setup: %s", exc) + # ── Vision & Image Analysis Setup ── # Keep setup aligned with the actual runtime resolver the vision tools use. try: diff --git a/run_agent.py b/run_agent.py index 6e8b23f24..13278d94c 100644 --- a/run_agent.py +++ b/run_agent.py @@ -505,6 +505,7 @@ class AIAgent: honcho_config=None, iteration_budget: "IterationBudget" = None, fallback_model: Dict[str, Any] = None, + credential_pool=None, checkpoints_enabled: bool = False, checkpoint_max_snapshots: int = 50, pass_session_id: bool = False, @@ -575,6 +576,7 @@ class AIAgent: self.skip_context_files = skip_context_files self.pass_session_id = pass_session_id self.persist_session = persist_session + self._credential_pool = credential_pool self.log_prefix_chars = log_prefix_chars self.log_prefix = f"{log_prefix} " if log_prefix else "" # Store effective base URL for feature detection (prompt caching, reasoning, etc.) @@ -3775,6 +3777,93 @@ class AIAgent: self._is_anthropic_oauth = _is_oauth_token(new_token) return True + def _apply_client_headers_for_base_url(self, base_url: str) -> None: + from agent.auxiliary_client import _OR_HEADERS + + normalized = (base_url or "").lower() + if "openrouter" in normalized: + self._client_kwargs["default_headers"] = dict(_OR_HEADERS) + elif "api.githubcopilot.com" in normalized: + from hermes_cli.models import copilot_default_headers + + self._client_kwargs["default_headers"] = copilot_default_headers() + elif "api.kimi.com" in normalized: + self._client_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.3"} + else: + self._client_kwargs.pop("default_headers", None) + + def _swap_credential(self, entry) -> None: + runtime_key = getattr(entry, "runtime_api_key", None) or getattr(entry, "access_token", "") + runtime_base = getattr(entry, "runtime_base_url", None) or getattr(entry, "base_url", None) or self.base_url + + if self.api_mode == "anthropic_messages": + from agent.anthropic_adapter import build_anthropic_client, _is_oauth_token + + try: + self._anthropic_client.close() + except Exception: + pass + + self._anthropic_api_key = runtime_key + self._anthropic_base_url = runtime_base + self._anthropic_client = build_anthropic_client(runtime_key, runtime_base) + self._is_anthropic_oauth = _is_oauth_token(runtime_key) if self.provider == "anthropic" else False + self.api_key = runtime_key + self.base_url = runtime_base + return + + self.api_key = runtime_key + self.base_url = runtime_base.rstrip("/") if isinstance(runtime_base, str) else runtime_base + self._client_kwargs["api_key"] = self.api_key + self._client_kwargs["base_url"] = self.base_url + self._apply_client_headers_for_base_url(self.base_url) + self._replace_primary_openai_client(reason="credential_rotation") + + def _recover_with_credential_pool( + self, + *, + status_code: Optional[int], + has_retried_429: bool, + ) -> tuple[bool, bool]: + """Attempt credential recovery via pool rotation. + + Returns (recovered, has_retried_429). + On 429: first occurrence retries same credential (sets flag True). + second consecutive 429 rotates to next credential (resets flag). + On 402: immediately rotates (billing exhaustion won't resolve with retry). + On 401: attempts token refresh before rotating. + """ + pool = self._credential_pool + if pool is None or status_code is None: + return False, has_retried_429 + + if status_code == 402: + next_entry = pool.mark_exhausted_and_rotate(status_code=402) + if next_entry is not None: + logger.info(f"Credential 402 (billing) — rotated to pool entry {getattr(next_entry, 'id', '?')}") + self._swap_credential(next_entry) + return True, False + return False, has_retried_429 + + if status_code == 429: + if not has_retried_429: + return False, True + next_entry = pool.mark_exhausted_and_rotate(status_code=429) + if next_entry is not None: + logger.info(f"Credential 429 (rate limit) — rotated to pool entry {getattr(next_entry, 'id', '?')}") + self._swap_credential(next_entry) + return True, False + return False, True + + if status_code == 401: + refreshed = pool.try_refresh_current() + if refreshed is not None: + logger.info(f"Credential 401 — refreshed pool entry {getattr(refreshed, 'id', '?')}") + self._swap_credential(refreshed) + return True, has_retried_429 + + return False, has_retried_429 + def _anthropic_messages_create(self, api_kwargs: dict): if self.api_mode == "anthropic_messages": self._try_refresh_anthropic_client_credentials() @@ -6460,6 +6549,7 @@ class AIAgent: codex_auth_retry_attempted = False anthropic_auth_retry_attempted = False nous_auth_retry_attempted = False + has_retried_429 = False restart_with_compressed_messages = False restart_with_length_continuation = False @@ -6895,6 +6985,7 @@ class AIAgent: if not self.quiet_mode: self._vprint(f"{self.log_prefix} 💾 Cache: {cached:,}/{prompt:,} tokens ({hit_pct:.0f}% hit, {written:,} written)") + has_retried_429 = False # Reset on success break # Success, exit retry loop except InterruptedError: @@ -6937,6 +7028,12 @@ class AIAgent: # prompt or prefill. Fall through to normal error path. status_code = getattr(api_error, "status_code", None) + recovered_with_pool, has_retried_429 = self._recover_with_credential_pool( + status_code=status_code, + has_retried_429=has_retried_429, + ) + if recovered_with_pool: + continue if ( self.api_mode == "codex_responses" and self.provider == "openai-codex" diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py index a8197e574..b9f71674a 100644 --- a/tests/agent/test_auxiliary_client.py +++ b/tests/agent/test_auxiliary_client.py @@ -198,7 +198,8 @@ class TestAnthropicOAuthFlag: def test_api_key_no_oauth_flag(self, monkeypatch): """Regular API keys (sk-ant-api-*) should create client with is_oauth=False.""" with patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="sk-ant-api03-testkey1234"), \ - patch("agent.anthropic_adapter.build_anthropic_client") as mock_build: + patch("agent.anthropic_adapter.build_anthropic_client") as mock_build, \ + patch("agent.auxiliary_client._select_pool_entry", return_value=(False, None)): mock_build.return_value = MagicMock() from agent.auxiliary_client import _try_anthropic, AnthropicAuxiliaryClient client, model = _try_anthropic() @@ -207,6 +208,31 @@ class TestAnthropicOAuthFlag: adapter = client.chat.completions assert adapter._is_oauth is False + def test_pool_entry_takes_priority_over_legacy_resolution(self): + class _Entry: + access_token = "sk-ant-oat01-pooled" + base_url = "https://api.anthropic.com" + + class _Pool: + def has_credentials(self): + return True + + def select(self): + return _Entry() + + with ( + patch("agent.auxiliary_client.load_pool", return_value=_Pool()), + patch("agent.anthropic_adapter.resolve_anthropic_token", side_effect=AssertionError("legacy path should not run")), + patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()) as mock_build, + ): + from agent.auxiliary_client import _try_anthropic + + client, model = _try_anthropic() + + assert client is not None + assert model == "claude-haiku-4-5-20251001" + assert mock_build.call_args.args[0] == "sk-ant-oat01-pooled" + class TestExpiredCodexFallback: """Test that expired Codex tokens don't block the auto chain.""" @@ -392,7 +418,8 @@ class TestExplicitProviderRouting: def test_explicit_anthropic_api_key(self, monkeypatch): """provider='anthropic' + regular API key should work with is_oauth=False.""" with patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="sk-ant-api-regular-key"), \ - patch("agent.anthropic_adapter.build_anthropic_client") as mock_build: + patch("agent.anthropic_adapter.build_anthropic_client") as mock_build, \ + patch("agent.auxiliary_client._select_pool_entry", return_value=(False, None)): mock_build.return_value = MagicMock() client, model = resolve_provider_client("anthropic") assert client is not None @@ -542,6 +569,32 @@ class TestGetTextAuxiliaryClient: from agent.auxiliary_client import CodexAuxiliaryClient assert isinstance(client, CodexAuxiliaryClient) + def test_codex_pool_entry_takes_priority_over_auth_store(self): + class _Entry: + access_token = "pooled-codex-token" + base_url = "https://chatgpt.com/backend-api/codex" + + class _Pool: + def has_credentials(self): + return True + + def select(self): + return _Entry() + + with ( + patch("agent.auxiliary_client.load_pool", return_value=_Pool()), + patch("agent.auxiliary_client.OpenAI"), + patch("hermes_cli.auth._read_codex_tokens", side_effect=AssertionError("legacy codex store should not run")), + ): + from agent.auxiliary_client import _try_codex + + client, model = _try_codex() + + from agent.auxiliary_client import CodexAuxiliaryClient + + assert isinstance(client, CodexAuxiliaryClient) + assert model == "gpt-5.2-codex" + def test_returns_none_when_nothing_available(self, monkeypatch): monkeypatch.delenv("OPENAI_BASE_URL", raising=False) monkeypatch.delenv("OPENAI_API_KEY", raising=False) @@ -590,6 +643,35 @@ class TestVisionClientFallback: assert client.__class__.__name__ == "AnthropicAuxiliaryClient" assert model == "claude-haiku-4-5-20251001" + +class TestAuxiliaryPoolAwareness: + def test_try_nous_uses_pool_entry(self): + class _Entry: + access_token = "pooled-access-token" + agent_key = "pooled-agent-key" + inference_base_url = "https://inference.pool.example/v1" + + class _Pool: + def has_credentials(self): + return True + + def select(self): + return _Entry() + + with ( + patch("agent.auxiliary_client.load_pool", return_value=_Pool()), + patch("agent.auxiliary_client.OpenAI") as mock_openai, + ): + from agent.auxiliary_client import _try_nous + + client, model = _try_nous() + + assert client is not None + assert model == "gemini-3-flash" + call_kwargs = mock_openai.call_args.kwargs + assert call_kwargs["api_key"] == "pooled-agent-key" + assert call_kwargs["base_url"] == "https://inference.pool.example/v1" + def test_resolve_provider_client_copilot_uses_runtime_credentials(self, monkeypatch): monkeypatch.delenv("GITHUB_TOKEN", raising=False) monkeypatch.delenv("GH_TOKEN", raising=False) diff --git a/tests/hermes_cli/test_setup_model_provider.py b/tests/hermes_cli/test_setup_model_provider.py index 09116bc95..eb59360a0 100644 --- a/tests/hermes_cli/test_setup_model_provider.py +++ b/tests/hermes_cli/test_setup_model_provider.py @@ -113,6 +113,205 @@ def test_setup_keep_current_config_provider_uses_provider_specific_model_menu( assert reloaded["model"]["provider"] == "zai" +def test_setup_same_provider_rotation_strategy_saved_for_multi_credential_pool(tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + _clear_provider_env(monkeypatch) + save_env_value("OPENROUTER_API_KEY", "or-key") + + # Pre-write config so the pool step sees provider="openrouter" + _write_model_config("openrouter", "", "anthropic/claude-opus-4.6") + + config = load_config() + + class _Entry: + def __init__(self, label): + self.label = label + + class _Pool: + def entries(self): + return [_Entry("primary"), _Entry("secondary")] + + def fake_select(): + pass # no-op — config already has provider set + + def fake_prompt_choice(question, choices, default=0): + if "rotation strategy" in question: + return 1 # round robin + tts_idx = _maybe_keep_current_tts(question, choices) + if tts_idx is not None: + return tts_idx + return default + + def fake_prompt_yes_no(question, default=True): + return False + + # Patch directly on the module objects to ensure local imports pick them up. + import hermes_cli.main as _main_mod + import hermes_cli.setup as _setup_mod + import agent.credential_pool as _pool_mod + import agent.auxiliary_client as _aux_mod + + monkeypatch.setattr(_main_mod, "select_provider_and_model", fake_select) + # NOTE: _stub_tts overwrites prompt_choice, so set our mock AFTER it. + _stub_tts(monkeypatch) + monkeypatch.setattr(_setup_mod, "prompt_choice", fake_prompt_choice) + monkeypatch.setattr(_setup_mod, "prompt_yes_no", fake_prompt_yes_no) + monkeypatch.setattr(_setup_mod, "prompt", lambda *args, **kwargs: "") + monkeypatch.setattr(_pool_mod, "load_pool", lambda provider: _Pool()) + monkeypatch.setattr(_aux_mod, "get_available_vision_backends", lambda: []) + + setup_model_provider(config) + + # The pool has 2 entries, so the strategy prompt should fire + strategy = config.get("credential_pool_strategies", {}).get("openrouter") + assert strategy == "round_robin", f"Expected round_robin but got {strategy}" + + +def test_setup_same_provider_fallback_can_add_another_credential(tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + _clear_provider_env(monkeypatch) + save_env_value("OPENROUTER_API_KEY", "or-key") + + # Pre-write config so the pool step sees provider="openrouter" + _write_model_config("openrouter", "", "anthropic/claude-opus-4.6") + + config = load_config() + pool_sizes = iter([1, 2]) + add_calls = [] + + class _Entry: + def __init__(self, label): + self.label = label + + class _Pool: + def __init__(self, size): + self._size = size + + def entries(self): + return [_Entry(f"cred-{idx}") for idx in range(self._size)] + + def fake_load_pool(provider): + return _Pool(next(pool_sizes)) + + def fake_auth_add_command(args): + add_calls.append(args.provider) + + def fake_select(): + pass # no-op — config already has provider set + + def fake_prompt_choice(question, choices, default=0): + if question == "Select same-provider rotation strategy:": + return 0 + tts_idx = _maybe_keep_current_tts(question, choices) + if tts_idx is not None: + return tts_idx + return default + + yes_no_answers = iter([True, False]) + + def fake_prompt_yes_no(question, default=True): + if question == "Add another credential for same-provider fallback?": + return next(yes_no_answers) + return False + + monkeypatch.setattr("hermes_cli.main.select_provider_and_model", fake_select) + _stub_tts(monkeypatch) + monkeypatch.setattr("hermes_cli.setup.prompt_choice", fake_prompt_choice) + monkeypatch.setattr("hermes_cli.setup.prompt_yes_no", fake_prompt_yes_no) + monkeypatch.setattr("hermes_cli.setup.prompt", lambda *args, **kwargs: "") + monkeypatch.setattr("agent.credential_pool.load_pool", fake_load_pool) + monkeypatch.setattr("hermes_cli.auth_commands.auth_add_command", fake_auth_add_command) + monkeypatch.setattr("agent.auxiliary_client.get_available_vision_backends", lambda: []) + + setup_model_provider(config) + + assert add_calls == ["openrouter"] + assert config.get("credential_pool_strategies", {}).get("openrouter") == "fill_first" + + +def test_setup_pool_step_shows_manual_vs_auto_detected_counts(tmp_path, monkeypatch, capsys): + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + _clear_provider_env(monkeypatch) + save_env_value("OPENROUTER_API_KEY", "or-key") + + # Pre-write config so the pool step sees provider="openrouter" + _write_model_config("openrouter", "", "anthropic/claude-opus-4.6") + + config = load_config() + + class _Entry: + def __init__(self, label, source): + self.label = label + self.source = source + + class _Pool: + def entries(self): + return [ + _Entry("primary", "manual"), + _Entry("secondary", "manual"), + _Entry("OPENROUTER_API_KEY", "env:OPENROUTER_API_KEY"), + ] + + def fake_select(): + pass # no-op — config already has provider set + + def fake_prompt_choice(question, choices, default=0): + if "rotation strategy" in question: + return 0 + tts_idx = _maybe_keep_current_tts(question, choices) + if tts_idx is not None: + return tts_idx + return default + + monkeypatch.setattr("hermes_cli.main.select_provider_and_model", fake_select) + _stub_tts(monkeypatch) + monkeypatch.setattr("hermes_cli.setup.prompt_choice", fake_prompt_choice) + monkeypatch.setattr("hermes_cli.setup.prompt_yes_no", lambda *args, **kwargs: False) + monkeypatch.setattr("hermes_cli.setup.prompt", lambda *args, **kwargs: "") + monkeypatch.setattr("agent.credential_pool.load_pool", lambda provider: _Pool()) + monkeypatch.setattr("agent.auxiliary_client.get_available_vision_backends", lambda: []) + + setup_model_provider(config) + + out = capsys.readouterr().out + assert "Current pooled credentials for openrouter: 3 (2 manual, 1 auto-detected from env/shared auth)" in out + + +def test_setup_copilot_acp_skips_same_provider_pool_step(tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + _clear_provider_env(monkeypatch) + + config = load_config() + + def fake_prompt_choice(question, choices, default=0): + if question == "Select your inference provider:": + return 15 # GitHub Copilot ACP + if question == "Select default model:": + return 0 + if question == "Configure vision:": + return len(choices) - 1 + tts_idx = _maybe_keep_current_tts(question, choices) + if tts_idx is not None: + return tts_idx + raise AssertionError(f"Unexpected prompt_choice call: {question}") + + def fake_prompt_yes_no(question, default=True): + if question == "Add another credential for same-provider fallback?": + raise AssertionError("same-provider pool prompt should not appear for copilot-acp") + return False + + monkeypatch.setattr("hermes_cli.setup.prompt_choice", fake_prompt_choice) + monkeypatch.setattr("hermes_cli.setup.prompt_yes_no", fake_prompt_yes_no) + monkeypatch.setattr("hermes_cli.setup.prompt", lambda *args, **kwargs: "") + monkeypatch.setattr("hermes_cli.auth.get_active_provider", lambda: None) + monkeypatch.setattr("hermes_cli.auth.detect_external_credentials", lambda: []) + monkeypatch.setattr("agent.auxiliary_client.get_available_vision_backends", lambda: []) + + setup_model_provider(config) + + assert config.get("credential_pool_strategies", {}) == {} + + def test_setup_copilot_uses_gh_auth_and_saves_provider(tmp_path, monkeypatch): """Copilot provider saves correctly through delegation.""" monkeypatch.setenv("HERMES_HOME", str(tmp_path)) diff --git a/tests/test_auth_commands.py b/tests/test_auth_commands.py new file mode 100644 index 000000000..c55629404 --- /dev/null +++ b/tests/test_auth_commands.py @@ -0,0 +1,391 @@ +"""Tests for auth subcommands backed by the credential pool.""" + +from __future__ import annotations + +import base64 +import json + +import pytest + + +def _write_auth_store(tmp_path, payload: dict) -> None: + hermes_home = tmp_path / "hermes" + hermes_home.mkdir(parents=True, exist_ok=True) + (hermes_home / "auth.json").write_text(json.dumps(payload, indent=2)) + + +def _jwt_with_email(email: str) -> str: + header = base64.urlsafe_b64encode(b'{"alg":"RS256","typ":"JWT"}').rstrip(b"=").decode() + payload = base64.urlsafe_b64encode( + json.dumps({"email": email}).encode() + ).rstrip(b"=").decode() + return f"{header}.{payload}.signature" + + +@pytest.fixture(autouse=True) +def _clear_provider_env(monkeypatch): + for key in ( + "OPENROUTER_API_KEY", + "OPENAI_API_KEY", + "ANTHROPIC_API_KEY", + "ANTHROPIC_TOKEN", + "CLAUDE_CODE_OAUTH_TOKEN", + ): + monkeypatch.delenv(key, raising=False) + + +def test_auth_add_api_key_persists_manual_entry(tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes")) + monkeypatch.delenv("OPENROUTER_API_KEY", raising=False) + monkeypatch.delenv("OPENAI_API_KEY", raising=False) + _write_auth_store(tmp_path, {"version": 1, "providers": {}}) + + from hermes_cli.auth_commands import auth_add_command + + class _Args: + provider = "openrouter" + auth_type = "api-key" + api_key = "sk-or-manual" + label = "personal" + + auth_add_command(_Args()) + + payload = json.loads((tmp_path / "hermes" / "auth.json").read_text()) + entries = payload["credential_pool"]["openrouter"] + entry = next(item for item in entries if item["source"] == "manual") + assert entry["label"] == "personal" + assert entry["auth_type"] == "api_key" + assert entry["source"] == "manual" + assert entry["access_token"] == "sk-or-manual" + + +def test_auth_add_anthropic_oauth_persists_pool_entry(tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes")) + monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False) + monkeypatch.delenv("ANTHROPIC_TOKEN", raising=False) + monkeypatch.delenv("CLAUDE_CODE_OAUTH_TOKEN", raising=False) + _write_auth_store(tmp_path, {"version": 1, "providers": {}}) + token = _jwt_with_email("claude@example.com") + monkeypatch.setattr( + "agent.anthropic_adapter.run_hermes_oauth_login_pure", + lambda: { + "access_token": token, + "refresh_token": "refresh-token", + "expires_at_ms": 1711234567000, + }, + ) + + from hermes_cli.auth_commands import auth_add_command + + class _Args: + provider = "anthropic" + auth_type = "oauth" + api_key = None + label = None + + auth_add_command(_Args()) + + payload = json.loads((tmp_path / "hermes" / "auth.json").read_text()) + entries = payload["credential_pool"]["anthropic"] + entry = next(item for item in entries if item["source"] == "manual:hermes_pkce") + assert entry["label"] == "claude@example.com" + assert entry["source"] == "manual:hermes_pkce" + assert entry["refresh_token"] == "refresh-token" + assert entry["expires_at_ms"] == 1711234567000 + + +def test_auth_add_nous_oauth_persists_pool_entry(tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes")) + _write_auth_store(tmp_path, {"version": 1, "providers": {}}) + token = _jwt_with_email("nous@example.com") + monkeypatch.setattr( + "hermes_cli.auth._nous_device_code_login", + lambda **kwargs: { + "portal_base_url": "https://portal.example.com", + "inference_base_url": "https://inference.example.com/v1", + "client_id": "hermes-cli", + "scope": "inference:mint_agent_key", + "token_type": "Bearer", + "access_token": token, + "refresh_token": "refresh-token", + "obtained_at": "2026-03-23T10:00:00+00:00", + "expires_at": "2026-03-23T11:00:00+00:00", + "expires_in": 3600, + "agent_key": "ak-test", + "agent_key_id": "ak-id", + "agent_key_expires_at": "2026-03-23T10:30:00+00:00", + "agent_key_expires_in": 1800, + "agent_key_reused": False, + "agent_key_obtained_at": "2026-03-23T10:00:10+00:00", + "tls": {"insecure": False, "ca_bundle": None}, + }, + ) + + from hermes_cli.auth_commands import auth_add_command + + class _Args: + provider = "nous" + auth_type = "oauth" + api_key = None + label = None + portal_url = None + inference_url = None + client_id = None + scope = None + no_browser = False + timeout = None + insecure = False + ca_bundle = None + + auth_add_command(_Args()) + + payload = json.loads((tmp_path / "hermes" / "auth.json").read_text()) + entries = payload["credential_pool"]["nous"] + entry = next(item for item in entries if item["source"] == "manual:device_code") + assert entry["label"] == "nous@example.com" + assert entry["source"] == "manual:device_code" + assert entry["agent_key"] == "ak-test" + assert entry["portal_base_url"] == "https://portal.example.com" + + +def test_auth_add_codex_oauth_persists_pool_entry(tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes")) + _write_auth_store(tmp_path, {"version": 1, "providers": {}}) + token = _jwt_with_email("codex@example.com") + monkeypatch.setattr( + "hermes_cli.auth._codex_device_code_login", + lambda: { + "tokens": { + "access_token": token, + "refresh_token": "refresh-token", + }, + "base_url": "https://chatgpt.com/backend-api/codex", + "last_refresh": "2026-03-23T10:00:00Z", + }, + ) + + from hermes_cli.auth_commands import auth_add_command + + class _Args: + provider = "openai-codex" + auth_type = "oauth" + api_key = None + label = None + + auth_add_command(_Args()) + + payload = json.loads((tmp_path / "hermes" / "auth.json").read_text()) + entries = payload["credential_pool"]["openai-codex"] + entry = next(item for item in entries if item["source"] == "manual:device_code") + assert entry["label"] == "codex@example.com" + assert entry["source"] == "manual:device_code" + assert entry["refresh_token"] == "refresh-token" + assert entry["base_url"] == "https://chatgpt.com/backend-api/codex" + + +def test_auth_remove_reindexes_priorities(tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes")) + # Prevent pool auto-seeding from host env vars and file-backed sources + monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False) + monkeypatch.delenv("ANTHROPIC_TOKEN", raising=False) + monkeypatch.delenv("CLAUDE_CODE_OAUTH_TOKEN", raising=False) + monkeypatch.setattr( + "agent.credential_pool._seed_from_singletons", + lambda provider, entries: (False, set()), + ) + _write_auth_store( + tmp_path, + { + "version": 1, + "credential_pool": { + "anthropic": [ + { + "id": "cred-1", + "label": "primary", + "auth_type": "api_key", + "priority": 0, + "source": "manual", + "access_token": "sk-ant-api-primary", + }, + { + "id": "cred-2", + "label": "secondary", + "auth_type": "api_key", + "priority": 1, + "source": "manual", + "access_token": "sk-ant-api-secondary", + }, + ] + }, + }, + ) + + from hermes_cli.auth_commands import auth_remove_command + + class _Args: + provider = "anthropic" + index = 1 + + auth_remove_command(_Args()) + + payload = json.loads((tmp_path / "hermes" / "auth.json").read_text()) + entries = payload["credential_pool"]["anthropic"] + assert len(entries) == 1 + assert entries[0]["label"] == "secondary" + assert entries[0]["priority"] == 0 + + +def test_auth_reset_clears_provider_statuses(tmp_path, monkeypatch, capsys): + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes")) + _write_auth_store( + tmp_path, + { + "version": 1, + "credential_pool": { + "anthropic": [ + { + "id": "cred-1", + "label": "primary", + "auth_type": "api_key", + "priority": 0, + "source": "manual", + "access_token": "sk-ant-api-primary", + "last_status": "exhausted", + "last_status_at": 1711230000.0, + "last_error_code": 402, + } + ] + }, + }, + ) + + from hermes_cli.auth_commands import auth_reset_command + + class _Args: + provider = "anthropic" + + auth_reset_command(_Args()) + + out = capsys.readouterr().out + assert "Reset status" in out + + payload = json.loads((tmp_path / "hermes" / "auth.json").read_text()) + entry = payload["credential_pool"]["anthropic"][0] + assert entry["last_status"] is None + assert entry["last_status_at"] is None + assert entry["last_error_code"] is None + + +def test_clear_provider_auth_removes_provider_pool_entries(tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes")) + _write_auth_store( + tmp_path, + { + "version": 1, + "active_provider": "anthropic", + "providers": { + "anthropic": {"access_token": "legacy-token"}, + }, + "credential_pool": { + "anthropic": [ + { + "id": "cred-1", + "label": "primary", + "auth_type": "oauth", + "priority": 0, + "source": "manual:hermes_pkce", + "access_token": "pool-token", + } + ], + "openrouter": [ + { + "id": "cred-2", + "label": "other-provider", + "auth_type": "api_key", + "priority": 0, + "source": "manual", + "access_token": "sk-or-test", + } + ], + }, + }, + ) + + from hermes_cli.auth import clear_provider_auth + + assert clear_provider_auth("anthropic") is True + + payload = json.loads((tmp_path / "hermes" / "auth.json").read_text()) + assert payload["active_provider"] is None + assert "anthropic" not in payload.get("providers", {}) + assert "anthropic" not in payload.get("credential_pool", {}) + assert "openrouter" in payload.get("credential_pool", {}) + + +def test_auth_list_does_not_call_mutating_select(monkeypatch, capsys): + from hermes_cli.auth_commands import auth_list_command + + class _Entry: + id = "cred-1" + label = "primary" + auth_type="***" + source = "manual" + last_status = None + last_error_code = None + last_status_at = None + + class _Pool: + def entries(self): + return [_Entry()] + + def peek(self): + return _Entry() + + def select(self): + raise AssertionError("auth_list_command should not call select()") + + monkeypatch.setattr( + "hermes_cli.auth_commands.load_pool", + lambda provider: _Pool() if provider == "openrouter" else type("_EmptyPool", (), {"entries": lambda self: []})(), + ) + + class _Args: + provider = "openrouter" + + auth_list_command(_Args()) + + out = capsys.readouterr().out + assert "openrouter (1 credentials):" in out + assert "primary" in out + + +def test_auth_list_shows_exhausted_cooldown(monkeypatch, capsys): + from hermes_cli.auth_commands import auth_list_command + + class _Entry: + id = "cred-1" + label = "primary" + auth_type = "api_key" + source = "manual" + last_status = "exhausted" + last_error_code = 429 + last_status_at = 1000.0 + + class _Pool: + def entries(self): + return [_Entry()] + + def peek(self): + return None + + monkeypatch.setattr("hermes_cli.auth_commands.load_pool", lambda provider: _Pool()) + monkeypatch.setattr("hermes_cli.auth_commands.time.time", lambda: 1030.0) + + class _Args: + provider = "openrouter" + + auth_list_command(_Args()) + + out = capsys.readouterr().out + assert "exhausted (429)" in out + assert "59m 30s left" in out diff --git a/tests/test_credential_pool.py b/tests/test_credential_pool.py new file mode 100644 index 000000000..14302ab13 --- /dev/null +++ b/tests/test_credential_pool.py @@ -0,0 +1,949 @@ +"""Tests for multi-credential runtime pooling and rotation.""" + +from __future__ import annotations + +import json +import time + +import pytest + + +def _write_auth_store(tmp_path, payload: dict) -> None: + hermes_home = tmp_path / "hermes" + hermes_home.mkdir(parents=True, exist_ok=True) + (hermes_home / "auth.json").write_text(json.dumps(payload, indent=2)) + + +def test_fill_first_selection_skips_recently_exhausted_entry(tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes")) + _write_auth_store( + tmp_path, + { + "version": 1, + "credential_pool": { + "anthropic": [ + { + "id": "cred-1", + "label": "primary", + "auth_type": "api_key", + "priority": 0, + "source": "manual", + "access_token": "***", + "last_status": "exhausted", + "last_status_at": time.time(), + "last_error_code": 402, + }, + { + "id": "cred-2", + "label": "secondary", + "auth_type": "api_key", + "priority": 1, + "source": "manual", + "access_token": "***", + "last_status": "ok", + "last_status_at": None, + "last_error_code": None, + }, + ] + }, + }, + ) + + from agent.credential_pool import load_pool + + pool = load_pool("anthropic") + entry = pool.select() + + assert entry is not None + assert entry.id == "cred-2" + assert pool.current().id == "cred-2" + + +def test_select_clears_expired_exhaustion(tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes")) + _write_auth_store( + tmp_path, + { + "version": 1, + "credential_pool": { + "anthropic": [ + { + "id": "cred-1", + "label": "old", + "auth_type": "api_key", + "priority": 0, + "source": "manual", + "access_token": "***", + "last_status": "exhausted", + "last_status_at": time.time() - 90000, + "last_error_code": 402, + } + ] + }, + }, + ) + + from agent.credential_pool import load_pool + + pool = load_pool("anthropic") + entry = pool.select() + + assert entry is not None + assert entry.last_status == "ok" + + +def test_round_robin_strategy_rotates_priorities(tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes")) + _write_auth_store( + tmp_path, + { + "version": 1, + "credential_pool": { + "openrouter": [ + { + "id": "cred-1", + "label": "primary", + "auth_type": "api_key", + "priority": 0, + "source": "manual", + "access_token": "***", + }, + { + "id": "cred-2", + "label": "secondary", + "auth_type": "api_key", + "priority": 1, + "source": "manual", + "access_token": "***", + }, + ] + }, + }, + ) + config_path = tmp_path / "hermes" / "config.yaml" + config_path.write_text("credential_pool_strategies:\n openrouter: round_robin\n") + + from agent.credential_pool import load_pool + + pool = load_pool("openrouter") + first = pool.select() + assert first is not None + assert first.id == "cred-1" + + reloaded = load_pool("openrouter") + second = reloaded.select() + assert second is not None + assert second.id == "cred-2" + + +def test_random_strategy_uses_random_choice(tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes")) + monkeypatch.delenv("OPENROUTER_API_KEY", raising=False) + _write_auth_store( + tmp_path, + { + "version": 1, + "credential_pool": { + "openrouter": [ + { + "id": "cred-1", + "label": "primary", + "auth_type": "api_key", + "priority": 0, + "source": "manual", + "access_token": "***", + }, + { + "id": "cred-2", + "label": "secondary", + "auth_type": "api_key", + "priority": 1, + "source": "manual", + "access_token": "***", + }, + ] + }, + }, + ) + config_path = tmp_path / "hermes" / "config.yaml" + config_path.write_text("credential_pool_strategies:\n openrouter: random\n") + + monkeypatch.setattr("agent.credential_pool.random.choice", lambda entries: entries[-1]) + + from agent.credential_pool import load_pool + + pool = load_pool("openrouter") + selected = pool.select() + assert selected is not None + assert selected.id == "cred-2" + + + +def test_exhausted_entry_resets_after_ttl(tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes")) + _write_auth_store( + tmp_path, + { + "version": 1, + "credential_pool": { + "openrouter": [ + { + "id": "cred-1", + "label": "primary", + "auth_type": "api_key", + "priority": 0, + "source": "manual", + "access_token": "sk-or-primary", + "base_url": "https://openrouter.ai/api/v1", + "last_status": "exhausted", + "last_status_at": time.time() - 90000, + "last_error_code": 429, + } + ] + }, + }, + ) + + from agent.credential_pool import load_pool + + pool = load_pool("openrouter") + entry = pool.select() + + assert entry is not None + assert entry.id == "cred-1" + assert entry.last_status == "ok" + + +def test_mark_exhausted_and_rotate_persists_status(tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes")) + _write_auth_store( + tmp_path, + { + "version": 1, + "credential_pool": { + "anthropic": [ + { + "id": "cred-1", + "label": "primary", + "auth_type": "api_key", + "priority": 0, + "source": "manual", + "access_token": "sk-ant-api-primary", + }, + { + "id": "cred-2", + "label": "secondary", + "auth_type": "api_key", + "priority": 1, + "source": "manual", + "access_token": "sk-ant-api-secondary", + }, + ] + }, + }, + ) + + from agent.credential_pool import load_pool + + pool = load_pool("anthropic") + assert pool.select().id == "cred-1" + + next_entry = pool.mark_exhausted_and_rotate(status_code=402) + + assert next_entry is not None + assert next_entry.id == "cred-2" + + auth_payload = json.loads((tmp_path / "hermes" / "auth.json").read_text()) + persisted = auth_payload["credential_pool"]["anthropic"][0] + assert persisted["last_status"] == "exhausted" + assert persisted["last_error_code"] == 402 + + +def test_try_refresh_current_updates_only_current_entry(tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes")) + _write_auth_store( + tmp_path, + { + "version": 1, + "credential_pool": { + "openai-codex": [ + { + "id": "cred-1", + "label": "primary", + "auth_type": "oauth", + "priority": 0, + "source": "device_code", + "access_token": "access-old", + "refresh_token": "refresh-old", + "base_url": "https://chatgpt.com/backend-api/codex", + }, + { + "id": "cred-2", + "label": "secondary", + "auth_type": "oauth", + "priority": 1, + "source": "device_code", + "access_token": "access-other", + "refresh_token": "refresh-other", + "base_url": "https://chatgpt.com/backend-api/codex", + }, + ] + }, + }, + ) + + from agent.credential_pool import load_pool + + monkeypatch.setattr( + "hermes_cli.auth.refresh_codex_oauth_pure", + lambda access_token, refresh_token, timeout_seconds=20.0: { + "access_token": "access-new", + "refresh_token": "refresh-new", + }, + ) + + pool = load_pool("openai-codex") + current = pool.select() + assert current.id == "cred-1" + + refreshed = pool.try_refresh_current() + + assert refreshed is not None + assert refreshed.access_token == "access-new" + + auth_payload = json.loads((tmp_path / "hermes" / "auth.json").read_text()) + primary, secondary = auth_payload["credential_pool"]["openai-codex"] + assert primary["access_token"] == "access-new" + assert primary["refresh_token"] == "refresh-new" + assert secondary["access_token"] == "access-other" + assert secondary["refresh_token"] == "refresh-other" + + +def test_load_pool_seeds_env_api_key(tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes")) + monkeypatch.setenv("OPENROUTER_API_KEY", "sk-or-seeded") + _write_auth_store(tmp_path, {"version": 1, "providers": {}}) + + from agent.credential_pool import load_pool + + pool = load_pool("openrouter") + entry = pool.select() + + assert entry is not None + assert entry.source == "env:OPENROUTER_API_KEY" + assert entry.access_token == "sk-or-seeded" + + +def test_load_pool_removes_stale_seeded_env_entry(tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes")) + monkeypatch.delenv("OPENROUTER_API_KEY", raising=False) + _write_auth_store( + tmp_path, + { + "version": 1, + "credential_pool": { + "openrouter": [ + { + "id": "seeded-env", + "label": "OPENROUTER_API_KEY", + "auth_type": "api_key", + "priority": 0, + "source": "env:OPENROUTER_API_KEY", + "access_token": "stale-token", + "base_url": "https://openrouter.ai/api/v1", + } + ] + }, + }, + ) + + from agent.credential_pool import load_pool + + pool = load_pool("openrouter") + + assert pool.entries() == [] + + auth_payload = json.loads((tmp_path / "hermes" / "auth.json").read_text()) + assert auth_payload["credential_pool"]["openrouter"] == [] + + +def test_load_pool_migrates_nous_provider_state(tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes")) + _write_auth_store( + tmp_path, + { + "version": 1, + "active_provider": "nous", + "providers": { + "nous": { + "portal_base_url": "https://portal.example.com", + "inference_base_url": "https://inference.example.com/v1", + "client_id": "hermes-cli", + "token_type": "Bearer", + "scope": "inference:mint_agent_key", + "access_token": "access-token", + "refresh_token": "refresh-token", + "expires_at": "2026-03-24T12:00:00+00:00", + "agent_key": "agent-key", + "agent_key_expires_at": "2026-03-24T13:30:00+00:00", + } + }, + }, + ) + + from agent.credential_pool import load_pool + + pool = load_pool("nous") + entry = pool.select() + + assert entry is not None + assert entry.source == "device_code" + assert entry.portal_base_url == "https://portal.example.com" + assert entry.agent_key == "agent-key" + + +def test_load_pool_removes_stale_file_backed_singleton_entry(tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes")) + monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False) + monkeypatch.delenv("ANTHROPIC_TOKEN", raising=False) + monkeypatch.delenv("CLAUDE_CODE_OAUTH_TOKEN", raising=False) + _write_auth_store( + tmp_path, + { + "version": 1, + "credential_pool": { + "anthropic": [ + { + "id": "seeded-file", + "label": "claude-code", + "auth_type": "oauth", + "priority": 0, + "source": "claude_code", + "access_token": "stale-access-token", + "refresh_token": "stale-refresh-token", + "expires_at_ms": int(time.time() * 1000) + 60_000, + } + ] + }, + }, + ) + + monkeypatch.setattr( + "agent.anthropic_adapter.read_hermes_oauth_credentials", + lambda: None, + ) + monkeypatch.setattr( + "agent.anthropic_adapter.read_claude_code_credentials", + lambda: None, + ) + + from agent.credential_pool import load_pool + + pool = load_pool("anthropic") + + assert pool.entries() == [] + + auth_payload = json.loads((tmp_path / "hermes" / "auth.json").read_text()) + assert auth_payload["credential_pool"]["anthropic"] == [] + + +def test_load_pool_migrates_nous_provider_state_preserves_tls(tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes")) + _write_auth_store( + tmp_path, + { + "version": 1, + "active_provider": "nous", + "providers": { + "nous": { + "portal_base_url": "https://portal.example.com", + "inference_base_url": "https://inference.example.com/v1", + "client_id": "hermes-cli", + "token_type": "Bearer", + "scope": "inference:mint_agent_key", + "access_token": "access-token", + "refresh_token": "refresh-token", + "expires_at": "2026-03-24T12:00:00+00:00", + "agent_key": "agent-key", + "agent_key_expires_at": "2026-03-24T13:30:00+00:00", + "tls": { + "insecure": True, + "ca_bundle": "/tmp/nous-ca.pem", + }, + } + }, + }, + ) + + from agent.credential_pool import load_pool + + pool = load_pool("nous") + entry = pool.select() + + assert entry is not None + assert entry.tls == { + "insecure": True, + "ca_bundle": "/tmp/nous-ca.pem", + } + + auth_payload = json.loads((tmp_path / "hermes" / "auth.json").read_text()) + assert auth_payload["credential_pool"]["nous"][0]["tls"] == { + "insecure": True, + "ca_bundle": "/tmp/nous-ca.pem", + } + + +def test_singleton_seed_does_not_clobber_manual_oauth_entry(tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes")) + monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False) + monkeypatch.delenv("ANTHROPIC_TOKEN", raising=False) + monkeypatch.delenv("CLAUDE_CODE_OAUTH_TOKEN", raising=False) + _write_auth_store( + tmp_path, + { + "version": 1, + "credential_pool": { + "anthropic": [ + { + "id": "manual-1", + "label": "manual-pkce", + "auth_type": "oauth", + "priority": 0, + "source": "manual:hermes_pkce", + "access_token": "manual-token", + "refresh_token": "manual-refresh", + "expires_at_ms": 1711234567000, + } + ] + }, + }, + ) + + monkeypatch.setattr( + "agent.anthropic_adapter.read_hermes_oauth_credentials", + lambda: { + "accessToken": "seeded-token", + "refreshToken": "seeded-refresh", + "expiresAt": 1711234999000, + }, + ) + monkeypatch.setattr( + "agent.anthropic_adapter.read_claude_code_credentials", + lambda: None, + ) + + from agent.credential_pool import load_pool + + pool = load_pool("anthropic") + entries = pool.entries() + + assert len(entries) == 2 + assert {entry.source for entry in entries} == {"manual:hermes_pkce", "hermes_pkce"} + + +def test_load_pool_prefers_anthropic_env_token_over_file_backed_oauth(tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes")) + monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False) + monkeypatch.setenv("ANTHROPIC_TOKEN", "env-override-token") + monkeypatch.delenv("CLAUDE_CODE_OAUTH_TOKEN", raising=False) + _write_auth_store(tmp_path, {"version": 1, "providers": {}}) + + monkeypatch.setattr( + "agent.anthropic_adapter.read_hermes_oauth_credentials", + lambda: { + "accessToken": "file-backed-token", + "refreshToken": "refresh-token", + "expiresAt": int(time.time() * 1000) + 3_600_000, + }, + ) + monkeypatch.setattr( + "agent.anthropic_adapter.read_claude_code_credentials", + lambda: None, + ) + + from agent.credential_pool import load_pool + + pool = load_pool("anthropic") + entry = pool.select() + + assert entry is not None + assert entry.source == "env:ANTHROPIC_TOKEN" + assert entry.access_token == "env-override-token" + + +def test_least_used_strategy_selects_lowest_count(tmp_path, monkeypatch): + """least_used strategy should select the credential with the lowest request_count.""" + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes")) + monkeypatch.setattr( + "agent.credential_pool.get_pool_strategy", + lambda _provider: "least_used", + ) + monkeypatch.setattr( + "agent.credential_pool._seed_from_singletons", + lambda provider, entries: (False, set()), + ) + monkeypatch.setattr( + "agent.credential_pool._seed_from_env", + lambda provider, entries: (False, set()), + ) + _write_auth_store( + tmp_path, + { + "version": 1, + "credential_pool": { + "openrouter": [ + { + "id": "key-a", + "label": "heavy", + "auth_type": "api_key", + "priority": 0, + "source": "manual", + "access_token": "sk-or-heavy", + "request_count": 100, + }, + { + "id": "key-b", + "label": "light", + "auth_type": "api_key", + "priority": 1, + "source": "manual", + "access_token": "sk-or-light", + "request_count": 10, + }, + { + "id": "key-c", + "label": "medium", + "auth_type": "api_key", + "priority": 2, + "source": "manual", + "access_token": "sk-or-medium", + "request_count": 50, + }, + ] + }, + }, + ) + + from agent.credential_pool import load_pool + + pool = load_pool("openrouter") + entry = pool.select() + assert entry is not None + assert entry.id == "key-b" + assert entry.access_token == "sk-or-light" + + +def test_mark_used_increments_request_count(tmp_path, monkeypatch): + """mark_used should increment the request_count of the current entry.""" + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes")) + monkeypatch.setattr( + "agent.credential_pool.get_pool_strategy", + lambda _provider: "fill_first", + ) + monkeypatch.setattr( + "agent.credential_pool._seed_from_singletons", + lambda provider, entries: (False, set()), + ) + monkeypatch.setattr( + "agent.credential_pool._seed_from_env", + lambda provider, entries: (False, set()), + ) + _write_auth_store( + tmp_path, + { + "version": 1, + "credential_pool": { + "openrouter": [ + { + "id": "key-a", + "label": "test", + "auth_type": "api_key", + "priority": 0, + "source": "manual", + "access_token": "sk-or-test", + "request_count": 5, + }, + ] + }, + }, + ) + + from agent.credential_pool import load_pool + + pool = load_pool("openrouter") + entry = pool.select() + assert entry is not None + assert entry.request_count == 5 + pool.mark_used() + updated = pool.current() + assert updated is not None + assert updated.request_count == 6 + + +def test_thread_safety_concurrent_select(tmp_path, monkeypatch): + """Concurrent select() calls should not corrupt pool state.""" + import threading as _threading + + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes")) + monkeypatch.setattr( + "agent.credential_pool.get_pool_strategy", + lambda _provider: "round_robin", + ) + monkeypatch.setattr( + "agent.credential_pool._seed_from_singletons", + lambda provider, entries: (False, set()), + ) + monkeypatch.setattr( + "agent.credential_pool._seed_from_env", + lambda provider, entries: (False, set()), + ) + _write_auth_store( + tmp_path, + { + "version": 1, + "credential_pool": { + "openrouter": [ + { + "id": f"key-{i}", + "label": f"key-{i}", + "auth_type": "api_key", + "priority": i, + "source": "manual", + "access_token": f"sk-or-{i}", + } + for i in range(5) + ] + }, + }, + ) + + from agent.credential_pool import load_pool + + pool = load_pool("openrouter") + results = [] + errors = [] + + def worker(): + try: + for _ in range(20): + entry = pool.select() + if entry: + results.append(entry.id) + pool.mark_used(entry.id) + except Exception as exc: + errors.append(exc) + + threads = [_threading.Thread(target=worker) for _ in range(4)] + for t in threads: + t.start() + for t in threads: + t.join() + + assert not errors, f"Thread errors: {errors}" + assert len(results) == 80 # 4 threads * 20 selects + + +def test_custom_endpoint_pool_keyed_by_name(tmp_path, monkeypatch): + """Verify load_pool('custom:together.ai') works and returns entries from auth.json.""" + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes")) + # Disable seeding so we only test stored entries + monkeypatch.setattr( + "agent.credential_pool._seed_custom_pool", + lambda pool_key, entries: (False, set()), + ) + _write_auth_store( + tmp_path, + { + "version": 1, + "credential_pool": { + "custom:together.ai": [ + { + "id": "cred-1", + "label": "together-key", + "auth_type": "api_key", + "priority": 0, + "source": "manual", + "access_token": "sk-together-xxx", + "base_url": "https://api.together.ai/v1", + }, + { + "id": "cred-2", + "label": "together-key-2", + "auth_type": "api_key", + "priority": 1, + "source": "manual", + "access_token": "sk-together-yyy", + "base_url": "https://api.together.ai/v1", + }, + ] + }, + }, + ) + + from agent.credential_pool import load_pool + + pool = load_pool("custom:together.ai") + assert pool.has_credentials() + entries = pool.entries() + assert len(entries) == 2 + assert entries[0].access_token == "sk-together-xxx" + assert entries[1].access_token == "sk-together-yyy" + + # Select should return the first entry (fill_first default) + entry = pool.select() + assert entry is not None + assert entry.id == "cred-1" + + +def test_custom_endpoint_pool_seeds_from_config(tmp_path, monkeypatch): + """Verify seeding from custom_providers api_key in config.yaml.""" + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes")) + _write_auth_store(tmp_path, {"version": 1}) + + # Write config.yaml with a custom_providers entry + config_path = tmp_path / "hermes" / "config.yaml" + import yaml + config_path.write_text(yaml.dump({ + "custom_providers": [ + { + "name": "Together.ai", + "base_url": "https://api.together.ai/v1", + "api_key": "sk-config-seeded", + } + ] + })) + + from agent.credential_pool import load_pool + + pool = load_pool("custom:together.ai") + assert pool.has_credentials() + entries = pool.entries() + assert len(entries) == 1 + assert entries[0].access_token == "sk-config-seeded" + assert entries[0].source == "config:Together.ai" + + +def test_custom_endpoint_pool_seeds_from_model_config(tmp_path, monkeypatch): + """Verify seeding from model.api_key when model.provider=='custom' and base_url matches.""" + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes")) + _write_auth_store(tmp_path, {"version": 1}) + + import yaml + config_path = tmp_path / "hermes" / "config.yaml" + config_path.write_text(yaml.dump({ + "custom_providers": [ + { + "name": "Together.ai", + "base_url": "https://api.together.ai/v1", + } + ], + "model": { + "provider": "custom", + "base_url": "https://api.together.ai/v1", + "api_key": "sk-model-key", + }, + })) + + from agent.credential_pool import load_pool + + pool = load_pool("custom:together.ai") + assert pool.has_credentials() + entries = pool.entries() + # Should have the model_config entry + model_entries = [e for e in entries if e.source == "model_config"] + assert len(model_entries) == 1 + assert model_entries[0].access_token == "sk-model-key" + + +def test_custom_pool_does_not_break_existing_providers(tmp_path, monkeypatch): + """Existing registry providers work exactly as before with custom pool support.""" + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes")) + monkeypatch.setenv("OPENROUTER_API_KEY", "sk-or-test") + _write_auth_store(tmp_path, {"version": 1, "providers": {}}) + + from agent.credential_pool import load_pool + + pool = load_pool("openrouter") + entry = pool.select() + assert entry is not None + assert entry.source == "env:OPENROUTER_API_KEY" + assert entry.access_token == "sk-or-test" + + +def test_get_custom_provider_pool_key(tmp_path, monkeypatch): + """get_custom_provider_pool_key maps base_url to custom: pool key.""" + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes")) + (tmp_path / "hermes").mkdir(parents=True, exist_ok=True) + import yaml + config_path = tmp_path / "hermes" / "config.yaml" + config_path.write_text(yaml.dump({ + "custom_providers": [ + { + "name": "Together.ai", + "base_url": "https://api.together.ai/v1", + "api_key": "sk-xxx", + }, + { + "name": "My Local Server", + "base_url": "http://localhost:8080/v1", + }, + ] + })) + + from agent.credential_pool import get_custom_provider_pool_key + + assert get_custom_provider_pool_key("https://api.together.ai/v1") == "custom:together.ai" + assert get_custom_provider_pool_key("https://api.together.ai/v1/") == "custom:together.ai" + assert get_custom_provider_pool_key("http://localhost:8080/v1") == "custom:my-local-server" + assert get_custom_provider_pool_key("https://unknown.example.com/v1") is None + assert get_custom_provider_pool_key("") is None + + +def test_list_custom_pool_providers(tmp_path, monkeypatch): + """list_custom_pool_providers returns custom: pool keys from auth.json.""" + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes")) + _write_auth_store( + tmp_path, + { + "version": 1, + "credential_pool": { + "anthropic": [ + { + "id": "a1", + "label": "test", + "auth_type": "api_key", + "priority": 0, + "source": "manual", + "access_token": "sk-ant-xxx", + } + ], + "custom:together.ai": [ + { + "id": "c1", + "label": "together", + "auth_type": "api_key", + "priority": 0, + "source": "manual", + "access_token": "sk-tog-xxx", + } + ], + "custom:fireworks": [ + { + "id": "c2", + "label": "fireworks", + "auth_type": "api_key", + "priority": 0, + "source": "manual", + "access_token": "sk-fw-xxx", + } + ], + "custom:empty": [], + }, + }, + ) + + from agent.credential_pool import list_custom_pool_providers + + result = list_custom_pool_providers() + assert result == ["custom:fireworks", "custom:together.ai"] + # "custom:empty" not included because it's empty diff --git a/tests/test_run_agent.py b/tests/test_run_agent.py index c42ee29f2..7ea3a63fe 100644 --- a/tests/test_run_agent.py +++ b/tests/test_run_agent.py @@ -1771,6 +1771,62 @@ class TestNousCredentialRefresh: assert isinstance(agent.client, _RebuiltClient) +class TestCredentialPoolRecovery: + def test_recover_with_pool_rotates_on_402(self, agent): + current = SimpleNamespace(label="primary") + next_entry = SimpleNamespace(label="secondary") + + class _Pool: + def current(self): + return current + + def mark_exhausted_and_rotate(self, *, status_code): + assert status_code == 402 + return next_entry + + agent._credential_pool = _Pool() + agent._swap_credential = MagicMock() + + recovered, retry_same = agent._recover_with_credential_pool( + status_code=402, + has_retried_429=False, + ) + + assert recovered is True + assert retry_same is False + agent._swap_credential.assert_called_once_with(next_entry) + + def test_recover_with_pool_retries_first_429_then_rotates(self, agent): + next_entry = SimpleNamespace(label="secondary") + + class _Pool: + def current(self): + return SimpleNamespace(label="primary") + + def mark_exhausted_and_rotate(self, *, status_code): + assert status_code == 429 + return next_entry + + agent._credential_pool = _Pool() + agent._swap_credential = MagicMock() + + recovered, retry_same = agent._recover_with_credential_pool( + status_code=429, + has_retried_429=False, + ) + assert recovered is False + assert retry_same is True + agent._swap_credential.assert_not_called() + + recovered, retry_same = agent._recover_with_credential_pool( + status_code=429, + has_retried_429=True, + ) + assert recovered is True + assert retry_same is False + agent._swap_credential.assert_called_once_with(next_entry) + + class TestMaxTokensParam: """Verify _max_tokens_param returns the correct key for each provider.""" diff --git a/tests/test_runtime_provider_resolution.py b/tests/test_runtime_provider_resolution.py index 6976d071a..1a65aa31b 100644 --- a/tests/test_runtime_provider_resolution.py +++ b/tests/test_runtime_provider_resolution.py @@ -1,6 +1,123 @@ from hermes_cli import runtime_provider as rp +def test_resolve_runtime_provider_uses_credential_pool(monkeypatch): + class _Entry: + access_token = "pool-token" + source = "manual" + base_url = "https://chatgpt.com/backend-api/codex" + + class _Pool: + def has_credentials(self): + return True + + def select(self): + return _Entry() + + monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openai-codex") + monkeypatch.setattr(rp, "load_pool", lambda provider: _Pool()) + + resolved = rp.resolve_runtime_provider(requested="openai-codex") + + assert resolved["provider"] == "openai-codex" + assert resolved["api_key"] == "pool-token" + assert resolved["credential_pool"] is not None + assert resolved["source"] == "manual" + + +def test_resolve_runtime_provider_anthropic_pool_respects_config_base_url(monkeypatch): + class _Entry: + access_token = "pool-token" + source = "manual" + base_url = "https://api.anthropic.com" + + class _Pool: + def has_credentials(self): + return True + + def select(self): + return _Entry() + + monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "anthropic") + monkeypatch.setattr( + rp, + "_get_model_config", + lambda: { + "provider": "anthropic", + "base_url": "https://proxy.example.com/anthropic", + }, + ) + monkeypatch.setattr(rp, "load_pool", lambda provider: _Pool()) + + resolved = rp.resolve_runtime_provider(requested="anthropic") + + assert resolved["provider"] == "anthropic" + assert resolved["api_mode"] == "anthropic_messages" + assert resolved["api_key"] == "pool-token" + assert resolved["base_url"] == "https://proxy.example.com/anthropic" + + +def test_resolve_runtime_provider_anthropic_explicit_override_skips_pool(monkeypatch): + def _unexpected_pool(provider): + raise AssertionError(f"load_pool should not be called for {provider}") + + def _unexpected_anthropic_token(): + raise AssertionError("resolve_anthropic_token should not be called") + + monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "anthropic") + monkeypatch.setattr( + rp, + "_get_model_config", + lambda: { + "provider": "anthropic", + "base_url": "https://config.example.com/anthropic", + }, + ) + monkeypatch.setattr(rp, "load_pool", _unexpected_pool) + monkeypatch.setattr( + "agent.anthropic_adapter.resolve_anthropic_token", + _unexpected_anthropic_token, + ) + + resolved = rp.resolve_runtime_provider( + requested="anthropic", + explicit_api_key="anthropic-explicit-token", + explicit_base_url="https://proxy.example.com/anthropic/", + ) + + assert resolved["provider"] == "anthropic" + assert resolved["api_mode"] == "anthropic_messages" + assert resolved["api_key"] == "anthropic-explicit-token" + assert resolved["base_url"] == "https://proxy.example.com/anthropic" + assert resolved["source"] == "explicit" + assert resolved.get("credential_pool") is None + + +def test_resolve_runtime_provider_falls_back_when_pool_empty(monkeypatch): + class _Pool: + def has_credentials(self): + return False + + monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openai-codex") + monkeypatch.setattr(rp, "load_pool", lambda provider: _Pool()) + monkeypatch.setattr( + rp, + "resolve_codex_runtime_credentials", + lambda: { + "provider": "openai-codex", + "base_url": "https://chatgpt.com/backend-api/codex", + "api_key": "codex-token", + "source": "hermes-auth-store", + "last_refresh": "2026-02-26T00:00:00Z", + }, + ) + + resolved = rp.resolve_runtime_provider(requested="openai-codex") + + assert resolved["api_key"] == "codex-token" + assert resolved.get("credential_pool") is None + + def test_resolve_runtime_provider_codex(monkeypatch): monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openai-codex") monkeypatch.setattr( @@ -40,6 +157,36 @@ def test_resolve_runtime_provider_ai_gateway(monkeypatch): assert resolved["requested_provider"] == "ai-gateway" +def test_resolve_runtime_provider_ai_gateway_explicit_override_skips_pool(monkeypatch): + def _unexpected_pool(provider): + raise AssertionError(f"load_pool should not be called for {provider}") + + def _unexpected_provider_resolution(provider): + raise AssertionError(f"resolve_api_key_provider_credentials should not be called for {provider}") + + monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "ai-gateway") + monkeypatch.setattr(rp, "_get_model_config", lambda: {}) + monkeypatch.setattr(rp, "load_pool", _unexpected_pool) + monkeypatch.setattr( + rp, + "resolve_api_key_provider_credentials", + _unexpected_provider_resolution, + ) + + resolved = rp.resolve_runtime_provider( + requested="ai-gateway", + explicit_api_key="ai-gateway-explicit-token", + explicit_base_url="https://proxy.example.com/v1/", + ) + + assert resolved["provider"] == "ai-gateway" + assert resolved["api_mode"] == "chat_completions" + assert resolved["api_key"] == "ai-gateway-explicit-token" + assert resolved["base_url"] == "https://proxy.example.com/v1" + assert resolved["source"] == "explicit" + assert resolved.get("credential_pool") is None + + def test_resolve_runtime_provider_openrouter_explicit(monkeypatch): monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter") monkeypatch.setattr(rp, "_get_model_config", lambda: {}) @@ -61,6 +208,69 @@ def test_resolve_runtime_provider_openrouter_explicit(monkeypatch): assert resolved["source"] == "explicit" +def test_resolve_runtime_provider_auto_uses_openrouter_pool(monkeypatch): + class _Entry: + access_token = "pool-key" + source = "manual" + base_url = "https://openrouter.ai/api/v1" + + class _Pool: + def has_credentials(self): + return True + + def select(self): + return _Entry() + + monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter") + monkeypatch.setattr(rp, "_get_model_config", lambda: {}) + monkeypatch.setattr(rp, "load_pool", lambda provider: _Pool()) + monkeypatch.delenv("OPENAI_BASE_URL", raising=False) + monkeypatch.delenv("OPENROUTER_BASE_URL", raising=False) + monkeypatch.delenv("OPENAI_API_KEY", raising=False) + monkeypatch.delenv("OPENROUTER_API_KEY", raising=False) + + resolved = rp.resolve_runtime_provider(requested="auto") + + assert resolved["provider"] == "openrouter" + assert resolved["api_key"] == "pool-key" + assert resolved["base_url"] == "https://openrouter.ai/api/v1" + assert resolved["source"] == "manual" + assert resolved.get("credential_pool") is not None + + +def test_resolve_runtime_provider_openrouter_explicit_api_key_skips_pool(monkeypatch): + class _Entry: + access_token = "pool-key" + source = "manual" + base_url = "https://openrouter.ai/api/v1" + + class _Pool: + def has_credentials(self): + return True + + def select(self): + return _Entry() + + monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter") + monkeypatch.setattr(rp, "_get_model_config", lambda: {}) + monkeypatch.setattr(rp, "load_pool", lambda provider: _Pool()) + monkeypatch.delenv("OPENAI_BASE_URL", raising=False) + monkeypatch.delenv("OPENROUTER_BASE_URL", raising=False) + monkeypatch.delenv("OPENAI_API_KEY", raising=False) + monkeypatch.delenv("OPENROUTER_API_KEY", raising=False) + + resolved = rp.resolve_runtime_provider( + requested="openrouter", + explicit_api_key="explicit-key", + ) + + assert resolved["provider"] == "openrouter" + assert resolved["api_key"] == "explicit-key" + assert resolved["base_url"] == rp.OPENROUTER_BASE_URL + assert resolved["source"] == "explicit" + assert resolved.get("credential_pool") is None + + def test_resolve_runtime_provider_openrouter_ignores_codex_config_base_url(monkeypatch): monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter") monkeypatch.setattr( @@ -136,16 +346,19 @@ def test_openai_key_used_when_no_openrouter_key(monkeypatch): def test_custom_endpoint_prefers_openai_key(monkeypatch): - """Custom endpoint should use OPENAI_API_KEY, not OPENROUTER_API_KEY. + """Custom endpoint should use config api_key over OPENROUTER_API_KEY. - Regression test for #560: when base_url is a non-OpenRouter endpoint, - OPENROUTER_API_KEY was being sent as the auth header instead of OPENAI_API_KEY. + Updated for #4165: config.yaml is now the source of truth for endpoint URLs, + OPENAI_BASE_URL env var is no longer consulted. """ monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter") - monkeypatch.setattr(rp, "_get_model_config", lambda: {}) - monkeypatch.setenv("OPENAI_BASE_URL", "https://api.z.ai/api/coding/paas/v4") + monkeypatch.setattr(rp, "_get_model_config", lambda: { + "provider": "custom", + "base_url": "https://api.z.ai/api/coding/paas/v4", + "api_key": "zai-key", + }) + monkeypatch.delenv("OPENAI_BASE_URL", raising=False) monkeypatch.delenv("OPENROUTER_BASE_URL", raising=False) - monkeypatch.setenv("OPENAI_API_KEY", "zai-key") monkeypatch.setenv("OPENROUTER_API_KEY", "openrouter-key") resolved = rp.resolve_runtime_provider(requested="custom") @@ -221,19 +434,22 @@ def test_custom_endpoint_uses_config_api_field_when_no_api_key(monkeypatch): assert resolved["api_key"] == "config-api-field" -def test_custom_endpoint_auto_provider_prefers_openai_key(monkeypatch): - """Auto provider with non-OpenRouter base_url should prefer OPENAI_API_KEY. +def test_custom_endpoint_explicit_custom_prefers_config_key(monkeypatch): + """Explicit 'custom' provider with config base_url+api_key should use them. - Same as #560 but via 'hermes model' flow which sets provider to 'auto'. + Updated for #4165: config.yaml is the source of truth, not OPENAI_BASE_URL. """ monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter") - monkeypatch.setattr(rp, "_get_model_config", lambda: {}) - monkeypatch.setenv("OPENAI_BASE_URL", "https://my-vllm-server.example.com/v1") + monkeypatch.setattr(rp, "_get_model_config", lambda: { + "provider": "custom", + "base_url": "https://my-vllm-server.example.com/v1", + "api_key": "sk-vllm-key", + }) + monkeypatch.delenv("OPENAI_BASE_URL", raising=False) monkeypatch.delenv("OPENROUTER_BASE_URL", raising=False) - monkeypatch.setenv("OPENAI_API_KEY", "sk-vllm-key") monkeypatch.setenv("OPENROUTER_API_KEY", "sk-or-...leak") - resolved = rp.resolve_runtime_provider(requested="auto") + resolved = rp.resolve_runtime_provider(requested="custom") assert resolved["base_url"] == "https://my-vllm-server.example.com/v1" assert resolved["api_key"] == "sk-vllm-key" @@ -359,6 +575,36 @@ def test_explicit_openrouter_skips_openai_base_url(monkeypatch): assert resolved["api_key"] == "or-test-key" +def test_explicit_openrouter_honors_openrouter_base_url_over_pool(monkeypatch): + class _Entry: + access_token = "pool-key" + source = "manual" + base_url = "https://openrouter.ai/api/v1" + + class _Pool: + def has_credentials(self): + return True + + def select(self): + return _Entry() + + monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter") + monkeypatch.setattr(rp, "_get_model_config", lambda: {}) + monkeypatch.setattr(rp, "load_pool", lambda provider: _Pool()) + monkeypatch.setenv("OPENROUTER_BASE_URL", "https://mirror.example.com/v1") + monkeypatch.setenv("OPENROUTER_API_KEY", "mirror-key") + monkeypatch.delenv("OPENAI_BASE_URL", raising=False) + monkeypatch.delenv("OPENAI_API_KEY", raising=False) + + resolved = rp.resolve_runtime_provider(requested="openrouter") + + assert resolved["provider"] == "openrouter" + assert resolved["base_url"] == "https://mirror.example.com/v1" + assert resolved["api_key"] == "mirror-key" + assert resolved["source"] == "env/config" + assert resolved.get("credential_pool") is None + + def test_resolve_requested_provider_precedence(monkeypatch): monkeypatch.setenv("HERMES_INFERENCE_PROVIDER", "nous") monkeypatch.setattr(rp, "_get_model_config", lambda: {"provider": "openai-codex"}) diff --git a/tests/tools/test_delegate.py b/tests/tools/test_delegate.py index 1a779f8a0..d86a8c488 100644 --- a/tests/tools/test_delegate.py +++ b/tests/tools/test_delegate.py @@ -593,7 +593,14 @@ class TestDelegationCredentialResolution(unittest.TestCase): "model": "qwen2.5-coder", "base_url": "http://localhost:1234/v1", } - with patch.dict(os.environ, {"OPENROUTER_API_KEY": "env-openrouter-key"}, clear=False): + with patch.dict( + os.environ, + { + "OPENROUTER_API_KEY": "env-openrouter-key", + "OPENAI_API_KEY": "", + }, + clear=False, + ): with self.assertRaises(ValueError) as ctx: _resolve_delegation_credentials(cfg, parent) self.assertIn("OPENAI_API_KEY", str(ctx.exception)) diff --git a/tests/tools/test_transcription.py b/tests/tools/test_transcription.py index 0ce3f2468..5f42272a5 100644 --- a/tests/tools/test_transcription.py +++ b/tests/tools/test_transcription.py @@ -18,6 +18,11 @@ import pytest # --------------------------------------------------------------------------- +@pytest.fixture(autouse=True) +def _clear_openai_env(monkeypatch): + monkeypatch.delenv("OPENAI_API_KEY", raising=False) + + class TestGetProvider: """_get_provider() picks the right backend based on config + availability.""" diff --git a/website/docs/reference/cli-commands.md b/website/docs/reference/cli-commands.md index cd0cff39c..d10c29e03 100644 --- a/website/docs/reference/cli-commands.md +++ b/website/docs/reference/cli-commands.md @@ -38,6 +38,7 @@ hermes [global-options] [subcommand/options] | `hermes setup` | Interactive setup wizard for all or part of the configuration. | | `hermes whatsapp` | Configure and pair the WhatsApp bridge. | | `hermes login` / `logout` | Authenticate with OAuth-backed providers. | +| `hermes auth` | Manage credential pools — add, list, remove, reset, set strategy. | | `hermes status` | Show agent, auth, and platform status. | | `hermes cron` | Inspect and tick the cron scheduler. | | `hermes webhook` | Manage dynamic webhook subscriptions for event-driven activation. | @@ -192,6 +193,22 @@ Useful options for `login`: - `--ca-bundle ` - `--insecure` +## `hermes auth` + +Manage credential pools for same-provider key rotation. See [Credential Pools](/docs/user-guide/features/credential-pools) for full documentation. + +```bash +hermes auth # Interactive wizard +hermes auth list # Show all pools +hermes auth list openrouter # Show specific provider +hermes auth add openrouter --api-key sk-or-v1-xxx # Add API key +hermes auth add anthropic --type oauth # Add OAuth credential +hermes auth remove openrouter 2 # Remove by index +hermes auth reset openrouter # Clear cooldowns +``` + +Subcommands: `add`, `list`, `remove`, `reset`. When called with no subcommand, launches the interactive management wizard. + ## `hermes status` ```bash diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md index 4900fc05b..107e82395 100644 --- a/website/docs/user-guide/configuration.md +++ b/website/docs/user-guide/configuration.md @@ -478,6 +478,18 @@ If auto-compression is disabled, the warning tells you context may be truncated Context pressure is automatic — no configuration needed. It fires purely as a user-facing notification and does not modify the message stream or inject anything into the model's context. +## Credential Pool Strategies + +When you have multiple API keys or OAuth tokens for the same provider, configure the rotation strategy: + +```yaml +credential_pool_strategies: + openrouter: round_robin # cycle through keys evenly + anthropic: least_used # always pick the least-used key +``` + +Options: `fill_first` (default), `round_robin`, `least_used`, `random`. See [Credential Pools](/docs/user-guide/features/credential-pools) for full documentation. + ## Auxiliary Models Hermes uses lightweight "auxiliary" models for side tasks like image analysis, web page summarization, and browser screenshot analysis. By default, these use **Gemini Flash** via auto-detection — you don't need to configure anything. diff --git a/website/docs/user-guide/features/credential-pools.md b/website/docs/user-guide/features/credential-pools.md new file mode 100644 index 000000000..275e08a04 --- /dev/null +++ b/website/docs/user-guide/features/credential-pools.md @@ -0,0 +1,230 @@ +--- +title: Credential Pools +description: Pool multiple API keys or OAuth tokens per provider for automatic rotation and rate limit recovery. +sidebar_label: Credential Pools +sidebar_position: 9 +--- + +# Credential Pools + +Credential pools let you register multiple API keys or OAuth tokens for the same provider. When one key hits a rate limit or billing quota, Hermes automatically rotates to the next healthy key — keeping your session alive without switching providers. + +This is different from [fallback providers](./fallback-providers.md), which switch to a *different* provider entirely. Credential pools are same-provider rotation; fallback providers are cross-provider failover. Pools are tried first — if all pool keys are exhausted, *then* the fallback provider activates. + +## How It Works + +``` +Your request + → Pick key from pool (round_robin / least_used / fill_first / random) + → Send to provider + → 429 rate limit? + → Retry same key once (transient blip) + → Second 429 → rotate to next pool key + → All keys exhausted → fallback_model (different provider) + → 402 billing error? + → Immediately rotate to next pool key (24h cooldown) + → 401 auth expired? + → Try refreshing the token (OAuth) + → Refresh failed → rotate to next pool key + → Success → continue normally +``` + +## Quick Start + +If you already have an API key set in `.env`, Hermes auto-discovers it as a 1-key pool. To benefit from pooling, add more keys: + +```bash +# Add a second OpenRouter key +hermes auth add openrouter --api-key sk-or-v1-your-second-key + +# Add a second Anthropic key +hermes auth add anthropic --type api-key --api-key sk-ant-api03-your-second-key + +# Add an Anthropic OAuth credential (Claude Code subscription) +hermes auth add anthropic --type oauth +# Opens browser for OAuth login +``` + +Check your pools: + +```bash +hermes auth list +``` + +Output: +``` +openrouter (2 credentials): + #1 OPENROUTER_API_KEY api_key env:OPENROUTER_API_KEY ← + #2 backup-key api_key manual + +anthropic (3 credentials): + #1 hermes_pkce oauth hermes_pkce ← + #2 claude_code oauth claude_code + #3 ANTHROPIC_API_KEY api_key env:ANTHROPIC_API_KEY +``` + +The `←` marks the currently selected credential. + +## Interactive Management + +Run `hermes auth` with no subcommand for an interactive wizard: + +```bash +hermes auth +``` + +This shows your full pool status and offers a menu: + +``` +What would you like to do? + 1. Add a credential + 2. Remove a credential + 3. Reset cooldowns for a provider + 4. Set rotation strategy for a provider + 5. Exit +``` + +For providers that support both API keys and OAuth (Anthropic, Nous, Codex), the add flow asks which type: + +``` +anthropic supports both API keys and OAuth login. + 1. API key (paste a key from the provider dashboard) + 2. OAuth login (authenticate via browser) +Type [1/2]: +``` + +## CLI Commands + +| Command | Description | +|---------|-------------| +| `hermes auth` | Interactive pool management wizard | +| `hermes auth list` | Show all pools and credentials | +| `hermes auth list ` | Show a specific provider's pool | +| `hermes auth add ` | Add a credential (prompts for type and key) | +| `hermes auth add --type api-key --api-key ` | Add an API key non-interactively | +| `hermes auth add --type oauth` | Add an OAuth credential via browser login | +| `hermes auth remove ` | Remove credential by 1-based index | +| `hermes auth reset ` | Clear all cooldowns/exhaustion status | + +## Rotation Strategies + +Configure via `hermes auth` → "Set rotation strategy" or in `config.yaml`: + +```yaml +credential_pool_strategies: + openrouter: round_robin + anthropic: least_used +``` + +| Strategy | Behavior | +|----------|----------| +| `fill_first` (default) | Use the first healthy key until it's exhausted, then move to the next | +| `round_robin` | Cycle through keys evenly, rotating after each selection | +| `least_used` | Always pick the key with the lowest request count | +| `random` | Random selection among healthy keys | + +## Error Recovery + +The pool handles different errors differently: + +| Error | Behavior | Cooldown | +|-------|----------|----------| +| **429 Rate Limit** | Retry same key once (transient). Second consecutive 429 rotates to next key | 1 hour | +| **402 Billing/Quota** | Immediately rotate to next key | 24 hours | +| **401 Auth Expired** | Try refreshing the OAuth token first. Rotate only if refresh fails | — | +| **All keys exhausted** | Fall through to `fallback_model` if configured | — | + +The `has_retried_429` flag resets on every successful API call, so a single transient 429 doesn't trigger rotation. + +## Custom Endpoint Pools + +Custom OpenAI-compatible endpoints (Together.ai, RunPod, local servers) get their own pools, keyed by the endpoint name from `custom_providers` in config.yaml. + +When you set up a custom endpoint via `hermes model`, it auto-generates a name like "Together.ai" or "Local (localhost:8080)". This name becomes the pool key. + +```bash +# After setting up a custom endpoint via hermes model: +hermes auth list +# Shows: +# Together.ai (1 credential): +# #1 config key api_key config:Together.ai ← + +# Add a second key for the same endpoint: +hermes auth add Together.ai --api-key sk-together-second-key +``` + +Custom endpoint pools are stored in `auth.json` under `credential_pool` with a `custom:` prefix: + +```json +{ + "credential_pool": { + "openrouter": [...], + "custom:together.ai": [...] + } +} +``` + +## Auto-Discovery + +Hermes automatically discovers credentials from multiple sources and seeds the pool on startup: + +| Source | Example | Auto-seeded? | +|--------|---------|-------------| +| Environment variables | `OPENROUTER_API_KEY`, `ANTHROPIC_API_KEY` | Yes | +| OAuth tokens (auth.json) | Codex device code, Nous device code | Yes | +| Claude Code credentials | `~/.claude/.credentials.json` | Yes (Anthropic) | +| Hermes PKCE OAuth | `~/.hermes/auth.json` | Yes (Anthropic) | +| Custom endpoint config | `model.api_key` in config.yaml | Yes (custom endpoints) | +| Manual entries | Added via `hermes auth add` | Persisted in auth.json | + +Auto-seeded entries are updated on each pool load — if you remove an env var, its pool entry is automatically pruned. Manual entries (added via `hermes auth add`) are never auto-pruned. + +## Thread Safety + +The credential pool uses a threading lock for all state mutations (`select()`, `mark_exhausted_and_rotate()`, `try_refresh_current()`, `mark_used()`). This ensures safe concurrent access when the gateway handles multiple chat sessions simultaneously. + +## Architecture + +For the full data flow diagram, see [`docs/credential-pool-flow.excalidraw`](https://excalidraw.com/#json=2Ycqhqpi6f12E_3ITyiwh,c7u9jSt5BwrmiVzHGbm87g) in the repository. + +The credential pool integrates at the provider resolution layer: + +1. **`agent/credential_pool.py`** — Pool manager: storage, selection, rotation, cooldowns +2. **`hermes_cli/auth_commands.py`** — CLI commands and interactive wizard +3. **`hermes_cli/runtime_provider.py`** — Pool-aware credential resolution +4. **`run_agent.py`** — Error recovery: 429/402/401 → pool rotation → fallback + +## Storage + +Pool state is stored in `~/.hermes/auth.json` under the `credential_pool` key: + +```json +{ + "version": 1, + "credential_pool": { + "openrouter": [ + { + "id": "abc123", + "label": "OPENROUTER_API_KEY", + "auth_type": "api_key", + "priority": 0, + "source": "env:OPENROUTER_API_KEY", + "access_token": "sk-or-v1-...", + "last_status": "ok", + "request_count": 142 + } + ] + }, + "credential_pool_strategies": { + "openrouter": "round_robin" + } +} +``` + +Strategies are stored in `config.yaml` (not `auth.json`): + +```yaml +credential_pool_strategies: + openrouter: round_robin + anthropic: least_used +``` diff --git a/website/docs/user-guide/features/fallback-providers.md b/website/docs/user-guide/features/fallback-providers.md index e46f69e35..315866378 100644 --- a/website/docs/user-guide/features/fallback-providers.md +++ b/website/docs/user-guide/features/fallback-providers.md @@ -7,12 +7,13 @@ sidebar_position: 8 # Fallback Providers -Hermes Agent has two separate fallback systems that keep your sessions running when providers hit issues: +Hermes Agent has three layers of resilience that keep your sessions running when providers hit issues: -1. **Primary model fallback** — automatically switches to a backup provider:model when your main model fails -2. **Auxiliary task fallback** — independent provider resolution for side tasks like vision, compression, and web extraction +1. **[Credential pools](./credential-pools.md)** — rotate across multiple API keys for the *same* provider (tried first) +2. **Primary model fallback** — automatically switches to a *different* provider:model when your main model fails +3. **Auxiliary task fallback** — independent provider resolution for side tasks like vision, compression, and web extraction -Both are optional and work independently. +Credential pools handle same-provider rotation (e.g., multiple OpenRouter keys). This page covers cross-provider fallback. Both are optional and work independently. ## Primary Model Fallback -- 2.43.0 From 79b2694b9a02806592ea5cf6aeaa272a2e9d4028 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Tue, 31 Mar 2026 03:16:40 -0700 Subject: [PATCH 075/385] fix: _allow_private_urls name collision + stale OPENAI_BASE_URL test (#4217) 1. browser_tool.py: _allow_private_urls() used 'global _allow_private_urls' then assigned a bool to it, replacing the function in the module namespace. After first call, subsequent calls hit TypeError: 'bool' object is not callable. Renamed cache variable to _cached_allow_private_urls. 2. test_provider_parity.py: test_custom_endpoint_when_no_nous relied on OPENAI_BASE_URL env var (removed in config refactor). Mock _resolve_custom_runtime directly instead. --- tests/test_provider_parity.py | 9 ++++++++- tools/browser_tool.py | 12 ++++++------ 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/tests/test_provider_parity.py b/tests/test_provider_parity.py index b34c9cd70..deb657340 100644 --- a/tests/test_provider_parity.py +++ b/tests/test_provider_parity.py @@ -559,11 +559,18 @@ class TestAuxiliaryClientProviderPriority: assert model == "google/gemini-3-flash-preview" def test_custom_endpoint_when_no_nous(self, monkeypatch): + """Custom endpoint is used when no OpenRouter/Nous keys are available. + + Since the March 2026 config refactor, OPENAI_BASE_URL env var is no + longer consulted — base_url comes from config.yaml via + resolve_runtime_provider. Mock _resolve_custom_runtime directly. + """ monkeypatch.delenv("OPENROUTER_API_KEY", raising=False) - monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:1234/v1") monkeypatch.setenv("OPENAI_API_KEY", "local-key") from agent.auxiliary_client import get_text_auxiliary_client with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \ + patch("agent.auxiliary_client._resolve_custom_runtime", + return_value=("http://localhost:1234/v1", "local-key")), \ patch("agent.auxiliary_client.OpenAI") as mock: client, model = get_text_auxiliary_client() assert mock.call_args.kwargs["base_url"] == "http://localhost:1234/v1" diff --git a/tools/browser_tool.py b/tools/browser_tool.py index 03aa6106b..1861152e3 100644 --- a/tools/browser_tool.py +++ b/tools/browser_tool.py @@ -238,7 +238,7 @@ _PROVIDER_REGISTRY: Dict[str, type] = { _cached_cloud_provider: Optional[CloudBrowserProvider] = None _cloud_provider_resolved = False _allow_private_urls_resolved = False -_allow_private_urls: Optional[bool] = None +_cached_allow_private_urls: Optional[bool] = None def _get_cloud_provider() -> Optional[CloudBrowserProvider]: @@ -273,12 +273,12 @@ def _allow_private_urls() -> bool: Reads ``config["browser"]["allow_private_urls"]`` once and caches the result for the process lifetime. Defaults to ``False`` (SSRF protection active). """ - global _allow_private_urls, _allow_private_urls_resolved + global _cached_allow_private_urls, _allow_private_urls_resolved if _allow_private_urls_resolved: - return _allow_private_urls + return _cached_allow_private_urls _allow_private_urls_resolved = True - _allow_private_urls = False # safe default + _cached_allow_private_urls = False # safe default try: hermes_home = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes")) config_path = hermes_home / "config.yaml" @@ -286,10 +286,10 @@ def _allow_private_urls() -> bool: import yaml with open(config_path) as f: cfg = yaml.safe_load(f) or {} - _allow_private_urls = bool(cfg.get("browser", {}).get("allow_private_urls")) + _cached_allow_private_urls = bool(cfg.get("browser", {}).get("allow_private_urls")) except Exception as e: logger.debug("Could not read allow_private_urls from config: %s", e) - return _allow_private_urls + return _cached_allow_private_urls def _socket_safe_tmpdir() -> str: -- 2.43.0 From 344239c2dbfe6c03c9020a4faa9552c8769be20a Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Tue, 31 Mar 2026 03:29:00 -0700 Subject: [PATCH 076/385] feat: auto-detect models from server probe in custom endpoint setup (#4218) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Custom endpoint setup (_model_flow_custom) now probes the server first and presents detected models instead of asking users to type blind: - Single model: auto-confirms with Y/n prompt - Multiple models: numbered list picker, or type a name - No models / probe failed: falls back to manual input Context length prompt also moved after model selection so the user sees the verified endpoint before being asked for details. All recent fixes preserved: config dict sync (#4172), api_key persistence (#4182), no save_env_value for URLs (#4165). Inspired by PR #4194 by sudoingX — re-implemented against current main. Co-authored-by: Xpress AI (Dip KD) <200180104+sudoingX@users.noreply.github.com> --- hermes_cli/main.py | 50 ++++++++++++++++++++------- tests/test_cli_provider_resolution.py | 5 ++- 2 files changed, 42 insertions(+), 13 deletions(-) diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 3c7142b5e..9b4b3ccac 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -1242,22 +1242,10 @@ def _model_flow_custom(config): try: base_url = input(f"API base URL [{current_url or 'e.g. https://api.example.com/v1'}]: ").strip() api_key = input(f"API key [{current_key[:8] + '...' if current_key else 'optional'}]: ").strip() - model_name = input("Model name (e.g. gpt-4, llama-3-70b): ").strip() - context_length_str = input("Context length in tokens [leave blank for auto-detect]: ").strip() except (KeyboardInterrupt, EOFError): print("\nCancelled.") return - context_length = None - if context_length_str: - try: - context_length = int(context_length_str.replace(",", "").replace("k", "000").replace("K", "000")) - if context_length <= 0: - context_length = None - except ValueError: - print(f"Invalid context length: {context_length_str} — will auto-detect.") - context_length = None - if not base_url and not current_url: print("No URL provided. Cancelled.") return @@ -1294,6 +1282,44 @@ def _model_flow_custom(config): if probe.get("suggested_base_url"): print(f" If this server expects /v1, try base URL: {probe['suggested_base_url']}") + # Select model — use probe results when available, fall back to manual input + model_name = "" + detected_models = probe.get("models") or [] + try: + if len(detected_models) == 1: + print(f" Detected model: {detected_models[0]}") + confirm = input(" Use this model? [Y/n]: ").strip().lower() + if confirm in ("", "y", "yes"): + model_name = detected_models[0] + else: + model_name = input("Model name (e.g. gpt-4, llama-3-70b): ").strip() + elif len(detected_models) > 1: + print(" Available models:") + for i, m in enumerate(detected_models, 1): + print(f" {i}. {m}") + pick = input(f" Select model [1-{len(detected_models)}] or type name: ").strip() + if pick.isdigit() and 1 <= int(pick) <= len(detected_models): + model_name = detected_models[int(pick) - 1] + elif pick: + model_name = pick + else: + model_name = input("Model name (e.g. gpt-4, llama-3-70b): ").strip() + + context_length_str = input("Context length in tokens [leave blank for auto-detect]: ").strip() + except (KeyboardInterrupt, EOFError): + print("\nCancelled.") + return + + context_length = None + if context_length_str: + try: + context_length = int(context_length_str.replace(",", "").replace("k", "000").replace("K", "000")) + if context_length <= 0: + context_length = None + except ValueError: + print(f"Invalid context length: {context_length_str} — will auto-detect.") + context_length = None + if model_name: _save_model_choice(model_name) diff --git a/tests/test_cli_provider_resolution.py b/tests/test_cli_provider_resolution.py index 943a45a55..3c9b31f5f 100644 --- a/tests/test_cli_provider_resolution.py +++ b/tests/test_cli_provider_resolution.py @@ -460,13 +460,16 @@ def test_model_flow_custom_saves_verified_v1_base_url(monkeypatch, capsys): ) monkeypatch.setattr("hermes_cli.config.save_config", lambda cfg: None) - answers = iter(["http://localhost:8000", "local-key", "llm", ""]) + # After the probe detects a single model ("llm"), the flow asks + # "Use this model? [Y/n]:" — confirm with Enter, then context length. + answers = iter(["http://localhost:8000", "local-key", "", ""]) monkeypatch.setattr("builtins.input", lambda _prompt="": next(answers)) hermes_main._model_flow_custom({}) output = capsys.readouterr().out assert "Saving the working base URL instead" in output + assert "Detected model: llm" in output # OPENAI_BASE_URL is no longer saved to .env — config.yaml is authoritative assert "OPENAI_BASE_URL" not in saved_env assert saved_env["MODEL"] == "llm" \ No newline at end of file -- 2.43.0 From 6dcc3330b3313dd27dd21a2f233e48fee0e8fee5 Mon Sep 17 00:00:00 2001 From: Dilee Date: Mon, 30 Mar 2026 17:54:55 +0300 Subject: [PATCH 077/385] fix(security): add missing GitHub OAuth token patterns and snapshot redact flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add gho_, ghu_, ghs_, ghr_ prefix patterns (OAuth, user-to-server, server-to-server, and refresh tokens) — all four types used by GitHub Apps and Copilot auth flows were absent from _PREFIX_PATTERNS - Snapshot HERMES_REDACT_SECRETS at module import time instead of re-reading os.getenv() on every call, preventing runtime env mutations (e.g. LLM-generated export commands) from disabling redaction --- agent/redact.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/agent/redact.py b/agent/redact.py index 895e3265f..2906d920e 100644 --- a/agent/redact.py +++ b/agent/redact.py @@ -13,11 +13,19 @@ import re logger = logging.getLogger(__name__) +# Snapshot at import time so runtime env mutations (e.g. LLM-generated +# `export HERMES_REDACT_SECRETS=false`) cannot disable redaction mid-session. +_REDACT_ENABLED = os.getenv("HERMES_REDACT_SECRETS", "").lower() not in ("0", "false", "no", "off") + # Known API key prefixes -- match the prefix + contiguous token chars _PREFIX_PATTERNS = [ r"sk-[A-Za-z0-9_-]{10,}", # OpenAI / OpenRouter / Anthropic (sk-ant-*) r"ghp_[A-Za-z0-9]{10,}", # GitHub PAT (classic) r"github_pat_[A-Za-z0-9_]{10,}", # GitHub PAT (fine-grained) + r"gho_[A-Za-z0-9]{10,}", # GitHub OAuth access token + r"ghu_[A-Za-z0-9]{10,}", # GitHub user-to-server token + r"ghs_[A-Za-z0-9]{10,}", # GitHub server-to-server token + r"ghr_[A-Za-z0-9]{10,}", # GitHub refresh token r"xox[baprs]-[A-Za-z0-9-]{10,}", # Slack tokens r"AIza[A-Za-z0-9_-]{30,}", # Google API keys r"pplx-[A-Za-z0-9]{10,}", # Perplexity @@ -109,7 +117,7 @@ def redact_sensitive_text(text: str) -> str: text = str(text) if not text: return text - if os.getenv("HERMES_REDACT_SECRETS", "").lower() in ("0", "false", "no", "off"): + if not _REDACT_ENABLED: return text # Known prefixes (sk-, ghp_, etc.) -- 2.43.0 From fad3f338d1a9e68f923f35566beaa45548796041 Mon Sep 17 00:00:00 2001 From: Teknium Date: Tue, 31 Mar 2026 10:30:15 -0700 Subject: [PATCH 078/385] fix: patch _REDACT_ENABLED in test fixture for module-level snapshot The _REDACT_ENABLED constant is snapshotted at import time, so monkeypatch.delenv() alone doesn't re-enable redaction during tests when HERMES_REDACT_SECRETS=false is set in the host environment. --- tests/agent/test_redact.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/agent/test_redact.py b/tests/agent/test_redact.py index 27098ee6d..6b7cfa586 100644 --- a/tests/agent/test_redact.py +++ b/tests/agent/test_redact.py @@ -12,6 +12,8 @@ from agent.redact import redact_sensitive_text, RedactingFormatter def _ensure_redaction_enabled(monkeypatch): """Ensure HERMES_REDACT_SECRETS is not disabled by prior test imports.""" monkeypatch.delenv("HERMES_REDACT_SECRETS", raising=False) + # Also patch the module-level snapshot so it reflects the cleared env var + monkeypatch.setattr("agent.redact._REDACT_ENABLED", True) class TestKnownPrefixes: -- 2.43.0 From cca0996a28aa57a892bb5e9fe3657eb825345b48 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Tue, 31 Mar 2026 10:40:13 -0700 Subject: [PATCH 079/385] fix(browser): skip SSRF check for local backends (Camofox, headless Chromium) (#4292) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The SSRF protection added in #3041 blocks all private/internal addresses unconditionally in browser_navigate(). This prevents legitimate local use cases (localhost apps, LAN devices) when using Camofox or the built-in headless Chromium without a cloud provider. The check is only meaningful for cloud backends (Browserbase, BrowserUse) where the agent could reach internal resources on a remote machine. Local backends give the user full terminal and network access already — the SSRF check adds zero security value. Add _is_local_backend() helper that returns True when Camofox is active or no cloud provider is configured. Both the pre-navigation and post-redirect SSRF checks now skip when running locally. The browser.allow_private_urls config option remains available as an explicit opt-out for cloud mode. --- tests/tools/test_browser_ssrf_local.py | 126 ++++++++++++++++++++----- tools/browser_tool.py | 24 ++++- 2 files changed, 120 insertions(+), 30 deletions(-) diff --git a/tests/tools/test_browser_ssrf_local.py b/tests/tools/test_browser_ssrf_local.py index 44d3b8ea1..27b6e3933 100644 --- a/tests/tools/test_browser_ssrf_local.py +++ b/tests/tools/test_browser_ssrf_local.py @@ -1,8 +1,12 @@ -"""Tests that browser_navigate SSRF checks respect the allow_private_urls setting. +"""Tests that browser_navigate SSRF checks respect local-backend mode and +the allow_private_urls setting. -When ``browser.allow_private_urls`` is ``False`` (default), private/internal -addresses are blocked. When set to ``True``, they are allowed — useful for -local development, LAN access, and Hermes self-testing. +Local backends (Camofox, headless Chromium without a cloud provider) skip +SSRF checks entirely — the agent already has full local-network access via +the terminal tool. + +Cloud backends (Browserbase, BrowserUse) enforce SSRF by default. Users +can opt out for cloud mode via ``browser.allow_private_urls: true``. """ import json @@ -47,8 +51,11 @@ class TestPreNavigationSsrf: lambda *a, **kw: _make_browser_result(), ) - def test_blocks_private_url_by_default(self, monkeypatch, _common_patches): - """SSRF protection is on when allow_private_urls is not set (False).""" + # -- Cloud mode: SSRF active ----------------------------------------------- + + def test_cloud_blocks_private_url_by_default(self, monkeypatch, _common_patches): + """SSRF protection blocks private URLs in cloud mode.""" + monkeypatch.setattr(browser_tool, "_is_local_backend", lambda: False) monkeypatch.setattr(browser_tool, "_allow_private_urls", lambda: False) monkeypatch.setattr(browser_tool, "_is_safe_url", lambda url: False) @@ -57,27 +64,19 @@ class TestPreNavigationSsrf: assert result["success"] is False assert "private or internal address" in result["error"] - def test_blocks_private_url_when_setting_false(self, monkeypatch, _common_patches): - """SSRF protection is on when allow_private_urls is explicitly False.""" - monkeypatch.setattr(browser_tool, "_allow_private_urls", lambda: False) - monkeypatch.setattr(browser_tool, "_is_safe_url", lambda url: False) - - result = json.loads(browser_tool.browser_navigate(self.PRIVATE_URL)) - - assert result["success"] is False - - def test_allows_private_url_when_setting_true(self, monkeypatch, _common_patches): - """Private URLs are allowed when allow_private_urls is True.""" + def test_cloud_allows_private_url_when_setting_true(self, monkeypatch, _common_patches): + """Private URLs pass in cloud mode when allow_private_urls is True.""" + monkeypatch.setattr(browser_tool, "_is_local_backend", lambda: False) monkeypatch.setattr(browser_tool, "_allow_private_urls", lambda: True) - # _is_safe_url would block this, but the setting overrides it monkeypatch.setattr(browser_tool, "_is_safe_url", lambda url: False) result = json.loads(browser_tool.browser_navigate(self.PRIVATE_URL)) assert result["success"] is True - def test_allows_public_url_regardless_of_setting(self, monkeypatch, _common_patches): - """Public URLs always pass regardless of the allow_private_urls setting.""" + def test_cloud_allows_public_url(self, monkeypatch, _common_patches): + """Public URLs always pass in cloud mode.""" + monkeypatch.setattr(browser_tool, "_is_local_backend", lambda: False) monkeypatch.setattr(browser_tool, "_allow_private_urls", lambda: False) monkeypatch.setattr(browser_tool, "_is_safe_url", lambda url: True) @@ -85,6 +84,56 @@ class TestPreNavigationSsrf: assert result["success"] is True + # -- Local mode: SSRF skipped ---------------------------------------------- + + def test_local_allows_private_url(self, monkeypatch, _common_patches): + """Local backends skip SSRF — private URLs are always allowed.""" + monkeypatch.setattr(browser_tool, "_is_local_backend", lambda: True) + monkeypatch.setattr(browser_tool, "_allow_private_urls", lambda: False) + monkeypatch.setattr(browser_tool, "_is_safe_url", lambda url: False) + + result = json.loads(browser_tool.browser_navigate(self.PRIVATE_URL)) + + assert result["success"] is True + + def test_local_allows_public_url(self, monkeypatch, _common_patches): + """Local backends pass public URLs too (sanity check).""" + monkeypatch.setattr(browser_tool, "_is_local_backend", lambda: True) + monkeypatch.setattr(browser_tool, "_allow_private_urls", lambda: False) + monkeypatch.setattr(browser_tool, "_is_safe_url", lambda url: True) + + result = json.loads(browser_tool.browser_navigate("https://example.com")) + + assert result["success"] is True + + +# --------------------------------------------------------------------------- +# _is_local_backend() unit tests +# --------------------------------------------------------------------------- + + +class TestIsLocalBackend: + def test_camofox_is_local(self, monkeypatch): + """Camofox mode counts as a local backend.""" + monkeypatch.setattr(browser_tool, "_is_camofox_mode", lambda: True) + monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: "anything") + + assert browser_tool._is_local_backend() is True + + def test_no_cloud_provider_is_local(self, monkeypatch): + """No cloud provider configured → local backend.""" + monkeypatch.setattr(browser_tool, "_is_camofox_mode", lambda: False) + monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: None) + + assert browser_tool._is_local_backend() is True + + def test_cloud_provider_is_not_local(self, monkeypatch): + """Cloud provider configured and not Camofox → NOT local.""" + monkeypatch.setattr(browser_tool, "_is_camofox_mode", lambda: False) + monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: "bb") + + assert browser_tool._is_local_backend() is False + # --------------------------------------------------------------------------- # Post-redirect SSRF check @@ -112,8 +161,11 @@ class TestPostRedirectSsrf: }, ) - def test_blocks_redirect_to_private_by_default(self, monkeypatch, _common_patches): - """Redirects to private addresses are blocked when setting is False.""" + # -- Cloud mode: redirect SSRF active -------------------------------------- + + def test_cloud_blocks_redirect_to_private(self, monkeypatch, _common_patches): + """Redirects to private addresses are blocked in cloud mode.""" + monkeypatch.setattr(browser_tool, "_is_local_backend", lambda: False) monkeypatch.setattr(browser_tool, "_allow_private_urls", lambda: False) monkeypatch.setattr( browser_tool, "_is_safe_url", lambda url: "192.168" not in url, @@ -129,8 +181,9 @@ class TestPostRedirectSsrf: assert result["success"] is False assert "redirect landed on a private/internal address" in result["error"] - def test_allows_redirect_to_private_when_setting_true(self, monkeypatch, _common_patches): - """Redirects to private addresses are allowed when setting is True.""" + def test_cloud_allows_redirect_to_private_when_setting_true(self, monkeypatch, _common_patches): + """Redirects to private addresses pass in cloud mode with allow_private_urls.""" + monkeypatch.setattr(browser_tool, "_is_local_backend", lambda: False) monkeypatch.setattr(browser_tool, "_allow_private_urls", lambda: True) monkeypatch.setattr( browser_tool, "_is_safe_url", lambda url: "192.168" not in url, @@ -146,9 +199,30 @@ class TestPostRedirectSsrf: assert result["success"] is True assert result["url"] == self.PRIVATE_FINAL_URL - def test_allows_redirect_to_public_regardless_of_setting(self, monkeypatch, _common_patches): - """Redirects to public addresses always pass.""" + # -- Local mode: redirect SSRF skipped ------------------------------------- + + def test_local_allows_redirect_to_private(self, monkeypatch, _common_patches): + """Redirects to private addresses pass in local mode.""" + monkeypatch.setattr(browser_tool, "_is_local_backend", lambda: True) + monkeypatch.setattr(browser_tool, "_allow_private_urls", lambda: False) + monkeypatch.setattr( + browser_tool, "_is_safe_url", lambda url: "192.168" not in url, + ) + monkeypatch.setattr( + browser_tool, + "_run_browser_command", + lambda *a, **kw: _make_browser_result(url=self.PRIVATE_FINAL_URL), + ) + + result = json.loads(browser_tool.browser_navigate(self.PUBLIC_URL)) + + assert result["success"] is True + assert result["url"] == self.PRIVATE_FINAL_URL + + def test_cloud_allows_redirect_to_public(self, monkeypatch, _common_patches): + """Redirects to public addresses always pass (cloud mode).""" final = "https://example.com/final" + monkeypatch.setattr(browser_tool, "_is_local_backend", lambda: False) monkeypatch.setattr(browser_tool, "_allow_private_urls", lambda: False) monkeypatch.setattr(browser_tool, "_is_safe_url", lambda url: True) monkeypatch.setattr( diff --git a/tools/browser_tool.py b/tools/browser_tool.py index 1861152e3..441dc21f6 100644 --- a/tools/browser_tool.py +++ b/tools/browser_tool.py @@ -267,6 +267,19 @@ def _get_cloud_provider() -> Optional[CloudBrowserProvider]: return _cached_cloud_provider +def _is_local_backend() -> bool: + """Return True when the browser runs locally (no cloud provider). + + SSRF protection is only meaningful for cloud backends (Browserbase, + BrowserUse) where the agent could reach internal resources on a remote + machine. For local backends — Camofox, or the built-in headless + Chromium without a cloud provider — the user already has full terminal + and network access on the same machine, so the check adds no security + value. + """ + return _is_camofox_mode() or _get_cloud_provider() is None + + def _allow_private_urls() -> bool: """Return whether the browser is allowed to navigate to private/internal addresses. @@ -1066,9 +1079,11 @@ def browser_navigate(url: str, task_id: Optional[str] = None) -> str: JSON string with navigation result (includes stealth features info on first nav) """ # SSRF protection — block private/internal addresses before navigating. - # Can be opted out via ``browser.allow_private_urls`` in config for local - # development or LAN access use cases. - if not _allow_private_urls() and not _is_safe_url(url): + # Skipped for local backends (Camofox, headless Chromium without a cloud + # provider) because the agent already has full local network access via + # the terminal tool. Can also be opted out for cloud mode via + # ``browser.allow_private_urls`` in config. + if not _is_local_backend() and not _allow_private_urls() and not _is_safe_url(url): return json.dumps({ "success": False, "error": "Blocked: URL targets a private or internal address", @@ -1110,7 +1125,8 @@ def browser_navigate(url: str, task_id: Optional[str] = None) -> str: # Post-redirect SSRF check — if the browser followed a redirect to a # private/internal address, block the result so the model can't read # internal content via subsequent browser_snapshot calls. - if not _allow_private_urls() and final_url and final_url != url and not _is_safe_url(final_url): + # Skipped for local backends (same rationale as the pre-nav check). + if not _is_local_backend() and not _allow_private_urls() and final_url and final_url != url and not _is_safe_url(final_url): # Navigate away to a blank page to prevent snapshot leaks _run_browser_command(effective_task_id, "open", ["about:blank"], timeout=10) return json.dumps({ -- 2.43.0 From 84a541b619238427d038e92746102c87a6ac5c36 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Tue, 31 Mar 2026 10:42:03 -0700 Subject: [PATCH 080/385] feat: support * wildcard in platform allowlists and improve WhatsApp docs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * docs: clarify WhatsApp allowlist behavior and document WHATSAPP_ALLOW_ALL_USERS - Add WHATSAPP_ALLOW_ALL_USERS and WHATSAPP_DEBUG to env vars reference - Warn that * is not a wildcard and silently blocks all messages - Show WHATSAPP_ALLOWED_USERS as optional, not required - Update troubleshooting with the * trap and debug mode tip - Fix Security section to mention the allow-all alternative Prompted by a user report in Discord where WHATSAPP_ALLOWED_USERS=* caused all incoming messages to be silently dropped at the bridge level. * feat: support * wildcard in platform allowlists Follow the precedent set by SIGNAL_GROUP_ALLOWED_USERS which already supports * as an allow-all wildcard. Bridge (allowlist.js): matchesAllowedUser() now checks for * in the allowedUsers set before iterating sender aliases. Gateway (run.py): _is_authorized() checks for * in allowed_ids after parsing the allowlist. This is generic — works for all platforms, not just WhatsApp. Updated docs to document * as a supported value instead of warning against it. Added WHATSAPP_ALLOW_ALL_USERS and WHATSAPP_DEBUG to the env vars reference. Tests: JS allowlist test + 2 Python gateway tests (WhatsApp + Telegram to verify cross-platform behavior). --- gateway/run.py | 5 +++ scripts/whatsapp-bridge/allowlist.js | 5 +++ scripts/whatsapp-bridge/allowlist.test.mjs | 12 ++++++ .../gateway/test_unauthorized_dm_behavior.py | 40 +++++++++++++++++++ .../docs/reference/environment-variables.md | 4 +- website/docs/user-guide/messaging/whatsapp.md | 20 ++++++++-- 6 files changed, 81 insertions(+), 5 deletions(-) diff --git a/gateway/run.py b/gateway/run.py index 2fe929447..cc1a6666f 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -1650,6 +1650,11 @@ class GatewayRunner: if global_allowlist: allowed_ids.update(uid.strip() for uid in global_allowlist.split(",") if uid.strip()) + # "*" in any allowlist means allow everyone (consistent with + # SIGNAL_GROUP_ALLOWED_USERS precedent) + if "*" in allowed_ids: + return True + check_ids = {user_id} if "@" in user_id: check_ids.add(user_id.split("@")[0]) diff --git a/scripts/whatsapp-bridge/allowlist.js b/scripts/whatsapp-bridge/allowlist.js index 760e413f2..4cbd82d0d 100644 --- a/scripts/whatsapp-bridge/allowlist.js +++ b/scripts/whatsapp-bridge/allowlist.js @@ -68,6 +68,11 @@ export function matchesAllowedUser(senderId, allowedUsers, sessionDir) { return true; } + // "*" means allow everyone (consistent with SIGNAL_GROUP_ALLOWED_USERS) + if (allowedUsers.has('*')) { + return true; + } + const aliases = expandWhatsAppIdentifiers(senderId, sessionDir); for (const alias of aliases) { if (allowedUsers.has(alias)) { diff --git a/scripts/whatsapp-bridge/allowlist.test.mjs b/scripts/whatsapp-bridge/allowlist.test.mjs index 7eea7399c..86e1f1d6b 100644 --- a/scripts/whatsapp-bridge/allowlist.test.mjs +++ b/scripts/whatsapp-bridge/allowlist.test.mjs @@ -45,3 +45,15 @@ test('matchesAllowedUser accepts mapped lid sender when allowlist only contains rmSync(sessionDir, { recursive: true, force: true }); } }); + +test('matchesAllowedUser treats * as allow-all wildcard', () => { + const sessionDir = mkdtempSync(path.join(os.tmpdir(), 'hermes-wa-allowlist-')); + + try { + const allowedUsers = parseAllowedUsers('*'); + assert.equal(matchesAllowedUser('19175395595@s.whatsapp.net', allowedUsers, sessionDir), true); + assert.equal(matchesAllowedUser('267383306489914@lid', allowedUsers, sessionDir), true); + } finally { + rmSync(sessionDir, { recursive: true, force: true }); + } +}); diff --git a/tests/gateway/test_unauthorized_dm_behavior.py b/tests/gateway/test_unauthorized_dm_behavior.py index 25b51dc2f..5f898b5e6 100644 --- a/tests/gateway/test_unauthorized_dm_behavior.py +++ b/tests/gateway/test_unauthorized_dm_behavior.py @@ -90,6 +90,46 @@ def test_whatsapp_lid_user_matches_phone_allowlist_via_session_mapping(monkeypat assert runner._is_user_authorized(source) is True +def test_star_wildcard_in_allowlist_authorizes_any_user(monkeypatch): + """WHATSAPP_ALLOWED_USERS=* should act as allow-all wildcard.""" + _clear_auth_env(monkeypatch) + monkeypatch.setenv("WHATSAPP_ALLOWED_USERS", "*") + + runner, _adapter = _make_runner( + Platform.WHATSAPP, + GatewayConfig(platforms={Platform.WHATSAPP: PlatformConfig(enabled=True)}), + ) + + source = SessionSource( + platform=Platform.WHATSAPP, + user_id="99998887776@s.whatsapp.net", + chat_id="99998887776@s.whatsapp.net", + user_name="stranger", + chat_type="dm", + ) + assert runner._is_user_authorized(source) is True + + +def test_star_wildcard_works_for_any_platform(monkeypatch): + """The * wildcard should work generically, not just for WhatsApp.""" + _clear_auth_env(monkeypatch) + monkeypatch.setenv("TELEGRAM_ALLOWED_USERS", "*") + + runner, _adapter = _make_runner( + Platform.TELEGRAM, + GatewayConfig(platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="t")}), + ) + + source = SessionSource( + platform=Platform.TELEGRAM, + user_id="123456789", + chat_id="123456789", + user_name="stranger", + chat_type="dm", + ) + assert runner._is_user_authorized(source) is True + + @pytest.mark.asyncio async def test_unauthorized_dm_pairs_by_default(monkeypatch): _clear_auth_env(monkeypatch) diff --git a/website/docs/reference/environment-variables.md b/website/docs/reference/environment-variables.md index fd57ffb02..10b6367be 100644 --- a/website/docs/reference/environment-variables.md +++ b/website/docs/reference/environment-variables.md @@ -170,7 +170,9 @@ For native Anthropic auth, Hermes prefers Claude Code's own credential files whe | `SLACK_HOME_CHANNEL_NAME` | Display name for the Slack home channel | | `WHATSAPP_ENABLED` | Enable the WhatsApp bridge (`true`/`false`) | | `WHATSAPP_MODE` | `bot` (separate number) or `self-chat` (message yourself) | -| `WHATSAPP_ALLOWED_USERS` | Comma-separated phone numbers (with country code, no `+`) | +| `WHATSAPP_ALLOWED_USERS` | Comma-separated phone numbers (with country code, no `+`), or `*` to allow all senders | +| `WHATSAPP_ALLOW_ALL_USERS` | Allow all WhatsApp senders without an allowlist (`true`/`false`) | +| `WHATSAPP_DEBUG` | Log raw message events in the bridge for troubleshooting (`true`/`false`) | | `SIGNAL_HTTP_URL` | signal-cli daemon HTTP endpoint (for example `http://127.0.0.1:8080`) | | `SIGNAL_ACCOUNT` | Bot phone number in E.164 format | | `SIGNAL_ALLOWED_USERS` | Comma-separated E.164 phone numbers or UUIDs | diff --git a/website/docs/user-guide/messaging/whatsapp.md b/website/docs/user-guide/messaging/whatsapp.md index 1c5226813..6011992ec 100644 --- a/website/docs/user-guide/messaging/whatsapp.md +++ b/website/docs/user-guide/messaging/whatsapp.md @@ -94,9 +94,20 @@ Add the following to your `~/.hermes/.env` file: # Required WHATSAPP_ENABLED=true WHATSAPP_MODE=bot # "bot" or "self-chat" + +# Access control — pick ONE of these options: WHATSAPP_ALLOWED_USERS=15551234567 # Comma-separated phone numbers (with country code, no +) +# WHATSAPP_ALLOWED_USERS=* # OR use * to allow everyone +# WHATSAPP_ALLOW_ALL_USERS=true # OR set this flag instead (same effect as *) ``` +:::tip Allow-all shorthand +Setting `WHATSAPP_ALLOWED_USERS=*` allows **all** senders (equivalent to `WHATSAPP_ALLOW_ALL_USERS=true`). +This is consistent with [Signal group allowlists](/docs/reference/environment-variables). +To use the pairing flow instead, remove both variables and rely on the +[DM pairing system](/docs/user-guide/security#dm-pairing-system). +::: + Optional behavior settings in `~/.hermes/config.yaml`: ```yaml @@ -174,7 +185,7 @@ whatsapp: | **Bridge crashes or reconnect loops** | Restart the gateway, update Hermes, and re-pair if the session was invalidated by a WhatsApp protocol change. | | **Bot stops working after WhatsApp update** | Update Hermes to get the latest bridge version, then re-pair. | | **macOS: "Node.js not installed" but node works in terminal** | launchd services don't inherit your shell PATH. Run `hermes gateway install` to re-snapshot your current PATH into the plist, then `hermes gateway start`. See the [Gateway Service docs](./index.md#macos-launchd) for details. | -| **Messages not being received** | Verify `WHATSAPP_ALLOWED_USERS` includes the sender's number (with country code, no `+` or spaces). | +| **Messages not being received** | Verify `WHATSAPP_ALLOWED_USERS` includes the sender's number (with country code, no `+` or spaces), or set it to `*` to allow everyone. Set `WHATSAPP_DEBUG=true` in `.env` and restart the gateway to see raw message events in `bridge.log`. | | **Bot replies to strangers with a pairing code** | Set `whatsapp.unauthorized_dm_behavior: ignore` in `~/.hermes/config.yaml` if you want unauthorized DMs to be silently ignored instead. | --- @@ -182,9 +193,10 @@ whatsapp: ## Security :::warning -**Always set `WHATSAPP_ALLOWED_USERS`** with phone numbers (including country code, without the `+`) -of authorized users. Without this setting, the gateway will **deny all incoming messages** as a -safety measure. +**Configure access control** before going live. Set `WHATSAPP_ALLOWED_USERS` with specific +phone numbers (including country code, without the `+`), use `*` to allow everyone, or set +`WHATSAPP_ALLOW_ALL_USERS=true`. Without any of these, the gateway **denies all incoming +messages** as a safety measure. ::: By default, unauthorized DMs still receive a pairing code reply. If you want a private WhatsApp number to stay completely silent to strangers, set: -- 2.43.0 From 49d7210fede960796d4d0d80f5a88bfb8d45e3de Mon Sep 17 00:00:00 2001 From: MacroAnarchy Date: Mon, 30 Mar 2026 16:10:32 +0200 Subject: [PATCH 081/385] fix(gateway): parse thread_id from delivery target format MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The delivery target parser uses split(':', 1) which only splits on the first colon. For the documented format platform:chat_id:thread_id (e.g. 'telegram:-1001234567890:17585'), thread_id gets munged into chat_id and is never extracted. Fix: split(':', 2) to correctly extract all three parts. Also fix to_string() to include thread_id for proper round-tripping. The downstream plumbing in _deliver_to_platform() already handles thread_id correctly (line 292-293) — it just never received a value. --- gateway/delivery.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/gateway/delivery.py b/gateway/delivery.py index 5adb3c2c1..fff0aeadf 100644 --- a/gateway/delivery.py +++ b/gateway/delivery.py @@ -70,12 +70,15 @@ class DeliveryTarget: if target == "local": return cls(platform=Platform.LOCAL) - # Check for platform:chat_id format + # Check for platform:chat_id or platform:chat_id:thread_id format if ":" in target: - platform_str, chat_id = target.split(":", 1) + parts = target.split(":", 2) + platform_str = parts[0] + chat_id = parts[1] if len(parts) > 1 else None + thread_id = parts[2] if len(parts) > 2 else None try: platform = Platform(platform_str) - return cls(platform=platform, chat_id=chat_id, is_explicit=True) + return cls(platform=platform, chat_id=chat_id, thread_id=thread_id, is_explicit=True) except ValueError: # Unknown platform, treat as local return cls(platform=Platform.LOCAL) @@ -94,6 +97,8 @@ class DeliveryTarget: return "origin" if self.platform == Platform.LOCAL: return "local" + if self.chat_id and self.thread_id: + return f"{self.platform.value}:{self.chat_id}:{self.thread_id}" if self.chat_id: return f"{self.platform.value}:{self.chat_id}" return self.platform.value -- 2.43.0 From c1606aed69f3685a6cc5d866f2d2c80fadcedbef Mon Sep 17 00:00:00 2001 From: Dakota Secula-Rosell Date: Tue, 31 Mar 2026 13:32:54 -0400 Subject: [PATCH 082/385] fix(cli): allow empty strings and falsy values in config set `hermes config set KEY ""` and `hermes config set KEY 0` were rejected because the guard used `not value` which is truthy for empty strings, zero, and False. Changed to `value is None` so only truly missing arguments are rejected. Closes #4277 Co-Authored-By: Claude Opus 4.6 (1M context) --- hermes_cli/config.py | 2 +- tests/hermes_cli/test_set_config_value.py | 42 ++++++++++++++++++++++- 2 files changed, 42 insertions(+), 2 deletions(-) diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 51b8b9af7..e62a4cdc1 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -2040,7 +2040,7 @@ def config_command(args): elif subcmd == "set": key = getattr(args, 'key', None) value = getattr(args, 'value', None) - if not key or not value: + if not key or value is None: print("Usage: hermes config set ") print() print("Examples:") diff --git a/tests/hermes_cli/test_set_config_value.py b/tests/hermes_cli/test_set_config_value.py index 4eae64d6e..fbd71dbb5 100644 --- a/tests/hermes_cli/test_set_config_value.py +++ b/tests/hermes_cli/test_set_config_value.py @@ -1,12 +1,13 @@ """Tests for set_config_value — verifying secrets route to .env and config to config.yaml.""" +import argparse import os from pathlib import Path from unittest.mock import patch, call import pytest -from hermes_cli.config import set_config_value +from hermes_cli.config import set_config_value, config_command @pytest.fixture(autouse=True) @@ -125,3 +126,42 @@ class TestConfigYamlRouting: "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE=true" in env_content or "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE=True" in env_content ) + + +# --------------------------------------------------------------------------- +# Empty / falsy values — regression tests for #4277 +# --------------------------------------------------------------------------- + +class TestFalsyValues: + """config set should accept empty strings and falsy values like '0'.""" + + def test_empty_string_routes_to_env(self, _isolated_hermes_home): + """Blanking an API key should write an empty value to .env.""" + set_config_value("OPENROUTER_API_KEY", "") + env_content = _read_env(_isolated_hermes_home) + assert "OPENROUTER_API_KEY=" in env_content + + def test_empty_string_routes_to_config(self, _isolated_hermes_home): + """Blanking a config key should write an empty string to config.yaml.""" + set_config_value("model", "") + config = _read_config(_isolated_hermes_home) + assert "model: ''" in config or "model: \"\"" in config + + def test_zero_routes_to_config(self, _isolated_hermes_home): + """Setting a config key to '0' should write 0 to config.yaml.""" + set_config_value("verbose", "0") + config = _read_config(_isolated_hermes_home) + assert "verbose: 0" in config + + def test_config_command_rejects_missing_value(self): + """config set with no value arg (None) should still exit.""" + args = argparse.Namespace(config_command="set", key="model", value=None) + with pytest.raises(SystemExit): + config_command(args) + + def test_config_command_accepts_empty_string(self, _isolated_hermes_home): + """config set KEY '' should not exit — it should set the value.""" + args = argparse.Namespace(config_command="set", key="model", value="") + config_command(args) + config = _read_config(_isolated_hermes_home) + assert "model" in config -- 2.43.0 From 0240baa357522654026e4aa04c716d209f79b704 Mon Sep 17 00:00:00 2001 From: arasovic Date: Tue, 31 Mar 2026 19:42:44 +0300 Subject: [PATCH 083/385] fix: strip orphaned think/reasoning tags from user-facing responses Some models (e.g. Kimi K2.5 on Alibaba OpenAI-compatible endpoint) emit reasoning text followed by a closing without a matching opening tag. The existing paired-tag regexes in _strip_think_blocks() cannot match these orphaned tags, so leaks into user-facing responses on all platforms. Add a catch-all regex that strips any remaining opening or closing think/thinking/reasoning/REASONING_SCRATCHPAD tags after the existing paired-block removal pass. Closes #4285 --- run_agent.py | 1 + tests/test_run_agent.py | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/run_agent.py b/run_agent.py index 13278d94c..717c26b4a 100644 --- a/run_agent.py +++ b/run_agent.py @@ -1389,6 +1389,7 @@ class AIAgent: content = re.sub(r'.*?', '', content, flags=re.DOTALL | re.IGNORECASE) content = re.sub(r'.*?', '', content, flags=re.DOTALL) content = re.sub(r'.*?', '', content, flags=re.DOTALL) + content = re.sub(r'\s*', '', content, flags=re.IGNORECASE) return content def _looks_like_codex_intermediate_ack( diff --git a/tests/test_run_agent.py b/tests/test_run_agent.py index 7ea3a63fe..aa74164a7 100644 --- a/tests/test_run_agent.py +++ b/tests/test_run_agent.py @@ -230,6 +230,27 @@ class TestStripThinkBlocks: assert "line1" not in result assert "visible" in result + def test_orphaned_closing_think_tag(self, agent): + result = agent._strip_think_blocks("some reasoningactual answer") + assert "" not in result + assert "actual answer" in result + + def test_orphaned_closing_thinking_tag(self, agent): + result = agent._strip_think_blocks("reasoninganswer") + assert "" not in result + assert "answer" in result + + def test_orphaned_opening_think_tag(self, agent): + result = agent._strip_think_blocks("orphaned reasoning without close") + assert "" not in result + + def test_mixed_orphaned_and_paired_tags(self, agent): + text = "straypaired reasoning visible" + result = agent._strip_think_blocks(text) + assert "" not in result + assert "" not in result + assert "visible" in result + class TestExtractReasoning: def test_reasoning_field(self, agent): -- 2.43.0 From 57625329a218775b70b51237d8dbe5f632c864c2 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Tue, 31 Mar 2026 11:42:48 -0700 Subject: [PATCH 084/385] docs+feat: comprehensive local LLM provider guides and context length warning (#4294) * docs: update llama.cpp section with --jinja flag and tool calling guide The llama.cpp docs were missing the --jinja flag which is required for tool calling to work. Without it, models output tool calls as raw JSON text instead of structured API responses, making Hermes unable to execute them. Changes: - Add --jinja and -fa flags to the server startup example - Replace deprecated env vars (OPENAI_BASE_URL, LLM_MODEL) with hermes model interactive setup - Add caution block explaining the --jinja requirement and symptoms - List models with native tool calling support - Add /props endpoint verification tip * docs+feat: comprehensive local LLM provider guides and context length warning Docs (providers.md): - Rewrote Ollama section with context length warning (defaults to 4k on <24GB VRAM), three methods to increase it, and verification steps - Rewrote vLLM section with --max-model-len, tool calling flags (--enable-auto-tool-choice, --tool-call-parser), and context guidance - Rewrote SGLang section with --context-length, --tool-call-parser, and warning about 128-token default max output - Added LM Studio section (port 1234, context length defaults to 2048, tool calling since 0.3.6) - Added llama.cpp context length flag (-c) and GPU offload (-ngl) - Added Troubleshooting Local Models section covering: - Tool calls appearing as text (with per-server fix table) - Silent context truncation and diagnosis commands - Low detected context at startup - Truncated responses - Replaced all deprecated env vars (OPENAI_BASE_URL, LLM_MODEL) with hermes model interactive setup and config.yaml examples - Added deprecation warning for legacy env vars in General Setup Code (cli.py): - Added context length warning in show_banner() when detected context is <= 8192 tokens, with server-specific fix hints: - Ollama (port 11434): suggests OLLAMA_CONTEXT_LENGTH env var - LM Studio (port 1234): suggests model settings adjustment - Other servers: suggests config.yaml override Tests: - 9 new tests covering warning thresholds, server-specific hints, and no-warning cases --- cli.py | 26 ++- tests/test_cli_context_warning.py | 147 ++++++++++++ website/docs/integrations/providers.md | 299 ++++++++++++++++++++----- 3 files changed, 417 insertions(+), 55 deletions(-) create mode 100644 tests/test_cli_context_warning.py diff --git a/cli.py b/cli.py index 978b36091..e5f88e752 100644 --- a/cli.py +++ b/cli.py @@ -2192,7 +2192,31 @@ class HermesCLI: # Show tool availability warnings if any tools are disabled self._show_tool_availability_warnings() - + + # Warn about very low context lengths (common with local servers) + if ctx_len and ctx_len <= 8192: + self.console.print() + self.console.print( + f"[yellow]⚠️ Context length is only {ctx_len:,} tokens — " + f"this is likely too low for agent use with tools.[/]" + ) + self.console.print( + "[dim] Hermes needs 16k–32k minimum. Tool schemas + system prompt alone use ~4k–8k.[/]" + ) + base_url = getattr(self, "base_url", "") or "" + if "11434" in base_url or "ollama" in base_url.lower(): + self.console.print( + "[dim] Ollama fix: OLLAMA_CONTEXT_LENGTH=32768 ollama serve[/]" + ) + elif "1234" in base_url: + self.console.print( + "[dim] LM Studio fix: Set context length in model settings → reload model[/]" + ) + else: + self.console.print( + "[dim] Fix: Set model.context_length in config.yaml, or increase your server's context setting[/]" + ) + self.console.print() def _preload_resumed_session(self) -> bool: diff --git a/tests/test_cli_context_warning.py b/tests/test_cli_context_warning.py new file mode 100644 index 000000000..fa0305a27 --- /dev/null +++ b/tests/test_cli_context_warning.py @@ -0,0 +1,147 @@ +"""Tests for the low context length warning in the CLI banner.""" + +import os +from types import SimpleNamespace +from unittest.mock import MagicMock, patch + +import pytest + + +@pytest.fixture +def _isolate(tmp_path, monkeypatch): + """Isolate HERMES_HOME so tests don't touch real config.""" + home = tmp_path / ".hermes" + home.mkdir() + monkeypatch.setenv("HERMES_HOME", str(home)) + + +@pytest.fixture +def cli_obj(_isolate): + """Create a minimal HermesCLI instance for banner testing.""" + with patch("cli.load_cli_config", return_value={ + "display": {"tool_progress": "new"}, + "terminal": {}, + }), patch("cli.get_tool_definitions", return_value=[]), \ + patch("cli.build_welcome_banner"): + from cli import HermesCLI + obj = HermesCLI.__new__(HermesCLI) + obj.model = "test-model" + obj.enabled_toolsets = ["hermes-core"] + obj.compact = False + obj.console = MagicMock() + obj.session_id = None + obj.api_key = "test" + obj.base_url = "" + # Mock agent with context compressor + obj.agent = SimpleNamespace( + context_compressor=SimpleNamespace(context_length=None) + ) + return obj + + +class TestLowContextWarning: + """Tests that the CLI warns about low context lengths.""" + + def test_no_warning_for_normal_context(self, cli_obj): + """No warning when context is 32k+.""" + cli_obj.agent.context_compressor.context_length = 32768 + with patch("cli.get_tool_definitions", return_value=[]), \ + patch("cli.build_welcome_banner"): + cli_obj.show_banner() + + # Check that no yellow warning was printed + calls = [str(c) for c in cli_obj.console.print.call_args_list] + warning_calls = [c for c in calls if "too low" in c] + assert len(warning_calls) == 0 + + def test_warning_for_low_context(self, cli_obj): + """Warning shown when context is 4096 (Ollama default).""" + cli_obj.agent.context_compressor.context_length = 4096 + with patch("cli.get_tool_definitions", return_value=[]), \ + patch("cli.build_welcome_banner"): + cli_obj.show_banner() + + calls = [str(c) for c in cli_obj.console.print.call_args_list] + warning_calls = [c for c in calls if "too low" in c] + assert len(warning_calls) == 1 + assert "4,096" in warning_calls[0] + + def test_warning_for_2048_context(self, cli_obj): + """Warning shown for 2048 tokens (common LM Studio default).""" + cli_obj.agent.context_compressor.context_length = 2048 + with patch("cli.get_tool_definitions", return_value=[]), \ + patch("cli.build_welcome_banner"): + cli_obj.show_banner() + + calls = [str(c) for c in cli_obj.console.print.call_args_list] + warning_calls = [c for c in calls if "too low" in c] + assert len(warning_calls) == 1 + + def test_no_warning_at_boundary(self, cli_obj): + """No warning at exactly 8192 — 8192 is borderline but included in warning.""" + cli_obj.agent.context_compressor.context_length = 8192 + with patch("cli.get_tool_definitions", return_value=[]), \ + patch("cli.build_welcome_banner"): + cli_obj.show_banner() + + calls = [str(c) for c in cli_obj.console.print.call_args_list] + warning_calls = [c for c in calls if "too low" in c] + assert len(warning_calls) == 1 # 8192 is still warned about + + def test_no_warning_above_boundary(self, cli_obj): + """No warning at 16384.""" + cli_obj.agent.context_compressor.context_length = 16384 + with patch("cli.get_tool_definitions", return_value=[]), \ + patch("cli.build_welcome_banner"): + cli_obj.show_banner() + + calls = [str(c) for c in cli_obj.console.print.call_args_list] + warning_calls = [c for c in calls if "too low" in c] + assert len(warning_calls) == 0 + + def test_ollama_specific_hint(self, cli_obj): + """Ollama-specific fix shown when port 11434 detected.""" + cli_obj.agent.context_compressor.context_length = 4096 + cli_obj.base_url = "http://localhost:11434/v1" + with patch("cli.get_tool_definitions", return_value=[]), \ + patch("cli.build_welcome_banner"): + cli_obj.show_banner() + + calls = [str(c) for c in cli_obj.console.print.call_args_list] + ollama_hints = [c for c in calls if "OLLAMA_CONTEXT_LENGTH" in c] + assert len(ollama_hints) == 1 + + def test_lm_studio_specific_hint(self, cli_obj): + """LM Studio-specific fix shown when port 1234 detected.""" + cli_obj.agent.context_compressor.context_length = 2048 + cli_obj.base_url = "http://localhost:1234/v1" + with patch("cli.get_tool_definitions", return_value=[]), \ + patch("cli.build_welcome_banner"): + cli_obj.show_banner() + + calls = [str(c) for c in cli_obj.console.print.call_args_list] + lms_hints = [c for c in calls if "LM Studio" in c] + assert len(lms_hints) == 1 + + def test_generic_hint_for_other_servers(self, cli_obj): + """Generic fix shown for unknown servers.""" + cli_obj.agent.context_compressor.context_length = 4096 + cli_obj.base_url = "http://localhost:8080/v1" + with patch("cli.get_tool_definitions", return_value=[]), \ + patch("cli.build_welcome_banner"): + cli_obj.show_banner() + + calls = [str(c) for c in cli_obj.console.print.call_args_list] + generic_hints = [c for c in calls if "config.yaml" in c] + assert len(generic_hints) == 1 + + def test_no_warning_when_no_context_length(self, cli_obj): + """No warning when context length is not yet known.""" + cli_obj.agent.context_compressor.context_length = None + with patch("cli.get_tool_definitions", return_value=[]), \ + patch("cli.build_welcome_banner"): + cli_obj.show_banner() + + calls = [str(c) for c in cli_obj.console.print.call_args_list] + warning_calls = [c for c in calls if "too low" in c] + assert len(warning_calls) == 0 diff --git a/website/docs/integrations/providers.md b/website/docs/integrations/providers.md index ab4c8f354..7740e36db 100644 --- a/website/docs/integrations/providers.md +++ b/website/docs/integrations/providers.md @@ -218,15 +218,11 @@ model: api_key: your-key-or-leave-empty-for-local ``` -**Environment variables (`.env` file):** -```bash -# Add to ~/.hermes/.env -OPENAI_BASE_URL=http://localhost:8000/v1 -OPENAI_API_KEY=your-key # Any non-empty string for local servers -LLM_MODEL=your-model-name -``` +:::warning Legacy env vars +`OPENAI_BASE_URL` and `LLM_MODEL` in `.env` are **deprecated**. The CLI ignores `LLM_MODEL` entirely (only the gateway reads it). Use `hermes model` or edit `config.yaml` directly — both persist correctly across restarts and Docker containers. +::: -All three approaches end up in the same runtime path. `hermes model` persists provider, model, and base URL to `config.yaml` so later sessions keep using that endpoint even if env vars are not set. +Both approaches persist to `config.yaml`, which is the source of truth for model, provider, and base URL. ### Switching Models with `/model` @@ -257,23 +253,73 @@ Everything below follows this same pattern — just change the URL, key, and mod ### Ollama — Local Models, Zero Config -[Ollama](https://ollama.com/) runs open-weight models locally with one command. Best for: quick local experimentation, privacy-sensitive work, offline use. +[Ollama](https://ollama.com/) runs open-weight models locally with one command. Best for: quick local experimentation, privacy-sensitive work, offline use. Supports tool calling via the OpenAI-compatible API. ```bash # Install and run a model -ollama pull llama3.1:70b +ollama pull qwen2.5-coder:32b ollama serve # Starts on port 11434 - -# Configure Hermes -OPENAI_BASE_URL=http://localhost:11434/v1 -OPENAI_API_KEY=ollama # Any non-empty string -LLM_MODEL=llama3.1:70b ``` -Ollama's OpenAI-compatible endpoint supports chat completions, streaming, and tool calling (for supported models). No GPU required for smaller models — Ollama handles CPU inference automatically. +Then configure Hermes: + +```bash +hermes model +# Select "Custom endpoint (self-hosted / VLLM / etc.)" +# Enter URL: http://localhost:11434/v1 +# Skip API key (Ollama doesn't need one) +# Enter model name (e.g. qwen2.5-coder:32b) +``` + +Or configure `config.yaml` directly: + +```yaml +model: + default: qwen2.5-coder:32b + provider: custom + base_url: http://localhost:11434/v1 + context_length: 32768 # See warning below +``` + +:::caution Ollama defaults to very low context lengths +Ollama does **not** use your model's full context window by default. Depending on your VRAM, the default is: + +| Available VRAM | Default context | +|----------------|----------------| +| Less than 24 GB | **4,096 tokens** | +| 24–48 GB | 32,768 tokens | +| 48+ GB | 256,000 tokens | + +For agent use with tools, **you need at least 16k–32k context**. At 4k, the system prompt + tool schemas alone can fill the window, leaving no room for conversation. + +**How to increase it** (pick one): + +```bash +# Option 1: Set server-wide via environment variable (recommended) +OLLAMA_CONTEXT_LENGTH=32768 ollama serve + +# Option 2: For systemd-managed Ollama +sudo systemctl edit ollama.service +# Add: Environment="OLLAMA_CONTEXT_LENGTH=32768" +# Then: sudo systemctl daemon-reload && sudo systemctl restart ollama + +# Option 3: Bake it into a custom model (persistent per-model) +echo -e "FROM qwen2.5-coder:32b\nPARAMETER num_ctx 32768" > Modelfile +ollama create qwen2.5-coder-32k -f Modelfile +``` + +**You cannot set context length through the OpenAI-compatible API** (`/v1/chat/completions`). It must be configured server-side or via a Modelfile. This is the #1 source of confusion when integrating Ollama with tools like Hermes. +::: + +**Verify your context is set correctly:** + +```bash +ollama ps +# Look at the CONTEXT column — it should show your configured value +``` :::tip -List available models with `ollama list`. Pull any model from the [Ollama library](https://ollama.com/library) with `ollama pull `. +List available models with `ollama list`. Pull any model from the [Ollama library](https://ollama.com/library) with `ollama pull `. Ollama handles GPU offloading automatically — no configuration needed for most setups. ::: --- @@ -283,19 +329,39 @@ List available models with `ollama list`. Pull any model from the [Ollama librar [vLLM](https://docs.vllm.ai/) is the standard for production LLM serving. Best for: maximum throughput on GPU hardware, serving large models, continuous batching. ```bash -# Start vLLM server pip install vllm vllm serve meta-llama/Llama-3.1-70B-Instruct \ --port 8000 \ - --tensor-parallel-size 2 # Multi-GPU - -# Configure Hermes -OPENAI_BASE_URL=http://localhost:8000/v1 -OPENAI_API_KEY=dummy -LLM_MODEL=meta-llama/Llama-3.1-70B-Instruct + --max-model-len 65536 \ + --tensor-parallel-size 2 \ + --enable-auto-tool-choice \ + --tool-call-parser hermes ``` -vLLM supports tool calling, structured output, and multi-modal models. Use `--enable-auto-tool-choice` and `--tool-call-parser hermes` for Hermes-format tool calling with NousResearch models. +Then configure Hermes: + +```bash +hermes model +# Select "Custom endpoint (self-hosted / VLLM / etc.)" +# Enter URL: http://localhost:8000/v1 +# Skip API key (or enter one if you configured vLLM with --api-key) +# Enter model name: meta-llama/Llama-3.1-70B-Instruct +``` + +**Context length:** vLLM reads the model's `max_position_embeddings` by default. If that exceeds your GPU memory, it errors and asks you to set `--max-model-len` lower. You can also use `--max-model-len auto` to automatically find the maximum that fits. Set `--gpu-memory-utilization 0.95` (default 0.9) to squeeze more context into VRAM. + +**Tool calling requires explicit flags:** + +| Flag | Purpose | +|------|---------| +| `--enable-auto-tool-choice` | Required for `tool_choice: "auto"` (the default in Hermes) | +| `--tool-call-parser ` | Parser for the model's tool call format | + +Supported parsers: `hermes` (Qwen 2.5, Hermes 2/3), `llama3_json` (Llama 3.x), `mistral`, `deepseek_v3`, `deepseek_v31`, `xlam`, `pythonic`. Without these flags, tool calls won't work — the model will output tool calls as text. + +:::tip +vLLM supports human-readable sizes: `--max-model-len 64k` (lowercase k = 1000, uppercase K = 1024). +::: --- @@ -304,19 +370,32 @@ vLLM supports tool calling, structured output, and multi-modal models. Use `--en [SGLang](https://github.com/sgl-project/sglang) is an alternative to vLLM with RadixAttention for KV cache reuse. Best for: multi-turn conversations (prefix caching), constrained decoding, structured output. ```bash -# Start SGLang server pip install "sglang[all]" python -m sglang.launch_server \ --model meta-llama/Llama-3.1-70B-Instruct \ - --port 8000 \ - --tp 2 - -# Configure Hermes -OPENAI_BASE_URL=http://localhost:8000/v1 -OPENAI_API_KEY=dummy -LLM_MODEL=meta-llama/Llama-3.1-70B-Instruct + --port 30000 \ + --context-length 65536 \ + --tp 2 \ + --tool-call-parser qwen ``` +Then configure Hermes: + +```bash +hermes model +# Select "Custom endpoint (self-hosted / VLLM / etc.)" +# Enter URL: http://localhost:30000/v1 +# Enter model name: meta-llama/Llama-3.1-70B-Instruct +``` + +**Context length:** SGLang reads from the model's config by default. Use `--context-length` to override. If you need to exceed the model's declared maximum, set `SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN=1`. + +**Tool calling:** Use `--tool-call-parser` with the appropriate parser for your model family: `qwen` (Qwen 2.5), `llama3`, `llama4`, `deepseekv3`, `mistral`, `glm`. Without this flag, tool calls come back as plain text. + +:::caution SGLang defaults to 128 max output tokens +If responses seem truncated, add `max_tokens` to your requests or set `--default-max-tokens` on the server. SGLang's default is only 128 tokens per response if not specified in the request. +::: + --- ### llama.cpp / llama-server — CPU & Metal Inference @@ -327,21 +406,136 @@ LLM_MODEL=meta-llama/Llama-3.1-70B-Instruct # Build and start llama-server cmake -B build && cmake --build build --config Release ./build/bin/llama-server \ - -m models/llama-3.1-8b-instruct-Q4_K_M.gguf \ + --jinja -fa \ + -c 32768 \ + -ngl 99 \ + -m models/qwen2.5-coder-32b-instruct-Q4_K_M.gguf \ --port 8080 --host 0.0.0.0 - -# Configure Hermes -OPENAI_BASE_URL=http://localhost:8080/v1 -OPENAI_API_KEY=dummy -LLM_MODEL=llama-3.1-8b-instruct ``` +**Context length (`-c`):** Recent builds default to `0` which reads the model's training context from the GGUF metadata. For models with 128k+ training context, this can OOM trying to allocate the full KV cache. Set `-c` explicitly to what you need (32k–64k is a good range for agent use). If using parallel slots (`-np`), the total context is divided among slots — with `-c 32768 -np 4`, each slot only gets 8k. + +Then configure Hermes to point at it: + +```bash +hermes model +# Select "Custom endpoint (self-hosted / VLLM / etc.)" +# Enter URL: http://localhost:8080/v1 +# Skip API key (local servers don't need one) +# Enter model name — or leave blank to auto-detect if only one model is loaded +``` + +This saves the endpoint to `config.yaml` so it persists across sessions. + +:::caution `--jinja` is required for tool calling +Without `--jinja`, llama-server ignores the `tools` parameter entirely. The model will try to call tools by writing JSON in its response text, but Hermes won't recognize it as a tool call — you'll see raw JSON like `{"name": "web_search", ...}` printed as a message instead of an actual search. + +Native tool calling support (best performance): Llama 3.x, Qwen 2.5 (including Coder), Hermes 2/3, Mistral, DeepSeek, Functionary. All other models use a generic handler that works but may be less efficient. See the [llama.cpp function calling docs](https://github.com/ggml-org/llama.cpp/blob/master/docs/function-calling.md) for the full list. + +You can verify tool support is active by checking `http://localhost:8080/props` — the `chat_template` field should be present. +::: + :::tip Download GGUF models from [Hugging Face](https://huggingface.co/models?library=gguf). Q4_K_M quantization offers the best balance of quality vs. memory usage. ::: --- +### LM Studio — Desktop App with Local Models + +[LM Studio](https://lmstudio.ai/) is a desktop app for running local models with a GUI. Best for: users who prefer a visual interface, quick model testing, developers on macOS/Windows/Linux. + +Start the server from the LM Studio app (Developer tab → Start Server), or use the CLI: + +```bash +lms server start # Starts on port 1234 +lms load qwen2.5-coder --context-length 32768 +``` + +Then configure Hermes: + +```bash +hermes model +# Select "Custom endpoint (self-hosted / VLLM / etc.)" +# Enter URL: http://localhost:1234/v1 +# Skip API key (LM Studio doesn't require one) +# Enter model name +``` + +:::caution Context length often defaults to 2048 +LM Studio reads context length from the model's metadata, but many GGUF models report low defaults (2048 or 4096). **Always set context length explicitly** in the LM Studio model settings: + +1. Click the gear icon next to the model picker +2. Set "Context Length" to at least 16384 (preferably 32768) +3. Reload the model for the change to take effect + +Alternatively, use the CLI: `lms load model-name --context-length 32768` + +To set persistent per-model defaults: My Models tab → gear icon on the model → set context size. +::: + +**Tool calling:** Supported since LM Studio 0.3.6. Models with native tool-calling training (Qwen 2.5, Llama 3.x, Mistral, Hermes) are auto-detected and shown with a tool badge. Other models use a generic fallback that may be less reliable. + +--- + +### Troubleshooting Local Models + +These issues affect **all** local inference servers when used with Hermes. + +#### Tool calls appear as text instead of executing + +The model outputs something like `{"name": "web_search", "arguments": {...}}` as a message instead of actually calling the tool. + +**Cause:** Your server doesn't have tool calling enabled, or the model doesn't support it through the server's tool calling implementation. + +| Server | Fix | +|--------|-----| +| **llama.cpp** | Add `--jinja` to the startup command | +| **vLLM** | Add `--enable-auto-tool-choice --tool-call-parser hermes` | +| **SGLang** | Add `--tool-call-parser qwen` (or appropriate parser) | +| **Ollama** | Tool calling is enabled by default — make sure your model supports it (check with `ollama show model-name`) | +| **LM Studio** | Update to 0.3.6+ and use a model with native tool support | + +#### Model seems to forget context or give incoherent responses + +**Cause:** Context window is too small. When the conversation exceeds the context limit, most servers silently drop older messages. Hermes's system prompt + tool schemas alone can use 4k–8k tokens. + +**Diagnosis:** + +```bash +# Check what Hermes thinks the context is +# Look at startup line: "Context limit: X tokens" + +# Check your server's actual context +# Ollama: ollama ps (CONTEXT column) +# llama.cpp: curl http://localhost:8080/props | jq '.default_generation_settings.n_ctx' +# vLLM: check --max-model-len in startup args +``` + +**Fix:** Set context to at least **32,768 tokens** for agent use. See each server's section above for the specific flag. + +#### "Context limit: 2048 tokens" at startup + +Hermes auto-detects context length from your server's `/v1/models` endpoint. If the server reports a low value (or doesn't report one at all), Hermes uses the model's declared limit which may be wrong. + +**Fix:** Set it explicitly in `config.yaml`: + +```yaml +model: + default: your-model + provider: custom + base_url: http://localhost:11434/v1 + context_length: 32768 +``` + +#### Responses get cut off mid-sentence + +**Possible causes:** +1. **Low `max_tokens` on the server** — SGLang defaults to 128 tokens per response. Set `--default-max-tokens` on the server or configure Hermes with `model.max_tokens` in config.yaml. +2. **Context exhaustion** — The model filled its context window. Increase context length or enable [context compression](/docs/user-guide/configuration#context-compression) in Hermes. + +--- + ### LiteLLM Proxy — Multi-Provider Gateway [LiteLLM](https://docs.litellm.ai/) is an OpenAI-compatible proxy that unifies 100+ LLM providers behind a single API. Best for: switching between providers without config changes, load balancing, fallback chains, budget controls. @@ -353,13 +547,10 @@ litellm --model anthropic/claude-sonnet-4 --port 4000 # Or with a config file for multiple models: litellm --config litellm_config.yaml --port 4000 - -# Configure Hermes -OPENAI_BASE_URL=http://localhost:4000/v1 -OPENAI_API_KEY=sk-your-litellm-key -LLM_MODEL=anthropic/claude-sonnet-4 ``` +Then configure Hermes with `hermes model` → Custom endpoint → `http://localhost:4000/v1`. + Example `litellm_config.yaml` with fallback: ```yaml model_list: @@ -384,13 +575,10 @@ router_settings: ```bash # Install and start npx @blockrun/clawrouter # Starts on port 8402 - -# Configure Hermes -OPENAI_BASE_URL=http://localhost:8402/v1 -OPENAI_API_KEY=dummy -LLM_MODEL=blockrun/auto # or: blockrun/eco, blockrun/premium, blockrun/agentic ``` +Then configure Hermes with `hermes model` → Custom endpoint → `http://localhost:8402/v1` → model name `blockrun/auto`. + Routing profiles: | Profile | Strategy | Savings | |---------|----------|---------| @@ -423,11 +611,14 @@ Any service with an OpenAI-compatible API works. Some popular options: | [LocalAI](https://localai.io) | `http://localhost:8080/v1` | Self-hosted, multi-model | | [Jan](https://jan.ai) | `http://localhost:1337/v1` | Desktop app with local models | -```bash -# Example: Together AI -OPENAI_BASE_URL=https://api.together.xyz/v1 -OPENAI_API_KEY=your-together-key -LLM_MODEL=meta-llama/Llama-3.1-70B-Instruct-Turbo +Configure any of these with `hermes model` → Custom endpoint, or in `config.yaml`: + +```yaml +model: + default: meta-llama/Llama-3.1-70B-Instruct-Turbo + provider: custom + base_url: https://api.together.xyz/v1 + api_key: your-together-key ``` --- -- 2.43.0 From 143b74ec00b41a7b7e949b9cb4f2b303b27e5fa6 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Tue, 31 Mar 2026 11:42:52 -0700 Subject: [PATCH 085/385] fix: first-run guard stuck in loop when provider configured via config.yaml (#4298) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The _has_any_provider_configured() guard only checked env vars, .env file, and auth.json — missing config.yaml model.provider/base_url/api_key entirely. Users who configured a provider through setup (saving to config.yaml) but had empty API key placeholders in .env from the install template were permanently blocked by the 'not configured' message. Changes: - _has_any_provider_configured() now checks config.yaml model section for explicit provider, base_url, or api_key — covers custom endpoints and providers that store credentials in config rather than env vars - .env.example: comment out all empty API key placeholders so they don't pollute the environment when copied to .env by the installer - .env.example: mark LLM_MODEL as deprecated (config.yaml is source of truth) - 4 new tests for the config.yaml detection path Reported by OkadoOP on Discord. --- .env.example | 43 +++++++++--------- hermes_cli/main.py | 11 +++++ tests/test_api_key_providers.py | 77 +++++++++++++++++++++++++++++++++ 3 files changed, 110 insertions(+), 21 deletions(-) diff --git a/.env.example b/.env.example index 3df76497e..13aacade6 100644 --- a/.env.example +++ b/.env.example @@ -7,18 +7,19 @@ # OpenRouter provides access to many models through one API # All LLM calls go through OpenRouter - no direct provider keys needed # Get your key at: https://openrouter.ai/keys -OPENROUTER_API_KEY= +# OPENROUTER_API_KEY= -# Default model to use (OpenRouter format: provider/model) -# Examples: anthropic/claude-opus-4.6, openai/gpt-4o, google/gemini-3-flash-preview, zhipuai/glm-4-plus -LLM_MODEL=anthropic/claude-opus-4.6 +# Default model is configured in ~/.hermes/config.yaml (model.default). +# Use 'hermes model' or 'hermes setup' to change it. +# LLM_MODEL is no longer read from .env — this line is kept for reference only. +# LLM_MODEL=anthropic/claude-opus-4.6 # ============================================================================= # LLM PROVIDER (z.ai / GLM) # ============================================================================= # z.ai provides access to ZhipuAI GLM models (GLM-4-Plus, etc.) # Get your key at: https://z.ai or https://open.bigmodel.cn -GLM_API_KEY= +# GLM_API_KEY= # GLM_BASE_URL=https://api.z.ai/api/paas/v4 # Override default base URL # ============================================================================= @@ -28,7 +29,7 @@ GLM_API_KEY= # Get your key at: https://platform.kimi.ai (Kimi Code console) # Keys prefixed sk-kimi- use the Kimi Code API (api.kimi.com) by default. # Legacy keys from platform.moonshot.ai need KIMI_BASE_URL override below. -KIMI_API_KEY= +# KIMI_API_KEY= # KIMI_BASE_URL=https://api.kimi.com/coding/v1 # Default for sk-kimi- keys # KIMI_BASE_URL=https://api.moonshot.ai/v1 # For legacy Moonshot keys # KIMI_BASE_URL=https://api.moonshot.cn/v1 # For Moonshot China keys @@ -38,11 +39,11 @@ KIMI_API_KEY= # ============================================================================= # MiniMax provides access to MiniMax models (global endpoint) # Get your key at: https://www.minimax.io -MINIMAX_API_KEY= +# MINIMAX_API_KEY= # MINIMAX_BASE_URL=https://api.minimax.io/v1 # Override default base URL # MiniMax China endpoint (for users in mainland China) -MINIMAX_CN_API_KEY= +# MINIMAX_CN_API_KEY= # MINIMAX_CN_BASE_URL=https://api.minimaxi.com/v1 # Override default base URL # ============================================================================= @@ -50,7 +51,7 @@ MINIMAX_CN_API_KEY= # ============================================================================= # OpenCode Zen provides curated, tested models (GPT, Claude, Gemini, MiniMax, GLM, Kimi) # Pay-as-you-go pricing. Get your key at: https://opencode.ai/auth -OPENCODE_ZEN_API_KEY= +# OPENCODE_ZEN_API_KEY= # OPENCODE_ZEN_BASE_URL=https://opencode.ai/zen/v1 # Override default base URL # ============================================================================= @@ -58,7 +59,7 @@ OPENCODE_ZEN_API_KEY= # ============================================================================= # OpenCode Go provides access to open models (GLM-5, Kimi K2.5, MiniMax M2.5) # $10/month subscription. Get your key at: https://opencode.ai/auth -OPENCODE_GO_API_KEY= +# OPENCODE_GO_API_KEY= # ============================================================================= # LLM PROVIDER (Hugging Face Inference Providers) @@ -67,7 +68,7 @@ OPENCODE_GO_API_KEY= # Free tier included ($0.10/month), no markup on provider rates. # Get your token at: https://huggingface.co/settings/tokens # Required permission: "Make calls to Inference Providers" -HF_TOKEN= +# HF_TOKEN= # OPENCODE_GO_BASE_URL=https://opencode.ai/zen/go/v1 # Override default base URL # ============================================================================= @@ -76,26 +77,26 @@ HF_TOKEN= # Exa API Key - AI-native web search and contents # Get at: https://exa.ai -EXA_API_KEY= +# EXA_API_KEY= # Parallel API Key - AI-native web search and extract # Get at: https://parallel.ai -PARALLEL_API_KEY= +# PARALLEL_API_KEY= # Firecrawl API Key - Web search, extract, and crawl # Get at: https://firecrawl.dev/ -FIRECRAWL_API_KEY= +# FIRECRAWL_API_KEY= # FAL.ai API Key - Image generation # Get at: https://fal.ai/ -FAL_KEY= +# FAL_KEY= # Honcho - Cross-session AI-native user modeling (optional) # Builds a persistent understanding of the user across sessions and tools. # Get at: https://app.honcho.dev # Also requires ~/.honcho/config.json with enabled=true (see README). -HONCHO_API_KEY= +# HONCHO_API_KEY= # ============================================================================= # TERMINAL TOOL CONFIGURATION @@ -181,10 +182,10 @@ TERMINAL_LIFETIME_SECONDS=300 # Browserbase API Key - Cloud browser execution # Get at: https://browserbase.com/ -BROWSERBASE_API_KEY= +# BROWSERBASE_API_KEY= # Browserbase Project ID - From your Browserbase dashboard -BROWSERBASE_PROJECT_ID= +# BROWSERBASE_PROJECT_ID= # Enable residential proxies for better CAPTCHA solving (default: true) # Routes traffic through residential IPs, significantly improves success rate @@ -216,7 +217,7 @@ BROWSER_INACTIVITY_TIMEOUT=120 # Uses OpenAI's API directly (not via OpenRouter). # Named VOICE_TOOLS_OPENAI_KEY to avoid interference with OpenRouter. # Get at: https://platform.openai.com/api-keys -VOICE_TOOLS_OPENAI_KEY= +# VOICE_TOOLS_OPENAI_KEY= # ============================================================================= # SLACK INTEGRATION @@ -302,11 +303,11 @@ IMAGE_TOOLS_DEBUG=false # Tinker API Key - RL training service # Get at: https://tinker-console.thinkingmachines.ai/keys -TINKER_API_KEY= +# TINKER_API_KEY= # Weights & Biases API Key - Experiment tracking and metrics # Get at: https://wandb.ai/authorize -WANDB_API_KEY= +# WANDB_API_KEY= # RL API Server URL (default: http://localhost:8080) # Change if running the rl-server on a different host/port diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 9b4b3ccac..315e0f974 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -246,6 +246,17 @@ def _has_any_provider_configured() -> bool: pass + # Check config.yaml — if model is a dict with an explicit provider set, + # the user has gone through setup (fresh installs have model as a plain + # string). Also covers custom endpoints that store api_key/base_url in + # config rather than .env. + if isinstance(model_cfg, dict): + cfg_provider = (model_cfg.get("provider") or "").strip() + cfg_base_url = (model_cfg.get("base_url") or "").strip() + cfg_api_key = (model_cfg.get("api_key") or "").strip() + if cfg_provider or cfg_base_url or cfg_api_key: + return True + # Check for Claude Code OAuth credentials (~/.claude/.credentials.json) # Only count these if Hermes has been explicitly configured — Claude Code # being installed doesn't mean the user wants Hermes to use their tokens. diff --git a/tests/test_api_key_providers.py b/tests/test_api_key_providers.py index e250bbb25..da191496d 100644 --- a/tests/test_api_key_providers.py +++ b/tests/test_api_key_providers.py @@ -645,6 +645,83 @@ class TestHasAnyProviderConfigured: from hermes_cli.main import _has_any_provider_configured assert _has_any_provider_configured() is False + def test_config_provider_counts(self, monkeypatch, tmp_path): + """config.yaml with model.provider set should count as configured.""" + import yaml + from hermes_cli import config as config_module + hermes_home = tmp_path / ".hermes" + hermes_home.mkdir() + config_file = hermes_home / "config.yaml" + config_file.write_text(yaml.dump({ + "model": {"default": "anthropic/claude-opus-4.6", "provider": "openrouter"}, + })) + monkeypatch.setattr(config_module, "get_env_path", lambda: hermes_home / ".env") + monkeypatch.setattr(config_module, "get_hermes_home", lambda: hermes_home) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + # Clear all provider env vars + for var in ("OPENROUTER_API_KEY", "OPENAI_API_KEY", "ANTHROPIC_API_KEY", + "ANTHROPIC_TOKEN", "OPENAI_BASE_URL"): + monkeypatch.delenv(var, raising=False) + from hermes_cli.main import _has_any_provider_configured + assert _has_any_provider_configured() is True + + def test_config_base_url_counts(self, monkeypatch, tmp_path): + """config.yaml with model.base_url set (custom endpoint) should count.""" + import yaml + from hermes_cli import config as config_module + hermes_home = tmp_path / ".hermes" + hermes_home.mkdir() + config_file = hermes_home / "config.yaml" + config_file.write_text(yaml.dump({ + "model": {"default": "my-model", "base_url": "http://localhost:11434/v1"}, + })) + monkeypatch.setattr(config_module, "get_env_path", lambda: hermes_home / ".env") + monkeypatch.setattr(config_module, "get_hermes_home", lambda: hermes_home) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + for var in ("OPENROUTER_API_KEY", "OPENAI_API_KEY", "ANTHROPIC_API_KEY", + "ANTHROPIC_TOKEN", "OPENAI_BASE_URL"): + monkeypatch.delenv(var, raising=False) + from hermes_cli.main import _has_any_provider_configured + assert _has_any_provider_configured() is True + + def test_config_api_key_counts(self, monkeypatch, tmp_path): + """config.yaml with model.api_key set should count.""" + import yaml + from hermes_cli import config as config_module + hermes_home = tmp_path / ".hermes" + hermes_home.mkdir() + config_file = hermes_home / "config.yaml" + config_file.write_text(yaml.dump({ + "model": {"default": "my-model", "api_key": "sk-test-key"}, + })) + monkeypatch.setattr(config_module, "get_env_path", lambda: hermes_home / ".env") + monkeypatch.setattr(config_module, "get_hermes_home", lambda: hermes_home) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + for var in ("OPENROUTER_API_KEY", "OPENAI_API_KEY", "ANTHROPIC_API_KEY", + "ANTHROPIC_TOKEN", "OPENAI_BASE_URL"): + monkeypatch.delenv(var, raising=False) + from hermes_cli.main import _has_any_provider_configured + assert _has_any_provider_configured() is True + + def test_config_dict_no_provider_no_creds_still_false(self, monkeypatch, tmp_path): + """config.yaml model dict with only 'default' key and no creds stays false.""" + import yaml + from hermes_cli import config as config_module + hermes_home = tmp_path / ".hermes" + hermes_home.mkdir() + config_file = hermes_home / "config.yaml" + config_file.write_text(yaml.dump({ + "model": {"default": "anthropic/claude-opus-4.6"}, + })) + monkeypatch.setattr(config_module, "get_env_path", lambda: hermes_home / ".env") + monkeypatch.setattr(config_module, "get_hermes_home", lambda: hermes_home) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + for var in ("OPENROUTER_API_KEY", "OPENAI_API_KEY", "ANTHROPIC_API_KEY", + "ANTHROPIC_TOKEN", "OPENAI_BASE_URL"): + monkeypatch.delenv(var, raising=False) + from hermes_cli.main import _has_any_provider_configured + assert _has_any_provider_configured() is False + def test_claude_code_creds_counted_when_hermes_configured(self, monkeypatch, tmp_path): """Claude Code credentials should count when Hermes has been explicitly configured.""" import yaml -- 2.43.0 From 161acb0086274e30c806e6abfbcbe0d3a8740873 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Tue, 31 Mar 2026 12:02:29 -0700 Subject: [PATCH 086/385] fix: credential pool 401 recovery rotates to next credential after failed refresh (#4300) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When an OAuth token refresh fails on a 401 error, the pool recovery would return 'not recovered' without trying the next credential in the pool. This meant users who added a second valid credential via 'hermes auth add' would never see it used when the primary credential was dead. Now: try refresh first (handles expired tokens quickly), and if that fails, rotate to the next available credential — same as 429/402 already did. Adds three tests covering 401 refresh success, refresh-fail-then-rotate, and refresh-fail-with-no-remaining-credentials. --- run_agent.py | 7 +++++ tests/test_run_agent.py | 65 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+) diff --git a/run_agent.py b/run_agent.py index 717c26b4a..3cfcc12af 100644 --- a/run_agent.py +++ b/run_agent.py @@ -3862,6 +3862,13 @@ class AIAgent: logger.info(f"Credential 401 — refreshed pool entry {getattr(refreshed, 'id', '?')}") self._swap_credential(refreshed) return True, has_retried_429 + # Refresh failed — rotate to next credential instead of giving up. + # The failed entry is already marked exhausted by try_refresh_current(). + next_entry = pool.mark_exhausted_and_rotate(status_code=401) + if next_entry is not None: + logger.info(f"Credential 401 (refresh failed) — rotated to pool entry {getattr(next_entry, 'id', '?')}") + self._swap_credential(next_entry) + return True, False return False, has_retried_429 diff --git a/tests/test_run_agent.py b/tests/test_run_agent.py index aa74164a7..99905bb56 100644 --- a/tests/test_run_agent.py +++ b/tests/test_run_agent.py @@ -1848,6 +1848,71 @@ class TestCredentialPoolRecovery: agent._swap_credential.assert_called_once_with(next_entry) + def test_recover_with_pool_refreshes_on_401(self, agent): + """401 with successful refresh should swap to refreshed credential.""" + refreshed_entry = SimpleNamespace(label="refreshed-primary", id="abc") + + class _Pool: + def try_refresh_current(self): + return refreshed_entry + + agent._credential_pool = _Pool() + agent._swap_credential = MagicMock() + + recovered, retry_same = agent._recover_with_credential_pool( + status_code=401, + has_retried_429=False, + ) + + assert recovered is True + agent._swap_credential.assert_called_once_with(refreshed_entry) + + def test_recover_with_pool_rotates_on_401_when_refresh_fails(self, agent): + """401 with failed refresh should rotate to next credential.""" + next_entry = SimpleNamespace(label="secondary", id="def") + + class _Pool: + def try_refresh_current(self): + return None # refresh failed + + def mark_exhausted_and_rotate(self, *, status_code): + assert status_code == 401 + return next_entry + + agent._credential_pool = _Pool() + agent._swap_credential = MagicMock() + + recovered, retry_same = agent._recover_with_credential_pool( + status_code=401, + has_retried_429=False, + ) + + assert recovered is True + assert retry_same is False + agent._swap_credential.assert_called_once_with(next_entry) + + def test_recover_with_pool_401_refresh_fails_no_more_credentials(self, agent): + """401 with failed refresh and no other credentials returns not recovered.""" + + class _Pool: + def try_refresh_current(self): + return None + + def mark_exhausted_and_rotate(self, *, status_code): + return None # no more credentials + + agent._credential_pool = _Pool() + agent._swap_credential = MagicMock() + + recovered, retry_same = agent._recover_with_credential_pool( + status_code=401, + has_retried_429=False, + ) + + assert recovered is False + agent._swap_credential.assert_not_called() + + class TestMaxTokensParam: """Verify _max_tokens_param returns the correct key for each provider.""" -- 2.43.0 From e75964d46dad9e95bd4333027a96e8a7bb61f8fb Mon Sep 17 00:00:00 2001 From: curtitoo Date: Tue, 31 Mar 2026 09:25:08 -0700 Subject: [PATCH 087/385] fix: harden codex responses transport handling --- run_agent.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/run_agent.py b/run_agent.py index 3cfcc12af..670f21007 100644 --- a/run_agent.py +++ b/run_agent.py @@ -3238,9 +3238,10 @@ class AIAgent: "model": model, "instructions": instructions, "input": normalized_input, - "tools": normalized_tools, "store": False, } + if normalized_tools is not None: + normalized["tools"] = normalized_tools # Pass through reasoning config reasoning = api_kwargs.get("reasoning") @@ -3583,6 +3584,8 @@ class AIAgent: def _run_codex_stream(self, api_kwargs: dict, client: Any = None, on_first_delta: callable = None): """Execute one streaming Responses API request and return the final response.""" + import httpx as _httpx + active_client = client or self._ensure_primary_openai_client(reason="codex_stream_direct") max_stream_retries = 1 has_tool_calls = False @@ -3616,6 +3619,22 @@ class AIAgent: if reasoning_text: self._fire_reasoning_delta(reasoning_text) return stream.get_final_response() + except (_httpx.RemoteProtocolError, _httpx.ReadTimeout, _httpx.ConnectError, ConnectionError) as exc: + if attempt < max_stream_retries: + logger.debug( + "Codex Responses stream transport failed (attempt %s/%s); retrying. %s error=%s", + attempt + 1, + max_stream_retries + 1, + self._client_log_context(), + exc, + ) + continue + logger.debug( + "Codex Responses stream transport failed; falling back to create(stream=True). %s error=%s", + self._client_log_context(), + exc, + ) + return self._run_codex_create_stream_fallback(api_kwargs, client=active_client) except RuntimeError as exc: err_text = str(exc) missing_completed = "response.completed" in err_text -- 2.43.0 From cac9d20c4f7c9fc1d5176f347595ba124a6c7e1b Mon Sep 17 00:00:00 2001 From: curtitoo Date: Tue, 31 Mar 2026 09:25:31 -0700 Subject: [PATCH 088/385] test: add codex transport drop regression --- tests/test_streaming.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/tests/test_streaming.py b/tests/test_streaming.py index 107a8a4d4..37a61ac37 100644 --- a/tests/test_streaming.py +++ b/tests/test_streaming.py @@ -782,3 +782,35 @@ class TestCodexStreamCallbacks: response = agent._run_codex_stream({}, client=mock_client) assert "Hello from Codex!" in deltas + + def test_codex_remote_protocol_error_falls_back_to_create_stream(self): + from run_agent import AIAgent + import httpx + + fallback_response = SimpleNamespace( + output=[SimpleNamespace( + type="message", + content=[SimpleNamespace(type="output_text", text="fallback from create stream")], + )], + status="completed", + ) + + mock_client = MagicMock() + mock_client.responses.stream.side_effect = httpx.RemoteProtocolError( + "peer closed connection without sending complete message body" + ) + + agent = AIAgent( + model="test/model", + quiet_mode=True, + skip_context_files=True, + skip_memory=True, + ) + agent.api_mode = "codex_responses" + agent._interrupt_requested = False + + with patch.object(agent, "_run_codex_create_stream_fallback", return_value=fallback_response) as mock_fallback: + response = agent._run_codex_stream({}, client=mock_client) + + assert response is fallback_response + mock_fallback.assert_called_once_with({}, client=mock_client) -- 2.43.0 From 7f670a06cff300ab0cec44c2dade9fe29fcd7a49 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Tue, 31 Mar 2026 12:10:12 -0700 Subject: [PATCH 089/385] feat: add --max-turns CLI flag to hermes chat Exposes the existing max_turns parameter (cli.py main()) as a CLI flag so programmatic callers (Paperclip adapter, scripts) can control the agent's tool-calling iteration limit without editing config.yaml. Priority chain unchanged: CLI flag > config agent.max_turns > env HERMES_MAX_ITERATIONS > default 90. --- hermes_cli/main.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 315e0f974..a420aafcc 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -643,6 +643,7 @@ def cmd_chat(args): "worktree": getattr(args, "worktree", False), "checkpoints": getattr(args, "checkpoints", False), "pass_session_id": getattr(args, "pass_session_id", False), + "max_turns": getattr(args, "max_turns", None), } # Filter out None values kwargs = {k: v for k, v in kwargs.items() if v is not None} @@ -3808,6 +3809,13 @@ For more help on a command: default=False, help="Enable filesystem checkpoints before destructive file operations (use /rollback to restore)" ) + chat_parser.add_argument( + "--max-turns", + type=int, + default=None, + metavar="N", + help="Maximum tool-calling iterations per conversation turn (default: 90, or agent.max_turns in config)" + ) chat_parser.add_argument( "--yolo", action="store_true", -- 2.43.0 From 08171c1c316722b5a38ea3aef38351441613bd26 Mon Sep 17 00:00:00 2001 From: 0xbyt4 <35742124+0xbyt4@users.noreply.github.com> Date: Mon, 30 Mar 2026 11:30:26 +0300 Subject: [PATCH 090/385] fix: allow voice mode in WSL when PulseAudio bridge is configured WSL detection was treated as a hard fail, blocking voice mode even when audio worked via PulseAudio bridge. Now PULSE_SERVER env var presence makes WSL a soft notice instead of a blocking warning. Device query failures in WSL with PULSE_SERVER are also treated as non-blocking. --- tests/tools/test_voice_mode.py | 128 +++++++++++++++++++++++++++++++++ tools/voice_mode.py | 30 ++++++-- 2 files changed, 153 insertions(+), 5 deletions(-) diff --git a/tests/tools/test_voice_mode.py b/tests/tools/test_voice_mode.py index 013ed6635..933393f85 100644 --- a/tests/tools/test_voice_mode.py +++ b/tests/tools/test_voice_mode.py @@ -56,6 +56,134 @@ def mock_sd(monkeypatch): return mock +# ============================================================================ +# detect_audio_environment — WSL / SSH / Docker detection +# ============================================================================ + +class TestDetectAudioEnvironment: + def test_clean_environment_is_available(self, monkeypatch): + """No SSH, Docker, or WSL — should be available.""" + monkeypatch.delenv("SSH_CLIENT", raising=False) + monkeypatch.delenv("SSH_TTY", raising=False) + monkeypatch.delenv("SSH_CONNECTION", raising=False) + monkeypatch.setattr("tools.voice_mode._import_audio", + lambda: (MagicMock(), MagicMock())) + + from tools.voice_mode import detect_audio_environment + result = detect_audio_environment() + assert result["available"] is True + assert result["warnings"] == [] + + def test_ssh_blocks_voice(self, monkeypatch): + """SSH environment should block voice mode.""" + monkeypatch.setenv("SSH_CLIENT", "1.2.3.4 54321 22") + monkeypatch.setattr("tools.voice_mode._import_audio", + lambda: (MagicMock(), MagicMock())) + + from tools.voice_mode import detect_audio_environment + result = detect_audio_environment() + assert result["available"] is False + assert any("SSH" in w for w in result["warnings"]) + + def test_wsl_without_pulse_blocks_voice(self, monkeypatch, tmp_path): + """WSL without PULSE_SERVER should block voice mode.""" + monkeypatch.delenv("SSH_CLIENT", raising=False) + monkeypatch.delenv("SSH_TTY", raising=False) + monkeypatch.delenv("SSH_CONNECTION", raising=False) + monkeypatch.delenv("PULSE_SERVER", raising=False) + monkeypatch.setattr("tools.voice_mode._import_audio", + lambda: (MagicMock(), MagicMock())) + + proc_version = tmp_path / "proc_version" + proc_version.write_text("Linux 5.15.0-microsoft-standard-WSL2") + + _real_open = open + def _fake_open(f, *a, **kw): + if f == "/proc/version": + return _real_open(str(proc_version), *a, **kw) + return _real_open(f, *a, **kw) + + with patch("builtins.open", side_effect=_fake_open): + from tools.voice_mode import detect_audio_environment + result = detect_audio_environment() + + assert result["available"] is False + assert any("WSL" in w for w in result["warnings"]) + assert any("PulseAudio" in w for w in result["warnings"]) + + def test_wsl_with_pulse_allows_voice(self, monkeypatch, tmp_path): + """WSL with PULSE_SERVER set should NOT block voice mode.""" + monkeypatch.delenv("SSH_CLIENT", raising=False) + monkeypatch.delenv("SSH_TTY", raising=False) + monkeypatch.delenv("SSH_CONNECTION", raising=False) + monkeypatch.setenv("PULSE_SERVER", "unix:/mnt/wslg/PulseServer") + monkeypatch.setattr("tools.voice_mode._import_audio", + lambda: (MagicMock(), MagicMock())) + + proc_version = tmp_path / "proc_version" + proc_version.write_text("Linux 5.15.0-microsoft-standard-WSL2") + + _real_open = open + def _fake_open(f, *a, **kw): + if f == "/proc/version": + return _real_open(str(proc_version), *a, **kw) + return _real_open(f, *a, **kw) + + with patch("builtins.open", side_effect=_fake_open): + from tools.voice_mode import detect_audio_environment + result = detect_audio_environment() + + assert result["available"] is True + assert result["warnings"] == [] + assert any("WSL" in n for n in result.get("notices", [])) + + def test_wsl_device_query_fails_with_pulse_continues(self, monkeypatch, tmp_path): + """WSL device query failure should not block if PULSE_SERVER is set.""" + monkeypatch.delenv("SSH_CLIENT", raising=False) + monkeypatch.delenv("SSH_TTY", raising=False) + monkeypatch.delenv("SSH_CONNECTION", raising=False) + monkeypatch.setenv("PULSE_SERVER", "unix:/mnt/wslg/PulseServer") + + mock_sd = MagicMock() + mock_sd.query_devices.side_effect = Exception("device query failed") + monkeypatch.setattr("tools.voice_mode._import_audio", + lambda: (mock_sd, MagicMock())) + + proc_version = tmp_path / "proc_version" + proc_version.write_text("Linux 5.15.0-microsoft-standard-WSL2") + + _real_open = open + def _fake_open(f, *a, **kw): + if f == "/proc/version": + return _real_open(str(proc_version), *a, **kw) + return _real_open(f, *a, **kw) + + with patch("builtins.open", side_effect=_fake_open): + from tools.voice_mode import detect_audio_environment + result = detect_audio_environment() + + assert result["available"] is True + assert any("device query failed" in n for n in result.get("notices", [])) + + def test_device_query_fails_without_pulse_blocks(self, monkeypatch): + """Device query failure without PULSE_SERVER should block.""" + monkeypatch.delenv("SSH_CLIENT", raising=False) + monkeypatch.delenv("SSH_TTY", raising=False) + monkeypatch.delenv("SSH_CONNECTION", raising=False) + monkeypatch.delenv("PULSE_SERVER", raising=False) + + mock_sd = MagicMock() + mock_sd.query_devices.side_effect = Exception("device query failed") + monkeypatch.setattr("tools.voice_mode._import_audio", + lambda: (mock_sd, MagicMock())) + + from tools.voice_mode import detect_audio_environment + result = detect_audio_environment() + + assert result["available"] is False + assert any("PortAudio" in w for w in result["warnings"]) + + # ============================================================================ # check_voice_requirements # ============================================================================ diff --git a/tools/voice_mode.py b/tools/voice_mode.py index 6df6a54bc..53d9ecb00 100644 --- a/tools/voice_mode.py +++ b/tools/voice_mode.py @@ -51,9 +51,12 @@ def _audio_available() -> bool: def detect_audio_environment() -> dict: """Detect if the current environment supports audio I/O. - Returns dict with 'available' (bool) and 'warnings' (list of strings). + Returns dict with 'available' (bool), 'warnings' (list of hard-fail + reasons that block voice mode), and 'notices' (list of informational + messages that do NOT block voice mode). """ - warnings = [] + warnings = [] # hard-fail: these block voice mode + notices = [] # informational: logged but don't block # SSH detection if any(os.environ.get(v) for v in ('SSH_CLIENT', 'SSH_TTY', 'SSH_CONNECTION')): @@ -63,11 +66,20 @@ def detect_audio_environment() -> dict: if os.path.exists('/.dockerenv'): warnings.append("Running inside Docker container -- no audio devices") - # WSL detection + # WSL detection — PulseAudio bridge makes audio work in WSL. + # Only block if PULSE_SERVER is not configured. try: with open('/proc/version', 'r') as f: if 'microsoft' in f.read().lower(): - warnings.append("Running in WSL -- audio requires PulseAudio bridge to Windows") + if os.environ.get('PULSE_SERVER'): + notices.append("Running in WSL with PulseAudio bridge") + else: + warnings.append( + "Running in WSL -- audio requires PulseAudio bridge.\n" + " 1. Set PULSE_SERVER=unix:/mnt/wslg/PulseServer\n" + " 2. Create ~/.asoundrc pointing ALSA at PulseAudio\n" + " 3. Verify with: arecord -d 3 /tmp/test.wav && aplay /tmp/test.wav" + ) except (FileNotFoundError, PermissionError, OSError): pass @@ -79,7 +91,12 @@ def detect_audio_environment() -> dict: if not devices: warnings.append("No audio input/output devices detected") except Exception: - warnings.append("Audio subsystem error (PortAudio cannot query devices)") + # In WSL with PulseAudio, device queries can fail even though + # recording/playback works fine. Don't block if PULSE_SERVER is set. + if os.environ.get('PULSE_SERVER'): + notices.append("Audio device query failed but PULSE_SERVER is set -- continuing") + else: + warnings.append("Audio subsystem error (PortAudio cannot query devices)") except ImportError: warnings.append("Audio libraries not installed (pip install sounddevice numpy)") except OSError: @@ -93,6 +110,7 @@ def detect_audio_environment() -> dict: return { "available": len(warnings) == 0, "warnings": warnings, + "notices": notices, } # --------------------------------------------------------------------------- @@ -748,6 +766,8 @@ def check_voice_requirements() -> Dict[str, Any]: for warning in env_check["warnings"]: details_parts.append(f"Environment: {warning}") + for notice in env_check.get("notices", []): + details_parts.append(f"Environment: {notice}") return { "available": available, -- 2.43.0 From 0f2ea2062bc0041b6c954e1ec8b4be0fbd45734e Mon Sep 17 00:00:00 2001 From: Gutslabs Date: Tue, 31 Mar 2026 12:13:07 -0700 Subject: [PATCH 091/385] fix(profiles): validate tar archive member paths on import Fixes a zip-slip path traversal vulnerability in hermes profile import. shutil.unpack_archive() on untrusted tar members allows entries like ../../escape.txt to write files outside ~/.hermes/profiles/. - Add _normalize_profile_archive_parts() to reject absolute paths (POSIX and Windows), traversal (..), empty paths, backslash tricks - Add _safe_extract_profile_archive() for manual per-member extraction that only allows regular files and directories (rejects symlinks) - Replace shutil.unpack_archive() with the safe extraction path - Add regression tests for traversal and absolute-path attacks Co-authored-by: Gutslabs --- hermes_cli/profiles.py | 69 +++++++++++++++++++++++++++++-- tests/hermes_cli/test_profiles.py | 35 ++++++++++++++++ 2 files changed, 100 insertions(+), 4 deletions(-) diff --git a/hermes_cli/profiles.py b/hermes_cli/profiles.py index 30da7eb1a..5809186f5 100644 --- a/hermes_cli/profiles.py +++ b/hermes_cli/profiles.py @@ -27,7 +27,7 @@ import stat import subprocess import sys from dataclasses import dataclass, field -from pathlib import Path +from pathlib import Path, PurePosixPath, PureWindowsPath from typing import List, Optional _PROFILE_ID_RE = re.compile(r"^[a-z0-9][a-z0-9_-]{0,63}$") @@ -702,6 +702,58 @@ def export_profile(name: str, output_path: str) -> Path: return Path(result) +def _normalize_profile_archive_parts(member_name: str) -> List[str]: + """Return safe path parts for a profile archive member.""" + normalized_name = member_name.replace("\\", "/") + posix_path = PurePosixPath(normalized_name) + windows_path = PureWindowsPath(member_name) + + if ( + not normalized_name + or posix_path.is_absolute() + or windows_path.is_absolute() + or windows_path.drive + ): + raise ValueError(f"Unsafe archive member path: {member_name}") + + parts = [part for part in posix_path.parts if part not in ("", ".")] + if not parts or any(part == ".." for part in parts): + raise ValueError(f"Unsafe archive member path: {member_name}") + return parts + + +def _safe_extract_profile_archive(archive: Path, destination: Path) -> None: + """Extract a profile archive without allowing path escapes or links.""" + import tarfile + + with tarfile.open(archive, "r:gz") as tf: + for member in tf.getmembers(): + parts = _normalize_profile_archive_parts(member.name) + target = destination.joinpath(*parts) + + if member.isdir(): + target.mkdir(parents=True, exist_ok=True) + continue + + if not member.isfile(): + raise ValueError( + f"Unsupported archive member type: {member.name}" + ) + + target.parent.mkdir(parents=True, exist_ok=True) + extracted = tf.extractfile(member) + if extracted is None: + raise ValueError(f"Cannot read archive member: {member.name}") + + with extracted, open(target, "wb") as dst: + shutil.copyfileobj(extracted, dst) + + try: + os.chmod(target, member.mode & 0o777) + except OSError: + pass + + def import_profile(archive_path: str, name: Optional[str] = None) -> Path: """Import a profile from a tar.gz archive. @@ -716,9 +768,18 @@ def import_profile(archive_path: str, name: Optional[str] = None) -> Path: # Peek at the archive to find the top-level directory name with tarfile.open(archive, "r:gz") as tf: - top_dirs = {m.name.split("/")[0] for m in tf.getmembers() if "/" in m.name} + top_dirs = { + parts[0] + for member in tf.getmembers() + for parts in [_normalize_profile_archive_parts(member.name)] + if len(parts) > 1 or member.isdir() + } if not top_dirs: - top_dirs = {m.name for m in tf.getmembers() if m.isdir()} + top_dirs = { + _normalize_profile_archive_parts(member.name)[0] + for member in tf.getmembers() + if member.isdir() + } inferred_name = name or (top_dirs.pop() if len(top_dirs) == 1 else None) if not inferred_name: @@ -735,7 +796,7 @@ def import_profile(archive_path: str, name: Optional[str] = None) -> Path: profiles_root = _get_profiles_root() profiles_root.mkdir(parents=True, exist_ok=True) - shutil.unpack_archive(str(archive), str(profiles_root)) + _safe_extract_profile_archive(archive, profiles_root) # If the archive extracted under a different name, rename extracted = profiles_root / (top_dirs.pop() if top_dirs else inferred_name) diff --git a/tests/hermes_cli/test_profiles.py b/tests/hermes_cli/test_profiles.py index 80152a4a0..4e59d250e 100644 --- a/tests/hermes_cli/test_profiles.py +++ b/tests/hermes_cli/test_profiles.py @@ -6,6 +6,7 @@ and shell completion generation. """ import json +import io import os import tarfile from pathlib import Path @@ -449,6 +450,40 @@ class TestExportImport: with pytest.raises(FileExistsError): import_profile(str(archive_path), name="coder") + def test_import_rejects_traversal_archive_member(self, profile_env, tmp_path): + archive_path = tmp_path / "export" / "evil.tar.gz" + archive_path.parent.mkdir(parents=True, exist_ok=True) + escape_path = tmp_path / "escape.txt" + + with tarfile.open(archive_path, "w:gz") as tf: + info = tarfile.TarInfo("../../escape.txt") + data = b"pwned" + info.size = len(data) + tf.addfile(info, io.BytesIO(data)) + + with pytest.raises(ValueError, match="Unsafe archive member path"): + import_profile(str(archive_path), name="coder") + + assert not escape_path.exists() + assert not get_profile_dir("coder").exists() + + def test_import_rejects_absolute_archive_member(self, profile_env, tmp_path): + archive_path = tmp_path / "export" / "evil-abs.tar.gz" + archive_path.parent.mkdir(parents=True, exist_ok=True) + absolute_target = tmp_path / "abs-escape.txt" + + with tarfile.open(archive_path, "w:gz") as tf: + info = tarfile.TarInfo(str(absolute_target)) + data = b"pwned" + info.size = len(data) + tf.addfile(info, io.BytesIO(data)) + + with pytest.raises(ValueError, match="Unsafe archive member path"): + import_profile(str(archive_path), name="coder") + + assert not absolute_target.exists() + assert not get_profile_dir("coder").exists() + def test_export_nonexistent_raises(self, profile_env, tmp_path): with pytest.raises(FileNotFoundError): export_profile("nonexistent", str(tmp_path / "out.tar.gz")) -- 2.43.0 From a97641b9f2b90399c81a1242fc7845808611d021 Mon Sep 17 00:00:00 2001 From: maymuneth Date: Mon, 30 Mar 2026 15:06:35 +0300 Subject: [PATCH 092/385] fix(security): reject path traversal in credential file registration --- tests/tools/test_credential_files.py | 107 +++++++++++++++++++++++++++ tools/credential_files.py | 39 +++++++++- 2 files changed, 142 insertions(+), 4 deletions(-) diff --git a/tests/tools/test_credential_files.py b/tests/tools/test_credential_files.py index c46f73fae..b6e43d4a8 100644 --- a/tests/tools/test_credential_files.py +++ b/tests/tools/test_credential_files.py @@ -197,3 +197,110 @@ class TestIterSkillsFiles: with patch.dict(os.environ, {"HERMES_HOME": str(hermes_home)}): assert iter_skills_files() == [] + +class TestPathTraversalSecurity: + """Path traversal and absolute path rejection. + + A malicious skill could declare:: + + required_credential_files: + - path: '../../.ssh/id_rsa' + + Without containment checks, this would mount the host's SSH private key + into the container sandbox, leaking it to the skill's execution environment. + """ + + def test_dotdot_traversal_rejected(self, tmp_path, monkeypatch): + """'../sensitive' must not escape HERMES_HOME.""" + monkeypatch.setenv("HERMES_HOME", str(tmp_path / ".hermes")) + (tmp_path / ".hermes").mkdir() + + # Create a sensitive file one level above hermes_home + sensitive = tmp_path / "sensitive.json" + sensitive.write_text('{"secret": "value"}') + + result = register_credential_file("../sensitive.json") + + assert result is False + assert get_credential_file_mounts() == [] + + def test_deep_traversal_rejected(self, tmp_path, monkeypatch): + """'../../etc/passwd' style traversal must be rejected.""" + hermes_home = tmp_path / ".hermes" + hermes_home.mkdir() + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + # Create a fake sensitive file outside hermes_home + ssh_dir = tmp_path / ".ssh" + ssh_dir.mkdir() + (ssh_dir / "id_rsa").write_text("PRIVATE KEY") + + result = register_credential_file("../../.ssh/id_rsa") + + assert result is False + assert get_credential_file_mounts() == [] + + def test_absolute_path_rejected(self, tmp_path, monkeypatch): + """Absolute paths must be rejected regardless of whether they exist.""" + hermes_home = tmp_path / ".hermes" + hermes_home.mkdir() + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + # Create a file at an absolute path + sensitive = tmp_path / "absolute.json" + sensitive.write_text("{}") + + result = register_credential_file(str(sensitive)) + + assert result is False + assert get_credential_file_mounts() == [] + + def test_legitimate_file_still_works(self, tmp_path, monkeypatch): + """Normal files inside HERMES_HOME must still be registered.""" + hermes_home = tmp_path / ".hermes" + hermes_home.mkdir() + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + (hermes_home / "token.json").write_text('{"token": "abc"}') + + result = register_credential_file("token.json") + + assert result is True + mounts = get_credential_file_mounts() + assert len(mounts) == 1 + assert "token.json" in mounts[0]["container_path"] + + def test_nested_subdir_inside_hermes_home_allowed(self, tmp_path, monkeypatch): + """Files in subdirectories of HERMES_HOME must be allowed.""" + hermes_home = tmp_path / ".hermes" + hermes_home.mkdir() + subdir = hermes_home / "creds" + subdir.mkdir() + (subdir / "oauth.json").write_text("{}") + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + result = register_credential_file("creds/oauth.json") + + assert result is True + + def test_symlink_traversal_rejected(self, tmp_path, monkeypatch): + """A symlink inside HERMES_HOME pointing outside must be rejected.""" + hermes_home = tmp_path / ".hermes" + hermes_home.mkdir() + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + # Create a sensitive file outside hermes_home + sensitive = tmp_path / "sensitive.json" + sensitive.write_text('{"secret": "value"}') + + # Create a symlink inside hermes_home pointing outside + symlink = hermes_home / "evil_link.json" + try: + symlink.symlink_to(sensitive) + except (OSError, NotImplementedError): + pytest.skip("Symlinks not supported on this platform") + + result = register_credential_file("evil_link.json") + + # The resolved path escapes HERMES_HOME — must be rejected + assert result is False + assert get_credential_file_mounts() == [] diff --git a/tools/credential_files.py b/tools/credential_files.py index 53ddd79d5..95f068a81 100644 --- a/tools/credential_files.py +++ b/tools/credential_files.py @@ -55,16 +55,47 @@ def register_credential_file( *relative_path* is relative to ``HERMES_HOME`` (e.g. ``google_token.json``). Returns True if the file exists on the host and was registered. + + Security: rejects absolute paths and path traversal sequences (``..``). + The resolved host path must remain inside HERMES_HOME so that a malicious + skill cannot declare ``required_credential_files: ['../../.ssh/id_rsa']`` + and exfiltrate sensitive host files into a container sandbox. """ hermes_home = _resolve_hermes_home() + + # Reject absolute paths — they bypass the HERMES_HOME sandbox entirely. + if os.path.isabs(relative_path): + logger.warning( + "credential_files: rejected absolute path %r (must be relative to HERMES_HOME)", + relative_path, + ) + return False + host_path = hermes_home / relative_path - if not host_path.is_file(): - logger.debug("credential_files: skipping %s (not found)", host_path) + + # Resolve symlinks and normalise ``..`` before the containment check so + # that traversal like ``../. ssh/id_rsa`` cannot escape HERMES_HOME. + try: + resolved = host_path.resolve() + hermes_home_resolved = hermes_home.resolve() + resolved.relative_to(hermes_home_resolved) # raises ValueError if outside + except ValueError: + logger.warning( + "credential_files: rejected path traversal %r " + "(resolves to %s, outside HERMES_HOME %s)", + relative_path, + resolved, + hermes_home_resolved, + ) + return False + + if not resolved.is_file(): + logger.debug("credential_files: skipping %s (not found)", resolved) return False container_path = f"{container_base.rstrip('/')}/{relative_path}" - _registered_files[container_path] = str(host_path) - logger.debug("credential_files: registered %s -> %s", host_path, container_path) + _registered_files[container_path] = str(resolved) + logger.debug("credential_files: registered %s -> %s", resolved, container_path) return True -- 2.43.0 From 7f78deebe76447ea218a2363063bddc77edbf274 Mon Sep 17 00:00:00 2001 From: Teknium Date: Tue, 31 Mar 2026 12:06:16 -0700 Subject: [PATCH 093/385] fix: apply same path traversal checks to config-based credential files _load_config_files() had the same hermes_home / item pattern without containment checks. While config.yaml is user-controlled (lower threat than skill frontmatter), defense in depth prevents exploitation via config injection or copy-paste mistakes. --- tests/tools/test_credential_files.py | 54 ++++++++++++++++++++++++++++ tools/credential_files.py | 20 +++++++++-- 2 files changed, 72 insertions(+), 2 deletions(-) diff --git a/tests/tools/test_credential_files.py b/tests/tools/test_credential_files.py index b6e43d4a8..7449c1db4 100644 --- a/tests/tools/test_credential_files.py +++ b/tests/tools/test_credential_files.py @@ -304,3 +304,57 @@ class TestPathTraversalSecurity: # The resolved path escapes HERMES_HOME — must be rejected assert result is False assert get_credential_file_mounts() == [] + + +# --------------------------------------------------------------------------- +# Config-based credential files — same containment checks +# --------------------------------------------------------------------------- + +class TestConfigPathTraversal: + """terminal.credential_files in config.yaml must also reject traversal.""" + + def _write_config(self, hermes_home: Path, cred_files: list): + import yaml + config_path = hermes_home / "config.yaml" + config_path.write_text(yaml.dump({"terminal": {"credential_files": cred_files}})) + + def test_config_traversal_rejected(self, tmp_path, monkeypatch): + """'../secret' in config.yaml must not escape HERMES_HOME.""" + hermes_home = tmp_path / ".hermes" + hermes_home.mkdir() + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + sensitive = tmp_path / "secret.json" + sensitive.write_text("{}") + self._write_config(hermes_home, ["../secret.json"]) + + mounts = get_credential_file_mounts() + host_paths = [m["host_path"] for m in mounts] + assert str(sensitive) not in host_paths + assert str(sensitive.resolve()) not in host_paths + + def test_config_absolute_path_rejected(self, tmp_path, monkeypatch): + """Absolute paths in config.yaml must be rejected.""" + hermes_home = tmp_path / ".hermes" + hermes_home.mkdir() + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + sensitive = tmp_path / "abs.json" + sensitive.write_text("{}") + self._write_config(hermes_home, [str(sensitive)]) + + mounts = get_credential_file_mounts() + assert mounts == [] + + def test_config_legitimate_file_works(self, tmp_path, monkeypatch): + """Normal files inside HERMES_HOME via config must still mount.""" + hermes_home = tmp_path / ".hermes" + hermes_home.mkdir() + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + (hermes_home / "oauth.json").write_text("{}") + self._write_config(hermes_home, ["oauth.json"]) + + mounts = get_credential_file_mounts() + assert len(mounts) == 1 + assert "oauth.json" in mounts[0]["container_path"] diff --git a/tools/credential_files.py b/tools/credential_files.py index 95f068a81..af4d13a4e 100644 --- a/tools/credential_files.py +++ b/tools/credential_files.py @@ -141,11 +141,27 @@ def _load_config_files() -> List[Dict[str, str]]: cfg = yaml.safe_load(f) or {} cred_files = cfg.get("terminal", {}).get("credential_files") if isinstance(cred_files, list): + hermes_home_resolved = hermes_home.resolve() for item in cred_files: if isinstance(item, str) and item.strip(): - host_path = hermes_home / item.strip() + rel = item.strip() + if os.path.isabs(rel): + logger.warning( + "credential_files: rejected absolute config path %r", rel, + ) + continue + host_path = (hermes_home / rel).resolve() + try: + host_path.relative_to(hermes_home_resolved) + except ValueError: + logger.warning( + "credential_files: rejected config path traversal %r " + "(resolves to %s, outside HERMES_HOME %s)", + rel, host_path, hermes_home_resolved, + ) + continue if host_path.is_file(): - container_path = f"/root/.hermes/{item.strip()}" + container_path = f"/root/.hermes/{rel}" result.append({ "host_path": str(host_path), "container_path": container_path, -- 2.43.0 From c94a5fa1b2cbf6074e6feb56622020647987abe5 Mon Sep 17 00:00:00 2001 From: binhnt92 Date: Tue, 31 Mar 2026 12:19:10 -0700 Subject: [PATCH 094/385] fix(cli): use atomic write in save_config_value to prevent config loss on interrupt save_config_value() used bare open(path, 'w') + yaml.dump() which truncates the file to zero bytes on open. If the process is interrupted mid-write, config.yaml is left empty. Replace with atomic_yaml_write() (temp file + fsync + os.replace), matching the gateway config write path. Co-authored-by: Hermes Agent --- cli.py | 7 +-- tests/test_cli_save_config_value.py | 80 +++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+), 3 deletions(-) create mode 100644 tests/test_cli_save_config_value.py diff --git a/cli.py b/cli.py index e5f88e752..1f72207aa 100644 --- a/cli.py +++ b/cli.py @@ -991,9 +991,10 @@ def save_config_value(key_path: str, value: any) -> bool: current = current[key] current[keys[-1]] = value - # Save back - with open(config_path, 'w') as f: - yaml.dump(config, f, default_flow_style=False, sort_keys=False) + # Save back atomically — write to temp file + fsync + os.replace + # so an interrupt never leaves config.yaml truncated or empty. + from utils import atomic_yaml_write + atomic_yaml_write(config_path, config) # Enforce owner-only permissions on config files (contain API keys) try: diff --git a/tests/test_cli_save_config_value.py b/tests/test_cli_save_config_value.py new file mode 100644 index 000000000..7d030c03c --- /dev/null +++ b/tests/test_cli_save_config_value.py @@ -0,0 +1,80 @@ +"""Tests for save_config_value() in cli.py — atomic write behavior.""" + +import os +import yaml +from pathlib import Path +from unittest.mock import patch, MagicMock + +import pytest + + +class TestSaveConfigValueAtomic: + """save_config_value() must use atomic_yaml_write to avoid data loss.""" + + @pytest.fixture + def config_env(self, tmp_path, monkeypatch): + """Isolated config environment with a writable config.yaml.""" + hermes_home = tmp_path / ".hermes" + hermes_home.mkdir() + config_path = hermes_home / "config.yaml" + config_path.write_text(yaml.dump({ + "model": {"default": "test-model", "provider": "openrouter"}, + "display": {"skin": "default"}, + })) + monkeypatch.setattr("cli._hermes_home", hermes_home) + return config_path + + def test_calls_atomic_yaml_write(self, config_env, monkeypatch): + """save_config_value must route through atomic_yaml_write, not bare open().""" + mock_atomic = MagicMock() + monkeypatch.setattr("utils.atomic_yaml_write", mock_atomic) + + from cli import save_config_value + save_config_value("display.skin", "mono") + + mock_atomic.assert_called_once() + written_path, written_data = mock_atomic.call_args[0] + assert Path(written_path) == config_env + assert written_data["display"]["skin"] == "mono" + + def test_preserves_existing_keys(self, config_env): + """Writing a new key must not clobber existing config entries.""" + from cli import save_config_value + save_config_value("agent.max_turns", 50) + + result = yaml.safe_load(config_env.read_text()) + assert result["model"]["default"] == "test-model" + assert result["model"]["provider"] == "openrouter" + assert result["display"]["skin"] == "default" + assert result["agent"]["max_turns"] == 50 + + def test_creates_nested_keys(self, config_env): + """Dot-separated paths create intermediate dicts as needed.""" + from cli import save_config_value + save_config_value("compression.summary_model", "google/gemini-3-flash-preview") + + result = yaml.safe_load(config_env.read_text()) + assert result["compression"]["summary_model"] == "google/gemini-3-flash-preview" + + def test_overwrites_existing_value(self, config_env): + """Updating an existing key replaces the value.""" + from cli import save_config_value + save_config_value("display.skin", "ares") + + result = yaml.safe_load(config_env.read_text()) + assert result["display"]["skin"] == "ares" + + def test_file_not_truncated_on_error(self, config_env, monkeypatch): + """If atomic_yaml_write raises, the original file is untouched.""" + original_content = config_env.read_text() + + def exploding_write(*args, **kwargs): + raise OSError("disk full") + + monkeypatch.setattr("utils.atomic_yaml_write", exploding_write) + + from cli import save_config_value + result = save_config_value("display.skin", "broken") + + assert result is False + assert config_env.read_text() == original_content -- 2.43.0 From 655eea2db88e3da31bb7655ffefe291b7abcc24b Mon Sep 17 00:00:00 2001 From: maymuneth Date: Tue, 31 Mar 2026 21:08:06 +0300 Subject: [PATCH 095/385] fix(security): protect .docker, .azure, and .config/gh from read and write --- agent/context_references.py | 2 +- tools/file_operations.py | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/agent/context_references.py b/agent/context_references.py index 09ba982df..d0985605d 100644 --- a/agent/context_references.py +++ b/agent/context_references.py @@ -17,7 +17,7 @@ REFERENCE_PATTERN = re.compile( r"(?diff|staged)\b|(?Pfile|folder|git|url):(?P\S+))" ) TRAILING_PUNCTUATION = ",.;!?" -_SENSITIVE_HOME_DIRS = (".ssh", ".aws", ".gnupg", ".kube") +_SENSITIVE_HOME_DIRS = (".ssh", ".aws", ".gnupg", ".kube", ".docker", ".azure") _SENSITIVE_HERMES_DIRS = (Path("skills") / ".hub",) _SENSITIVE_HOME_FILES = ( Path(".ssh") / "authorized_keys", diff --git a/tools/file_operations.py b/tools/file_operations.py index 96bdc2d53..d0e3ad3c8 100644 --- a/tools/file_operations.py +++ b/tools/file_operations.py @@ -71,6 +71,9 @@ WRITE_DENIED_PREFIXES = [ os.path.join(_HOME, ".kube"), "/etc/sudoers.d", "/etc/systemd", + os.path.join(_HOME, ".docker"), + os.path.join(_HOME, ".azure"), + os.path.join(_HOME, ".config", "gh"), ] ] -- 2.43.0 From d3f1987a051c8592ded99e5654dfd58c394835e8 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Tue, 31 Mar 2026 12:48:30 -0700 Subject: [PATCH 096/385] fix(security): add .config/gh to read protection for @file references (#4327) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow-up to PR #4305 — .config/gh was added to the write-deny list but missed from _SENSITIVE_HOME_DIRS, leaving GitHub CLI OAuth tokens exposed via @file:~/.config/gh/hosts.yml context injection. --- agent/context_references.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/agent/context_references.py b/agent/context_references.py index d0985605d..8222dc33a 100644 --- a/agent/context_references.py +++ b/agent/context_references.py @@ -17,7 +17,7 @@ REFERENCE_PATTERN = re.compile( r"(?diff|staged)\b|(?Pfile|folder|git|url):(?P\S+))" ) TRAILING_PUNCTUATION = ",.;!?" -_SENSITIVE_HOME_DIRS = (".ssh", ".aws", ".gnupg", ".kube", ".docker", ".azure") +_SENSITIVE_HOME_DIRS = (".ssh", ".aws", ".gnupg", ".kube", ".docker", ".azure", ".config/gh") _SENSITIVE_HERMES_DIRS = (Path("skills") / ".hub",) _SENSITIVE_HOME_FILES = ( Path(".ssh") / "authorized_keys", -- 2.43.0 From e3f8347be30a068b91662818a70d0c3c42513b96 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Tue, 31 Mar 2026 12:53:19 -0700 Subject: [PATCH 097/385] feat(file_tools): harden read_file with size guard, dedup, and device blocking (#4315) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat(file_tools): harden read_file with size guard, dedup, and device blocking Three improvements to read_file_tool to reduce wasted context tokens and prevent process hangs: 1. Character-count guard: reads that produce more than 100K characters (≈25-35K tokens across tokenisers) are rejected with an error that tells the model to use offset+limit for a smaller range. The effective cap is min(file_size, 100K) so small files that happen to have long lines aren't over-penalised. Large truncated files also get a hint nudging toward targeted reads. 2. File-read deduplication: when the same (path, offset, limit) is read a second time and the file hasn't been modified (mtime unchanged), return a lightweight stub instead of re-sending the full content. Writes and patches naturally change mtime, so post-edit reads always return fresh content. The dedup cache is cleared on context compression — after compression the original read content is summarised away, so the model needs the full content again. 3. Device path blocking: paths like /dev/zero, /dev/random, /dev/stdin etc. are rejected before any I/O to prevent process hangs from infinite-output or blocking-input devices. Tests: 17 new tests covering all three features plus the dedup-reset- on-compression integration. All 52 file-read tests pass (35 existing + 17 new). Full tool suite (2124 tests) passes with 0 failures. * feat: make file_read_max_chars configurable, add docs Add file_read_max_chars to DEFAULT_CONFIG (default 100K). read_file_tool reads this on first call and caches for the process lifetime. Users on large-context models can raise it; users on small local models can lower it. Also adds a 'File Read Safety' section to the configuration docs explaining the char limit, dedup behavior, and example values. --- hermes_cli/config.py | 5 + run_agent.py | 9 + tests/tools/test_file_read_guards.py | 378 +++++++++++++++++++++++ tools/file_tools.py | 203 +++++++++++- website/docs/user-guide/configuration.md | 20 ++ 5 files changed, 605 insertions(+), 10 deletions(-) create mode 100644 tests/tools/test_file_read_guards.py diff --git a/hermes_cli/config.py b/hermes_cli/config.py index e62a4cdc1..e5cf73d3f 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -256,6 +256,11 @@ DEFAULT_CONFIG = { "enabled": True, "max_snapshots": 50, # Max checkpoints to keep per directory }, + + # Maximum characters returned by a single read_file call. Reads that + # exceed this are rejected with guidance to use offset+limit. + # 100K chars ≈ 25–35K tokens across typical tokenisers. + "file_read_max_chars": 100_000, "compression": { "enabled": True, diff --git a/run_agent.py b/run_agent.py index 670f21007..5ed40500b 100644 --- a/run_agent.py +++ b/run_agent.py @@ -5361,6 +5361,15 @@ class AIAgent: if _post_progress < 0.85: self._context_pressure_warned = False + # Clear the file-read dedup cache. After compression the original + # read content is summarised away — if the model re-reads the same + # file it needs the full content, not a "file unchanged" stub. + try: + from tools.file_tools import reset_file_dedup + reset_file_dedup(task_id) + except Exception: + pass + return compressed, new_system_prompt def _execute_tool_calls(self, assistant_message, messages: list, effective_task_id: str, api_call_count: int = 0) -> None: diff --git a/tests/tools/test_file_read_guards.py b/tests/tools/test_file_read_guards.py new file mode 100644 index 000000000..b4a688aa6 --- /dev/null +++ b/tests/tools/test_file_read_guards.py @@ -0,0 +1,378 @@ +#!/usr/bin/env python3 +""" +Tests for read_file_tool safety guards: device-path blocking, +character-count limits, file deduplication, and dedup reset on +context compression. + +Run with: python -m pytest tests/tools/test_file_read_guards.py -v +""" + +import json +import os +import tempfile +import time +import unittest +from unittest.mock import patch, MagicMock + +from tools.file_tools import ( + read_file_tool, + clear_read_tracker, + reset_file_dedup, + _is_blocked_device, + _get_max_read_chars, + _DEFAULT_MAX_READ_CHARS, +) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +class _FakeReadResult: + """Minimal stand-in for FileOperations.read_file return value.""" + def __init__(self, content="line1\nline2\n", total_lines=2, file_size=100): + self.content = content + self._total_lines = total_lines + self._file_size = file_size + + def to_dict(self): + return { + "content": self.content, + "total_lines": self._total_lines, + "file_size": self._file_size, + } + + +def _make_fake_ops(content="hello\n", total_lines=1, file_size=6): + fake = MagicMock() + fake.read_file = lambda path, offset=1, limit=500: _FakeReadResult( + content=content, total_lines=total_lines, file_size=file_size, + ) + return fake + + +# --------------------------------------------------------------------------- +# Device path blocking +# --------------------------------------------------------------------------- + +class TestDevicePathBlocking(unittest.TestCase): + """Paths like /dev/zero should be rejected before any I/O.""" + + def test_blocked_device_detection(self): + for dev in ("/dev/zero", "/dev/random", "/dev/urandom", "/dev/stdin", + "/dev/tty", "/dev/console", "/dev/stdout", "/dev/stderr", + "/dev/fd/0", "/dev/fd/1", "/dev/fd/2"): + self.assertTrue(_is_blocked_device(dev), f"{dev} should be blocked") + + def test_safe_device_not_blocked(self): + self.assertFalse(_is_blocked_device("/dev/null")) + self.assertFalse(_is_blocked_device("/dev/sda1")) + + def test_proc_fd_blocked(self): + self.assertTrue(_is_blocked_device("/proc/self/fd/0")) + self.assertTrue(_is_blocked_device("/proc/12345/fd/2")) + + def test_proc_fd_other_not_blocked(self): + self.assertFalse(_is_blocked_device("/proc/self/fd/3")) + self.assertFalse(_is_blocked_device("/proc/self/maps")) + + def test_normal_files_not_blocked(self): + self.assertFalse(_is_blocked_device("/tmp/test.py")) + self.assertFalse(_is_blocked_device("/home/user/.bashrc")) + + def test_read_file_tool_rejects_device(self): + """read_file_tool returns an error without any file I/O.""" + result = json.loads(read_file_tool("/dev/zero", task_id="dev_test")) + self.assertIn("error", result) + self.assertIn("device file", result["error"]) + + +# --------------------------------------------------------------------------- +# Character-count limits +# --------------------------------------------------------------------------- + +class TestCharacterCountGuard(unittest.TestCase): + """Large reads should be rejected with guidance to use offset/limit.""" + + def setUp(self): + clear_read_tracker() + + def tearDown(self): + clear_read_tracker() + + @patch("tools.file_tools._get_file_ops") + @patch("tools.file_tools._get_max_read_chars", return_value=_DEFAULT_MAX_READ_CHARS) + def test_oversized_read_rejected(self, _mock_limit, mock_ops): + """A read that returns >max chars is rejected.""" + big_content = "x" * (_DEFAULT_MAX_READ_CHARS + 1) + mock_ops.return_value = _make_fake_ops( + content=big_content, + total_lines=5000, + file_size=len(big_content) + 100, # bigger than content + ) + result = json.loads(read_file_tool("/tmp/huge.txt", task_id="big")) + self.assertIn("error", result) + self.assertIn("safety limit", result["error"]) + self.assertIn("offset and limit", result["error"]) + self.assertIn("total_lines", result) + + @patch("tools.file_tools._get_file_ops") + def test_small_read_not_rejected(self, mock_ops): + """Normal-sized reads pass through fine.""" + mock_ops.return_value = _make_fake_ops(content="short\n", file_size=6) + result = json.loads(read_file_tool("/tmp/small.txt", task_id="small")) + self.assertNotIn("error", result) + self.assertIn("content", result) + + @patch("tools.file_tools._get_file_ops") + @patch("tools.file_tools._get_max_read_chars", return_value=_DEFAULT_MAX_READ_CHARS) + def test_content_under_limit_passes(self, _mock_limit, mock_ops): + """Content just under the limit should pass through fine.""" + mock_ops.return_value = _make_fake_ops( + content="y" * (_DEFAULT_MAX_READ_CHARS - 1), + file_size=_DEFAULT_MAX_READ_CHARS - 1, + ) + result = json.loads(read_file_tool("/tmp/justunder.txt", task_id="under")) + self.assertNotIn("error", result) + self.assertIn("content", result) + + +# --------------------------------------------------------------------------- +# File deduplication +# --------------------------------------------------------------------------- + +class TestFileDedup(unittest.TestCase): + """Re-reading an unchanged file should return a lightweight stub.""" + + def setUp(self): + clear_read_tracker() + self._tmpdir = tempfile.mkdtemp() + self._tmpfile = os.path.join(self._tmpdir, "dedup_test.txt") + with open(self._tmpfile, "w") as f: + f.write("line one\nline two\n") + + def tearDown(self): + clear_read_tracker() + try: + os.unlink(self._tmpfile) + os.rmdir(self._tmpdir) + except OSError: + pass + + @patch("tools.file_tools._get_file_ops") + def test_second_read_returns_dedup_stub(self, mock_ops): + """Second read of same file+range returns dedup stub.""" + mock_ops.return_value = _make_fake_ops( + content="line one\nline two\n", file_size=20, + ) + # First read — full content + r1 = json.loads(read_file_tool(self._tmpfile, task_id="dup")) + self.assertNotIn("dedup", r1) + + # Second read — should get dedup stub + r2 = json.loads(read_file_tool(self._tmpfile, task_id="dup")) + self.assertTrue(r2.get("dedup"), "Second read should return dedup stub") + self.assertIn("unchanged", r2.get("content", "")) + + @patch("tools.file_tools._get_file_ops") + def test_modified_file_not_deduped(self, mock_ops): + """After the file is modified, dedup returns full content.""" + mock_ops.return_value = _make_fake_ops( + content="line one\nline two\n", file_size=20, + ) + read_file_tool(self._tmpfile, task_id="mod") + + # Modify the file — ensure mtime changes + time.sleep(0.05) + with open(self._tmpfile, "w") as f: + f.write("changed content\n") + + r2 = json.loads(read_file_tool(self._tmpfile, task_id="mod")) + self.assertNotEqual(r2.get("dedup"), True, "Modified file should not dedup") + + @patch("tools.file_tools._get_file_ops") + def test_different_range_not_deduped(self, mock_ops): + """Same file but different offset/limit should not dedup.""" + mock_ops.return_value = _make_fake_ops( + content="line one\nline two\n", file_size=20, + ) + read_file_tool(self._tmpfile, offset=1, limit=500, task_id="rng") + + r2 = json.loads(read_file_tool( + self._tmpfile, offset=10, limit=500, task_id="rng", + )) + self.assertNotEqual(r2.get("dedup"), True) + + @patch("tools.file_tools._get_file_ops") + def test_different_task_not_deduped(self, mock_ops): + """Different task_ids have separate dedup caches.""" + mock_ops.return_value = _make_fake_ops( + content="line one\nline two\n", file_size=20, + ) + read_file_tool(self._tmpfile, task_id="task_a") + + r2 = json.loads(read_file_tool(self._tmpfile, task_id="task_b")) + self.assertNotEqual(r2.get("dedup"), True) + + +# --------------------------------------------------------------------------- +# Dedup reset on compression +# --------------------------------------------------------------------------- + +class TestDedupResetOnCompression(unittest.TestCase): + """reset_file_dedup should clear the dedup cache so post-compression + reads return full content.""" + + def setUp(self): + clear_read_tracker() + self._tmpdir = tempfile.mkdtemp() + self._tmpfile = os.path.join(self._tmpdir, "compress_test.txt") + with open(self._tmpfile, "w") as f: + f.write("original content\n") + + def tearDown(self): + clear_read_tracker() + try: + os.unlink(self._tmpfile) + os.rmdir(self._tmpdir) + except OSError: + pass + + @patch("tools.file_tools._get_file_ops") + def test_reset_clears_dedup(self, mock_ops): + """After reset_file_dedup, the same read returns full content.""" + mock_ops.return_value = _make_fake_ops( + content="original content\n", file_size=18, + ) + # First read — populates dedup cache + read_file_tool(self._tmpfile, task_id="comp") + + # Verify dedup works before reset + r_dedup = json.loads(read_file_tool(self._tmpfile, task_id="comp")) + self.assertTrue(r_dedup.get("dedup"), "Should dedup before reset") + + # Simulate compression + reset_file_dedup("comp") + + # Read again — should get full content + r_post = json.loads(read_file_tool(self._tmpfile, task_id="comp")) + self.assertNotEqual(r_post.get("dedup"), True, + "Post-compression read should return full content") + + @patch("tools.file_tools._get_file_ops") + def test_reset_all_tasks(self, mock_ops): + """reset_file_dedup(None) clears all tasks.""" + mock_ops.return_value = _make_fake_ops( + content="original content\n", file_size=18, + ) + read_file_tool(self._tmpfile, task_id="t1") + read_file_tool(self._tmpfile, task_id="t2") + + reset_file_dedup() # no task_id — clear all + + r1 = json.loads(read_file_tool(self._tmpfile, task_id="t1")) + r2 = json.loads(read_file_tool(self._tmpfile, task_id="t2")) + self.assertNotEqual(r1.get("dedup"), True) + self.assertNotEqual(r2.get("dedup"), True) + + @patch("tools.file_tools._get_file_ops") + def test_reset_preserves_loop_detection(self, mock_ops): + """reset_file_dedup does NOT affect the consecutive-read counter.""" + mock_ops.return_value = _make_fake_ops( + content="original content\n", file_size=18, + ) + # Build up consecutive count (read 1 and 2) + read_file_tool(self._tmpfile, task_id="loop") + # 2nd read is deduped — doesn't increment consecutive counter + read_file_tool(self._tmpfile, task_id="loop") + + reset_file_dedup("loop") + + # 3rd read — counter should still be at 2 from before reset + # (dedup was hit for read 2, but consecutive counter was 1 for that) + # After reset, this read goes through full path, incrementing to 2 + r3 = json.loads(read_file_tool(self._tmpfile, task_id="loop")) + # Should NOT be blocked or warned — counter restarted since dedup + # intercepted reads before they reached the counter + self.assertNotIn("error", r3) + + +# --------------------------------------------------------------------------- +# Large-file hint +# --------------------------------------------------------------------------- + +class TestLargeFileHint(unittest.TestCase): + """Large truncated files should include a hint about targeted reads.""" + + def setUp(self): + clear_read_tracker() + + def tearDown(self): + clear_read_tracker() + + @patch("tools.file_tools._get_file_ops") + def test_large_truncated_file_gets_hint(self, mock_ops): + content = "line\n" * 400 # 2000 chars, small enough to pass char guard + fake = _make_fake_ops(content=content, total_lines=10000, file_size=600_000) + # Make to_dict return truncated=True + orig_read = fake.read_file + def patched_read(path, offset=1, limit=500): + r = orig_read(path, offset, limit) + orig_to_dict = r.to_dict + def new_to_dict(): + d = orig_to_dict() + d["truncated"] = True + return d + r.to_dict = new_to_dict + return r + fake.read_file = patched_read + mock_ops.return_value = fake + + result = json.loads(read_file_tool("/tmp/bigfile.log", task_id="hint")) + self.assertIn("_hint", result) + self.assertIn("section you need", result["_hint"]) + + +# --------------------------------------------------------------------------- +# Config override +# --------------------------------------------------------------------------- + +class TestConfigOverride(unittest.TestCase): + """file_read_max_chars in config.yaml should control the char guard.""" + + def setUp(self): + clear_read_tracker() + # Reset the cached value so each test gets a fresh lookup + import tools.file_tools as _ft + _ft._max_read_chars_cached = None + + def tearDown(self): + clear_read_tracker() + import tools.file_tools as _ft + _ft._max_read_chars_cached = None + + @patch("tools.file_tools._get_file_ops") + @patch("hermes_cli.config.load_config", return_value={"file_read_max_chars": 50}) + def test_custom_config_lowers_limit(self, _mock_cfg, mock_ops): + """A config value of 50 should reject reads over 50 chars.""" + mock_ops.return_value = _make_fake_ops(content="x" * 60, file_size=60) + result = json.loads(read_file_tool("/tmp/cfgtest.txt", task_id="cfg1")) + self.assertIn("error", result) + self.assertIn("safety limit", result["error"]) + self.assertIn("50", result["error"]) # should show the configured limit + + @patch("tools.file_tools._get_file_ops") + @patch("hermes_cli.config.load_config", return_value={"file_read_max_chars": 500_000}) + def test_custom_config_raises_limit(self, _mock_cfg, mock_ops): + """A config value of 500K should allow reads up to 500K chars.""" + # 200K chars would be rejected at the default 100K but passes at 500K + mock_ops.return_value = _make_fake_ops( + content="y" * 200_000, file_size=200_000, + ) + result = json.loads(read_file_tool("/tmp/cfgtest2.txt", task_id="cfg2")) + self.assertNotIn("error", result) + self.assertIn("content", result) + + +if __name__ == "__main__": + unittest.main() diff --git a/tools/file_tools.py b/tools/file_tools.py index 6226e7657..1245e68de 100644 --- a/tools/file_tools.py +++ b/tools/file_tools.py @@ -15,6 +15,80 @@ logger = logging.getLogger(__name__) _EXPECTED_WRITE_ERRNOS = {errno.EACCES, errno.EPERM, errno.EROFS} +# --------------------------------------------------------------------------- +# Read-size guard: cap the character count returned to the model. +# We're model-agnostic so we can't count tokens; characters are a safe proxy. +# 100K chars ≈ 25–35K tokens across typical tokenisers. Files larger than +# this in a single read are a context-window hazard — the model should use +# offset+limit to read the relevant section. +# +# Configurable via config.yaml: file_read_max_chars: 200000 +# --------------------------------------------------------------------------- +_DEFAULT_MAX_READ_CHARS = 100_000 +_max_read_chars_cached: int | None = None + + +def _get_max_read_chars() -> int: + """Return the configured max characters per file read. + + Reads ``file_read_max_chars`` from config.yaml on first call, caches + the result for the lifetime of the process. Falls back to the + built-in default if the config is missing or invalid. + """ + global _max_read_chars_cached + if _max_read_chars_cached is not None: + return _max_read_chars_cached + try: + from hermes_cli.config import load_config + cfg = load_config() + val = cfg.get("file_read_max_chars") + if isinstance(val, (int, float)) and val > 0: + _max_read_chars_cached = int(val) + return _max_read_chars_cached + except Exception: + pass + _max_read_chars_cached = _DEFAULT_MAX_READ_CHARS + return _max_read_chars_cached + +# If the total file size exceeds this AND the caller didn't specify a narrow +# range (limit <= 200), we include a hint encouraging targeted reads. +_LARGE_FILE_HINT_BYTES = 512_000 # 512 KB + +# --------------------------------------------------------------------------- +# Device path blocklist — reading these hangs the process (infinite output +# or blocking on input). Checked by path only (no I/O). +# --------------------------------------------------------------------------- +_BLOCKED_DEVICE_PATHS = frozenset({ + # Infinite output — never reach EOF + "/dev/zero", "/dev/random", "/dev/urandom", "/dev/full", + # Blocks waiting for input + "/dev/stdin", "/dev/tty", "/dev/console", + # Nonsensical to read + "/dev/stdout", "/dev/stderr", + # fd aliases + "/dev/fd/0", "/dev/fd/1", "/dev/fd/2", +}) + + +def _is_blocked_device(filepath: str) -> bool: + """Return True if the path would hang the process (infinite output or blocking input). + + Uses the *literal* path — no symlink resolution — because the model + specifies paths directly and realpath follows symlinks all the way + through (e.g. /dev/stdin → /proc/self/fd/0 → /dev/pts/0), defeating + the check. + """ + normalized = os.path.expanduser(filepath) + if normalized in _BLOCKED_DEVICE_PATHS: + return True + # /proc/self/fd/0-2 and /proc//fd/0-2 are Linux aliases for stdio + if normalized.startswith("/proc/") and normalized.endswith( + ("/fd/0", "/fd/1", "/fd/2") + ): + return True + return False + + # Paths that file tools should refuse to write to without going through the # terminal tool's approval system. These match prefixes after os.path.realpath. _SENSITIVE_PATH_PREFIXES = ("/etc/", "/boot/", "/usr/lib/systemd/") @@ -53,11 +127,15 @@ def _is_expected_write_exception(exc: Exception) -> bool: _file_ops_lock = threading.Lock() _file_ops_cache: dict = {} -# Track files read per task to detect re-read loops after context compression. +# Track files read per task to detect re-read loops and deduplicate reads. # Per task_id we store: # "last_key": the key of the most recent read/search call (or None) # "consecutive": how many times that exact call has been repeated in a row # "read_history": set of (path, offset, limit) tuples for get_read_files_summary +# "dedup": dict mapping (resolved_path, offset, limit) → mtime float +# Used to skip re-reads of unchanged files. Reset on +# context compression (the original content is summarised +# away so the model needs the full content again). _read_tracker_lock = threading.Lock() _read_tracker: dict = {} @@ -195,8 +273,19 @@ def clear_file_ops_cache(task_id: str = None): def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str = "default") -> str: """Read a file with pagination and line numbers.""" try: - # Security: block direct reads of internal Hermes cache/index files - # to prevent prompt injection via catalog or hub metadata files. + # ── Device path guard ───────────────────────────────────────── + # Block paths that would hang the process (infinite output, + # blocking on input). Pure path check — no I/O. + if _is_blocked_device(path): + return json.dumps({ + "error": ( + f"Cannot read '{path}': this is a device file that would " + "block or produce infinite output." + ), + }) + + # ── Hermes internal path guard ──────────────────────────────── + # Prevent prompt injection via catalog or hub metadata files. import pathlib as _pathlib from hermes_constants import get_hermes_home as _get_hh _resolved = _pathlib.Path(path).expanduser().resolve() @@ -217,20 +306,83 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str = }) except ValueError: pass + + # ── Dedup check ─────────────────────────────────────────────── + # If we already read this exact (path, offset, limit) and the + # file hasn't been modified since, return a lightweight stub + # instead of re-sending the same content. Saves context tokens. + resolved_str = str(_resolved) + dedup_key = (resolved_str, offset, limit) + with _read_tracker_lock: + task_data = _read_tracker.setdefault(task_id, { + "last_key": None, "consecutive": 0, + "read_history": set(), "dedup": {}, + }) + cached_mtime = task_data.get("dedup", {}).get(dedup_key) + + if cached_mtime is not None: + try: + current_mtime = os.path.getmtime(resolved_str) + if current_mtime == cached_mtime: + return json.dumps({ + "content": ( + "File unchanged since last read. The content from " + "the earlier read_file result in this conversation is " + "still current — refer to that instead of re-reading." + ), + "path": path, + "dedup": True, + }, ensure_ascii=False) + except OSError: + pass # stat failed — fall through to full read + + # ── Perform the read ────────────────────────────────────────── file_ops = _get_file_ops(task_id) result = file_ops.read_file(path, offset, limit) if result.content: result.content = redact_sensitive_text(result.content) result_dict = result.to_dict() - # Track reads to detect *consecutive* re-read loops. - # The counter resets whenever any other tool is called in between, - # so only truly back-to-back identical reads trigger warnings/blocks. + # ── Character-count guard ───────────────────────────────────── + # We're model-agnostic so we can't count tokens; characters are + # the best proxy we have. If the read produced an unreasonable + # amount of content, reject it and tell the model to narrow down. + # Note: we check the formatted content (with line-number prefixes), + # not the raw file size, because that's what actually enters context. + content_len = len(result.content or "") + file_size = result_dict.get("file_size", 0) + max_chars = _get_max_read_chars() + if content_len > max_chars: + total_lines = result_dict.get("total_lines", "unknown") + return json.dumps({ + "error": ( + f"Read produced {content_len:,} characters which exceeds " + f"the safety limit ({max_chars:,} chars). " + "Use offset and limit to read a smaller range. " + f"The file has {total_lines} lines total." + ), + "path": path, + "total_lines": total_lines, + "file_size": file_size, + }, ensure_ascii=False) + + # Large-file hint: if the file is big and the caller didn't ask + # for a narrow window, nudge toward targeted reads. + if (file_size and file_size > _LARGE_FILE_HINT_BYTES + and limit > 200 + and result_dict.get("truncated")): + result_dict.setdefault("_hint", ( + f"This file is large ({file_size:,} bytes). " + "Consider reading only the section you need with offset and limit " + "to keep context usage efficient." + )) + + # ── Track for consecutive-loop detection ────────────────────── read_key = ("read", path, offset, limit) with _read_tracker_lock: - task_data = _read_tracker.setdefault(task_id, { - "last_key": None, "consecutive": 0, "read_history": set(), - }) + # Ensure "dedup" key exists (backward compat with old tracker state) + if "dedup" not in task_data: + task_data["dedup"] = {} task_data["read_history"].add((path, offset, limit)) if task_data["last_key"] == read_key: task_data["consecutive"] += 1 @@ -239,6 +391,15 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str = task_data["consecutive"] = 1 count = task_data["consecutive"] + # Store dedup entry (mtime at read time). + # Writes/patches will naturally change mtime, so subsequent + # dedup checks after edits will see a different mtime and + # return the full content — no special handling needed. + try: + task_data["dedup"][dedup_key] = os.path.getmtime(resolved_str) + except OSError: + pass # Can't stat — skip dedup for this entry + if count >= 4: # Hard block: stop returning content to break the loop return json.dumps({ @@ -296,6 +457,28 @@ def clear_read_tracker(task_id: str = None): _read_tracker.clear() +def reset_file_dedup(task_id: str = None): + """Clear the deduplication cache for file reads. + + Called after context compression — the original read content has been + summarised away, so the model needs the full content if it reads the + same file again. Without this, reads after compression would return + a "file unchanged" stub pointing at content that no longer exists in + context. + + Call with a task_id to clear just that task, or without to clear all. + """ + with _read_tracker_lock: + if task_id: + task_data = _read_tracker.get(task_id) + if task_data and "dedup" in task_data: + task_data["dedup"].clear() + else: + for task_data in _read_tracker.values(): + if "dedup" in task_data: + task_data["dedup"].clear() + + def notify_other_tool_call(task_id: str = "default"): """Reset consecutive read/search counter for a task. @@ -466,7 +649,7 @@ def _check_file_reqs(): READ_FILE_SCHEMA = { "name": "read_file", - "description": "Read a text file with line numbers and pagination. Use this instead of cat/head/tail in terminal. Output format: 'LINE_NUM|CONTENT'. Suggests similar filenames if not found. Use offset and limit for large files. NOTE: Cannot read images or binary files — use vision_analyze for images.", + "description": "Read a text file with line numbers and pagination. Use this instead of cat/head/tail in terminal. Output format: 'LINE_NUM|CONTENT'. Suggests similar filenames if not found. Use offset and limit for large files. Reads exceeding ~100K characters are rejected; use offset and limit to read specific sections of large files. NOTE: Cannot read images or binary files — use vision_analyze for images.", "parameters": { "type": "object", "properties": { diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md index 107e82395..d6ef5b05b 100644 --- a/website/docs/user-guide/configuration.md +++ b/website/docs/user-guide/configuration.md @@ -360,6 +360,26 @@ memory: user_char_limit: 1375 # ~500 tokens ``` +## File Read Safety + +Controls how much content a single `read_file` call can return. Reads that exceed the limit are rejected with an error telling the agent to use `offset` and `limit` for a smaller range. This prevents a single read of a minified JS bundle or large data file from flooding the context window. + +```yaml +file_read_max_chars: 100000 # default — ~25-35K tokens +``` + +Raise it if you're on a model with a large context window and frequently read big files. Lower it for small-context models to keep reads efficient: + +```yaml +# Large context model (200K+) +file_read_max_chars: 200000 + +# Small local model (16K context) +file_read_max_chars: 30000 +``` + +The agent also deduplicates file reads automatically — if the same file region is read twice and the file hasn't changed, a lightweight stub is returned instead of re-sending the content. This resets on context compression so the agent can re-read files after their content is summarized away. + ## Git Worktree Isolation Enable isolated git worktrees for running multiple agents in parallel on the same repo: -- 2.43.0 From 1b62ad9de71bd769e7a28276979188c05d936e64 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Tue, 31 Mar 2026 12:54:22 -0700 Subject: [PATCH 098/385] fix: root-level provider in config.yaml no longer overrides model.provider MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit load_cli_config() had a priority inversion: a stale root-level 'provider' key in config.yaml would OVERRIDE the canonical 'model.provider' set by 'hermes model'. The gateway reads model.provider directly from YAML and worked correctly, but 'hermes chat -q' and the interactive CLI went through the merge logic and picked up the stale root-level key. Fix: root-level provider/base_url are now only used as a fallback when model.provider/model.base_url is not set (never as an override). Also added _normalize_root_model_keys() to config.py load_config() and save_config() — migrates root-level provider/base_url into the model section and removes the root-level keys permanently. Reported by (≧▽≦) in Discord: opencode-go provider persisted as a root-level key and overrode the correct model.provider=openrouter, causing 401 errors. --- cli.py | 25 +++++++------ hermes_cli/config.py | 34 ++++++++++++++++- tests/test_cli_init.py | 85 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 131 insertions(+), 13 deletions(-) diff --git a/cli.py b/cli.py index 1f72207aa..2f6214989 100644 --- a/cli.py +++ b/cli.py @@ -263,17 +263,20 @@ def load_cli_config() -> Dict[str, Any]: # Old format: model is a dict with default/base_url defaults["model"].update(file_config["model"]) - # Root-level provider and base_url override model config. - # Users may write: - # model: kimi-k2.5:cloud - # provider: custom - # base_url: http://localhost:11434/v1 - # These root-level keys must be merged into defaults["model"] so - # they are picked up by CLI provider resolution. - if "provider" in file_config and file_config["provider"]: - defaults["model"]["provider"] = file_config["provider"] - if "base_url" in file_config and file_config["base_url"]: - defaults["model"]["base_url"] = file_config["base_url"] + # Legacy root-level provider/base_url fallback. + # Some users (or old code) put provider: / base_url: at the + # config root instead of inside the model: section. These are + # only used as a FALLBACK when model.provider / model.base_url + # is not already set — never as an override. The canonical + # location is model.provider (written by `hermes model`). + if not defaults["model"].get("provider"): + root_provider = file_config.get("provider") + if root_provider: + defaults["model"]["provider"] = root_provider + if not defaults["model"].get("base_url"): + root_base_url = file_config.get("base_url") + if root_base_url: + defaults["model"]["base_url"] = root_base_url # Deep merge file_config into defaults. # First: merge keys that exist in both (deep-merge dicts, overwrite scalars) diff --git a/hermes_cli/config.py b/hermes_cli/config.py index e5cf73d3f..c2a8774ea 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -1373,6 +1373,36 @@ def _expand_env_vars(obj): return obj +def _normalize_root_model_keys(config: Dict[str, Any]) -> Dict[str, Any]: + """Move stale root-level provider/base_url into model section. + + Some users (or older code) placed ``provider:`` and ``base_url:`` at the + config root instead of inside ``model:``. These root-level keys are only + used as a fallback when the corresponding ``model.*`` key is empty — they + never override an existing ``model.provider`` or ``model.base_url``. + After migration the root-level keys are removed so they can't cause + confusion on subsequent loads. + """ + # Only act if there are root-level keys to migrate + has_root = any(config.get(k) for k in ("provider", "base_url")) + if not has_root: + return config + + config = dict(config) + model = config.get("model") + if not isinstance(model, dict): + model = {"default": model} if model else {} + config["model"] = model + + for key in ("provider", "base_url"): + root_val = config.get(key) + if root_val and not model.get(key): + model[key] = root_val + config.pop(key, None) + + return config + + def _normalize_max_turns_config(config: Dict[str, Any]) -> Dict[str, Any]: """Normalize legacy root-level max_turns into agent.max_turns.""" config = dict(config) @@ -1414,7 +1444,7 @@ def load_config() -> Dict[str, Any]: except Exception as e: print(f"Warning: Failed to load config: {e}") - return _expand_env_vars(_normalize_max_turns_config(config)) + return _expand_env_vars(_normalize_root_model_keys(_normalize_max_turns_config(config))) _SECURITY_COMMENT = """ @@ -1521,7 +1551,7 @@ def save_config(config: Dict[str, Any]): ensure_hermes_home() config_path = get_config_path() - normalized = _normalize_max_turns_config(config) + normalized = _normalize_root_model_keys(_normalize_max_turns_config(config)) # Build optional commented-out sections for features that are off by # default or only relevant when explicitly configured. diff --git a/tests/test_cli_init.py b/tests/test_cli_init.py index b5598aed1..9e0409690 100644 --- a/tests/test_cli_init.py +++ b/tests/test_cli_init.py @@ -192,6 +192,91 @@ class TestHistoryDisplay: assert "A" * 250 + "..." not in output +class TestRootLevelProviderOverride: + """Root-level provider/base_url in config.yaml must NOT override model.provider.""" + + def test_model_provider_wins_over_root_provider(self, tmp_path, monkeypatch): + """model.provider takes priority — root-level provider is only a fallback.""" + import yaml + + hermes_home = tmp_path / ".hermes" + hermes_home.mkdir() + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + config_path = hermes_home / "config.yaml" + config_path.write_text(yaml.safe_dump({ + "provider": "opencode-go", # stale root-level key + "model": { + "default": "google/gemini-3-flash-preview", + "provider": "openrouter", # correct canonical key + }, + })) + + import cli + monkeypatch.setattr(cli, "_hermes_home", hermes_home) + cfg = cli.load_cli_config() + + assert cfg["model"]["provider"] == "openrouter" + + def test_root_provider_ignored_when_default_model_provider_exists(self, tmp_path, monkeypatch): + """Even when model.provider is the default 'auto', root-level provider is ignored.""" + import yaml + + hermes_home = tmp_path / ".hermes" + hermes_home.mkdir() + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + config_path = hermes_home / "config.yaml" + config_path.write_text(yaml.safe_dump({ + "provider": "opencode-go", # stale root key + "model": { + "default": "google/gemini-3-flash-preview", + # no explicit model.provider — defaults provide "auto" + }, + })) + + import cli + monkeypatch.setattr(cli, "_hermes_home", hermes_home) + cfg = cli.load_cli_config() + + # Root-level "opencode-go" must NOT leak through + assert cfg["model"]["provider"] != "opencode-go" + + def test_normalize_root_model_keys_moves_to_model(self): + """_normalize_root_model_keys migrates root keys into model section.""" + from hermes_cli.config import _normalize_root_model_keys + + config = { + "provider": "opencode-go", + "base_url": "https://example.com/v1", + "model": { + "default": "some-model", + }, + } + result = _normalize_root_model_keys(config) + # Root keys removed + assert "provider" not in result + assert "base_url" not in result + # Migrated into model section + assert result["model"]["provider"] == "opencode-go" + assert result["model"]["base_url"] == "https://example.com/v1" + + def test_normalize_root_model_keys_does_not_override_existing(self): + """Existing model.provider is never overridden by root-level key.""" + from hermes_cli.config import _normalize_root_model_keys + + config = { + "provider": "stale-provider", + "model": { + "default": "some-model", + "provider": "correct-provider", + }, + } + result = _normalize_root_model_keys(config) + assert result["model"]["provider"] == "correct-provider" + assert "provider" not in result # root key still cleaned up + + class TestProviderResolution: def test_api_key_is_string_or_none(self): cli = _make_cli() -- 2.43.0 From f5cc597afced7c3ad661ee576f41ebf5e2eb3d19 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Tue, 31 Mar 2026 13:38:22 -0700 Subject: [PATCH 099/385] fix: add CAMOFOX_PORT=9377 to Docker commands for camofox-browser (#4340) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The camofox-browser image defaults to port 3000 internally, not 9377. Without -e CAMOFOX_PORT=9377, the -p 9377:9377 mapping silently fails because nothing listens on 9377 inside the container. E2E verified: -p 9377:9377 alone → connection reset, -p 9377:9377 -e CAMOFOX_PORT=9377 → healthy and functional. --- hermes_cli/tools_config.py | 4 ++-- tools/browser_camofox.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/hermes_cli/tools_config.py b/hermes_cli/tools_config.py index 8b443d5dc..2150420f1 100644 --- a/hermes_cli/tools_config.py +++ b/hermes_cli/tools_config.py @@ -364,10 +364,10 @@ def _run_post_setup(post_setup_key: str): _print_info(" Start the Camofox server:") _print_info(" npx @askjo/camoufox-browser") _print_info(" First run downloads the Camoufox engine (~300MB)") - _print_info(" Or use Docker: docker run -p 9377:9377 jo-inc/camofox-browser") + _print_info(" Or use Docker: docker run -p 9377:9377 -e CAMOFOX_PORT=9377 jo-inc/camofox-browser") elif not shutil.which("npm"): _print_warning(" Node.js not found. Install Camofox via Docker:") - _print_info(" docker run -p 9377:9377 jo-inc/camofox-browser") + _print_info(" docker run -p 9377:9377 -e CAMOFOX_PORT=9377 jo-inc/camofox-browser") elif post_setup_key == "rl_training": try: diff --git a/tools/browser_camofox.py b/tools/browser_camofox.py index b1925d2c6..9b11ef0d0 100644 --- a/tools/browser_camofox.py +++ b/tools/browser_camofox.py @@ -15,7 +15,7 @@ Setup:: npm install && npm start # downloads Camoufox (~300MB) on first run # Option 2: Docker - docker run -p 9377:9377 jo-inc/camofox-browser + docker run -p 9377:9377 -e CAMOFOX_PORT=9377 jo-inc/camofox-browser Then set ``CAMOFOX_URL=http://localhost:9377`` in ``~/.hermes/.env``. """ @@ -184,7 +184,7 @@ def camofox_navigate(url: str, task_id: Optional[str] = None) -> str: "success": False, "error": f"Cannot connect to Camofox at {get_camofox_url()}. " "Is the server running? Start with: npm start (in camofox-browser dir) " - "or: docker run -p 9377:9377 jo-inc/camofox-browser", + "or: docker run -p 9377:9377 -e CAMOFOX_PORT=9377 jo-inc/camofox-browser", }) except Exception as e: return json.dumps({"success": False, "error": str(e)}) -- 2.43.0 From f04986029c55bb570f78a1051ea18f8d1619e2dd Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Tue, 31 Mar 2026 14:49:00 -0700 Subject: [PATCH 100/385] feat(file_tools): detect stale files on write and patch (#4345) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Track file mtime when read_file is called. When write_file or patch subsequently targets the same file, compare the current mtime against the recorded one. If they differ (external edit, concurrent agent, user change), include a _warning in the result advising the agent to re-read. The write still proceeds — this is a soft signal, not a hard block. Key design points: - Per-task isolation: task A's reads don't affect task B's writes. - Files never read produce no warning (not enforcing read-before-write). - mtime naturally updates after the agent's own writes, so the warning only fires on external changes, not the agent's own edits. - V4A multi-file patches check all target paths. Tests: 10 new tests covering write staleness, patch staleness, never-read files, cross-task isolation, and the helper function. --- tests/tools/test_file_staleness.py | 241 +++++++++++++++++++++++++++++ tools/file_tools.py | 63 +++++++- 2 files changed, 297 insertions(+), 7 deletions(-) create mode 100644 tests/tools/test_file_staleness.py diff --git a/tests/tools/test_file_staleness.py b/tests/tools/test_file_staleness.py new file mode 100644 index 000000000..46e7aac9f --- /dev/null +++ b/tests/tools/test_file_staleness.py @@ -0,0 +1,241 @@ +#!/usr/bin/env python3 +""" +Tests for file staleness detection in write_file and patch. + +When a file is modified externally between the agent's read and write, +the write should include a warning so the agent can re-read and verify. + +Run with: python -m pytest tests/tools/test_file_staleness.py -v +""" + +import json +import os +import tempfile +import time +import unittest +from unittest.mock import patch, MagicMock + +from tools.file_tools import ( + read_file_tool, + write_file_tool, + patch_tool, + clear_read_tracker, + _check_file_staleness, +) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +class _FakeReadResult: + def __init__(self, content="line1\nline2\n", total_lines=2, file_size=100): + self.content = content + self._total_lines = total_lines + self._file_size = file_size + + def to_dict(self): + return { + "content": self.content, + "total_lines": self._total_lines, + "file_size": self._file_size, + } + + +class _FakeWriteResult: + def __init__(self): + self.bytes_written = 10 + + def to_dict(self): + return {"bytes_written": self.bytes_written} + + +class _FakePatchResult: + def __init__(self): + self.success = True + + def to_dict(self): + return {"success": True, "diff": "--- a\n+++ b\n@@ ...\n"} + + +def _make_fake_ops(read_content="hello\n", file_size=6): + fake = MagicMock() + fake.read_file = lambda path, offset=1, limit=500: _FakeReadResult( + content=read_content, total_lines=1, file_size=file_size, + ) + fake.write_file = lambda path, content: _FakeWriteResult() + fake.patch_replace = lambda path, old, new, replace_all=False: _FakePatchResult() + return fake + + +# --------------------------------------------------------------------------- +# Core staleness check +# --------------------------------------------------------------------------- + +class TestStalenessCheck(unittest.TestCase): + + def setUp(self): + clear_read_tracker() + self._tmpdir = tempfile.mkdtemp() + self._tmpfile = os.path.join(self._tmpdir, "stale_test.txt") + with open(self._tmpfile, "w") as f: + f.write("original content\n") + + def tearDown(self): + clear_read_tracker() + try: + os.unlink(self._tmpfile) + os.rmdir(self._tmpdir) + except OSError: + pass + + @patch("tools.file_tools._get_file_ops") + def test_no_warning_when_file_unchanged(self, mock_ops): + """Read then write with no external modification — no warning.""" + mock_ops.return_value = _make_fake_ops("original content\n", 18) + read_file_tool(self._tmpfile, task_id="t1") + + result = json.loads(write_file_tool(self._tmpfile, "new content", task_id="t1")) + self.assertNotIn("_warning", result) + + @patch("tools.file_tools._get_file_ops") + def test_warning_when_file_modified_externally(self, mock_ops): + """Read, then external modify, then write — should warn.""" + mock_ops.return_value = _make_fake_ops("original content\n", 18) + read_file_tool(self._tmpfile, task_id="t1") + + # Simulate external modification + time.sleep(0.05) + with open(self._tmpfile, "w") as f: + f.write("someone else changed this\n") + + result = json.loads(write_file_tool(self._tmpfile, "new content", task_id="t1")) + self.assertIn("_warning", result) + self.assertIn("modified since you last read", result["_warning"]) + + @patch("tools.file_tools._get_file_ops") + def test_no_warning_when_file_never_read(self, mock_ops): + """Writing a file that was never read — no warning.""" + mock_ops.return_value = _make_fake_ops() + result = json.loads(write_file_tool(self._tmpfile, "new content", task_id="t2")) + self.assertNotIn("_warning", result) + + @patch("tools.file_tools._get_file_ops") + def test_no_warning_for_new_file(self, mock_ops): + """Creating a new file — no warning.""" + mock_ops.return_value = _make_fake_ops() + new_path = os.path.join(self._tmpdir, "brand_new.txt") + result = json.loads(write_file_tool(new_path, "content", task_id="t3")) + self.assertNotIn("_warning", result) + try: + os.unlink(new_path) + except OSError: + pass + + @patch("tools.file_tools._get_file_ops") + def test_different_task_isolated(self, mock_ops): + """Task A reads, file changes, Task B writes — no warning for B.""" + mock_ops.return_value = _make_fake_ops("original content\n", 18) + read_file_tool(self._tmpfile, task_id="task_a") + + time.sleep(0.05) + with open(self._tmpfile, "w") as f: + f.write("changed\n") + + result = json.loads(write_file_tool(self._tmpfile, "new", task_id="task_b")) + self.assertNotIn("_warning", result) + + +# --------------------------------------------------------------------------- +# Staleness in patch +# --------------------------------------------------------------------------- + +class TestPatchStaleness(unittest.TestCase): + + def setUp(self): + clear_read_tracker() + self._tmpdir = tempfile.mkdtemp() + self._tmpfile = os.path.join(self._tmpdir, "patch_test.txt") + with open(self._tmpfile, "w") as f: + f.write("original line\n") + + def tearDown(self): + clear_read_tracker() + try: + os.unlink(self._tmpfile) + os.rmdir(self._tmpdir) + except OSError: + pass + + @patch("tools.file_tools._get_file_ops") + def test_patch_warns_on_stale_file(self, mock_ops): + """Patch should warn if the target file changed since last read.""" + mock_ops.return_value = _make_fake_ops("original line\n", 15) + read_file_tool(self._tmpfile, task_id="p1") + + time.sleep(0.05) + with open(self._tmpfile, "w") as f: + f.write("externally modified\n") + + result = json.loads(patch_tool( + mode="replace", path=self._tmpfile, + old_string="original", new_string="patched", + task_id="p1", + )) + self.assertIn("_warning", result) + self.assertIn("modified since you last read", result["_warning"]) + + @patch("tools.file_tools._get_file_ops") + def test_patch_no_warning_when_fresh(self, mock_ops): + """Patch with no external changes — no warning.""" + mock_ops.return_value = _make_fake_ops("original line\n", 15) + read_file_tool(self._tmpfile, task_id="p2") + + result = json.loads(patch_tool( + mode="replace", path=self._tmpfile, + old_string="original", new_string="patched", + task_id="p2", + )) + self.assertNotIn("_warning", result) + + +# --------------------------------------------------------------------------- +# Unit test for the helper +# --------------------------------------------------------------------------- + +class TestCheckFileStalenessHelper(unittest.TestCase): + + def setUp(self): + clear_read_tracker() + + def tearDown(self): + clear_read_tracker() + + def test_returns_none_for_unknown_task(self): + self.assertIsNone(_check_file_staleness("/tmp/x.py", "nonexistent")) + + def test_returns_none_for_unread_file(self): + # Populate tracker with a different file + from tools.file_tools import _read_tracker, _read_tracker_lock + with _read_tracker_lock: + _read_tracker["t1"] = { + "last_key": None, "consecutive": 0, + "read_history": set(), "dedup": {}, + "file_mtimes": {"/tmp/other.py": 12345.0}, + } + self.assertIsNone(_check_file_staleness("/tmp/x.py", "t1")) + + def test_returns_none_when_stat_fails(self): + from tools.file_tools import _read_tracker, _read_tracker_lock + with _read_tracker_lock: + _read_tracker["t1"] = { + "last_key": None, "consecutive": 0, + "read_history": set(), "dedup": {}, + "file_mtimes": {"/nonexistent/path": 99999.0}, + } + # File doesn't exist → stat fails → returns None (let write handle it) + self.assertIsNone(_check_file_staleness("/nonexistent/path", "t1")) + + +if __name__ == "__main__": + unittest.main() diff --git a/tools/file_tools.py b/tools/file_tools.py index 1245e68de..07fb86d1a 100644 --- a/tools/file_tools.py +++ b/tools/file_tools.py @@ -136,6 +136,9 @@ _file_ops_cache: dict = {} # Used to skip re-reads of unchanged files. Reset on # context compression (the original content is summarised # away so the model needs the full content again). +# "file_mtimes": dict mapping resolved_path → mtime float at last read. +# Used by write_file and patch to detect when a file was +# modified externally between the agent's read and write. _read_tracker_lock = threading.Lock() _read_tracker: dict = {} @@ -391,14 +394,16 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str = task_data["consecutive"] = 1 count = task_data["consecutive"] - # Store dedup entry (mtime at read time). - # Writes/patches will naturally change mtime, so subsequent - # dedup checks after edits will see a different mtime and - # return the full content — no special handling needed. + # Store mtime at read time for two purposes: + # 1. Dedup: skip identical re-reads of unchanged files. + # 2. Staleness: warn on write/patch if the file changed since + # the agent last read it (external edit, concurrent agent, etc.). try: - task_data["dedup"][dedup_key] = os.path.getmtime(resolved_str) + _mtime_now = os.path.getmtime(resolved_str) + task_data["dedup"][dedup_key] = _mtime_now + task_data.setdefault("file_mtimes", {})[resolved_str] = _mtime_now except OSError: - pass # Can't stat — skip dedup for this entry + pass # Can't stat — skip tracking for this entry if count >= 4: # Hard block: stop returning content to break the loop @@ -495,15 +500,50 @@ def notify_other_tool_call(task_id: str = "default"): task_data["consecutive"] = 0 +def _check_file_staleness(filepath: str, task_id: str) -> str | None: + """Check whether a file was modified since the agent last read it. + + Returns a warning string if the file is stale (mtime changed since + the last read_file call for this task), or None if the file is fresh + or was never read. Does not block — the write still proceeds. + """ + try: + resolved = str(Path(filepath).expanduser().resolve()) + except (OSError, ValueError): + return None + with _read_tracker_lock: + task_data = _read_tracker.get(task_id) + if not task_data: + return None + read_mtime = task_data.get("file_mtimes", {}).get(resolved) + if read_mtime is None: + return None # File was never read — nothing to compare against + try: + current_mtime = os.path.getmtime(resolved) + except OSError: + return None # Can't stat — file may have been deleted, let write handle it + if current_mtime != read_mtime: + return ( + f"Warning: {filepath} was modified since you last read it " + "(external edit or concurrent agent). The content you read may be " + "stale. Consider re-reading the file to verify before writing." + ) + return None + + def write_file_tool(path: str, content: str, task_id: str = "default") -> str: """Write content to a file.""" sensitive_err = _check_sensitive_path(path) if sensitive_err: return json.dumps({"error": sensitive_err}, ensure_ascii=False) try: + stale_warning = _check_file_staleness(path, task_id) file_ops = _get_file_ops(task_id) result = file_ops.write_file(path, content) - return json.dumps(result.to_dict(), ensure_ascii=False) + result_dict = result.to_dict() + if stale_warning: + result_dict["_warning"] = stale_warning + return json.dumps(result_dict, ensure_ascii=False) except Exception as e: if _is_expected_write_exception(e): logger.debug("write_file expected denial: %s: %s", type(e).__name__, e) @@ -529,6 +569,13 @@ def patch_tool(mode: str = "replace", path: str = None, old_string: str = None, if sensitive_err: return json.dumps({"error": sensitive_err}, ensure_ascii=False) try: + # Check staleness for all files this patch will touch. + stale_warnings = [] + for _p in _paths_to_check: + _sw = _check_file_staleness(_p, task_id) + if _sw: + stale_warnings.append(_sw) + file_ops = _get_file_ops(task_id) if mode == "replace": @@ -545,6 +592,8 @@ def patch_tool(mode: str = "replace", path: str = None, old_string: str = None, return json.dumps({"error": f"Unknown mode: {mode}"}) result_dict = result.to_dict() + if stale_warnings: + result_dict["_warning"] = stale_warnings[0] if len(stale_warnings) == 1 else " | ".join(stale_warnings) result_json = json.dumps(result_dict, ensure_ascii=False) # Hint when old_string not found — saves iterations where the agent # retries with stale content instead of re-reading the file. -- 2.43.0 From b118f607b2a0be299c4d45d62bc87764ccfb3d6f Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Tue, 31 Mar 2026 14:49:20 -0700 Subject: [PATCH 101/385] feat(skills): unify hermes-agent and hermes-agent-setup into single skill (#4332) Merges the hermes-agent-spawning skill (autonomous-ai-agents/) and hermes-agent-setup skill (dogfood/) into a single comprehensive skills/hermes-agent/ skill. The unified skill covers: - What Hermes Agent is and how it compares to Claude Code/Codex/OpenClaw - Complete CLI reference (all subcommands and flags) - Slash command reference - Configuration guide (providers, toolsets, config sections) - Voice/STT/TTS setup - Spawning additional agent instances (one-shot and interactive PTY) - Multi-agent coordination patterns - Troubleshooting guide - Where-to-find-things lookup table with docs links - Concise contributor quick reference Removes: - skills/autonomous-ai-agents/hermes-agent/ (hermes-agent-spawning) - skills/dogfood/hermes-agent-setup/ --- .../hermes-agent/SKILL.md | 203 ------ skills/dogfood/hermes-agent-setup/SKILL.md | 300 -------- skills/hermes-agent/SKILL.md | 655 ++++++++++++++++++ 3 files changed, 655 insertions(+), 503 deletions(-) delete mode 100644 skills/autonomous-ai-agents/hermes-agent/SKILL.md delete mode 100644 skills/dogfood/hermes-agent-setup/SKILL.md create mode 100644 skills/hermes-agent/SKILL.md diff --git a/skills/autonomous-ai-agents/hermes-agent/SKILL.md b/skills/autonomous-ai-agents/hermes-agent/SKILL.md deleted file mode 100644 index a0678b0a2..000000000 --- a/skills/autonomous-ai-agents/hermes-agent/SKILL.md +++ /dev/null @@ -1,203 +0,0 @@ ---- -name: hermes-agent-spawning -description: Spawn additional Hermes Agent instances as autonomous subprocesses for independent long-running tasks. Supports non-interactive one-shot mode (-q) and interactive PTY mode for multi-turn collaboration. Different from delegate_task — this runs a full separate hermes process. -version: 1.1.0 -author: Hermes Agent -license: MIT -metadata: - hermes: - tags: [Agent, Hermes, Multi-Agent, Orchestration, Subprocess, Interactive] - homepage: https://github.com/NousResearch/hermes-agent - related_skills: [claude-code, codex] ---- - -# Spawning Hermes Agent Instances - -Run additional Hermes Agent processes as autonomous subprocesses. Unlike `delegate_task` (which spawns lightweight subagents sharing the same process), this launches fully independent `hermes` CLI processes with their own sessions, tools, and terminal environments. - -## When to Use This vs delegate_task - -| Feature | `delegate_task` | Spawning `hermes` process | -|---------|-----------------|--------------------------| -| Context isolation | Separate conversation, shared process | Fully independent process | -| Tool access | Subset of parent's tools | Full tool access (all toolsets) | -| Session persistence | Ephemeral (no DB entry) | Full session logging + DB | -| Duration | Minutes (bounded by parent's loop) | Hours/days (runs independently) | -| Monitoring | Parent waits for result | Background process, monitor via `process` tool | -| Interactive | No | Yes (PTY mode supports back-and-forth) | -| Use case | Quick parallel subtasks | Long autonomous missions, interactive collaboration | - -## Prerequisites - -- `hermes` CLI installed and on PATH -- API key configured in `~/.hermes/.env` - -### Installation - -Requires an interactive shell (the installer runs a setup wizard): - -``` -curl -fsSL https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.sh | bash -``` - -This installs uv, Python 3.11, clones the repo, sets up the venv, and launches an interactive setup wizard to configure your API provider and model. See the [GitHub repo](https://github.com/NousResearch/hermes-agent) for details. - -## Resuming Previous Sessions - -Resume a prior CLI session instead of starting fresh. Useful for continuing long tasks across process restarts: - -``` -# Resume the most recent CLI session -terminal(command="hermes --continue", background=true, pty=true) - -# Resume a specific session by ID (shown on exit) -terminal(command="hermes --resume 20260225_143052_a1b2c3", background=true, pty=true) -``` - -The full conversation history (messages, tool calls, responses) is restored from SQLite. The agent sees everything from the previous session. - -## Mode 1: One-Shot Query (-q flag) - -Run a single query non-interactively. The agent executes, does its work, and exits: - -``` -terminal(command="hermes chat -q 'Research the latest GRPO training papers and write a summary to ~/research/grpo.md'", timeout=300) -``` - -Background for long tasks: -``` -terminal(command="hermes chat -q 'Set up CI/CD for ~/myapp'", background=true) -# Returns session_id, monitor with process tool -``` - -## Mode 2: Interactive PTY Session - -Launch a full interactive Hermes session with PTY for back-and-forth collaboration. You can send messages, review its work, give feedback, and steer it. - -Note: Hermes uses prompt_toolkit for its CLI UI. Through a PTY, this works because ptyprocess provides a real terminal — input sent via `submit` arrives as keystrokes. The output log will contain ANSI escape sequences from the UI rendering — focus on the text content, not the formatting. - -``` -# Start interactive hermes in background with PTY -terminal(command="hermes", workdir="~/project", background=true, pty=true) -# Returns session_id - -# Send it a task -process(action="submit", session_id="", data="Set up a Python project with FastAPI, add auth endpoints, and write tests") - -# Wait for it to work, then check progress -process(action="log", session_id="") - -# Give feedback on what it produced -process(action="submit", session_id="", data="The tests look good but add edge cases for invalid tokens") - -# Check its response -process(action="log", session_id="") - -# Ask it to iterate -process(action="submit", session_id="", data="Now add rate limiting middleware") - -# When done, exit the session -process(action="submit", session_id="", data="/exit") -``` - -### Interactive Collaboration Patterns - -**Code review loop** — spawn hermes, send code for review, iterate on feedback: -``` -terminal(command="hermes", workdir="~/project", background=true, pty=true) -process(action="submit", session_id="", data="Review the changes in src/auth.py and suggest improvements") -# ... read its review ... -process(action="submit", session_id="", data="Good points. Go ahead and implement suggestions 1 and 3") -# ... it makes changes ... -process(action="submit", session_id="", data="Run the tests to make sure nothing broke") -``` - -**Research with steering** — start broad, narrow down based on findings: -``` -terminal(command="hermes", background=true, pty=true) -process(action="submit", session_id="", data="Search for the latest papers on KV cache compression techniques") -# ... read its findings ... -process(action="submit", session_id="", data="The MQA approach looks promising. Dig deeper into that one and compare with GQA") -# ... more detailed research ... -process(action="submit", session_id="", data="Write up everything you found to ~/research/kv-cache-compression.md") -``` - -**Multi-agent coordination** — spawn two agents working on related tasks, pass context between them: -``` -# Agent A: backend -terminal(command="hermes", workdir="~/project/backend", background=true, pty=true) -process(action="submit", session_id="", data="Build a REST API for user management with CRUD endpoints") - -# Agent B: frontend -terminal(command="hermes", workdir="~/project/frontend", background=true, pty=true) -process(action="submit", session_id="", data="Build a React dashboard that will connect to a REST API at localhost:8000/api/users") - -# Check Agent A's progress, relay API schema to Agent B -process(action="log", session_id="") -process(action="submit", session_id="", data="Here's the API schema Agent A built: GET /api/users, POST /api/users, etc. Update your fetch calls to match.") -``` - -## Parallel Non-Interactive Instances - -Spawn multiple independent agents for unrelated tasks: - -``` -terminal(command="hermes chat -q 'Research competitor landing pages and write a report to ~/research/competitors.md'", background=true) -terminal(command="hermes chat -q 'Audit security of ~/myapp and write findings to ~/myapp/SECURITY_AUDIT.md'", background=true) -process(action="list") -``` - -## With Custom Model - -``` -terminal(command="hermes chat -q 'Summarize this codebase' --model google/gemini-2.5-pro", workdir="~/project", background=true) -``` - -## Gateway Cron Integration - -For scheduled autonomous tasks, use the unified `cronjob` tool instead of spawning processes — cron jobs handle delivery, retry, and persistence automatically. - -## Key Differences Between Modes - -| | `-q` (one-shot) | Interactive (PTY) | `--continue` / `--resume` | -|---|---|---|---| -| User interaction | None | Full back-and-forth | Full back-and-forth | -| PTY required | No | Yes (`pty=true`) | Yes (`pty=true`) | -| Multi-turn | Single query | Unlimited turns | Continues previous turns | -| Best for | Fire-and-forget tasks | Iterative work, steering | Picking up where you left off | -| Exit | Automatic after completion | Send `/exit` or kill | Send `/exit` or kill | - -## Known Issues - -- **Interactive PTY + prompt_toolkit**: The `submit` action sends `\n` (line feed) but prompt_toolkit in raw mode expects `\r` (carriage return) for Enter. Text appears in the prompt but never submits. **Workaround**: Use **tmux** instead of raw PTY mode. tmux's `send-keys Enter` sends the correct `\r`: - -``` -# Start hermes inside tmux -tmux new-session -d -s hermes-session -x 120 -y 40 "hermes" -sleep 10 # Wait for banner/startup - -# Send messages -tmux send-keys -t hermes-session "your message here" Enter - -# Read output -sleep 15 # Wait for LLM response -tmux capture-pane -t hermes-session -p - -# Multi-turn: just send more messages and capture again -tmux send-keys -t hermes-session "follow-up message" Enter - -# Exit when done -tmux send-keys -t hermes-session "/exit" Enter -tmux kill-session -t hermes-session -``` - -## Rules - -1. **Use `-q` for autonomous tasks** — agent works independently and exits -2. **Use `pty=true` for interactive sessions** — required for the full CLI UI -3. **Use `submit` not `write`** — `submit` adds a newline (Enter), `write` doesn't -4. **Read logs before sending more** — check what the agent produced before giving next instruction -5. **Set timeouts for `-q` mode** — complex tasks may take 5-10 minutes -6. **Prefer `delegate_task` for quick subtasks** — spawning a full process has more overhead -7. **Each instance is independent** — they don't share conversation context with the parent -8. **Check results** — after completion, read the output files or logs the agent produced diff --git a/skills/dogfood/hermes-agent-setup/SKILL.md b/skills/dogfood/hermes-agent-setup/SKILL.md deleted file mode 100644 index 73980a1e6..000000000 --- a/skills/dogfood/hermes-agent-setup/SKILL.md +++ /dev/null @@ -1,300 +0,0 @@ ---- -name: hermes-agent-setup -description: Help users configure Hermes Agent — CLI usage, setup wizard, model/provider selection, tools, skills, voice/STT/TTS, gateway, and troubleshooting. Use when someone asks to enable features, configure settings, or needs help with Hermes itself. -version: 1.1.0 -author: Hermes Agent -tags: [setup, configuration, tools, stt, tts, voice, hermes, cli, skills] ---- - -# Hermes Agent Setup & Configuration - -Use this skill when a user asks about configuring Hermes, enabling features, setting up voice, managing tools/skills, or troubleshooting. - -## Key Paths - -- Config: `~/.hermes/config.yaml` -- API keys: `~/.hermes/.env` -- Skills: `~/.hermes/skills/` -- Hermes install: `~/.hermes/hermes-agent/` -- Venv: `~/.hermes/hermes-agent/venv/` - -## CLI Overview - -Hermes is used via the `hermes` command (or `python -m hermes_cli.main` from the repo). - -### Core commands: - -``` -hermes Interactive chat (default) -hermes chat -q "question" Single query, then exit -hermes chat -m MODEL Chat with a specific model -hermes -c Resume most recent session -hermes -c "project name" Resume session by name -hermes --resume SESSION_ID Resume by exact ID -hermes -w Isolated git worktree mode -hermes -s skill1,skill2 Preload skills for the session -hermes --yolo Skip dangerous command approval -``` - -### Configuration & setup: - -``` -hermes setup Interactive setup wizard (provider, API keys, model) -hermes model Interactive model/provider selection -hermes config View current configuration -hermes config edit Open config.yaml in $EDITOR -hermes config set KEY VALUE Set a config value directly -hermes login Authenticate with a provider -hermes logout Clear stored auth -hermes doctor Check configuration and dependencies -``` - -### Tools & skills: - -``` -hermes tools Interactive tool enable/disable per platform -hermes skills list List installed skills -hermes skills search QUERY Search the skills hub -hermes skills install NAME Install a skill from the hub -hermes skills config Enable/disable skills per platform -``` - -### Gateway (messaging platforms): - -``` -hermes gateway run Start the messaging gateway -hermes gateway install Install gateway as background service -hermes gateway status Check gateway status -``` - -### Session management: - -``` -hermes sessions list List past sessions -hermes sessions browse Interactive session picker -hermes sessions rename ID TITLE Rename a session -hermes sessions export ID Export session as markdown -hermes sessions prune Clean up old sessions -``` - -### Other: - -``` -hermes status Show status of all components -hermes cron list List cron jobs -hermes insights Usage analytics -hermes update Update to latest version -hermes pairing Manage DM authorization codes -``` - -## Setup Wizard (`hermes setup`) - -The interactive setup wizard walks through: -1. **Provider selection** — OpenRouter, Anthropic, OpenAI, Google, DeepSeek, and many more -2. **API key entry** — stores securely in the env file -3. **Model selection** — picks from available models for the chosen provider -4. **Basic settings** — reasoning effort, tool preferences - -Run it from terminal: -```bash -cd ~/.hermes/hermes-agent -source venv/bin/activate -python -m hermes_cli.main setup -``` - -To change just the model/provider later: `hermes model` - -## Skills Configuration (`hermes skills`) - -Skills are reusable instruction sets that extend what Hermes can do. - -### Managing skills: - -```bash -hermes skills list # Show installed skills -hermes skills search "docker" # Search the hub -hermes skills install NAME # Install from hub -hermes skills config # Enable/disable per platform -``` - -### Per-platform skill control: - -`hermes skills config` opens an interactive UI where you can enable or disable specific skills for each platform (cli, telegram, discord, etc.). Disabled skills won't appear in the agent's available skills list for that platform. - -### Loading skills in a session: - -- CLI: `hermes -s skill-name` or `hermes -s skill1,skill2` -- Chat: `/skill skill-name` -- Gateway: type `/skill skill-name` in any chat - -## Voice Messages (STT) - -Voice messages from Telegram/Discord/WhatsApp/Slack/Signal are auto-transcribed when an STT provider is available. - -### Provider priority (auto-detected): -1. **Local faster-whisper** — free, no API key, runs on CPU/GPU -2. **Groq Whisper** — free tier, needs GROQ_API_KEY -3. **OpenAI Whisper** — paid, needs VOICE_TOOLS_OPENAI_KEY - -### Setup local STT (recommended): - -```bash -cd ~/.hermes/hermes-agent -source venv/bin/activate -pip install faster-whisper -``` - -Add to config.yaml under the `stt:` section: -```yaml -stt: - enabled: true - provider: local - local: - model: base # Options: tiny, base, small, medium, large-v3 -``` - -Model downloads automatically on first use (~150 MB for base). - -### Setup Groq STT (free cloud): - -1. Get free key from https://console.groq.com -2. Add GROQ_API_KEY to the env file -3. Set provider to groq in config.yaml stt section - -### Verify STT: - -After config changes, restart the gateway (send /restart in chat, or restart `hermes gateway run`). Then send a voice message. - -## Voice Replies (TTS) - -Hermes can reply with voice when users send voice messages. - -### TTS providers (set API key in env file): - -| Provider | Env var | Free? | -|----------|---------|-------| -| ElevenLabs | ELEVENLABS_API_KEY | Free tier | -| OpenAI | VOICE_TOOLS_OPENAI_KEY | Paid | -| Kokoro (local) | None needed | Free | -| Fish Audio | FISH_AUDIO_API_KEY | Free tier | - -### Voice commands (in any chat): -- `/voice on` — voice reply to voice messages only -- `/voice tts` — voice reply to all messages -- `/voice off` — text only (default) - -## Enabling/Disabling Tools (`hermes tools`) - -### Interactive tool config: - -```bash -cd ~/.hermes/hermes-agent -source venv/bin/activate -python -m hermes_cli.main tools -``` - -This opens a curses UI to enable/disable toolsets per platform (cli, telegram, discord, slack, etc.). - -### After changing tools: - -Use `/reset` in the chat to start a fresh session with the new toolset. Tool changes do NOT take effect mid-conversation (this preserves prompt caching and avoids cost spikes). - -### Common toolsets: - -| Toolset | What it provides | -|---------|-----------------| -| terminal | Shell command execution | -| file | File read/write/search/patch | -| web | Web search and extraction | -| browser | Browser automation (needs Browserbase) | -| image_gen | AI image generation | -| mcp | MCP server connections | -| voice | Text-to-speech output | -| cronjob | Scheduled tasks | - -## Installing Dependencies - -Some tools need extra packages: - -```bash -cd ~/.hermes/hermes-agent && source venv/bin/activate - -pip install faster-whisper # Local STT (voice transcription) -pip install browserbase # Browser automation -pip install mcp # MCP server connections -``` - -## Config File Reference - -The main config file is `~/.hermes/config.yaml`. Key sections: - -```yaml -# Model and provider -model: - default: anthropic/claude-opus-4.6 - provider: openrouter - -# Agent behavior -agent: - max_turns: 90 - reasoning_effort: high # xhigh, high, medium, low, minimal, none - -# Voice -stt: - enabled: true - provider: local # local, groq, openai -tts: - provider: elevenlabs # elevenlabs, openai, kokoro, fish - -# Display -display: - skin: default # default, ares, mono, slate - tool_progress: full # full, compact, off - background_process_notifications: all # all, result, error, off -``` - -Edit with `hermes config edit` or `hermes config set KEY VALUE`. - -## Gateway Commands (Messaging Platforms) - -| Command | What it does | -|---------|-------------| -| /reset or /new | Fresh session (picks up new tool config) | -| /help | Show all commands | -| /model [name] | Show or change model | -| /compact | Compress conversation to save context | -| /voice [mode] | Configure voice replies | -| /reasoning [effort] | Set reasoning level | -| /sethome | Set home channel for cron/notifications | -| /restart | Restart the gateway (picks up config changes) | -| /status | Show session info | -| /retry | Retry last message | -| /undo | Remove last exchange | -| /personality [name] | Set agent personality | -| /skill [name] | Load a skill | - -## Troubleshooting - -### Voice messages not working -1. Check stt.enabled is true in config.yaml -2. Check a provider is available (faster-whisper installed, or API key set) -3. Restart gateway after config changes (/restart) - -### Tool not available -1. Run `hermes tools` to check if the toolset is enabled for your platform -2. Some tools need env vars — check the env file -3. Use /reset after enabling tools - -### Model/provider issues -1. Run `hermes doctor` to check configuration -2. Run `hermes login` to re-authenticate -3. Check the env file has the right API key - -### Changes not taking effect -- Gateway: /reset for tool changes, /restart for config changes -- CLI: start a new session - -### Skills not showing up -1. Check `hermes skills list` shows the skill -2. Check `hermes skills config` has it enabled for your platform -3. Load explicitly with `/skill name` or `hermes -s name` diff --git a/skills/hermes-agent/SKILL.md b/skills/hermes-agent/SKILL.md new file mode 100644 index 000000000..8d93e3fb7 --- /dev/null +++ b/skills/hermes-agent/SKILL.md @@ -0,0 +1,655 @@ +--- +name: hermes-agent +description: Complete guide to using and extending Hermes Agent — CLI usage, setup, configuration, spawning additional agents, gateway platforms, skills, voice, tools, profiles, and a concise contributor reference. Load this skill when helping users configure Hermes, troubleshoot issues, spawn agent instances, or make code contributions. +version: 2.0.0 +author: Hermes Agent + Teknium +license: MIT +metadata: + hermes: + tags: [hermes, setup, configuration, multi-agent, spawning, cli, gateway, development] + homepage: https://github.com/NousResearch/hermes-agent + related_skills: [claude-code, codex, opencode] +--- + +# Hermes Agent + +Hermes Agent is an open-source AI agent framework by Nous Research that runs in your terminal, messaging platforms, and IDEs. It belongs to the same category as Claude Code (Anthropic), Codex (OpenAI), and OpenClaw — autonomous coding and task-execution agents that use tool calling to interact with your system. Hermes works with any LLM provider (OpenRouter, Anthropic, OpenAI, DeepSeek, local models, and 15+ others) and runs on Linux, macOS, and WSL. + +What makes Hermes different: + +- **Self-improving through skills** — Hermes learns from experience by saving reusable procedures as skills. When it solves a complex problem, discovers a workflow, or gets corrected, it can persist that knowledge as a skill document that loads into future sessions. Skills accumulate over time, making the agent better at your specific tasks and environment. +- **Persistent memory across sessions** — remembers who you are, your preferences, environment details, and lessons learned. Pluggable memory backends (built-in, Honcho, Mem0, and more) let you choose how memory works. +- **Multi-platform gateway** — the same agent runs on Telegram, Discord, Slack, WhatsApp, Signal, Matrix, Email, and 8+ other platforms with full tool access, not just chat. +- **Provider-agnostic** — swap models and providers mid-workflow without changing anything else. Credential pools rotate across multiple API keys automatically. +- **Profiles** — run multiple independent Hermes instances with isolated configs, sessions, skills, and memory. +- **Extensible** — plugins, MCP servers, custom tools, webhook triggers, cron scheduling, and the full Python ecosystem. + +People use Hermes for software development, research, system administration, data analysis, content creation, home automation, and anything else that benefits from an AI agent with persistent context and full system access. + +**This skill helps you work with Hermes Agent effectively** — setting it up, configuring features, spawning additional agent instances, troubleshooting issues, finding the right commands and settings, and understanding how the system works when you need to extend or contribute to it. + +**Docs:** https://hermes-agent.nousresearch.com/docs/ + +## Quick Start + +```bash +# Install +curl -fsSL https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.sh | bash + +# Interactive chat (default) +hermes + +# Single query +hermes chat -q "What is the capital of France?" + +# Setup wizard +hermes setup + +# Change model/provider +hermes model + +# Check health +hermes doctor +``` + +--- + +## CLI Reference + +### Global Flags + +``` +hermes [flags] [command] + + --version, -V Show version + --resume, -r SESSION Resume session by ID or title + --continue, -c [NAME] Resume by name, or most recent session + --worktree, -w Isolated git worktree mode (parallel agents) + --skills, -s SKILL Preload skills (comma-separate or repeat) + --profile, -p NAME Use a named profile + --yolo Skip dangerous command approval + --pass-session-id Include session ID in system prompt +``` + +No subcommand defaults to `chat`. + +### Chat + +``` +hermes chat [flags] + -q, --query TEXT Single query, non-interactive + -m, --model MODEL Model (e.g. anthropic/claude-sonnet-4) + -t, --toolsets LIST Comma-separated toolsets + --provider PROVIDER Force provider (openrouter, anthropic, nous, etc.) + -v, --verbose Verbose output + -Q, --quiet Suppress banner, spinner, tool previews + --checkpoints Enable filesystem checkpoints (/rollback) + --source TAG Session source tag (default: cli) +``` + +### Configuration + +``` +hermes setup [section] Interactive wizard (model|terminal|gateway|tools|agent) +hermes model Interactive model/provider picker +hermes config View current config +hermes config edit Open config.yaml in $EDITOR +hermes config set KEY VAL Set a config value +hermes config path Print config.yaml path +hermes config env-path Print .env path +hermes config check Check for missing/outdated config +hermes config migrate Update config with new options +hermes login [--provider P] OAuth login (nous, openai-codex) +hermes logout Clear stored auth +hermes doctor [--fix] Check dependencies and config +hermes status [--all] Show component status +``` + +### Tools & Skills + +``` +hermes tools Interactive tool enable/disable (curses UI) +hermes tools list Show all tools and status +hermes tools enable NAME Enable a toolset +hermes tools disable NAME Disable a toolset + +hermes skills list List installed skills +hermes skills search QUERY Search the skills hub +hermes skills install ID Install a skill +hermes skills inspect ID Preview without installing +hermes skills config Enable/disable skills per platform +hermes skills check Check for updates +hermes skills update Update outdated skills +hermes skills uninstall N Remove a hub skill +hermes skills publish PATH Publish to registry +hermes skills browse Browse all available skills +hermes skills tap add REPO Add a GitHub repo as skill source +``` + +### MCP Servers + +``` +hermes mcp serve Run Hermes as an MCP server +hermes mcp add NAME Add an MCP server (--url or --command) +hermes mcp remove NAME Remove an MCP server +hermes mcp list List configured servers +hermes mcp test NAME Test connection +hermes mcp configure NAME Toggle tool selection +``` + +### Gateway (Messaging Platforms) + +``` +hermes gateway run Start gateway foreground +hermes gateway install Install as background service +hermes gateway start/stop Control the service +hermes gateway restart Restart the service +hermes gateway status Check status +hermes gateway setup Configure platforms +``` + +Supported platforms: Telegram, Discord, Slack, WhatsApp, Signal, Email, SMS, Matrix, Mattermost, Home Assistant, DingTalk, Feishu, WeCom, API Server, Webhooks, Open WebUI. + +Platform docs: https://hermes-agent.nousresearch.com/docs/user-guide/messaging/ + +### Sessions + +``` +hermes sessions list List recent sessions +hermes sessions browse Interactive picker +hermes sessions export OUT Export to JSONL +hermes sessions rename ID T Rename a session +hermes sessions delete ID Delete a session +hermes sessions prune Clean up old sessions (--older-than N days) +hermes sessions stats Session store statistics +``` + +### Cron Jobs + +``` +hermes cron list List jobs (--all for disabled) +hermes cron create SCHED Create: '30m', 'every 2h', '0 9 * * *' +hermes cron edit ID Edit schedule, prompt, delivery +hermes cron pause/resume ID Control job state +hermes cron run ID Trigger on next tick +hermes cron remove ID Delete a job +hermes cron status Scheduler status +``` + +### Webhooks + +``` +hermes webhook subscribe N Create route at /webhooks/ +hermes webhook list List subscriptions +hermes webhook remove NAME Remove a subscription +hermes webhook test NAME Send a test POST +``` + +### Profiles + +``` +hermes profile list List all profiles +hermes profile create NAME Create (--clone, --clone-all, --clone-from) +hermes profile use NAME Set sticky default +hermes profile delete NAME Delete a profile +hermes profile show NAME Show details +hermes profile alias NAME Manage wrapper scripts +hermes profile rename A B Rename a profile +hermes profile export NAME Export to tar.gz +hermes profile import FILE Import from archive +``` + +### Credential Pools + +``` +hermes auth add Interactive credential wizard +hermes auth list [PROVIDER] List pooled credentials +hermes auth remove P INDEX Remove by provider + index +hermes auth reset PROVIDER Clear exhaustion status +``` + +### Other + +``` +hermes insights [--days N] Usage analytics +hermes update Update to latest version +hermes pairing list/approve/revoke DM authorization +hermes plugins list/install/remove Plugin management +hermes honcho setup/status Honcho memory integration +hermes memory setup/status/off Memory provider config +hermes completion bash|zsh Shell completions +hermes acp ACP server (IDE integration) +hermes claw migrate Migrate from OpenClaw +hermes uninstall Uninstall Hermes +``` + +--- + +## Slash Commands (In-Session) + +Type these during an interactive chat session. + +### Session Control +``` +/new (/reset) Fresh session +/clear Clear screen + new session (CLI) +/retry Resend last message +/undo Remove last exchange +/title [name] Name the session +/compress Manually compress context +/stop Kill background processes +/rollback [N] Restore filesystem checkpoint +/background Run prompt in background +/queue Queue for next turn +/resume [name] Resume a named session +``` + +### Configuration +``` +/config Show config (CLI) +/model [name] Show or change model +/provider Show provider info +/prompt [text] View/set system prompt (CLI) +/personality [name] Set personality +/reasoning [level] Set reasoning (none|low|medium|high|xhigh|show|hide) +/verbose Cycle: off → new → all → verbose +/voice [on|off|tts] Voice mode +/yolo Toggle approval bypass +/skin [name] Change theme (CLI) +/statusbar Toggle status bar (CLI) +``` + +### Tools & Skills +``` +/tools Manage tools (CLI) +/toolsets List toolsets (CLI) +/skills Search/install skills (CLI) +/skill Load a skill into session +/cron Manage cron jobs (CLI) +/reload-mcp Reload MCP servers +/plugins List plugins (CLI) +``` + +### Info +``` +/help Show commands +/commands [page] Browse all commands (gateway) +/usage Token usage +/insights [days] Usage analytics +/status Session info (gateway) +/profile Active profile info +``` + +### Exit +``` +/quit (/exit, /q) Exit CLI +``` + +--- + +## Key Paths & Config + +``` +~/.hermes/config.yaml Main configuration +~/.hermes/.env API keys and secrets +~/.hermes/skills/ Installed skills +~/.hermes/sessions/ Session transcripts +~/.hermes/logs/ Gateway and error logs +~/.hermes/auth.json OAuth tokens and credential pools +~/.hermes/hermes-agent/ Source code (if git-installed) +``` + +Profiles use `~/.hermes/profiles//` with the same layout. + +### Config Sections + +Edit with `hermes config edit` or `hermes config set section.key value`. + +| Section | Key options | +|---------|-------------| +| `model` | `default`, `provider`, `base_url`, `api_key`, `context_length` | +| `agent` | `max_turns` (90), `tool_use_enforcement` | +| `terminal` | `backend` (local/docker/ssh/modal), `cwd`, `timeout` (180) | +| `compression` | `enabled`, `threshold` (0.50), `target_ratio` (0.20) | +| `display` | `skin`, `tool_progress`, `show_reasoning`, `show_cost` | +| `stt` | `enabled`, `provider` (local/groq/openai) | +| `tts` | `provider` (edge/elevenlabs/openai/kokoro/fish) | +| `memory` | `memory_enabled`, `user_profile_enabled`, `provider` | +| `security` | `tirith_enabled`, `website_blocklist` | +| `delegation` | `model`, `provider`, `max_iterations` (50) | +| `smart_model_routing` | `enabled`, `cheap_model` | +| `checkpoints` | `enabled`, `max_snapshots` (50) | + +Full config reference: https://hermes-agent.nousresearch.com/docs/user-guide/configuration + +### Providers + +18 providers supported. Set via `hermes model` or `hermes setup`. + +| Provider | Auth | Key env var | +|----------|------|-------------| +| OpenRouter | API key | `OPENROUTER_API_KEY` | +| Anthropic | API key | `ANTHROPIC_API_KEY` | +| Nous Portal | OAuth | `hermes login --provider nous` | +| OpenAI Codex | OAuth | `hermes login --provider openai-codex` | +| GitHub Copilot | Token | `COPILOT_GITHUB_TOKEN` | +| DeepSeek | API key | `DEEPSEEK_API_KEY` | +| Hugging Face | Token | `HF_TOKEN` | +| Z.AI / GLM | API key | `GLM_API_KEY` | +| MiniMax | API key | `MINIMAX_API_KEY` | +| Kimi / Moonshot | API key | `KIMI_API_KEY` | +| Alibaba / DashScope | API key | `DASHSCOPE_API_KEY` | +| Kilo Code | API key | `KILOCODE_API_KEY` | +| Custom endpoint | Config | `model.base_url` + `model.api_key` in config.yaml | + +Plus: AI Gateway, OpenCode Zen, OpenCode Go, MiniMax CN, GitHub Copilot ACP. + +Full provider docs: https://hermes-agent.nousresearch.com/docs/integrations/providers + +### Toolsets + +Enable/disable via `hermes tools` (interactive) or `hermes tools enable/disable NAME`. + +| Toolset | What it provides | +|---------|-----------------| +| `web` | Web search and content extraction | +| `browser` | Browser automation (Browserbase, Camofox, or local Chromium) | +| `terminal` | Shell commands and process management | +| `file` | File read/write/search/patch | +| `code_execution` | Sandboxed Python execution | +| `vision` | Image analysis | +| `image_gen` | AI image generation | +| `tts` | Text-to-speech | +| `skills` | Skill browsing and management | +| `memory` | Persistent cross-session memory | +| `session_search` | Search past conversations | +| `delegation` | Subagent task delegation | +| `cronjob` | Scheduled task management | +| `clarify` | Ask user clarifying questions | +| `moa` | Mixture of Agents (off by default) | +| `homeassistant` | Smart home control (off by default) | + +Tool changes take effect on `/reset` (new session). They do NOT apply mid-conversation to preserve prompt caching. + +--- + +## Voice & Transcription + +### STT (Voice → Text) + +Voice messages from messaging platforms are auto-transcribed. + +Provider priority (auto-detected): +1. **Local faster-whisper** — free, no API key: `pip install faster-whisper` +2. **Groq Whisper** — free tier: set `GROQ_API_KEY` +3. **OpenAI Whisper** — paid: set `VOICE_TOOLS_OPENAI_KEY` + +Config: +```yaml +stt: + enabled: true + provider: local # local, groq, openai + local: + model: base # tiny, base, small, medium, large-v3 +``` + +### TTS (Text → Voice) + +| Provider | Env var | Free? | +|----------|---------|-------| +| Edge TTS | None | Yes (default) | +| ElevenLabs | `ELEVENLABS_API_KEY` | Free tier | +| OpenAI | `VOICE_TOOLS_OPENAI_KEY` | Paid | +| Kokoro (local) | None | Free | +| Fish Audio | `FISH_AUDIO_API_KEY` | Free tier | + +Voice commands: `/voice on` (voice-to-voice), `/voice tts` (always voice), `/voice off`. + +--- + +## Spawning Additional Hermes Instances + +Run additional Hermes processes as fully independent subprocesses — separate sessions, tools, and environments. + +### When to Use This vs delegate_task + +| | `delegate_task` | Spawning `hermes` process | +|-|-----------------|--------------------------| +| Isolation | Separate conversation, shared process | Fully independent process | +| Duration | Minutes (bounded by parent loop) | Hours/days | +| Tool access | Subset of parent's tools | Full tool access | +| Interactive | No | Yes (PTY mode) | +| Use case | Quick parallel subtasks | Long autonomous missions | + +### One-Shot Mode + +``` +terminal(command="hermes chat -q 'Research GRPO papers and write summary to ~/research/grpo.md'", timeout=300) + +# Background for long tasks: +terminal(command="hermes chat -q 'Set up CI/CD for ~/myapp'", background=true) +``` + +### Interactive PTY Mode (via tmux) + +Hermes uses prompt_toolkit, which requires a real terminal. Use tmux for interactive spawning: + +``` +# Start +terminal(command="tmux new-session -d -s agent1 -x 120 -y 40 'hermes'", timeout=10) + +# Wait for startup, then send a message +terminal(command="sleep 8 && tmux send-keys -t agent1 'Build a FastAPI auth service' Enter", timeout=15) + +# Read output +terminal(command="sleep 20 && tmux capture-pane -t agent1 -p", timeout=5) + +# Send follow-up +terminal(command="tmux send-keys -t agent1 'Add rate limiting middleware' Enter", timeout=5) + +# Exit +terminal(command="tmux send-keys -t agent1 '/exit' Enter && sleep 2 && tmux kill-session -t agent1", timeout=10) +``` + +### Multi-Agent Coordination + +``` +# Agent A: backend +terminal(command="tmux new-session -d -s backend -x 120 -y 40 'hermes -w'", timeout=10) +terminal(command="sleep 8 && tmux send-keys -t backend 'Build REST API for user management' Enter", timeout=15) + +# Agent B: frontend +terminal(command="tmux new-session -d -s frontend -x 120 -y 40 'hermes -w'", timeout=10) +terminal(command="sleep 8 && tmux send-keys -t frontend 'Build React dashboard for user management' Enter", timeout=15) + +# Check progress, relay context between them +terminal(command="tmux capture-pane -t backend -p | tail -30", timeout=5) +terminal(command="tmux send-keys -t frontend 'Here is the API schema from the backend agent: ...' Enter", timeout=5) +``` + +### Session Resume + +``` +# Resume most recent session +terminal(command="tmux new-session -d -s resumed 'hermes --continue'", timeout=10) + +# Resume specific session +terminal(command="tmux new-session -d -s resumed 'hermes --resume 20260225_143052_a1b2c3'", timeout=10) +``` + +### Tips + +- **Prefer `delegate_task` for quick subtasks** — less overhead than spawning a full process +- **Use `-w` (worktree mode)** when spawning agents that edit code — prevents git conflicts +- **Set timeouts** for one-shot mode — complex tasks can take 5-10 minutes +- **Use `hermes chat -q` for fire-and-forget** — no PTY needed +- **Use tmux for interactive sessions** — raw PTY mode has `\r` vs `\n` issues with prompt_toolkit +- **For scheduled tasks**, use the `cronjob` tool instead of spawning — handles delivery and retry + +--- + +## Troubleshooting + +### Voice not working +1. Check `stt.enabled: true` in config.yaml +2. Verify provider: `pip install faster-whisper` or set API key +3. Restart gateway: `/restart` + +### Tool not available +1. `hermes tools` — check if toolset is enabled for your platform +2. Some tools need env vars (check `.env`) +3. `/reset` after enabling tools + +### Model/provider issues +1. `hermes doctor` — check config and dependencies +2. `hermes login` — re-authenticate OAuth providers +3. Check `.env` has the right API key + +### Changes not taking effect +- **Tools/skills:** `/reset` starts a new session with updated toolset +- **Config changes:** `/restart` reloads gateway config +- **Code changes:** Restart the CLI or gateway process + +### Skills not showing +1. `hermes skills list` — verify installed +2. `hermes skills config` — check platform enablement +3. Load explicitly: `/skill name` or `hermes -s name` + +### Gateway issues +Check logs first: +```bash +grep -i "failed to send\|error" ~/.hermes/logs/gateway.log | tail -20 +``` + +--- + +## Where to Find Things + +| Looking for... | Location | +|----------------|----------| +| Config options | `hermes config edit` or [Configuration docs](https://hermes-agent.nousresearch.com/docs/user-guide/configuration) | +| Available tools | `hermes tools list` or [Tools reference](https://hermes-agent.nousresearch.com/docs/reference/tools-reference) | +| Slash commands | `/help` in session or [Slash commands reference](https://hermes-agent.nousresearch.com/docs/reference/slash-commands) | +| Skills catalog | `hermes skills browse` or [Skills catalog](https://hermes-agent.nousresearch.com/docs/reference/skills-catalog) | +| Provider setup | `hermes model` or [Providers guide](https://hermes-agent.nousresearch.com/docs/integrations/providers) | +| Platform setup | `hermes gateway setup` or [Messaging docs](https://hermes-agent.nousresearch.com/docs/user-guide/messaging/) | +| MCP servers | `hermes mcp list` or [MCP guide](https://hermes-agent.nousresearch.com/docs/user-guide/features/mcp) | +| Profiles | `hermes profile list` or [Profiles docs](https://hermes-agent.nousresearch.com/docs/user-guide/profiles) | +| Cron jobs | `hermes cron list` or [Cron docs](https://hermes-agent.nousresearch.com/docs/user-guide/features/cron) | +| Memory | `hermes memory status` or [Memory docs](https://hermes-agent.nousresearch.com/docs/user-guide/features/memory) | +| Env variables | `hermes config env-path` or [Env vars reference](https://hermes-agent.nousresearch.com/docs/reference/environment-variables) | +| CLI commands | `hermes --help` or [CLI reference](https://hermes-agent.nousresearch.com/docs/reference/cli-commands) | +| Gateway logs | `~/.hermes/logs/gateway.log` | +| Session files | `~/.hermes/sessions/` or `hermes sessions browse` | +| Source code | `~/.hermes/hermes-agent/` | + +--- + +## Contributor Quick Reference + +For occasional contributors and PR authors. Full developer docs: https://hermes-agent.nousresearch.com/docs/developer-guide/ + +### Project Layout + +``` +hermes-agent/ +├── run_agent.py # AIAgent — core conversation loop +├── model_tools.py # Tool discovery and dispatch +├── toolsets.py # Toolset definitions +├── cli.py # Interactive CLI (HermesCLI) +├── hermes_state.py # SQLite session store +├── agent/ # Prompt builder, compression, display, adapters +├── hermes_cli/ # CLI subcommands, config, setup, commands +│ ├── commands.py # Slash command registry (CommandDef) +│ ├── config.py # DEFAULT_CONFIG, env var definitions +│ └── main.py # CLI entry point and argparse +├── tools/ # One file per tool +│ └── registry.py # Central tool registry +├── gateway/ # Messaging gateway +│ └── platforms/ # Platform adapters (telegram, discord, etc.) +├── cron/ # Job scheduler +├── tests/ # ~3000 pytest tests +└── website/ # Docusaurus docs site +``` + +Config: `~/.hermes/config.yaml` (settings), `~/.hermes/.env` (API keys). + +### Adding a Tool (3 files) + +**1. Create `tools/your_tool.py`:** +```python +import json, os +from tools.registry import registry + +def check_requirements() -> bool: + return bool(os.getenv("EXAMPLE_API_KEY")) + +def example_tool(param: str, task_id: str = None) -> str: + return json.dumps({"success": True, "data": "..."}) + +registry.register( + name="example_tool", + toolset="example", + schema={"name": "example_tool", "description": "...", "parameters": {...}}, + handler=lambda args, **kw: example_tool( + param=args.get("param", ""), task_id=kw.get("task_id")), + check_fn=check_requirements, + requires_env=["EXAMPLE_API_KEY"], +) +``` + +**2. Add import** in `model_tools.py` → `_discover_tools()` list. + +**3. Add to `toolsets.py`** → `_HERMES_CORE_TOOLS` list. + +All handlers must return JSON strings. Use `get_hermes_home()` for paths, never hardcode `~/.hermes`. + +### Adding a Slash Command + +1. Add `CommandDef` to `COMMAND_REGISTRY` in `hermes_cli/commands.py` +2. Add handler in `cli.py` → `process_command()` +3. (Optional) Add gateway handler in `gateway/run.py` + +All consumers (help text, autocomplete, Telegram menu, Slack mapping) derive from the central registry automatically. + +### Agent Loop (High Level) + +``` +run_conversation(): + 1. Build system prompt + 2. Loop while iterations < max: + a. Call LLM (OpenAI-format messages + tool schemas) + b. If tool_calls → dispatch each via handle_function_call() → append results → continue + c. If text response → return + 3. Context compression triggers automatically near token limit +``` + +### Testing + +```bash +source venv/bin/activate # or .venv/bin/activate +python -m pytest tests/ -o 'addopts=' -q # Full suite +python -m pytest tests/tools/ -q # Specific area +``` + +- Tests auto-redirect `HERMES_HOME` to temp dirs — never touch real `~/.hermes/` +- Run full suite before pushing any change +- Use `-o 'addopts='` to clear any baked-in pytest flags + +### Commit Conventions + +``` +type: concise subject line + +Optional body. +``` + +Types: `fix:`, `feat:`, `refactor:`, `docs:`, `chore:` + +### Key Rules + +- **Never break prompt caching** — don't change context, tools, or system prompt mid-conversation +- **Message role alternation** — never two assistant or two user messages in a row +- Use `get_hermes_home()` from `hermes_constants` for all paths (profile-safe) +- Config values go in `config.yaml`, secrets go in `.env` +- New tools need a `check_fn` so they only appear when requirements are met -- 2.43.0 From f8cb54ba0421ceac8518c6df90b7043fd15f00c5 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Tue, 31 Mar 2026 14:56:35 -0700 Subject: [PATCH 102/385] fix(cli): anchor input prompt near bottom of terminal after responses (#4359) After short agent responses, the prompt_toolkit input area sat mid-screen with empty terminal space below it. Now prints padding newlines (half terminal height) after each response to push the prompt toward the bottom. patch_stdout renders the padding above the input area. --- cli.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/cli.py b/cli.py index 2f6214989..b18e53077 100644 --- a/cli.py +++ b/cli.py @@ -7568,6 +7568,19 @@ class HermesCLI: finally: self._agent_running = False self._spinner_text = "" + + # Push the input prompt toward the bottom of the + # terminal so it doesn't sit mid-screen after short + # responses. patch_stdout renders these newlines + # above the input area, creating visual separation + # and anchoring the prompt near the bottom. + try: + _pad = shutil.get_terminal_size().lines // 2 + if _pad > 2: + _cprint("\n" * _pad) + except Exception: + pass + app.invalidate() # Refresh status line # Continuous voice: auto-restart recording after agent responds. -- 2.43.0 From 3604665e44817e735beeab6e9261a785059420bf Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Tue, 31 Mar 2026 18:05:40 -0700 Subject: [PATCH 103/385] feat: add qwen/qwen3.6-plus-preview:free to OpenRouter and Nous model lists (#4376) --- hermes_cli/models.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hermes_cli/models.py b/hermes_cli/models.py index c8bd106b6..df58df02f 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -28,6 +28,7 @@ GITHUB_MODELS_CATALOG_URL = COPILOT_MODELS_URL OPENROUTER_MODELS: list[tuple[str, str]] = [ ("anthropic/claude-opus-4.6", "recommended"), ("anthropic/claude-sonnet-4.6", ""), + ("qwen/qwen3.6-plus-preview:free", "free"), ("anthropic/claude-sonnet-4.5", ""), ("anthropic/claude-haiku-4.5", ""), ("openai/gpt-5.4", ""), @@ -58,6 +59,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = { "nous": [ "anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", + "qwen/qwen3.6-plus-preview:free", "anthropic/claude-sonnet-4.5", "anthropic/claude-haiku-4.5", "openai/gpt-5.4", -- 2.43.0 From 0a6d366327432f9ac3c3463839af7238a2d3fe9a Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Tue, 31 Mar 2026 18:52:11 -0700 Subject: [PATCH 104/385] fix(security): redact secrets from execute_code sandbox output MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: root-level provider in config.yaml no longer overrides model.provider load_cli_config() had a priority inversion: a stale root-level 'provider' key in config.yaml would OVERRIDE the canonical 'model.provider' set by 'hermes model'. The gateway reads model.provider directly from YAML and worked correctly, but 'hermes chat -q' and the interactive CLI went through the merge logic and picked up the stale root-level key. Fix: root-level provider/base_url are now only used as a fallback when model.provider/model.base_url is not set (never as an override). Also added _normalize_root_model_keys() to config.py load_config() and save_config() — migrates root-level provider/base_url into the model section and removes the root-level keys permanently. Reported by (≧▽≦) in Discord: opencode-go provider persisted as a root-level key and overrode the correct model.provider=openrouter, causing 401 errors. * fix(security): redact secrets from execute_code sandbox output The execute_code sandbox stripped env vars with secret-like names from the child process (preventing os.environ access), but scripts could still read secrets from disk (e.g. open('~/.hermes/.env')) and print them to stdout. The raw values entered the model context unredacted. terminal_tool and file_tools already applied redact_sensitive_text() to their output — execute_code was the only tool that skipped this step. Now the same redaction runs on both stdout and stderr after ANSI stripping. Reported via Discord (not filed on GitHub to avoid public disclosure of the reproduction steps). --- tools/code_execution_tool.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/code_execution_tool.py b/tools/code_execution_tool.py index 19270c6fe..ce78c9061 100644 --- a/tools/code_execution_tool.py +++ b/tools/code_execution_tool.py @@ -596,6 +596,14 @@ def execute_code( stdout_text = strip_ansi(stdout_text) stderr_text = strip_ansi(stderr_text) + # Redact secrets (API keys, tokens, etc.) from sandbox output. + # The sandbox env-var filter (lines 434-454) blocks os.environ access, + # but scripts can still read secrets from disk (e.g. open('~/.hermes/.env')). + # This ensures leaked secrets never enter the model context. + from agent.redact import redact_sensitive_text + stdout_text = redact_sensitive_text(stdout_text) + stderr_text = redact_sensitive_text(stderr_text) + # Build response result: Dict[str, Any] = { "status": status, -- 2.43.0 From f4d44c777b0661b4e254be4d1081fe56be893b31 Mon Sep 17 00:00:00 2001 From: Laura Batalha <5883822+lbatalha@users.noreply.github.com> Date: Tue, 31 Mar 2026 23:39:40 +0100 Subject: [PATCH 105/385] feat(discord): only create threads and reactions for authorized users --- gateway/platforms/discord.py | 104 ++++++++++++++++++----------------- 1 file changed, 54 insertions(+), 50 deletions(-) diff --git a/gateway/platforms/discord.py b/gateway/platforms/discord.py index 168919b09..6146bb2bc 100644 --- a/gateway/platforms/discord.py +++ b/gateway/platforms/discord.py @@ -408,7 +408,7 @@ class VoiceReceiver: class DiscordAdapter(BasePlatformAdapter): """ Discord bot adapter. - + Handles: - Receiving messages from servers and DMs - Sending responses with Discord markdown @@ -418,10 +418,10 @@ class DiscordAdapter(BasePlatformAdapter): - Auto-threading for long conversations - Reaction-based feedback """ - + # Discord message limits MAX_MESSAGE_LENGTH = 2000 - + # Auto-disconnect from voice channel after this many seconds of inactivity VOICE_TIMEOUT = 300 @@ -449,7 +449,7 @@ class DiscordAdapter(BasePlatformAdapter): self._bot_task: Optional[asyncio.Task] = None # Cap to prevent unbounded growth (Discord threads get archived). self._MAX_TRACKED_THREADS = 500 - + async def connect(self) -> bool: """Connect to Discord and start receiving events.""" if not DISCORD_AVAILABLE: @@ -480,11 +480,11 @@ class DiscordAdapter(BasePlatformAdapter): logger.warning("Opus codec found at %s but failed to load", opus_path) if not discord.opus.is_loaded(): logger.warning("Opus codec not found — voice channel playback disabled") - + if not self.config.token: logger.error("[%s] No bot token configured", self.name) return False - + try: # Acquire scoped lock to prevent duplicate bot token usage from gateway.status import acquire_scoped_lock @@ -504,13 +504,13 @@ class DiscordAdapter(BasePlatformAdapter): intents.guild_messages = True intents.members = True intents.voice_states = True - + # Create bot self._client = commands.Bot( command_prefix="!", # Not really used, we handle raw messages intents=intents, ) - + # Parse allowed user entries (may contain usernames or IDs) allowed_env = os.getenv("DISCORD_ALLOWED_USERS", "") if allowed_env: @@ -518,17 +518,17 @@ class DiscordAdapter(BasePlatformAdapter): _clean_discord_id(uid) for uid in allowed_env.split(",") if uid.strip() } - + adapter_self = self # capture for closure - + # Register event handlers @self._client.event async def on_ready(): logger.info("[%s] Connected as %s", adapter_self.name, adapter_self._client.user) - + # Resolve any usernames in the allowed list to numeric IDs await adapter_self._resolve_allowed_usernames() - + # Sync slash commands with Discord try: synced = await adapter_self._client.tree.sync() @@ -536,18 +536,22 @@ class DiscordAdapter(BasePlatformAdapter): except Exception as e: # pragma: no cover - defensive logging logger.warning("[%s] Slash command sync failed: %s", adapter_self.name, e, exc_info=True) adapter_self._ready_event.set() - + @self._client.event async def on_message(message: DiscordMessage): # Always ignore our own messages if message.author == self._client.user: return - + # Ignore Discord system messages (thread renames, pins, member joins, etc.) # Allow both default and reply types — replies have a distinct MessageType. if message.type not in (discord.MessageType.default, discord.MessageType.reply): return - + + # Check if the message author is in the allowed user list + if not self._is_allowed_user(str(message.author.id)): + return + # Bot message filtering (DISCORD_ALLOW_BOTS): # "none" — ignore all other bots (default) # "mentions" — accept bot messages only when they @mention us @@ -560,7 +564,7 @@ class DiscordAdapter(BasePlatformAdapter): if not self._client.user or self._client.user not in message.mentions: return # "all" falls through to handle_message - + # If the message @mentions other users but NOT the bot, the # sender is talking to someone else — stay silent. Only # applies in server channels; in DMs the user is always @@ -614,23 +618,23 @@ class DiscordAdapter(BasePlatformAdapter): # Register slash commands self._register_slash_commands() - + # Start the bot in background self._bot_task = asyncio.create_task(self._client.start(self.config.token)) - + # Wait for ready await asyncio.wait_for(self._ready_event.wait(), timeout=30) - + self._running = True return True - + except asyncio.TimeoutError: logger.error("[%s] Timeout waiting for connection to Discord", self.name, exc_info=True) return False except Exception as e: # pragma: no cover - defensive logging logger.error("[%s] Failed to connect to Discord: %s", self.name, e, exc_info=True) return False - + async def disconnect(self) -> None: """Disconnect from Discord.""" # Clean up all active voice connections before closing the client @@ -703,7 +707,7 @@ class DiscordAdapter(BasePlatformAdapter): if hasattr(message, "add_reaction"): await self._remove_reaction(message, "👀") await self._add_reaction(message, "✅" if success else "❌") - + async def send( self, chat_id: str, @@ -720,24 +724,24 @@ class DiscordAdapter(BasePlatformAdapter): channel = self._client.get_channel(int(chat_id)) if not channel: channel = await self._client.fetch_channel(int(chat_id)) - + if not channel: return SendResult(success=False, error=f"Channel {chat_id} not found") - + # Format and split message if needed formatted = self.format_message(content) chunks = self.truncate_message(formatted, self.MAX_MESSAGE_LENGTH) - + message_ids = [] reference = None - + if reply_to: try: ref_msg = await channel.fetch_message(int(reply_to)) reference = ref_msg except Exception as e: logger.debug("Could not fetch reply-to message: %s", e) - + for i, chunk in enumerate(chunks): chunk_reference = reference if i == 0 else None try: @@ -764,13 +768,13 @@ class DiscordAdapter(BasePlatformAdapter): else: raise message_ids.append(str(msg.id)) - + return SendResult( success=True, message_id=message_ids[0] if message_ids else None, raw_response={"message_ids": message_ids} ) - + except Exception as e: # pragma: no cover - defensive logging logger.error("[%s] Failed to send Discord message: %s", self.name, e, exc_info=True) return SendResult(success=False, error=str(e)) @@ -1242,25 +1246,25 @@ class DiscordAdapter(BasePlatformAdapter): """Send an image natively as a Discord file attachment.""" if not self._client: return SendResult(success=False, error="Not connected") - + try: import aiohttp - + channel = self._client.get_channel(int(chat_id)) if not channel: channel = await self._client.fetch_channel(int(chat_id)) if not channel: return SendResult(success=False, error=f"Channel {chat_id} not found") - + # Download the image and send as a Discord file attachment # (Discord renders attachments inline, unlike plain URLs) async with aiohttp.ClientSession() as session: async with session.get(image_url, timeout=aiohttp.ClientTimeout(total=30)) as resp: if resp.status != 200: raise Exception(f"Failed to download image: HTTP {resp.status}") - + image_data = await resp.read() - + # Determine filename from URL or content type content_type = resp.headers.get("content-type", "image/png") ext = "png" @@ -1270,16 +1274,16 @@ class DiscordAdapter(BasePlatformAdapter): ext = "gif" elif "webp" in content_type: ext = "webp" - + import io file = discord.File(io.BytesIO(image_data), filename=f"image.{ext}") - + msg = await channel.send( content=caption if caption else None, file=file, ) return SendResult(success=True, message_id=str(msg.id)) - + except ImportError: logger.warning( "[%s] aiohttp not installed, falling back to URL. Run: pip install aiohttp", @@ -1330,7 +1334,7 @@ class DiscordAdapter(BasePlatformAdapter): except Exception as e: # pragma: no cover - defensive logging logger.error("[%s] Failed to send document, falling back to base adapter: %s", self.name, e, exc_info=True) return await super().send_document(chat_id, file_path, caption, file_name, reply_to, metadata=metadata) - + async def send_typing(self, chat_id: str, metadata=None) -> None: """Start a persistent typing indicator for a channel. @@ -1374,20 +1378,20 @@ class DiscordAdapter(BasePlatformAdapter): await task except (asyncio.CancelledError, Exception): pass - + async def get_chat_info(self, chat_id: str) -> Dict[str, Any]: """Get information about a Discord channel.""" if not self._client: return {"name": "Unknown", "type": "dm"} - + try: channel = self._client.get_channel(int(chat_id)) if not channel: channel = await self._client.fetch_channel(int(chat_id)) - + if not channel: return {"name": str(chat_id), "type": "dm"} - + # Determine channel type if isinstance(channel, discord.DMChannel): chat_type = "dm" @@ -1403,7 +1407,7 @@ class DiscordAdapter(BasePlatformAdapter): else: chat_type = "channel" name = getattr(channel, "name", str(chat_id)) - + return { "name": name, "type": chat_type, @@ -1413,7 +1417,7 @@ class DiscordAdapter(BasePlatformAdapter): except Exception as e: # pragma: no cover - defensive logging logger.error("[%s] Failed to get chat info for %s: %s", self.name, chat_id, e, exc_info=True) return {"name": str(chat_id), "type": "dm", "error": str(e)} - + async def _resolve_allowed_usernames(self) -> None: """ Resolve non-numeric entries in DISCORD_ALLOWED_USERS to Discord user IDs. @@ -1481,7 +1485,7 @@ class DiscordAdapter(BasePlatformAdapter): def format_message(self, content: str) -> str: """ Format message for Discord. - + Discord uses its own markdown variant. """ # Discord markdown is fairly standard, no special escaping needed @@ -1647,7 +1651,7 @@ class DiscordAdapter(BasePlatformAdapter): chat_name = interaction.channel.name if hasattr(interaction.channel, "guild") and interaction.channel.guild: chat_name = f"{interaction.channel.guild.name} / #{chat_name}" - + # Get channel topic (if available) chat_topic = getattr(interaction.channel, "topic", None) @@ -2051,7 +2055,7 @@ class DiscordAdapter(BasePlatformAdapter): if doc_ext in SUPPORTED_DOCUMENT_TYPES: msg_type = MessageType.DOCUMENT break - + # When auto-threading kicked in, route responses to the new thread effective_channel = auto_threaded_channel or message.channel @@ -2070,7 +2074,7 @@ class DiscordAdapter(BasePlatformAdapter): # Get channel topic (if available - TextChannels have topics, DMs/threads don't) chat_topic = getattr(message.channel, "topic", None) - + # Build source source = self.build_source( chat_id=str(effective_channel.id), @@ -2081,7 +2085,7 @@ class DiscordAdapter(BasePlatformAdapter): thread_id=thread_id, chat_topic=chat_topic, ) - + # Build media URLs -- download image attachments to local cache so the # vision tool can access them reliably (Discord CDN URLs can expire). media_urls = [] @@ -2175,7 +2179,7 @@ class DiscordAdapter(BasePlatformAdapter): "[Discord] Failed to cache document %s: %s", att.filename, e, exc_info=True, ) - + event_text = message.content if pending_text_injection: event_text = f"{pending_text_injection}\n\n{event_text}" if event_text else pending_text_injection -- 2.43.0 From 83dec2b3ec0f6d0ddc5750f9a9e811a6a355a49f Mon Sep 17 00:00:00 2001 From: SHL0MS Date: Tue, 31 Mar 2026 12:07:28 -0400 Subject: [PATCH 106/385] fix: skip empty/whitespace text in Telegram send to prevent 400 errors Telegram API returns HTTP 400 when sent whitespace-only or empty text. Add a guard at the top of send() to silently succeed on blank content instead of crashing. Equivalent to OpenClaw #56620. --- gateway/platforms/telegram.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py index db1b19431..e5e2885c7 100644 --- a/gateway/platforms/telegram.py +++ b/gateway/platforms/telegram.py @@ -742,6 +742,10 @@ class TelegramAdapter(BasePlatformAdapter): if not self._bot: return SendResult(success=False, error="Not connected") + # Skip whitespace-only text to prevent Telegram 400 empty-text errors. + if not content or not content.strip(): + return SendResult(success=True, message_id=None) + try: # Format and split message if needed formatted = self.format_message(content) -- 2.43.0 From ef2ae3e48fe08a59f377f03b402826763b1d26ab Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Wed, 1 Apr 2026 00:50:08 -0700 Subject: [PATCH 107/385] fix(file_tools): refresh staleness timestamp after writes (#4390) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After a successful write_file or patch, update the stored read timestamp to match the file's new modification time. Without this, consecutive edits by the same task (read → write → write) would false-warn on the second write because the stored timestamp still reflected the original read, not the first write. Also renames the internal tracker key from 'file_mtimes' to 'read_timestamps' for clarity. --- tests/tools/test_file_staleness.py | 4 +-- tools/file_tools.py | 39 ++++++++++++++++++++++++++---- 2 files changed, 36 insertions(+), 7 deletions(-) diff --git a/tests/tools/test_file_staleness.py b/tests/tools/test_file_staleness.py index 46e7aac9f..230493e33 100644 --- a/tests/tools/test_file_staleness.py +++ b/tests/tools/test_file_staleness.py @@ -221,7 +221,7 @@ class TestCheckFileStalenessHelper(unittest.TestCase): _read_tracker["t1"] = { "last_key": None, "consecutive": 0, "read_history": set(), "dedup": {}, - "file_mtimes": {"/tmp/other.py": 12345.0}, + "read_timestamps": {"/tmp/other.py": 12345.0}, } self.assertIsNone(_check_file_staleness("/tmp/x.py", "t1")) @@ -231,7 +231,7 @@ class TestCheckFileStalenessHelper(unittest.TestCase): _read_tracker["t1"] = { "last_key": None, "consecutive": 0, "read_history": set(), "dedup": {}, - "file_mtimes": {"/nonexistent/path": 99999.0}, + "read_timestamps": {"/nonexistent/path": 99999.0}, } # File doesn't exist → stat fails → returns None (let write handle it) self.assertIsNone(_check_file_staleness("/nonexistent/path", "t1")) diff --git a/tools/file_tools.py b/tools/file_tools.py index 07fb86d1a..79a111cb7 100644 --- a/tools/file_tools.py +++ b/tools/file_tools.py @@ -136,9 +136,12 @@ _file_ops_cache: dict = {} # Used to skip re-reads of unchanged files. Reset on # context compression (the original content is summarised # away so the model needs the full content again). -# "file_mtimes": dict mapping resolved_path → mtime float at last read. -# Used by write_file and patch to detect when a file was -# modified externally between the agent's read and write. +# "read_timestamps": dict mapping resolved_path → modification-time float +# recorded when the file was last read (or written) by +# this task. Used by write_file and patch to detect +# external changes between the agent's read and write. +# Updated after successful writes so consecutive edits +# by the same task don't trigger false warnings. _read_tracker_lock = threading.Lock() _read_tracker: dict = {} @@ -401,7 +404,7 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str = try: _mtime_now = os.path.getmtime(resolved_str) task_data["dedup"][dedup_key] = _mtime_now - task_data.setdefault("file_mtimes", {})[resolved_str] = _mtime_now + task_data.setdefault("read_timestamps", {})[resolved_str] = _mtime_now except OSError: pass # Can't stat — skip tracking for this entry @@ -500,6 +503,24 @@ def notify_other_tool_call(task_id: str = "default"): task_data["consecutive"] = 0 +def _update_read_timestamp(filepath: str, task_id: str) -> None: + """Record the file's current modification time after a successful write. + + Called after write_file and patch so that consecutive edits by the + same task don't trigger false staleness warnings — each write + refreshes the stored timestamp to match the file's new state. + """ + try: + resolved = str(Path(filepath).expanduser().resolve()) + current_mtime = os.path.getmtime(resolved) + except (OSError, ValueError): + return + with _read_tracker_lock: + task_data = _read_tracker.get(task_id) + if task_data is not None: + task_data.setdefault("read_timestamps", {})[resolved] = current_mtime + + def _check_file_staleness(filepath: str, task_id: str) -> str | None: """Check whether a file was modified since the agent last read it. @@ -515,7 +536,7 @@ def _check_file_staleness(filepath: str, task_id: str) -> str | None: task_data = _read_tracker.get(task_id) if not task_data: return None - read_mtime = task_data.get("file_mtimes", {}).get(resolved) + read_mtime = task_data.get("read_timestamps", {}).get(resolved) if read_mtime is None: return None # File was never read — nothing to compare against try: @@ -543,6 +564,9 @@ def write_file_tool(path: str, content: str, task_id: str = "default") -> str: result_dict = result.to_dict() if stale_warning: result_dict["_warning"] = stale_warning + # Refresh the stored timestamp so consecutive writes by this + # task don't trigger false staleness warnings. + _update_read_timestamp(path, task_id) return json.dumps(result_dict, ensure_ascii=False) except Exception as e: if _is_expected_write_exception(e): @@ -594,6 +618,11 @@ def patch_tool(mode: str = "replace", path: str = None, old_string: str = None, result_dict = result.to_dict() if stale_warnings: result_dict["_warning"] = stale_warnings[0] if len(stale_warnings) == 1 else " | ".join(stale_warnings) + # Refresh stored timestamps for all successfully-patched paths so + # consecutive edits by this task don't trigger false warnings. + if not result_dict.get("error"): + for _p in _paths_to_check: + _update_read_timestamp(_p, task_id) result_json = json.dumps(result_dict, ensure_ascii=False) # Hint when old_string not found — saves iterations where the agent # retries with stale content instead of re-reading the file. -- 2.43.0 From a7f7e870705eb4eba8c47805094afdab102ee36d Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Wed, 1 Apr 2026 01:02:34 -0700 Subject: [PATCH 108/385] fix: preserve credential_pool through smart routing and defer eager fallback on 429 (#4361) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three bugs prevented credential pool rotation from working when multiple Codex OAuth tokens were configured: 1. credential_pool was dropped during smart model turn routing. resolve_turn_route() constructed runtime dicts without it, so the AIAgent was created without pool access. Fixed in smart_model_routing.py (no-route and fallback paths), cli.py, and gateway/run.py. 2. Eager fallback fired before pool rotation on 429. The rate-limit handler at line ~7180 switched to a fallback provider immediately, before _recover_with_credential_pool got a chance to rotate to the next credential. Now deferred when the pool still has credentials. 3. (Non-issue) Retry budget was reported as too small, but successful pool rotations already skip retry_count increment — no change needed. Reported by community member Schinsly who identified all three root causes and verified the fix locally with multiple Codex accounts. --- agent/credential_pool.py | 4 + agent/smart_model_routing.py | 2 + cli.py | 1 + gateway/run.py | 1 + run_agent.py | 15 +- tests/test_credential_pool_routing.py | 350 ++++++++++++++++++++++++++ 6 files changed, 369 insertions(+), 4 deletions(-) create mode 100644 tests/test_credential_pool_routing.py diff --git a/agent/credential_pool.py b/agent/credential_pool.py index ad4dbcfc1..003a5a8e7 100644 --- a/agent/credential_pool.py +++ b/agent/credential_pool.py @@ -267,6 +267,10 @@ class CredentialPool: def has_credentials(self) -> bool: return bool(self._entries) + def has_available(self) -> bool: + """True if at least one entry is not currently in exhaustion cooldown.""" + return bool(self._available_entries()) + def entries(self) -> List[PooledCredential]: return list(self._entries) diff --git a/agent/smart_model_routing.py b/agent/smart_model_routing.py index d57cd1b83..ada865af0 100644 --- a/agent/smart_model_routing.py +++ b/agent/smart_model_routing.py @@ -127,6 +127,7 @@ def resolve_turn_route(user_message: str, routing_config: Optional[Dict[str, Any "api_mode": primary.get("api_mode"), "command": primary.get("command"), "args": list(primary.get("args") or []), + "credential_pool": primary.get("credential_pool"), }, "label": None, "signature": ( @@ -162,6 +163,7 @@ def resolve_turn_route(user_message: str, routing_config: Optional[Dict[str, Any "api_mode": primary.get("api_mode"), "command": primary.get("command"), "args": list(primary.get("args") or []), + "credential_pool": primary.get("credential_pool"), }, "label": None, "signature": ( diff --git a/cli.py b/cli.py index b18e53077..151ae4615 100644 --- a/cli.py +++ b/cli.py @@ -2024,6 +2024,7 @@ class HermesCLI: "api_mode": self.api_mode, "command": self.acp_command, "args": list(self.acp_args or []), + "credential_pool": getattr(self, "_credential_pool", None), }, ) diff --git a/gateway/run.py b/gateway/run.py index cc1a6666f..49135ce5a 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -788,6 +788,7 @@ class GatewayRunner: "api_mode": runtime_kwargs.get("api_mode"), "command": runtime_kwargs.get("command"), "args": list(runtime_kwargs.get("args") or []), + "credential_pool": runtime_kwargs.get("credential_pool"), } return resolve_turn_route(user_message, getattr(self, "_smart_model_routing", {}), primary) diff --git a/run_agent.py b/run_agent.py index 5ed40500b..558a89457 100644 --- a/run_agent.py +++ b/run_agent.py @@ -7178,10 +7178,17 @@ class AIAgent: or "quota" in error_msg ) if is_rate_limited and self._fallback_index < len(self._fallback_chain): - self._emit_status("⚠️ Rate limited — switching to fallback provider...") - if self._try_activate_fallback(): - retry_count = 0 - continue + # Don't eagerly fallback if credential pool rotation may + # still recover. The pool's retry-then-rotate cycle needs + # at least one more attempt to fire — jumping to a fallback + # provider here short-circuits it. + pool = self._credential_pool + pool_may_recover = pool is not None and pool.has_available() + if not pool_may_recover: + self._emit_status("⚠️ Rate limited — switching to fallback provider...") + if self._try_activate_fallback(): + retry_count = 0 + continue is_payload_too_large = ( status_code == 413 diff --git a/tests/test_credential_pool_routing.py b/tests/test_credential_pool_routing.py new file mode 100644 index 000000000..f4006a236 --- /dev/null +++ b/tests/test_credential_pool_routing.py @@ -0,0 +1,350 @@ +"""Tests for credential pool preservation through smart routing and 429 recovery. + +Covers: +1. credential_pool flows through resolve_turn_route (no-route and fallback paths) +2. CLI _resolve_turn_agent_config passes credential_pool to primary dict +3. Gateway _resolve_turn_agent_config passes credential_pool to primary dict +4. Eager fallback deferred when credential pool has credentials +5. Eager fallback fires when no credential pool exists +6. Full 429 rotation cycle: retry-same → rotate → exhaust → fallback +""" + +import os +import time +from types import SimpleNamespace +from unittest.mock import MagicMock, patch, PropertyMock + +import pytest + + +# --------------------------------------------------------------------------- +# 1. smart_model_routing: credential_pool preserved in no-route path +# --------------------------------------------------------------------------- + +class TestSmartRoutingPoolPreservation: + def test_no_route_preserves_credential_pool(self): + from agent.smart_model_routing import resolve_turn_route + + fake_pool = MagicMock(name="CredentialPool") + primary = { + "model": "gpt-5.4", + "api_key": "sk-test", + "base_url": None, + "provider": "openai-codex", + "api_mode": "codex_responses", + "command": None, + "args": [], + "credential_pool": fake_pool, + } + # routing disabled + result = resolve_turn_route("hello", None, primary) + assert result["runtime"]["credential_pool"] is fake_pool + + def test_no_route_none_pool(self): + from agent.smart_model_routing import resolve_turn_route + + primary = { + "model": "gpt-5.4", + "api_key": "sk-test", + "base_url": None, + "provider": "openai-codex", + "api_mode": "codex_responses", + "command": None, + "args": [], + } + result = resolve_turn_route("hello", None, primary) + assert result["runtime"]["credential_pool"] is None + + def test_routing_disabled_preserves_pool(self): + from agent.smart_model_routing import resolve_turn_route + + fake_pool = MagicMock(name="CredentialPool") + primary = { + "model": "gpt-5.4", + "api_key": "sk-test", + "base_url": None, + "provider": "openai-codex", + "api_mode": "codex_responses", + "command": None, + "args": [], + "credential_pool": fake_pool, + } + # routing explicitly disabled + result = resolve_turn_route("hello", {"enabled": False}, primary) + assert result["runtime"]["credential_pool"] is fake_pool + + def test_route_fallback_on_resolve_error_preserves_pool(self, monkeypatch): + """When smart routing picks a cheap model but resolve_runtime_provider + fails, the fallback to primary must still include credential_pool.""" + from agent.smart_model_routing import resolve_turn_route + + fake_pool = MagicMock(name="CredentialPool") + primary = { + "model": "gpt-5.4", + "api_key": "sk-test", + "base_url": None, + "provider": "openai-codex", + "api_mode": "codex_responses", + "command": None, + "args": [], + "credential_pool": fake_pool, + } + routing_config = { + "enabled": True, + "cheap_model": "openai/gpt-4.1-mini", + "cheap_provider": "openrouter", + "max_tokens": 200, + "patterns": ["^(hi|hello|hey)"], + } + # Force resolve_runtime_provider to fail so it falls back to primary + monkeypatch.setattr( + "hermes_cli.runtime_provider.resolve_runtime_provider", + MagicMock(side_effect=RuntimeError("no credentials")), + ) + result = resolve_turn_route("hi", routing_config, primary) + assert result["runtime"]["credential_pool"] is fake_pool + + +# --------------------------------------------------------------------------- +# 2 & 3. CLI and Gateway _resolve_turn_agent_config include credential_pool +# --------------------------------------------------------------------------- + +class TestCliTurnRoutePool: + def test_resolve_turn_includes_pool(self, monkeypatch, tmp_path): + """CLI's _resolve_turn_agent_config must pass credential_pool to primary.""" + from agent.smart_model_routing import resolve_turn_route + captured = {} + + def spy_resolve(user_message, routing_config, primary): + captured["primary"] = primary + return resolve_turn_route(user_message, routing_config, primary) + + monkeypatch.setattr( + "agent.smart_model_routing.resolve_turn_route", spy_resolve + ) + + # Build a minimal HermesCLI-like object with the method + shell = SimpleNamespace( + model="gpt-5.4", + api_key="sk-test", + base_url=None, + provider="openai-codex", + api_mode="codex_responses", + acp_command=None, + acp_args=[], + _credential_pool=MagicMock(name="FakePool"), + _smart_model_routing={"enabled": False}, + ) + + # Import and bind the real method + from cli import HermesCLI + bound = HermesCLI._resolve_turn_agent_config.__get__(shell) + bound("test message") + + assert "credential_pool" in captured["primary"] + assert captured["primary"]["credential_pool"] is shell._credential_pool + + +class TestGatewayTurnRoutePool: + def test_resolve_turn_includes_pool(self, monkeypatch): + """Gateway's _resolve_turn_agent_config must pass credential_pool.""" + from agent.smart_model_routing import resolve_turn_route + captured = {} + + def spy_resolve(user_message, routing_config, primary): + captured["primary"] = primary + return resolve_turn_route(user_message, routing_config, primary) + + monkeypatch.setattr( + "agent.smart_model_routing.resolve_turn_route", spy_resolve + ) + + from gateway.run import GatewayRunner + + runner = SimpleNamespace( + _smart_model_routing={"enabled": False}, + ) + + runtime_kwargs = { + "api_key": "sk-test", + "base_url": None, + "provider": "openai-codex", + "api_mode": "codex_responses", + "command": None, + "args": [], + "credential_pool": MagicMock(name="FakePool"), + } + + bound = GatewayRunner._resolve_turn_agent_config.__get__(runner) + bound("test message", "gpt-5.4", runtime_kwargs) + + assert "credential_pool" in captured["primary"] + assert captured["primary"]["credential_pool"] is runtime_kwargs["credential_pool"] + + +# --------------------------------------------------------------------------- +# 4 & 5. Eager fallback deferred/fires based on credential pool +# --------------------------------------------------------------------------- + +class TestEagerFallbackWithPool: + """Test the eager fallback guard in run_agent.py's error handling loop.""" + + def _make_agent(self, has_pool=True, pool_has_creds=True, has_fallback=True): + """Create a minimal AIAgent mock with the fields needed.""" + from run_agent import AIAgent + + with patch.object(AIAgent, "__init__", lambda self, **kw: None): + agent = AIAgent() + + agent._credential_pool = None + if has_pool: + pool = MagicMock() + pool.has_available.return_value = pool_has_creds + agent._credential_pool = pool + + agent._fallback_chain = [{"model": "fallback/model"}] if has_fallback else [] + agent._fallback_index = 0 + agent._try_activate_fallback = MagicMock(return_value=True) + agent._emit_status = MagicMock() + + return agent + + def test_eager_fallback_deferred_when_pool_has_credentials(self): + """429 with active pool should NOT trigger eager fallback.""" + agent = self._make_agent(has_pool=True, pool_has_creds=True, has_fallback=True) + + # Simulate the check from run_agent.py lines 7180-7191 + is_rate_limited = True + if is_rate_limited and agent._fallback_index < len(agent._fallback_chain): + pool = agent._credential_pool + pool_may_recover = pool is not None and pool.has_available() + if not pool_may_recover: + agent._try_activate_fallback() + + agent._try_activate_fallback.assert_not_called() + + def test_eager_fallback_fires_when_no_pool(self): + """429 without pool should trigger eager fallback.""" + agent = self._make_agent(has_pool=False, has_fallback=True) + + is_rate_limited = True + if is_rate_limited and agent._fallback_index < len(agent._fallback_chain): + pool = agent._credential_pool + pool_may_recover = pool is not None and pool.has_available() + if not pool_may_recover: + agent._try_activate_fallback() + + agent._try_activate_fallback.assert_called_once() + + def test_eager_fallback_fires_when_pool_exhausted(self): + """429 with exhausted pool should trigger eager fallback.""" + agent = self._make_agent(has_pool=True, pool_has_creds=False, has_fallback=True) + + is_rate_limited = True + if is_rate_limited and agent._fallback_index < len(agent._fallback_chain): + pool = agent._credential_pool + pool_may_recover = pool is not None and pool.has_available() + if not pool_may_recover: + agent._try_activate_fallback() + + agent._try_activate_fallback.assert_called_once() + + +# --------------------------------------------------------------------------- +# 6. Full 429 rotation cycle via _recover_with_credential_pool +# --------------------------------------------------------------------------- + +class TestPoolRotationCycle: + """Verify the retry-same → rotate → exhaust flow in _recover_with_credential_pool.""" + + def _make_agent_with_pool(self, pool_entries=3): + from run_agent import AIAgent + + with patch.object(AIAgent, "__init__", lambda self, **kw: None): + agent = AIAgent() + + entries = [] + for i in range(pool_entries): + e = MagicMock(name=f"entry_{i}") + e.id = f"cred-{i}" + entries.append(e) + + pool = MagicMock() + pool.has_credentials.return_value = True + + # mark_exhausted_and_rotate returns next entry until exhausted + self._rotation_index = 0 + + def rotate(status_code=None): + self._rotation_index += 1 + if self._rotation_index < pool_entries: + return entries[self._rotation_index] + pool.has_credentials.return_value = False + return None + + pool.mark_exhausted_and_rotate = MagicMock(side_effect=rotate) + agent._credential_pool = pool + agent._swap_credential = MagicMock() + agent.log_prefix = "" + + return agent, pool, entries + + def test_first_429_sets_retry_flag_no_rotation(self): + """First 429 should just set has_retried_429=True, no rotation.""" + agent, pool, _ = self._make_agent_with_pool(3) + recovered, has_retried = agent._recover_with_credential_pool( + status_code=429, has_retried_429=False + ) + assert recovered is False + assert has_retried is True + pool.mark_exhausted_and_rotate.assert_not_called() + + def test_second_429_rotates_to_next(self): + """Second consecutive 429 should rotate to next credential.""" + agent, pool, entries = self._make_agent_with_pool(3) + recovered, has_retried = agent._recover_with_credential_pool( + status_code=429, has_retried_429=True + ) + assert recovered is True + assert has_retried is False # reset after rotation + pool.mark_exhausted_and_rotate.assert_called_once_with(status_code=429) + agent._swap_credential.assert_called_once_with(entries[1]) + + def test_pool_exhaustion_returns_false(self): + """When all credentials exhausted, recovery should return False.""" + agent, pool, _ = self._make_agent_with_pool(1) + # First 429 sets flag + _, has_retried = agent._recover_with_credential_pool( + status_code=429, has_retried_429=False + ) + assert has_retried is True + + # Second 429 tries to rotate but pool is exhausted (only 1 entry) + recovered, _ = agent._recover_with_credential_pool( + status_code=429, has_retried_429=True + ) + assert recovered is False + + def test_402_immediate_rotation(self): + """402 (billing) should immediately rotate, no retry-first.""" + agent, pool, entries = self._make_agent_with_pool(3) + recovered, has_retried = agent._recover_with_credential_pool( + status_code=402, has_retried_429=False + ) + assert recovered is True + assert has_retried is False + pool.mark_exhausted_and_rotate.assert_called_once_with(status_code=402) + + def test_no_pool_returns_false(self): + """No pool should return (False, unchanged).""" + from run_agent import AIAgent + + with patch.object(AIAgent, "__init__", lambda self, **kw: None): + agent = AIAgent() + agent._credential_pool = None + + recovered, has_retried = agent._recover_with_credential_pool( + status_code=429, has_retried_429=False + ) + assert recovered is False + assert has_retried is False -- 2.43.0 From 9b99ea176e52c5daf319d1fe4e81689b29834807 Mon Sep 17 00:00:00 2001 From: Johannnnn506 Date: Tue, 31 Mar 2026 16:08:29 -0400 Subject: [PATCH 109/385] fix(cli): initialize ctx_len before compact banner path --- cli.py | 11 ++++++----- tests/test_cli_context_warning.py | 12 ++++++++++++ 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/cli.py b/cli.py index 151ae4615..0469d09b4 100644 --- a/cli.py +++ b/cli.py @@ -2163,6 +2163,12 @@ class HermesCLI: def show_banner(self): """Display the welcome banner in Claude Code style.""" self.console.clear() + + # Get context length for display before branching so it remains + # available to the low-context warning logic in compact mode too. + ctx_len = None + if hasattr(self, 'agent') and self.agent and hasattr(self.agent, 'context_compressor'): + ctx_len = self.agent.context_compressor.context_length # Auto-compact for narrow terminals — the full banner with caduceus # + tool list needs ~80 columns minimum to render without wrapping. @@ -2179,11 +2185,6 @@ class HermesCLI: # Get terminal working directory (where commands will execute) cwd = os.getenv("TERMINAL_CWD", os.getcwd()) - # Get context length for display - ctx_len = None - if hasattr(self, 'agent') and self.agent and hasattr(self.agent, 'context_compressor'): - ctx_len = self.agent.context_compressor.context_length - # Build and display the banner build_welcome_banner( console=self.console, diff --git a/tests/test_cli_context_warning.py b/tests/test_cli_context_warning.py index fa0305a27..abf9c1349 100644 --- a/tests/test_cli_context_warning.py +++ b/tests/test_cli_context_warning.py @@ -145,3 +145,15 @@ class TestLowContextWarning: calls = [str(c) for c in cli_obj.console.print.call_args_list] warning_calls = [c for c in calls if "too low" in c] assert len(warning_calls) == 0 + + def test_compact_banner_does_not_crash_on_narrow_terminal(self, cli_obj): + """Compact mode should still have ctx_len defined for warning logic.""" + cli_obj.agent.context_compressor.context_length = 4096 + + with patch("shutil.get_terminal_size", return_value=os.terminal_size((70, 40))), \ + patch("cli._build_compact_banner", return_value="compact banner"): + cli_obj.show_banner() + + calls = [str(c) for c in cli_obj.console.print.call_args_list] + warning_calls = [c for c in calls if "too low" in c] + assert len(warning_calls) == 1 -- 2.43.0 From efa327a99806c6857660ea511721ab9cf3226cef Mon Sep 17 00:00:00 2001 From: Teknium Date: Wed, 1 Apr 2026 01:06:21 -0700 Subject: [PATCH 110/385] fix: add missing provider attrs to cli_obj test fixture _show_status() now references self.provider and self._provider_source, added after the original PR was submitted. --- tests/test_cli_context_warning.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_cli_context_warning.py b/tests/test_cli_context_warning.py index abf9c1349..bf0c5aac4 100644 --- a/tests/test_cli_context_warning.py +++ b/tests/test_cli_context_warning.py @@ -32,6 +32,8 @@ def cli_obj(_isolate): obj.session_id = None obj.api_key = "test" obj.base_url = "" + obj.provider = "test" + obj._provider_source = None # Mock agent with context compressor obj.agent = SimpleNamespace( context_compressor=SimpleNamespace(context_length=None) -- 2.43.0 From 7baee0b023394d38360c4518f2ce70bc71aee8c3 Mon Sep 17 00:00:00 2001 From: Smyile <84925446+davidetacchini@users.noreply.github.com> Date: Tue, 31 Mar 2026 10:52:57 +0200 Subject: [PATCH 111/385] fix(docs): restrict backdrop-filter to desktop to fix mobile sidebar backdrop-filter on .navbar creates a new CSS stacking context that hides .navbar-sidebar menu content on mobile (only the close button is visible). Scope the blur effect to min-width: 997px so it only applies on desktop where the sidebar is not rendered inside the navbar. Ref: facebook/docusaurus#6996, facebook/docusaurus#6853 --- website/src/css/custom.css | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/website/src/css/custom.css b/website/src/css/custom.css index 7c7000391..469c6792e 100644 --- a/website/src/css/custom.css +++ b/website/src/css/custom.css @@ -63,10 +63,18 @@ /* Navbar styling */ .navbar { - backdrop-filter: blur(12px); border-bottom: 1px solid rgba(255, 215, 0, 0.08); } +/* Frosted-glass blur — desktop only. + On mobile, backdrop-filter creates a stacking context that hides + the navbar-sidebar menu content (Docusaurus #6996). */ +@media (min-width: 997px) { + .navbar { + backdrop-filter: blur(12px); + } +} + .navbar__title { font-weight: 600; letter-spacing: -0.02em; -- 2.43.0 From 8327f7cc611a874d7a009275766ac0335bc66403 Mon Sep 17 00:00:00 2001 From: Smyile <84925446+davidetacchini@users.noreply.github.com> Date: Tue, 31 Mar 2026 10:56:23 +0200 Subject: [PATCH 112/385] fix(docs): use compound selector instead of media query Target the exact state that breaks: when .navbar-sidebar--show is active on the same