diff --git a/agent/smart_model_routing.py b/agent/smart_model_routing.py new file mode 100644 index 000000000..249548701 --- /dev/null +++ b/agent/smart_model_routing.py @@ -0,0 +1,184 @@ +"""Helpers for optional cheap-vs-strong model routing.""" + +from __future__ import annotations + +import os +import re +from typing import Any, Dict, Optional + +_COMPLEX_KEYWORDS = { + "debug", + "debugging", + "implement", + "implementation", + "refactor", + "patch", + "traceback", + "stacktrace", + "exception", + "error", + "analyze", + "analysis", + "investigate", + "architecture", + "design", + "compare", + "benchmark", + "optimize", + "optimise", + "review", + "terminal", + "shell", + "tool", + "tools", + "pytest", + "test", + "tests", + "plan", + "planning", + "delegate", + "subagent", + "cron", + "docker", + "kubernetes", +} + +_URL_RE = re.compile(r"https?://|www\.", re.IGNORECASE) + + +def _coerce_bool(value: Any, default: bool = False) -> bool: + if value is None: + return default + if isinstance(value, bool): + return value + if isinstance(value, str): + return value.strip().lower() in {"1", "true", "yes", "on"} + return bool(value) + + +def _coerce_int(value: Any, default: int) -> int: + try: + return int(value) + except (TypeError, ValueError): + return default + + +def choose_cheap_model_route(user_message: str, routing_config: Optional[Dict[str, Any]]) -> Optional[Dict[str, Any]]: + """Return the configured cheap-model route when a message looks simple. + + Conservative by design: if the message has signs of code/tool/debugging/ + long-form work, keep the primary model. + """ + cfg = routing_config or {} + if not _coerce_bool(cfg.get("enabled"), False): + return None + + cheap_model = cfg.get("cheap_model") or {} + if not isinstance(cheap_model, dict): + return None + provider = str(cheap_model.get("provider") or "").strip().lower() + model = str(cheap_model.get("model") or "").strip() + if not provider or not model: + return None + + text = (user_message or "").strip() + if not text: + return None + + max_chars = _coerce_int(cfg.get("max_simple_chars"), 160) + max_words = _coerce_int(cfg.get("max_simple_words"), 28) + + if len(text) > max_chars: + return None + if len(text.split()) > max_words: + return None + if text.count("\n") > 1: + return None + if "```" in text or "`" in text: + return None + if _URL_RE.search(text): + return None + + lowered = text.lower() + words = {token.strip(".,:;!?()[]{}\"'`") for token in lowered.split()} + if words & _COMPLEX_KEYWORDS: + return None + + route = dict(cheap_model) + route["provider"] = provider + route["model"] = model + route["routing_reason"] = "simple_turn" + return route + + +def resolve_turn_route(user_message: str, routing_config: Optional[Dict[str, Any]], primary: Dict[str, Any]) -> Dict[str, Any]: + """Resolve the effective model/runtime for one turn. + + Returns a dict with model/runtime/signature/label fields. + """ + route = choose_cheap_model_route(user_message, routing_config) + if not route: + return { + "model": primary.get("model"), + "runtime": { + "api_key": primary.get("api_key"), + "base_url": primary.get("base_url"), + "provider": primary.get("provider"), + "api_mode": primary.get("api_mode"), + }, + "label": None, + "signature": ( + primary.get("model"), + primary.get("provider"), + primary.get("base_url"), + primary.get("api_mode"), + ), + } + + from hermes_cli.runtime_provider import resolve_runtime_provider + + explicit_api_key = None + api_key_env = str(route.get("api_key_env") or "").strip() + if api_key_env: + explicit_api_key = os.getenv(api_key_env) or None + + try: + runtime = resolve_runtime_provider( + requested=route.get("provider"), + explicit_api_key=explicit_api_key, + explicit_base_url=route.get("base_url"), + ) + except Exception: + return { + "model": primary.get("model"), + "runtime": { + "api_key": primary.get("api_key"), + "base_url": primary.get("base_url"), + "provider": primary.get("provider"), + "api_mode": primary.get("api_mode"), + }, + "label": None, + "signature": ( + primary.get("model"), + primary.get("provider"), + primary.get("base_url"), + primary.get("api_mode"), + ), + } + + return { + "model": route.get("model"), + "runtime": { + "api_key": runtime.get("api_key"), + "base_url": runtime.get("base_url"), + "provider": runtime.get("provider"), + "api_mode": runtime.get("api_mode"), + }, + "label": f"smart route → {route.get('model')} ({runtime.get('provider')})", + "signature": ( + route.get("model"), + runtime.get("provider"), + runtime.get("base_url"), + runtime.get("api_mode"), + ), + } diff --git a/cli-config.yaml.example b/cli-config.yaml.example index 0c947e479..681405c44 100644 --- a/cli-config.yaml.example +++ b/cli-config.yaml.example @@ -51,6 +51,20 @@ model: # # Data policy: "allow" (default) or "deny" to exclude providers that may store data # # data_collection: "deny" +# ============================================================================= +# Smart Model Routing (optional) +# ============================================================================= +# Use a cheaper model for short/simple turns while keeping your main model for +# more complex requests. Disabled by default. +# +# smart_model_routing: +# enabled: true +# max_simple_chars: 160 +# max_simple_words: 28 +# cheap_model: +# provider: openrouter +# model: google/gemini-2.5-flash + # ============================================================================= # Git Worktree Isolation # ============================================================================= diff --git a/cli.py b/cli.py index 7716d6f57..eb0d7b101 100755 --- a/cli.py +++ b/cli.py @@ -176,6 +176,12 @@ def load_cli_config() -> Dict[str, Any]: "threshold": 0.50, # Compress at 50% of model's context limit "summary_model": "google/gemini-3-flash-preview", # Fast/cheap model for summaries }, + "smart_model_routing": { + "enabled": False, + "max_simple_chars": 160, + "max_simple_words": 28, + "cheap_model": {}, + }, "agent": { "max_turns": 90, # Default max tool-calling iterations (shared with subagents) "verbose": False, @@ -1126,6 +1132,10 @@ class HermesCLI: fb = CLI_CONFIG.get("fallback_model") or {} self._fallback_model = fb if fb.get("provider") and fb.get("model") else None + # Optional cheap-vs-strong routing for simple turns + self._smart_model_routing = CLI_CONFIG.get("smart_model_routing", {}) or {} + self._active_agent_route_signature = None + # Agent will be initialized on first use self.agent: Optional[AIAgent] = None self._app = None # prompt_toolkit Application (set in run()) @@ -1537,10 +1547,27 @@ class HermesCLI: # routing, or the effective model changed. if (credentials_changed or routing_changed or model_changed) and self.agent is not None: self.agent = None + self._active_agent_route_signature = None return True - def _init_agent(self) -> bool: + def _resolve_turn_agent_config(self, user_message: str) -> dict: + """Resolve model/runtime overrides for a single user turn.""" + from agent.smart_model_routing import resolve_turn_route + + return resolve_turn_route( + user_message, + self._smart_model_routing, + { + "model": self.model, + "api_key": self.api_key, + "base_url": self.base_url, + "provider": self.provider, + "api_mode": self.api_mode, + }, + ) + + def _init_agent(self, *, model_override: str = None, runtime_override: dict = None, route_label: str = None) -> bool: """ Initialize the agent on first use. When resuming a session, restores conversation history from SQLite. @@ -1600,12 +1627,19 @@ class HermesCLI: pass try: + runtime = runtime_override or { + "api_key": self.api_key, + "base_url": self.base_url, + "provider": self.provider, + "api_mode": self.api_mode, + } + effective_model = model_override or self.model self.agent = AIAgent( - model=self.model, - api_key=self.api_key, - base_url=self.base_url, - provider=self.provider, - api_mode=self.api_mode, + model=effective_model, + api_key=runtime.get("api_key"), + base_url=runtime.get("base_url"), + provider=runtime.get("provider"), + api_mode=runtime.get("api_mode"), max_iterations=self.max_turns, enabled_toolsets=self.enabled_toolsets, verbose_logging=self.verbose, @@ -1632,7 +1666,13 @@ class HermesCLI: pass_session_id=self.pass_session_id, tool_progress_callback=self._on_tool_progress, ) - # Apply any pending title now that the session exists in the DB + self._active_agent_route_signature = ( + effective_model, + runtime.get("provider"), + runtime.get("base_url"), + runtime.get("api_mode"), + ) + if self._pending_title and self._session_db: try: self._session_db.set_session_title(self.session_id, self._pending_title) @@ -3455,14 +3495,16 @@ class HermesCLI: _cprint(f" Task ID: {task_id}") _cprint(f" You can continue chatting — results will appear when done.\n") + turn_route = self._resolve_turn_agent_config(prompt) + def run_background(): try: bg_agent = AIAgent( - model=self.model, - api_key=self.api_key, - base_url=self.base_url, - provider=self.provider, - api_mode=self.api_mode, + model=turn_route["model"], + api_key=turn_route["runtime"].get("api_key"), + base_url=turn_route["runtime"].get("base_url"), + provider=turn_route["runtime"].get("provider"), + api_mode=turn_route["runtime"].get("api_mode"), max_iterations=self.max_turns, enabled_toolsets=self.enabled_toolsets, quiet_mode=True, @@ -4886,8 +4928,16 @@ class HermesCLI: if not self._ensure_runtime_credentials(): return None + turn_route = self._resolve_turn_agent_config(message) + if turn_route["signature"] != self._active_agent_route_signature: + self.agent = None + # Initialize agent if needed - if not self._init_agent(): + if not self._init_agent( + model_override=turn_route["model"], + runtime_override=turn_route["runtime"], + route_label=turn_route["label"], + ): return None # Pre-process images through the vision tool (Gemini Flash) so the @@ -6616,13 +6666,21 @@ def main( # Quiet mode: suppress banner, spinner, tool previews. # Only print the final response and parseable session info. cli.tool_progress_mode = "off" - if cli._init_agent(): - cli.agent.quiet_mode = True - result = cli.agent.run_conversation(query) - response = result.get("final_response", "") if isinstance(result, dict) else str(result) - if response: - print(response) - print(f"\nsession_id: {cli.session_id}") + if cli._ensure_runtime_credentials(): + turn_route = cli._resolve_turn_agent_config(query) + if turn_route["signature"] != cli._active_agent_route_signature: + cli.agent = None + if cli._init_agent( + model_override=turn_route["model"], + runtime_override=turn_route["runtime"], + route_label=turn_route["label"], + ): + cli.agent.quiet_mode = True + result = cli.agent.run_conversation(query) + response = result.get("final_response", "") if isinstance(result, dict) else str(result) + if response: + print(response) + print(f"\nsession_id: {cli.session_id}") else: cli.show_banner() cli.console.print(f"[bold blue]Query:[/] {query}") diff --git a/cron/scheduler.py b/cron/scheduler.py index 8d75e1a95..ded88ef53 100644 --- a/cron/scheduler.py +++ b/cron/scheduler.py @@ -315,6 +315,7 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]: # Provider routing pr = _cfg.get("provider_routing", {}) + smart_routing = _cfg.get("smart_model_routing", {}) or {} from hermes_cli.runtime_provider import ( resolve_runtime_provider, @@ -331,12 +332,25 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]: message = format_runtime_provider_error(exc) raise RuntimeError(message) from exc + from agent.smart_model_routing import resolve_turn_route + turn_route = resolve_turn_route( + prompt, + smart_routing, + { + "model": model, + "api_key": runtime.get("api_key"), + "base_url": runtime.get("base_url"), + "provider": runtime.get("provider"), + "api_mode": runtime.get("api_mode"), + }, + ) + agent = AIAgent( - model=model, - api_key=runtime.get("api_key"), - base_url=runtime.get("base_url"), - provider=runtime.get("provider"), - api_mode=runtime.get("api_mode"), + model=turn_route["model"], + api_key=turn_route["runtime"].get("api_key"), + base_url=turn_route["runtime"].get("base_url"), + provider=turn_route["runtime"].get("provider"), + api_mode=turn_route["runtime"].get("api_mode"), max_iterations=max_iterations, reasoning_config=reasoning_config, prefill_messages=prefill_messages, diff --git a/gateway/run.py b/gateway/run.py index 50e68eaad..5c0b8b48c 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -324,6 +324,7 @@ class GatewayRunner: self._show_reasoning = self._load_show_reasoning() self._provider_routing = self._load_provider_routing() self._fallback_model = self._load_fallback_model() + self._smart_model_routing = self._load_smart_model_routing() # Wire process registry into session store for reset protection from tools.process_registry import process_registry @@ -593,6 +594,18 @@ class GatewayRunner: group_sessions_per_user=getattr(config, "group_sessions_per_user", True), ) + def _resolve_turn_agent_config(self, user_message: str, model: str, runtime_kwargs: dict) -> dict: + from agent.smart_model_routing import resolve_turn_route + + primary = { + "model": model, + "api_key": runtime_kwargs.get("api_key"), + "base_url": runtime_kwargs.get("base_url"), + "provider": runtime_kwargs.get("provider"), + "api_mode": runtime_kwargs.get("api_mode"), + } + return resolve_turn_route(user_message, getattr(self, "_smart_model_routing", {}), primary) + async def _handle_adapter_fatal_error(self, adapter: BasePlatformAdapter) -> None: """React to a non-retryable adapter failure after startup.""" logger.error( @@ -795,6 +808,20 @@ class GatewayRunner: pass return None + @staticmethod + def _load_smart_model_routing() -> dict: + """Load optional smart cheap-vs-strong model routing config.""" + try: + import yaml as _y + cfg_path = _hermes_home / "config.yaml" + if cfg_path.exists(): + with open(cfg_path, encoding="utf-8") as _f: + cfg = _y.safe_load(_f) or {} + return cfg.get("smart_model_routing", {}) or {} + except Exception: + pass + return {} + async def start(self) -> bool: """ Start the gateway and all configured platform adapters. @@ -2931,11 +2958,12 @@ class GatewayRunner: max_iterations = int(os.getenv("HERMES_MAX_ITERATIONS", "90")) reasoning_config = self._load_reasoning_config() self._reasoning_config = reasoning_config + turn_route = self._resolve_turn_agent_config(prompt, model, runtime_kwargs) def run_sync(): agent = AIAgent( - model=model, - **runtime_kwargs, + model=turn_route["model"], + **turn_route["runtime"], max_iterations=max_iterations, quiet_mode=True, verbose_logging=False, @@ -4169,9 +4197,10 @@ class GatewayRunner: honcho_manager, honcho_config = self._get_or_create_gateway_honcho(session_key) reasoning_config = self._load_reasoning_config() self._reasoning_config = reasoning_config + turn_route = self._resolve_turn_agent_config(message, model, runtime_kwargs) agent = AIAgent( - model=model, - **runtime_kwargs, + model=turn_route["model"], + **turn_route["runtime"], max_iterations=max_iterations, quiet_mode=True, verbose_logging=False, diff --git a/hermes_cli/config.py b/hermes_cli/config.py index ee5e54470..1a46a80bb 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -147,6 +147,12 @@ DEFAULT_CONFIG = { "summary_model": "google/gemini-3-flash-preview", "summary_provider": "auto", }, + "smart_model_routing": { + "enabled": False, + "max_simple_chars": 160, + "max_simple_words": 28, + "cheap_model": {}, + }, # Auxiliary model config — provider:model for each side task. # Format: provider is the provider name, model is the model slug. @@ -1010,6 +1016,19 @@ _FALLBACK_COMMENT = """ # fallback_model: # provider: openrouter # model: anthropic/claude-sonnet-4 +# +# ── Smart Model Routing ──────────────────────────────────────────────── +# Optional cheap-vs-strong routing for simple turns. +# Keeps the primary model for complex work, but can route short/simple +# messages to a cheaper model across providers. +# +# smart_model_routing: +# enabled: true +# max_simple_chars: 160 +# max_simple_words: 28 +# cheap_model: +# provider: openrouter +# model: google/gemini-2.5-flash """ @@ -1040,6 +1059,19 @@ _COMMENTED_SECTIONS = """ # fallback_model: # provider: openrouter # model: anthropic/claude-sonnet-4 +# +# ── Smart Model Routing ──────────────────────────────────────────────── +# Optional cheap-vs-strong routing for simple turns. +# Keeps the primary model for complex work, but can route short/simple +# messages to a cheaper model across providers. +# +# smart_model_routing: +# enabled: true +# max_simple_chars: 160 +# max_simple_words: 28 +# cheap_model: +# provider: openrouter +# model: google/gemini-2.5-flash """ diff --git a/hermes_cli/gateway.py b/hermes_cli/gateway.py index 2399436c4..4bc068c1e 100644 --- a/hermes_cli/gateway.py +++ b/hermes_cli/gateway.py @@ -705,6 +705,7 @@ def generate_launchd_plist() -> str: hermes_cli.main gateway run + --replace WorkingDirectory @@ -728,6 +729,36 @@ def generate_launchd_plist() -> str: """ +def launchd_plist_is_current() -> bool: + """Check if the installed launchd plist matches the currently generated one.""" + plist_path = get_launchd_plist_path() + if not plist_path.exists(): + return False + + installed = plist_path.read_text(encoding="utf-8") + expected = generate_launchd_plist() + return _normalize_service_definition(installed) == _normalize_service_definition(expected) + + +def refresh_launchd_plist_if_needed() -> bool: + """Rewrite the installed launchd plist when the generated definition has changed. + + Unlike systemd, launchd picks up plist changes on the next ``launchctl stop``/ + ``launchctl start`` cycle — no daemon-reload is needed. We still unload/reload + to make launchd re-read the updated plist immediately. + """ + plist_path = get_launchd_plist_path() + if not plist_path.exists() or launchd_plist_is_current(): + return False + + plist_path.write_text(generate_launchd_plist(), encoding="utf-8") + # Unload/reload so launchd picks up the new definition + subprocess.run(["launchctl", "unload", str(plist_path)], check=False) + subprocess.run(["launchctl", "load", str(plist_path)], check=False) + print("↻ Updated gateway launchd service definition to match the current Hermes install") + return True + + def launchd_install(force: bool = False): plist_path = get_launchd_plist_path() @@ -760,6 +791,7 @@ def launchd_uninstall(): print("✓ Service uninstalled") def launchd_start(): + refresh_launchd_plist_if_needed() subprocess.run(["launchctl", "start", "ai.hermes.gateway"], check=True) print("✓ Service started") @@ -768,6 +800,7 @@ def launchd_stop(): print("✓ Service stopped") def launchd_restart(): + refresh_launchd_plist_if_needed() launchd_stop() launchd_start() diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 2cb220df6..845ae207e 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -2306,12 +2306,16 @@ def cmd_update(args): # installation's gateway — safe with multiple installations. try: from gateway.status import get_running_pid, remove_pid_file - from hermes_cli.gateway import get_service_name + from hermes_cli.gateway import ( + get_service_name, get_launchd_plist_path, is_macos, + refresh_launchd_plist_if_needed, + ) import signal as _signal _gw_service_name = get_service_name() existing_pid = get_running_pid() has_systemd_service = False + has_launchd_service = False try: check = subprocess.run( @@ -2322,23 +2326,36 @@ def cmd_update(args): except (FileNotFoundError, subprocess.TimeoutExpired): pass - if existing_pid or has_systemd_service: + # Check for macOS launchd service + if is_macos(): + try: + plist_path = get_launchd_plist_path() + if plist_path.exists(): + check = subprocess.run( + ["launchctl", "list", "ai.hermes.gateway"], + capture_output=True, text=True, timeout=5, + ) + has_launchd_service = check.returncode == 0 + except (FileNotFoundError, subprocess.TimeoutExpired): + pass + + if existing_pid or has_systemd_service or has_launchd_service: print() - # Kill the PID-file-tracked process (may be manual or systemd) - if existing_pid: - try: - os.kill(existing_pid, _signal.SIGTERM) - print(f"→ Stopped gateway process (PID {existing_pid})") - except ProcessLookupError: - pass # Already gone - except PermissionError: - print(f"⚠ Permission denied killing gateway PID {existing_pid}") - remove_pid_file() - - # Restart the systemd service (starts a fresh process) + # When a service manager is handling the gateway, let it + # manage the lifecycle — don't manually SIGTERM the PID + # (launchd KeepAlive would respawn immediately, causing races). if has_systemd_service: import time as _time + if existing_pid: + try: + os.kill(existing_pid, _signal.SIGTERM) + print(f"→ Stopped gateway process (PID {existing_pid})") + except ProcessLookupError: + pass + except PermissionError: + print(f"⚠ Permission denied killing gateway PID {existing_pid}") + remove_pid_file() _time.sleep(1) # Brief pause for port/socket release print("→ Restarting gateway service...") restart = subprocess.run( @@ -2350,7 +2367,36 @@ def cmd_update(args): else: print(f"⚠ Gateway restart failed: {restart.stderr.strip()}") print(" Try manually: hermes gateway restart") + elif has_launchd_service: + # Refresh the plist first (picks up --replace and other + # changes from the update we just pulled). + refresh_launchd_plist_if_needed() + # Explicit stop+start — don't rely on KeepAlive respawn + # after a manual SIGTERM, which would race with the + # PID file cleanup. + print("→ Restarting gateway service...") + stop = subprocess.run( + ["launchctl", "stop", "ai.hermes.gateway"], + capture_output=True, text=True, timeout=10, + ) + start = subprocess.run( + ["launchctl", "start", "ai.hermes.gateway"], + capture_output=True, text=True, timeout=10, + ) + if start.returncode == 0: + print("✓ Gateway restarted via launchd.") + else: + print(f"⚠ Gateway restart failed: {start.stderr.strip()}") + print(" Try manually: hermes gateway restart") elif existing_pid: + try: + os.kill(existing_pid, _signal.SIGTERM) + print(f"→ Stopped gateway process (PID {existing_pid})") + except ProcessLookupError: + pass # Already gone + except PermissionError: + print(f"⚠ Permission denied killing gateway PID {existing_pid}") + remove_pid_file() print(" ℹ️ Gateway was running manually (not as a service).") print(" Restart it with: hermes gateway run") except Exception as e: diff --git a/tests/agent/test_smart_model_routing.py b/tests/agent/test_smart_model_routing.py new file mode 100644 index 000000000..7e9025609 --- /dev/null +++ b/tests/agent/test_smart_model_routing.py @@ -0,0 +1,61 @@ +from agent.smart_model_routing import choose_cheap_model_route + + +_BASE_CONFIG = { + "enabled": True, + "cheap_model": { + "provider": "openrouter", + "model": "google/gemini-2.5-flash", + }, +} + + +def test_returns_none_when_disabled(): + cfg = {**_BASE_CONFIG, "enabled": False} + assert choose_cheap_model_route("what time is it in tokyo?", cfg) is None + + +def test_routes_short_simple_prompt(): + result = choose_cheap_model_route("what time is it in tokyo?", _BASE_CONFIG) + assert result is not None + assert result["provider"] == "openrouter" + assert result["model"] == "google/gemini-2.5-flash" + assert result["routing_reason"] == "simple_turn" + + +def test_skips_long_prompt(): + prompt = "please summarize this carefully " * 20 + assert choose_cheap_model_route(prompt, _BASE_CONFIG) is None + + +def test_skips_code_like_prompt(): + prompt = "debug this traceback: ```python\nraise ValueError('bad')\n```" + assert choose_cheap_model_route(prompt, _BASE_CONFIG) is None + + +def test_skips_tool_heavy_prompt_keywords(): + prompt = "implement a patch for this docker error" + assert choose_cheap_model_route(prompt, _BASE_CONFIG) is None + + +def test_resolve_turn_route_falls_back_to_primary_when_route_runtime_cannot_be_resolved(monkeypatch): + from agent.smart_model_routing import resolve_turn_route + + monkeypatch.setattr( + "hermes_cli.runtime_provider.resolve_runtime_provider", + lambda **kwargs: (_ for _ in ()).throw(RuntimeError("bad route")), + ) + result = resolve_turn_route( + "what time is it in tokyo?", + _BASE_CONFIG, + { + "model": "anthropic/claude-sonnet-4", + "provider": "openrouter", + "base_url": "https://openrouter.ai/api/v1", + "api_mode": "chat_completions", + "api_key": "sk-primary", + }, + ) + assert result["model"] == "anthropic/claude-sonnet-4" + assert result["runtime"]["provider"] == "openrouter" + assert result["label"] is None diff --git a/tests/hermes_cli/test_update_gateway_restart.py b/tests/hermes_cli/test_update_gateway_restart.py new file mode 100644 index 000000000..b9cdecaa0 --- /dev/null +++ b/tests/hermes_cli/test_update_gateway_restart.py @@ -0,0 +1,305 @@ +"""Tests for cmd_update gateway auto-restart — systemd + launchd coverage. + +Ensures ``hermes update`` correctly detects running gateways managed by +systemd (Linux) or launchd (macOS) and restarts/informs the user properly, +rather than leaving zombie processes or telling users to manually restart +when launchd will auto-respawn. +""" + +import subprocess +from types import SimpleNamespace +from unittest.mock import patch, MagicMock + +import pytest + +import hermes_cli.gateway as gateway_cli +from hermes_cli.main import cmd_update + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _make_run_side_effect( + branch="main", + verify_ok=True, + commit_count="3", + systemd_active=False, + launchctl_loaded=False, +): + """Build a subprocess.run side_effect that simulates git + service commands.""" + + def side_effect(cmd, **kwargs): + joined = " ".join(str(c) for c in cmd) + + # git rev-parse --abbrev-ref HEAD + if "rev-parse" in joined and "--abbrev-ref" in joined: + return subprocess.CompletedProcess(cmd, 0, stdout=f"{branch}\n", stderr="") + + # git rev-parse --verify origin/{branch} + if "rev-parse" in joined and "--verify" in joined: + rc = 0 if verify_ok else 128 + return subprocess.CompletedProcess(cmd, rc, stdout="", stderr="") + + # git rev-list HEAD..origin/{branch} --count + if "rev-list" in joined: + return subprocess.CompletedProcess(cmd, 0, stdout=f"{commit_count}\n", stderr="") + + # systemctl --user is-active + if "systemctl" in joined and "is-active" in joined: + if systemd_active: + return subprocess.CompletedProcess(cmd, 0, stdout="active\n", stderr="") + return subprocess.CompletedProcess(cmd, 3, stdout="inactive\n", stderr="") + + # systemctl --user restart + if "systemctl" in joined and "restart" in joined: + return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="") + + # launchctl list ai.hermes.gateway + if "launchctl" in joined and "list" in joined: + if launchctl_loaded: + return subprocess.CompletedProcess(cmd, 0, stdout="PID\tStatus\tLabel\n123\t0\tai.hermes.gateway\n", stderr="") + return subprocess.CompletedProcess(cmd, 113, stdout="", stderr="Could not find service") + + return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="") + + return side_effect + + +@pytest.fixture +def mock_args(): + return SimpleNamespace() + + +# --------------------------------------------------------------------------- +# Launchd plist includes --replace +# --------------------------------------------------------------------------- + + +class TestLaunchdPlistReplace: + """The generated launchd plist must include --replace so respawned + gateways kill stale instances.""" + + def test_plist_contains_replace_flag(self): + plist = gateway_cli.generate_launchd_plist() + assert "--replace" in plist + + def test_plist_program_arguments_order(self): + """--replace comes after 'run' in the ProgramArguments.""" + plist = gateway_cli.generate_launchd_plist() + lines = [line.strip() for line in plist.splitlines()] + # Find 'run' and '--replace' in the string entries + string_values = [ + line.replace("", "").replace("", "") + for line in lines + if "" in line and "" in line + ] + assert "run" in string_values + assert "--replace" in string_values + run_idx = string_values.index("run") + replace_idx = string_values.index("--replace") + assert replace_idx == run_idx + 1 + + +# --------------------------------------------------------------------------- +# cmd_update — macOS launchd detection +# --------------------------------------------------------------------------- + + +class TestLaunchdPlistRefresh: + """refresh_launchd_plist_if_needed rewrites stale plists (like systemd's + refresh_systemd_unit_if_needed).""" + + def test_refresh_rewrites_stale_plist(self, tmp_path, monkeypatch): + plist_path = tmp_path / "ai.hermes.gateway.plist" + plist_path.write_text("old content") + + monkeypatch.setattr(gateway_cli, "get_launchd_plist_path", lambda: plist_path) + + calls = [] + def fake_run(cmd, check=False, **kwargs): + calls.append(cmd) + return SimpleNamespace(returncode=0, stdout="", stderr="") + + monkeypatch.setattr(gateway_cli.subprocess, "run", fake_run) + + result = gateway_cli.refresh_launchd_plist_if_needed() + + assert result is True + # Plist should now contain the generated content (which includes --replace) + assert "--replace" in plist_path.read_text() + # Should have unloaded then reloaded + assert any("unload" in str(c) for c in calls) + assert any("load" in str(c) for c in calls) + + def test_refresh_skips_when_current(self, tmp_path, monkeypatch): + plist_path = tmp_path / "ai.hermes.gateway.plist" + monkeypatch.setattr(gateway_cli, "get_launchd_plist_path", lambda: plist_path) + + # Write the current expected content + plist_path.write_text(gateway_cli.generate_launchd_plist()) + + calls = [] + monkeypatch.setattr( + gateway_cli.subprocess, "run", + lambda cmd, **kw: calls.append(cmd) or SimpleNamespace(returncode=0), + ) + + result = gateway_cli.refresh_launchd_plist_if_needed() + + assert result is False + assert len(calls) == 0 # No launchctl calls needed + + def test_refresh_skips_when_no_plist(self, tmp_path, monkeypatch): + plist_path = tmp_path / "nonexistent.plist" + monkeypatch.setattr(gateway_cli, "get_launchd_plist_path", lambda: plist_path) + + result = gateway_cli.refresh_launchd_plist_if_needed() + assert result is False + + def test_launchd_start_calls_refresh(self, tmp_path, monkeypatch): + """launchd_start refreshes the plist before starting.""" + plist_path = tmp_path / "ai.hermes.gateway.plist" + plist_path.write_text("old") + monkeypatch.setattr(gateway_cli, "get_launchd_plist_path", lambda: plist_path) + + calls = [] + def fake_run(cmd, check=False, **kwargs): + calls.append(cmd) + return SimpleNamespace(returncode=0, stdout="", stderr="") + + monkeypatch.setattr(gateway_cli.subprocess, "run", fake_run) + + gateway_cli.launchd_start() + + # First calls should be refresh (unload/load), then start + cmd_strs = [" ".join(c) for c in calls] + assert any("unload" in s for s in cmd_strs) + assert any("start" in s for s in cmd_strs) + + +class TestCmdUpdateLaunchdRestart: + """cmd_update correctly detects and handles launchd on macOS.""" + + @patch("shutil.which", return_value=None) + @patch("subprocess.run") + def test_update_detects_launchd_and_skips_manual_restart_message( + self, mock_run, _mock_which, mock_args, capsys, tmp_path, monkeypatch, + ): + """When launchd is running the gateway, update should print + 'auto-restart via launchd' instead of 'Restart it with: hermes gateway run'.""" + # Create a fake launchd plist so is_macos + plist.exists() passes + plist_path = tmp_path / "ai.hermes.gateway.plist" + plist_path.write_text("") + + monkeypatch.setattr( + gateway_cli, "is_macos", lambda: True, + ) + monkeypatch.setattr( + gateway_cli, "get_launchd_plist_path", lambda: plist_path, + ) + + mock_run.side_effect = _make_run_side_effect( + commit_count="3", + launchctl_loaded=True, + ) + + # Mock get_running_pid to return a PID + with patch("gateway.status.get_running_pid", return_value=12345), \ + patch("gateway.status.remove_pid_file"): + cmd_update(mock_args) + + captured = capsys.readouterr().out + assert "Gateway restarted via launchd" in captured + assert "Restart it with: hermes gateway run" not in captured + # Verify launchctl stop + start were called (not manual SIGTERM) + launchctl_calls = [ + c for c in mock_run.call_args_list + if len(c.args[0]) > 0 and c.args[0][0] == "launchctl" + ] + stop_calls = [c for c in launchctl_calls if "stop" in c.args[0]] + start_calls = [c for c in launchctl_calls if "start" in c.args[0]] + assert len(stop_calls) >= 1 + assert len(start_calls) >= 1 + + @patch("shutil.which", return_value=None) + @patch("subprocess.run") + def test_update_without_launchd_shows_manual_restart( + self, mock_run, _mock_which, mock_args, capsys, tmp_path, monkeypatch, + ): + """When no service manager is running, update should show the manual restart hint.""" + monkeypatch.setattr( + gateway_cli, "is_macos", lambda: True, + ) + plist_path = tmp_path / "ai.hermes.gateway.plist" + # plist does NOT exist — no launchd service + monkeypatch.setattr( + gateway_cli, "get_launchd_plist_path", lambda: plist_path, + ) + + mock_run.side_effect = _make_run_side_effect( + commit_count="3", + launchctl_loaded=False, + ) + + with patch("gateway.status.get_running_pid", return_value=12345), \ + patch("gateway.status.remove_pid_file"), \ + patch("os.kill"): + cmd_update(mock_args) + + captured = capsys.readouterr().out + assert "Restart it with: hermes gateway run" in captured + assert "Gateway restarted via launchd" not in captured + + @patch("shutil.which", return_value=None) + @patch("subprocess.run") + def test_update_with_systemd_still_restarts_via_systemd( + self, mock_run, _mock_which, mock_args, capsys, monkeypatch, + ): + """On Linux with systemd active, update should restart via systemctl.""" + monkeypatch.setattr( + gateway_cli, "is_macos", lambda: False, + ) + + mock_run.side_effect = _make_run_side_effect( + commit_count="3", + systemd_active=True, + ) + + with patch("gateway.status.get_running_pid", return_value=12345), \ + patch("gateway.status.remove_pid_file"), \ + patch("os.kill"): + cmd_update(mock_args) + + captured = capsys.readouterr().out + assert "Gateway restarted" in captured + # Verify systemctl restart was called + restart_calls = [ + c for c in mock_run.call_args_list + if "restart" in " ".join(str(a) for a in c.args[0]) + and "systemctl" in " ".join(str(a) for a in c.args[0]) + ] + assert len(restart_calls) == 1 + + @patch("shutil.which", return_value=None) + @patch("subprocess.run") + def test_update_no_gateway_running_skips_restart( + self, mock_run, _mock_which, mock_args, capsys, monkeypatch, + ): + """When no gateway is running, update should skip the restart section entirely.""" + monkeypatch.setattr( + gateway_cli, "is_macos", lambda: False, + ) + + mock_run.side_effect = _make_run_side_effect( + commit_count="3", + systemd_active=False, + ) + + with patch("gateway.status.get_running_pid", return_value=None): + cmd_update(mock_args) + + captured = capsys.readouterr().out + assert "Stopped gateway" not in captured + assert "Gateway restarted" not in captured + assert "Gateway restarted via launchd" not in captured diff --git a/tests/test_cli_provider_resolution.py b/tests/test_cli_provider_resolution.py index 3144bed80..99d8830fa 100644 --- a/tests/test_cli_provider_resolution.py +++ b/tests/test_cli_provider_resolution.py @@ -162,6 +162,57 @@ def test_runtime_resolution_rebuilds_agent_on_routing_change(monkeypatch): assert shell.api_mode == "codex_responses" +def test_cli_turn_routing_uses_primary_when_disabled(monkeypatch): + cli = _import_cli() + shell = cli.HermesCLI(model="gpt-5", compact=True, max_turns=1) + shell.provider = "openrouter" + shell.api_mode = "chat_completions" + shell.base_url = "https://openrouter.ai/api/v1" + shell.api_key = "sk-primary" + shell._smart_model_routing = {"enabled": False} + + result = shell._resolve_turn_agent_config("what time is it in tokyo?") + + assert result["model"] == "gpt-5" + assert result["runtime"]["provider"] == "openrouter" + assert result["label"] is None + + +def test_cli_turn_routing_uses_cheap_model_when_simple(monkeypatch): + cli = _import_cli() + + def _runtime_resolve(**kwargs): + assert kwargs["requested"] == "zai" + return { + "provider": "zai", + "api_mode": "chat_completions", + "base_url": "https://open.z.ai/api/v1", + "api_key": "cheap-key", + "source": "env/config", + } + + monkeypatch.setattr("hermes_cli.runtime_provider.resolve_runtime_provider", _runtime_resolve) + + shell = cli.HermesCLI(model="anthropic/claude-sonnet-4", compact=True, max_turns=1) + shell.provider = "openrouter" + shell.api_mode = "chat_completions" + shell.base_url = "https://openrouter.ai/api/v1" + shell.api_key = "primary-key" + shell._smart_model_routing = { + "enabled": True, + "cheap_model": {"provider": "zai", "model": "glm-5-air"}, + "max_simple_chars": 160, + "max_simple_words": 28, + } + + result = shell._resolve_turn_agent_config("what time is it in tokyo?") + + assert result["model"] == "glm-5-air" + assert result["runtime"]["provider"] == "zai" + assert result["runtime"]["api_key"] == "cheap-key" + assert result["label"] is not None + + def test_cli_prefers_config_provider_over_stale_env_override(monkeypatch): cli = _import_cli() diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md index 1df9c491f..56a2ab125 100644 --- a/website/docs/user-guide/configuration.md +++ b/website/docs/user-guide/configuration.md @@ -441,6 +441,39 @@ Supported providers: `openrouter`, `nous`, `openai-codex`, `anthropic`, `zai`, ` Fallback is configured exclusively through `config.yaml` — there are no environment variables for it. For full details on when it triggers, supported providers, and how it interacts with auxiliary tasks and delegation, see [Fallback Providers](/docs/user-guide/features/fallback-providers). ::: +## Smart Model Routing + +Optional cheap-vs-strong routing lets Hermes keep your main model for complex work while sending very short/simple turns to a cheaper model. + +```yaml +smart_model_routing: + enabled: true + max_simple_chars: 160 + max_simple_words: 28 + cheap_model: + provider: openrouter + model: google/gemini-2.5-flash + # base_url: http://localhost:8000/v1 # optional custom endpoint + # api_key_env: MY_CUSTOM_KEY # optional env var name for that endpoint's API key +``` + +How it works: +- If a turn is short, single-line, and does not look code/tool/debug heavy, Hermes may route it to `cheap_model` +- If the turn looks complex, Hermes stays on your primary model/provider +- If the cheap route cannot be resolved cleanly, Hermes falls back to the primary model automatically + +This is intentionally conservative. It is meant for quick, low-stakes turns like: +- short factual questions +- quick rewrites +- lightweight summaries + +It will avoid routing prompts that look like: +- coding/debugging work +- tool-heavy requests +- long or multi-line analysis asks + +Use this when you want lower latency or cost without fully changing your default model. + ## Terminal Backend Configuration Configure which environment the agent uses for terminal commands: