diff --git a/agent/smart_model_routing.py b/agent/smart_model_routing.py
new file mode 100644
index 000000000..249548701
--- /dev/null
+++ b/agent/smart_model_routing.py
@@ -0,0 +1,184 @@
+"""Helpers for optional cheap-vs-strong model routing."""
+
+from __future__ import annotations
+
+import os
+import re
+from typing import Any, Dict, Optional
+
+_COMPLEX_KEYWORDS = {
+    "debug",
+    "debugging",
+    "implement",
+    "implementation",
+    "refactor",
+    "patch",
+    "traceback",
+    "stacktrace",
+    "exception",
+    "error",
+    "analyze",
+    "analysis",
+    "investigate",
+    "architecture",
+    "design",
+    "compare",
+    "benchmark",
+    "optimize",
+    "optimise",
+    "review",
+    "terminal",
+    "shell",
+    "tool",
+    "tools",
+    "pytest",
+    "test",
+    "tests",
+    "plan",
+    "planning",
+    "delegate",
+    "subagent",
+    "cron",
+    "docker",
+    "kubernetes",
+}
+
+_URL_RE = re.compile(r"https?://|www\.", re.IGNORECASE)
+
+
+def _coerce_bool(value: Any, default: bool = False) -> bool:
+    if value is None:
+        return default
+    if isinstance(value, bool):
+        return value
+    if isinstance(value, str):
+        return value.strip().lower() in {"1", "true", "yes", "on"}
+    return bool(value)
+
+
+def _coerce_int(value: Any, default: int) -> int:
+    try:
+        return int(value)
+    except (TypeError, ValueError):
+        return default
+
+
+def choose_cheap_model_route(user_message: str, routing_config: Optional[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
+    """Return the configured cheap-model route when a message looks simple.
+
+    Conservative by design: if the message has signs of code/tool/debugging/
+    long-form work, keep the primary model.
+    """
+    cfg = routing_config or {}
+    if not _coerce_bool(cfg.get("enabled"), False):
+        return None
+
+    cheap_model = cfg.get("cheap_model") or {}
+    if not isinstance(cheap_model, dict):
+        return None
+    provider = str(cheap_model.get("provider") or "").strip().lower()
+    model = str(cheap_model.get("model") or "").strip()
+    if not provider or not model:
+        return None
+
+    text = (user_message or "").strip()
+    if not text:
+        return None
+
+    max_chars = _coerce_int(cfg.get("max_simple_chars"), 160)
+    max_words = _coerce_int(cfg.get("max_simple_words"), 28)
+
+    if len(text) > max_chars:
+        return None
+    if len(text.split()) > max_words:
+        return None
+    if text.count("\n") > 1:
+        return None
+    if "```" in text or "`" in text:
+        return None
+    if _URL_RE.search(text):
+        return None
+
+    lowered = text.lower()
+    words = {token.strip(".,:;!?()[]{}\"'`") for token in lowered.split()}
+    if words & _COMPLEX_KEYWORDS:
+        return None
+
+    route = dict(cheap_model)
+    route["provider"] = provider
+    route["model"] = model
+    route["routing_reason"] = "simple_turn"
+    return route
+
+
+def resolve_turn_route(user_message: str, routing_config: Optional[Dict[str, Any]], primary: Dict[str, Any]) -> Dict[str, Any]:
+    """Resolve the effective model/runtime for one turn.
+
+    Returns a dict with model/runtime/signature/label fields.
+    """
+    route = choose_cheap_model_route(user_message, routing_config)
+    if not route:
+        return {
+            "model": primary.get("model"),
+            "runtime": {
+                "api_key": primary.get("api_key"),
+                "base_url": primary.get("base_url"),
+                "provider": primary.get("provider"),
+                "api_mode": primary.get("api_mode"),
+            },
+            "label": None,
+            "signature": (
+                primary.get("model"),
+                primary.get("provider"),
+                primary.get("base_url"),
+                primary.get("api_mode"),
+            ),
+        }
+
+    from hermes_cli.runtime_provider import resolve_runtime_provider
+
+    explicit_api_key = None
+    api_key_env = str(route.get("api_key_env") or "").strip()
+    if api_key_env:
+        explicit_api_key = os.getenv(api_key_env) or None
+
+    try:
+        runtime = resolve_runtime_provider(
+            requested=route.get("provider"),
+            explicit_api_key=explicit_api_key,
+            explicit_base_url=route.get("base_url"),
+        )
+    except Exception:
+        return {
+            "model": primary.get("model"),
+            "runtime": {
+                "api_key": primary.get("api_key"),
+                "base_url": primary.get("base_url"),
+                "provider": primary.get("provider"),
+                "api_mode": primary.get("api_mode"),
+            },
+            "label": None,
+            "signature": (
+                primary.get("model"),
+                primary.get("provider"),
+                primary.get("base_url"),
+                primary.get("api_mode"),
+            ),
+        }
+
+    return {
+        "model": route.get("model"),
+        "runtime": {
+            "api_key": runtime.get("api_key"),
+            "base_url": runtime.get("base_url"),
+            "provider": runtime.get("provider"),
+            "api_mode": runtime.get("api_mode"),
+        },
+        "label": f"smart route → {route.get('model')} ({runtime.get('provider')})",
+        "signature": (
+            route.get("model"),
+            runtime.get("provider"),
+            runtime.get("base_url"),
+            runtime.get("api_mode"),
+        ),
+    }
diff --git a/cli-config.yaml.example b/cli-config.yaml.example
index 0c947e479..681405c44 100644
--- a/cli-config.yaml.example
+++ b/cli-config.yaml.example
@@ -51,6 +51,20 @@ model:
 #   # Data policy: "allow" (default) or "deny" to exclude providers that may store data
 #   # data_collection: "deny"
 
+# =============================================================================
+# Smart Model Routing (optional)
+# =============================================================================
+# Use a cheaper model for short/simple turns while keeping your main model for
+# more complex requests. Disabled by default.
+#
+# smart_model_routing:
+#   enabled: true
+#   max_simple_chars: 160
+#   max_simple_words: 28
+#   cheap_model:
+#     provider: openrouter
+#     model: google/gemini-2.5-flash
+
 # =============================================================================
 # Git Worktree Isolation
 # =============================================================================
diff --git a/cli.py b/cli.py
index 7716d6f57..eb0d7b101 100755
--- a/cli.py
+++ b/cli.py
@@ -176,6 +176,12 @@ def load_cli_config() -> Dict[str, Any]:
             "threshold": 0.50,    # Compress at 50% of model's context limit
             "summary_model": "google/gemini-3-flash-preview",  # Fast/cheap model for summaries
         },
+        "smart_model_routing": {
+            "enabled": False,
+            "max_simple_chars": 160,
+            "max_simple_words": 28,
+            "cheap_model": {},
+        },
         "agent": {
             "max_turns": 90,  # Default max tool-calling iterations (shared with subagents)
             "verbose": False,
@@ -1126,6 +1132,10 @@ class HermesCLI:
         fb = CLI_CONFIG.get("fallback_model") or {}
         self._fallback_model = fb if fb.get("provider") and fb.get("model") else None
 
+        # Optional cheap-vs-strong routing for simple turns
+        self._smart_model_routing = CLI_CONFIG.get("smart_model_routing", {}) or {}
+        self._active_agent_route_signature = None
+
         # Agent will be initialized on first use
         self.agent: Optional[AIAgent] = None
         self._app = None  # prompt_toolkit Application (set in run())
@@ -1537,10 +1547,27 @@ class HermesCLI:
         # routing, or the effective model changed.
         if (credentials_changed or routing_changed or model_changed) and self.agent is not None:
             self.agent = None
+            self._active_agent_route_signature = None
 
         return True
 
-    def _init_agent(self) -> bool:
+    def _resolve_turn_agent_config(self, user_message: str) -> dict:
+        """Resolve model/runtime overrides for a single user turn."""
+        from agent.smart_model_routing import resolve_turn_route
+
+        return resolve_turn_route(
+            user_message,
+            self._smart_model_routing,
+            {
+                "model": self.model,
+                "api_key": self.api_key,
+                "base_url": self.base_url,
+                "provider": self.provider,
+                "api_mode": self.api_mode,
+            },
+        )
+
+    def _init_agent(self, *, model_override: str = None, runtime_override: dict = None, route_label: str = None) -> bool:
         """
         Initialize the agent on first use.
         When resuming a session, restores conversation history from SQLite.
@@ -1600,12 +1627,19 @@ class HermesCLI:
                 pass
         
         try:
+            runtime = runtime_override or {
+                "api_key": self.api_key,
+                "base_url": self.base_url,
+                "provider": self.provider,
+                "api_mode": self.api_mode,
+            }
+            effective_model = model_override or self.model
             self.agent = AIAgent(
-                model=self.model,
-                api_key=self.api_key,
-                base_url=self.base_url,
-                provider=self.provider,
-                api_mode=self.api_mode,
+                model=effective_model,
+                api_key=runtime.get("api_key"),
+                base_url=runtime.get("base_url"),
+                provider=runtime.get("provider"),
+                api_mode=runtime.get("api_mode"),
                 max_iterations=self.max_turns,
                 enabled_toolsets=self.enabled_toolsets,
                 verbose_logging=self.verbose,
@@ -1632,7 +1666,13 @@ class HermesCLI:
                 pass_session_id=self.pass_session_id,
                 tool_progress_callback=self._on_tool_progress,
             )
-            # Apply any pending title now that the session exists in the DB
+            self._active_agent_route_signature = (
+                effective_model,
+                runtime.get("provider"),
+                runtime.get("base_url"),
+                runtime.get("api_mode"),
+            )
+
             if self._pending_title and self._session_db:
                 try:
                     self._session_db.set_session_title(self.session_id, self._pending_title)
@@ -3455,14 +3495,16 @@ class HermesCLI:
         _cprint(f"  Task ID: {task_id}")
         _cprint(f"  You can continue chatting — results will appear when done.\n")
 
+        turn_route = self._resolve_turn_agent_config(prompt)
+
         def run_background():
             try:
                 bg_agent = AIAgent(
-                    model=self.model,
-                    api_key=self.api_key,
-                    base_url=self.base_url,
-                    provider=self.provider,
-                    api_mode=self.api_mode,
+                    model=turn_route["model"],
+                    api_key=turn_route["runtime"].get("api_key"),
+                    base_url=turn_route["runtime"].get("base_url"),
+                    provider=turn_route["runtime"].get("provider"),
+                    api_mode=turn_route["runtime"].get("api_mode"),
                     max_iterations=self.max_turns,
                     enabled_toolsets=self.enabled_toolsets,
                     quiet_mode=True,
@@ -4886,8 +4928,16 @@ class HermesCLI:
         if not self._ensure_runtime_credentials():
             return None
 
+        turn_route = self._resolve_turn_agent_config(message)
+        if turn_route["signature"] != self._active_agent_route_signature:
+            self.agent = None
+
         # Initialize agent if needed
-        if not self._init_agent():
+        if not self._init_agent(
+            model_override=turn_route["model"],
+            runtime_override=turn_route["runtime"],
+            route_label=turn_route["label"],
+        ):
             return None
         
         # Pre-process images through the vision tool (Gemini Flash) so the
@@ -6616,13 +6666,21 @@ def main(
             # Quiet mode: suppress banner, spinner, tool previews.
             # Only print the final response and parseable session info.
             cli.tool_progress_mode = "off"
-            if cli._init_agent():
-                cli.agent.quiet_mode = True
-                result = cli.agent.run_conversation(query)
-                response = result.get("final_response", "") if isinstance(result, dict) else str(result)
-                if response:
-                    print(response)
-                print(f"\nsession_id: {cli.session_id}")
+            if cli._ensure_runtime_credentials():
+                turn_route = cli._resolve_turn_agent_config(query)
+                if turn_route["signature"] != cli._active_agent_route_signature:
+                    cli.agent = None
+                if cli._init_agent(
+                    model_override=turn_route["model"],
+                    runtime_override=turn_route["runtime"],
+                    route_label=turn_route["label"],
+                ):
+                    cli.agent.quiet_mode = True
+                    result = cli.agent.run_conversation(query)
+                    response = result.get("final_response", "") if isinstance(result, dict) else str(result)
+                    if response:
+                        print(response)
+                    print(f"\nsession_id: {cli.session_id}")
         else:
             cli.show_banner()
             cli.console.print(f"[bold blue]Query:[/] {query}")
diff --git a/cron/scheduler.py b/cron/scheduler.py
index 8d75e1a95..ded88ef53 100644
--- a/cron/scheduler.py
+++ b/cron/scheduler.py
@@ -315,6 +315,7 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
 
         # Provider routing
         pr = _cfg.get("provider_routing", {})
+        smart_routing = _cfg.get("smart_model_routing", {}) or {}
 
         from hermes_cli.runtime_provider import (
             resolve_runtime_provider,
@@ -331,12 +332,25 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
             message = format_runtime_provider_error(exc)
             raise RuntimeError(message) from exc
 
+        from agent.smart_model_routing import resolve_turn_route
+        turn_route = resolve_turn_route(
+            prompt,
+            smart_routing,
+            {
+                "model": model,
+                "api_key": runtime.get("api_key"),
+                "base_url": runtime.get("base_url"),
+                "provider": runtime.get("provider"),
+                "api_mode": runtime.get("api_mode"),
+            },
+        )
+
         agent = AIAgent(
-            model=model,
-            api_key=runtime.get("api_key"),
-            base_url=runtime.get("base_url"),
-            provider=runtime.get("provider"),
-            api_mode=runtime.get("api_mode"),
+            model=turn_route["model"],
+            api_key=turn_route["runtime"].get("api_key"),
+            base_url=turn_route["runtime"].get("base_url"),
+            provider=turn_route["runtime"].get("provider"),
+            api_mode=turn_route["runtime"].get("api_mode"),
             max_iterations=max_iterations,
             reasoning_config=reasoning_config,
             prefill_messages=prefill_messages,
diff --git a/gateway/run.py b/gateway/run.py
index 50e68eaad..5c0b8b48c 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -324,6 +324,7 @@ class GatewayRunner:
         self._show_reasoning = self._load_show_reasoning()
         self._provider_routing = self._load_provider_routing()
         self._fallback_model = self._load_fallback_model()
+        self._smart_model_routing = self._load_smart_model_routing()
 
         # Wire process registry into session store for reset protection
         from tools.process_registry import process_registry
@@ -593,6 +594,18 @@ class GatewayRunner:
             group_sessions_per_user=getattr(config, "group_sessions_per_user", True),
         )
 
+    def _resolve_turn_agent_config(self, user_message: str, model: str, runtime_kwargs: dict) -> dict:
+        from agent.smart_model_routing import resolve_turn_route
+
+        primary = {
+            "model": model,
+            "api_key": runtime_kwargs.get("api_key"),
+            "base_url": runtime_kwargs.get("base_url"),
+            "provider": runtime_kwargs.get("provider"),
+            "api_mode": runtime_kwargs.get("api_mode"),
+        }
+        return resolve_turn_route(user_message, getattr(self, "_smart_model_routing", {}), primary)
+
     async def _handle_adapter_fatal_error(self, adapter: BasePlatformAdapter) -> None:
         """React to a non-retryable adapter failure after startup."""
         logger.error(
@@ -795,6 +808,20 @@ class GatewayRunner:
             pass
         return None
 
+    @staticmethod
+    def _load_smart_model_routing() -> dict:
+        """Load optional smart cheap-vs-strong model routing config."""
+        try:
+            import yaml as _y
+            cfg_path = _hermes_home / "config.yaml"
+            if cfg_path.exists():
+                with open(cfg_path, encoding="utf-8") as _f:
+                    cfg = _y.safe_load(_f) or {}
+                return cfg.get("smart_model_routing", {}) or {}
+        except Exception:
+            pass
+        return {}
+
     async def start(self) -> bool:
         """
         Start the gateway and all configured platform adapters.
@@ -2931,11 +2958,12 @@ class GatewayRunner:
             max_iterations = int(os.getenv("HERMES_MAX_ITERATIONS", "90"))
             reasoning_config = self._load_reasoning_config()
             self._reasoning_config = reasoning_config
+            turn_route = self._resolve_turn_agent_config(prompt, model, runtime_kwargs)
 
             def run_sync():
                 agent = AIAgent(
-                    model=model,
-                    **runtime_kwargs,
+                    model=turn_route["model"],
+                    **turn_route["runtime"],
                     max_iterations=max_iterations,
                     quiet_mode=True,
                     verbose_logging=False,
@@ -4169,9 +4197,10 @@ class GatewayRunner:
             honcho_manager, honcho_config = self._get_or_create_gateway_honcho(session_key)
             reasoning_config = self._load_reasoning_config()
             self._reasoning_config = reasoning_config
+            turn_route = self._resolve_turn_agent_config(message, model, runtime_kwargs)
             agent = AIAgent(
-                model=model,
-                **runtime_kwargs,
+                model=turn_route["model"],
+                **turn_route["runtime"],
                 max_iterations=max_iterations,
                 quiet_mode=True,
                 verbose_logging=False,
diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index ee5e54470..1a46a80bb 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -147,6 +147,12 @@ DEFAULT_CONFIG = {
         "summary_model": "google/gemini-3-flash-preview",
         "summary_provider": "auto",
     },
+    "smart_model_routing": {
+        "enabled": False,
+        "max_simple_chars": 160,
+        "max_simple_words": 28,
+        "cheap_model": {},
+    },
     
     # Auxiliary model config — provider:model for each side task.
     # Format: provider is the provider name, model is the model slug.
@@ -1010,6 +1016,19 @@ _FALLBACK_COMMENT = """
 # fallback_model:
 #   provider: openrouter
 #   model: anthropic/claude-sonnet-4
+#
+# ── Smart Model Routing ────────────────────────────────────────────────
+# Optional cheap-vs-strong routing for simple turns.
+# Keeps the primary model for complex work, but can route short/simple
+# messages to a cheaper model across providers.
+#
+# smart_model_routing:
+#   enabled: true
+#   max_simple_chars: 160
+#   max_simple_words: 28
+#   cheap_model:
+#     provider: openrouter
+#     model: google/gemini-2.5-flash
 """
 
 
@@ -1040,6 +1059,19 @@ _COMMENTED_SECTIONS = """
 # fallback_model:
 #   provider: openrouter
 #   model: anthropic/claude-sonnet-4
+#
+# ── Smart Model Routing ────────────────────────────────────────────────
+# Optional cheap-vs-strong routing for simple turns.
+# Keeps the primary model for complex work, but can route short/simple
+# messages to a cheaper model across providers.
+#
+# smart_model_routing:
+#   enabled: true
+#   max_simple_chars: 160
+#   max_simple_words: 28
+#   cheap_model:
+#     provider: openrouter
+#     model: google/gemini-2.5-flash
 """
 
 
diff --git a/hermes_cli/gateway.py b/hermes_cli/gateway.py
index 2399436c4..4bc068c1e 100644
--- a/hermes_cli/gateway.py
+++ b/hermes_cli/gateway.py
@@ -705,6 +705,7 @@ def generate_launchd_plist() -> str:
         <string>hermes_cli.main</string>
         <string>gateway</string>
         <string>run</string>
+        <string>--replace</string>
     </array>
     
     <key>WorkingDirectory</key>
@@ -728,6 +729,36 @@ def generate_launchd_plist() -> str:
 </plist>
 """
 
+def launchd_plist_is_current() -> bool:
+    """Check if the installed launchd plist matches the currently generated one."""
+    plist_path = get_launchd_plist_path()
+    if not plist_path.exists():
+        return False
+
+    installed = plist_path.read_text(encoding="utf-8")
+    expected = generate_launchd_plist()
+    return _normalize_service_definition(installed) == _normalize_service_definition(expected)
+
+
+def refresh_launchd_plist_if_needed() -> bool:
+    """Rewrite the installed launchd plist when the generated definition has changed.
+
+    Unlike systemd, launchd picks up plist changes on the next ``launchctl stop``/
+    ``launchctl start`` cycle — no daemon-reload is needed.  We still unload/reload
+    to make launchd re-read the updated plist immediately.
+    """
+    plist_path = get_launchd_plist_path()
+    if not plist_path.exists() or launchd_plist_is_current():
+        return False
+
+    plist_path.write_text(generate_launchd_plist(), encoding="utf-8")
+    # Unload/reload so launchd picks up the new definition
+    subprocess.run(["launchctl", "unload", str(plist_path)], check=False)
+    subprocess.run(["launchctl", "load", str(plist_path)], check=False)
+    print("↻ Updated gateway launchd service definition to match the current Hermes install")
+    return True
+
+
 def launchd_install(force: bool = False):
     plist_path = get_launchd_plist_path()
     
@@ -760,6 +791,7 @@ def launchd_uninstall():
     print("✓ Service uninstalled")
 
 def launchd_start():
+    refresh_launchd_plist_if_needed()
     subprocess.run(["launchctl", "start", "ai.hermes.gateway"], check=True)
     print("✓ Service started")
 
@@ -768,6 +800,7 @@ def launchd_stop():
     print("✓ Service stopped")
 
 def launchd_restart():
+    refresh_launchd_plist_if_needed()
     launchd_stop()
     launchd_start()
 
diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index 2cb220df6..845ae207e 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -2306,12 +2306,16 @@ def cmd_update(args):
         # installation's gateway — safe with multiple installations.
         try:
             from gateway.status import get_running_pid, remove_pid_file
-            from hermes_cli.gateway import get_service_name
+            from hermes_cli.gateway import (
+                get_service_name, get_launchd_plist_path, is_macos,
+                refresh_launchd_plist_if_needed,
+            )
             import signal as _signal
 
             _gw_service_name = get_service_name()
             existing_pid = get_running_pid()
             has_systemd_service = False
+            has_launchd_service = False
 
             try:
                 check = subprocess.run(
@@ -2322,23 +2326,36 @@ def cmd_update(args):
             except (FileNotFoundError, subprocess.TimeoutExpired):
                 pass
 
-            if existing_pid or has_systemd_service:
+            # Check for macOS launchd service
+            if is_macos():
+                try:
+                    plist_path = get_launchd_plist_path()
+                    if plist_path.exists():
+                        check = subprocess.run(
+                            ["launchctl", "list", "ai.hermes.gateway"],
+                            capture_output=True, text=True, timeout=5,
+                        )
+                        has_launchd_service = check.returncode == 0
+                except (FileNotFoundError, subprocess.TimeoutExpired):
+                    pass
+
+            if existing_pid or has_systemd_service or has_launchd_service:
                 print()
 
-                # Kill the PID-file-tracked process (may be manual or systemd)
-                if existing_pid:
-                    try:
-                        os.kill(existing_pid, _signal.SIGTERM)
-                        print(f"→ Stopped gateway process (PID {existing_pid})")
-                    except ProcessLookupError:
-                        pass  # Already gone
-                    except PermissionError:
-                        print(f"⚠ Permission denied killing gateway PID {existing_pid}")
-                    remove_pid_file()
-
-                # Restart the systemd service (starts a fresh process)
+                # When a service manager is handling the gateway, let it
+                # manage the lifecycle — don't manually SIGTERM the PID
+                # (launchd KeepAlive would respawn immediately, causing races).
                 if has_systemd_service:
                     import time as _time
+                    if existing_pid:
+                        try:
+                            os.kill(existing_pid, _signal.SIGTERM)
+                            print(f"→ Stopped gateway process (PID {existing_pid})")
+                        except ProcessLookupError:
+                            pass
+                        except PermissionError:
+                            print(f"⚠ Permission denied killing gateway PID {existing_pid}")
+                        remove_pid_file()
                     _time.sleep(1)  # Brief pause for port/socket release
                     print("→ Restarting gateway service...")
                     restart = subprocess.run(
@@ -2350,7 +2367,36 @@ def cmd_update(args):
                     else:
                         print(f"⚠ Gateway restart failed: {restart.stderr.strip()}")
                         print("  Try manually: hermes gateway restart")
+                elif has_launchd_service:
+                    # Refresh the plist first (picks up --replace and other
+                    # changes from the update we just pulled).
+                    refresh_launchd_plist_if_needed()
+                    # Explicit stop+start — don't rely on KeepAlive respawn
+                    # after a manual SIGTERM, which would race with the
+                    # PID file cleanup.
+                    print("→ Restarting gateway service...")
+                    stop = subprocess.run(
+                        ["launchctl", "stop", "ai.hermes.gateway"],
+                        capture_output=True, text=True, timeout=10,
+                    )
+                    start = subprocess.run(
+                        ["launchctl", "start", "ai.hermes.gateway"],
+                        capture_output=True, text=True, timeout=10,
+                    )
+                    if start.returncode == 0:
+                        print("✓ Gateway restarted via launchd.")
+                    else:
+                        print(f"⚠ Gateway restart failed: {start.stderr.strip()}")
+                        print("  Try manually: hermes gateway restart")
                 elif existing_pid:
+                    try:
+                        os.kill(existing_pid, _signal.SIGTERM)
+                        print(f"→ Stopped gateway process (PID {existing_pid})")
+                    except ProcessLookupError:
+                        pass  # Already gone
+                    except PermissionError:
+                        print(f"⚠ Permission denied killing gateway PID {existing_pid}")
+                    remove_pid_file()
                     print("  ℹ️  Gateway was running manually (not as a service).")
                     print("  Restart it with: hermes gateway run")
         except Exception as e:
diff --git a/tests/agent/test_smart_model_routing.py b/tests/agent/test_smart_model_routing.py
new file mode 100644
index 000000000..7e9025609
--- /dev/null
+++ b/tests/agent/test_smart_model_routing.py
@@ -0,0 +1,61 @@
+from agent.smart_model_routing import choose_cheap_model_route
+
+
+_BASE_CONFIG = {
+    "enabled": True,
+    "cheap_model": {
+        "provider": "openrouter",
+        "model": "google/gemini-2.5-flash",
+    },
+}
+
+
+def test_returns_none_when_disabled():
+    cfg = {**_BASE_CONFIG, "enabled": False}
+    assert choose_cheap_model_route("what time is it in tokyo?", cfg) is None
+
+
+def test_routes_short_simple_prompt():
+    result = choose_cheap_model_route("what time is it in tokyo?", _BASE_CONFIG)
+    assert result is not None
+    assert result["provider"] == "openrouter"
+    assert result["model"] == "google/gemini-2.5-flash"
+    assert result["routing_reason"] == "simple_turn"
+
+
+def test_skips_long_prompt():
+    prompt = "please summarize this carefully " * 20
+    assert choose_cheap_model_route(prompt, _BASE_CONFIG) is None
+
+
+def test_skips_code_like_prompt():
+    prompt = "debug this traceback: ```python\nraise ValueError('bad')\n```"
+    assert choose_cheap_model_route(prompt, _BASE_CONFIG) is None
+
+
+def test_skips_tool_heavy_prompt_keywords():
+    prompt = "implement a patch for this docker error"
+    assert choose_cheap_model_route(prompt, _BASE_CONFIG) is None
+
+
+def test_resolve_turn_route_falls_back_to_primary_when_route_runtime_cannot_be_resolved(monkeypatch):
+    from agent.smart_model_routing import resolve_turn_route
+
+    monkeypatch.setattr(
+        "hermes_cli.runtime_provider.resolve_runtime_provider",
+        lambda **kwargs: (_ for _ in ()).throw(RuntimeError("bad route")),
+    )
+    result = resolve_turn_route(
+        "what time is it in tokyo?",
+        _BASE_CONFIG,
+        {
+            "model": "anthropic/claude-sonnet-4",
+            "provider": "openrouter",
+            "base_url": "https://openrouter.ai/api/v1",
+            "api_mode": "chat_completions",
+            "api_key": "sk-primary",
+        },
+    )
+    assert result["model"] == "anthropic/claude-sonnet-4"
+    assert result["runtime"]["provider"] == "openrouter"
+    assert result["label"] is None
diff --git a/tests/hermes_cli/test_update_gateway_restart.py b/tests/hermes_cli/test_update_gateway_restart.py
new file mode 100644
index 000000000..b9cdecaa0
--- /dev/null
+++ b/tests/hermes_cli/test_update_gateway_restart.py
@@ -0,0 +1,305 @@
+"""Tests for cmd_update gateway auto-restart — systemd + launchd coverage.
+
+Ensures ``hermes update`` correctly detects running gateways managed by
+systemd (Linux) or launchd (macOS) and restarts/informs the user properly,
+rather than leaving zombie processes or telling users to manually restart
+when launchd will auto-respawn.
+"""
+
+import subprocess
+from types import SimpleNamespace
+from unittest.mock import patch, MagicMock
+
+import pytest
+
+import hermes_cli.gateway as gateway_cli
+from hermes_cli.main import cmd_update
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _make_run_side_effect(
+    branch="main",
+    verify_ok=True,
+    commit_count="3",
+    systemd_active=False,
+    launchctl_loaded=False,
+):
+    """Build a subprocess.run side_effect that simulates git + service commands."""
+
+    def side_effect(cmd, **kwargs):
+        joined = " ".join(str(c) for c in cmd)
+
+        # git rev-parse --abbrev-ref HEAD
+        if "rev-parse" in joined and "--abbrev-ref" in joined:
+            return subprocess.CompletedProcess(cmd, 0, stdout=f"{branch}\n", stderr="")
+
+        # git rev-parse --verify origin/{branch}
+        if "rev-parse" in joined and "--verify" in joined:
+            rc = 0 if verify_ok else 128
+            return subprocess.CompletedProcess(cmd, rc, stdout="", stderr="")
+
+        # git rev-list HEAD..origin/{branch} --count
+        if "rev-list" in joined:
+            return subprocess.CompletedProcess(cmd, 0, stdout=f"{commit_count}\n", stderr="")
+
+        # systemctl --user is-active
+        if "systemctl" in joined and "is-active" in joined:
+            if systemd_active:
+                return subprocess.CompletedProcess(cmd, 0, stdout="active\n", stderr="")
+            return subprocess.CompletedProcess(cmd, 3, stdout="inactive\n", stderr="")
+
+        # systemctl --user restart
+        if "systemctl" in joined and "restart" in joined:
+            return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
+
+        # launchctl list ai.hermes.gateway
+        if "launchctl" in joined and "list" in joined:
+            if launchctl_loaded:
+                return subprocess.CompletedProcess(cmd, 0, stdout="PID\tStatus\tLabel\n123\t0\tai.hermes.gateway\n", stderr="")
+            return subprocess.CompletedProcess(cmd, 113, stdout="", stderr="Could not find service")
+
+        return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
+
+    return side_effect
+
+
+@pytest.fixture
+def mock_args():
+    return SimpleNamespace()
+
+
+# ---------------------------------------------------------------------------
+# Launchd plist includes --replace
+# ---------------------------------------------------------------------------
+
+
+class TestLaunchdPlistReplace:
+    """The generated launchd plist must include --replace so respawned
+    gateways kill stale instances."""
+
+    def test_plist_contains_replace_flag(self):
+        plist = gateway_cli.generate_launchd_plist()
+        assert "--replace" in plist
+
+    def test_plist_program_arguments_order(self):
+        """--replace comes after 'run' in the ProgramArguments."""
+        plist = gateway_cli.generate_launchd_plist()
+        lines = [line.strip() for line in plist.splitlines()]
+        # Find 'run' and '--replace' in the string entries
+        string_values = [
+            line.replace("<string>", "").replace("</string>", "")
+            for line in lines
+            if "<string>" in line and "</string>" in line
+        ]
+        assert "run" in string_values
+        assert "--replace" in string_values
+        run_idx = string_values.index("run")
+        replace_idx = string_values.index("--replace")
+        assert replace_idx == run_idx + 1
+
+
+# ---------------------------------------------------------------------------
+# cmd_update — macOS launchd detection
+# ---------------------------------------------------------------------------
+
+
+class TestLaunchdPlistRefresh:
+    """refresh_launchd_plist_if_needed rewrites stale plists (like systemd's
+    refresh_systemd_unit_if_needed)."""
+
+    def test_refresh_rewrites_stale_plist(self, tmp_path, monkeypatch):
+        plist_path = tmp_path / "ai.hermes.gateway.plist"
+        plist_path.write_text("<plist>old content</plist>")
+
+        monkeypatch.setattr(gateway_cli, "get_launchd_plist_path", lambda: plist_path)
+
+        calls = []
+        def fake_run(cmd, check=False, **kwargs):
+            calls.append(cmd)
+            return SimpleNamespace(returncode=0, stdout="", stderr="")
+
+        monkeypatch.setattr(gateway_cli.subprocess, "run", fake_run)
+
+        result = gateway_cli.refresh_launchd_plist_if_needed()
+
+        assert result is True
+        # Plist should now contain the generated content (which includes --replace)
+        assert "--replace" in plist_path.read_text()
+        # Should have unloaded then reloaded
+        assert any("unload" in str(c) for c in calls)
+        assert any("load" in str(c) for c in calls)
+
+    def test_refresh_skips_when_current(self, tmp_path, monkeypatch):
+        plist_path = tmp_path / "ai.hermes.gateway.plist"
+        monkeypatch.setattr(gateway_cli, "get_launchd_plist_path", lambda: plist_path)
+
+        # Write the current expected content
+        plist_path.write_text(gateway_cli.generate_launchd_plist())
+
+        calls = []
+        monkeypatch.setattr(
+            gateway_cli.subprocess, "run",
+            lambda cmd, **kw: calls.append(cmd) or SimpleNamespace(returncode=0),
+        )
+
+        result = gateway_cli.refresh_launchd_plist_if_needed()
+
+        assert result is False
+        assert len(calls) == 0  # No launchctl calls needed
+
+    def test_refresh_skips_when_no_plist(self, tmp_path, monkeypatch):
+        plist_path = tmp_path / "nonexistent.plist"
+        monkeypatch.setattr(gateway_cli, "get_launchd_plist_path", lambda: plist_path)
+
+        result = gateway_cli.refresh_launchd_plist_if_needed()
+        assert result is False
+
+    def test_launchd_start_calls_refresh(self, tmp_path, monkeypatch):
+        """launchd_start refreshes the plist before starting."""
+        plist_path = tmp_path / "ai.hermes.gateway.plist"
+        plist_path.write_text("<plist>old</plist>")
+        monkeypatch.setattr(gateway_cli, "get_launchd_plist_path", lambda: plist_path)
+
+        calls = []
+        def fake_run(cmd, check=False, **kwargs):
+            calls.append(cmd)
+            return SimpleNamespace(returncode=0, stdout="", stderr="")
+
+        monkeypatch.setattr(gateway_cli.subprocess, "run", fake_run)
+
+        gateway_cli.launchd_start()
+
+        # First calls should be refresh (unload/load), then start
+        cmd_strs = [" ".join(c) for c in calls]
+        assert any("unload" in s for s in cmd_strs)
+        assert any("start" in s for s in cmd_strs)
+
+
+class TestCmdUpdateLaunchdRestart:
+    """cmd_update correctly detects and handles launchd on macOS."""
+
+    @patch("shutil.which", return_value=None)
+    @patch("subprocess.run")
+    def test_update_detects_launchd_and_skips_manual_restart_message(
+        self, mock_run, _mock_which, mock_args, capsys, tmp_path, monkeypatch,
+    ):
+        """When launchd is running the gateway, update should print
+        'auto-restart via launchd' instead of 'Restart it with: hermes gateway run'."""
+        # Create a fake launchd plist so is_macos + plist.exists() passes
+        plist_path = tmp_path / "ai.hermes.gateway.plist"
+        plist_path.write_text("<plist/>")
+
+        monkeypatch.setattr(
+            gateway_cli, "is_macos", lambda: True,
+        )
+        monkeypatch.setattr(
+            gateway_cli, "get_launchd_plist_path", lambda: plist_path,
+        )
+
+        mock_run.side_effect = _make_run_side_effect(
+            commit_count="3",
+            launchctl_loaded=True,
+        )
+
+        # Mock get_running_pid to return a PID
+        with patch("gateway.status.get_running_pid", return_value=12345), \
+             patch("gateway.status.remove_pid_file"):
+            cmd_update(mock_args)
+
+        captured = capsys.readouterr().out
+        assert "Gateway restarted via launchd" in captured
+        assert "Restart it with: hermes gateway run" not in captured
+        # Verify launchctl stop + start were called (not manual SIGTERM)
+        launchctl_calls = [
+            c for c in mock_run.call_args_list
+            if len(c.args[0]) > 0 and c.args[0][0] == "launchctl"
+        ]
+        stop_calls = [c for c in launchctl_calls if "stop" in c.args[0]]
+        start_calls = [c for c in launchctl_calls if "start" in c.args[0]]
+        assert len(stop_calls) >= 1
+        assert len(start_calls) >= 1
+
+    @patch("shutil.which", return_value=None)
+    @patch("subprocess.run")
+    def test_update_without_launchd_shows_manual_restart(
+        self, mock_run, _mock_which, mock_args, capsys, tmp_path, monkeypatch,
+    ):
+        """When no service manager is running, update should show the manual restart hint."""
+        monkeypatch.setattr(
+            gateway_cli, "is_macos", lambda: True,
+        )
+        plist_path = tmp_path / "ai.hermes.gateway.plist"
+        # plist does NOT exist — no launchd service
+        monkeypatch.setattr(
+            gateway_cli, "get_launchd_plist_path", lambda: plist_path,
+        )
+
+        mock_run.side_effect = _make_run_side_effect(
+            commit_count="3",
+            launchctl_loaded=False,
+        )
+
+        with patch("gateway.status.get_running_pid", return_value=12345), \
+             patch("gateway.status.remove_pid_file"), \
+             patch("os.kill"):
+            cmd_update(mock_args)
+
+        captured = capsys.readouterr().out
+        assert "Restart it with: hermes gateway run" in captured
+        assert "Gateway restarted via launchd" not in captured
+
+    @patch("shutil.which", return_value=None)
+    @patch("subprocess.run")
+    def test_update_with_systemd_still_restarts_via_systemd(
+        self, mock_run, _mock_which, mock_args, capsys, monkeypatch,
+    ):
+        """On Linux with systemd active, update should restart via systemctl."""
+        monkeypatch.setattr(
+            gateway_cli, "is_macos", lambda: False,
+        )
+
+        mock_run.side_effect = _make_run_side_effect(
+            commit_count="3",
+            systemd_active=True,
+        )
+
+        with patch("gateway.status.get_running_pid", return_value=12345), \
+             patch("gateway.status.remove_pid_file"), \
+             patch("os.kill"):
+            cmd_update(mock_args)
+
+        captured = capsys.readouterr().out
+        assert "Gateway restarted" in captured
+        # Verify systemctl restart was called
+        restart_calls = [
+            c for c in mock_run.call_args_list
+            if "restart" in " ".join(str(a) for a in c.args[0])
+            and "systemctl" in " ".join(str(a) for a in c.args[0])
+        ]
+        assert len(restart_calls) == 1
+
+    @patch("shutil.which", return_value=None)
+    @patch("subprocess.run")
+    def test_update_no_gateway_running_skips_restart(
+        self, mock_run, _mock_which, mock_args, capsys, monkeypatch,
+    ):
+        """When no gateway is running, update should skip the restart section entirely."""
+        monkeypatch.setattr(
+            gateway_cli, "is_macos", lambda: False,
+        )
+
+        mock_run.side_effect = _make_run_side_effect(
+            commit_count="3",
+            systemd_active=False,
+        )
+
+        with patch("gateway.status.get_running_pid", return_value=None):
+            cmd_update(mock_args)
+
+        captured = capsys.readouterr().out
+        assert "Stopped gateway" not in captured
+        assert "Gateway restarted" not in captured
+        assert "Gateway restarted via launchd" not in captured
diff --git a/tests/test_cli_provider_resolution.py b/tests/test_cli_provider_resolution.py
index 3144bed80..99d8830fa 100644
--- a/tests/test_cli_provider_resolution.py
+++ b/tests/test_cli_provider_resolution.py
@@ -162,6 +162,57 @@ def test_runtime_resolution_rebuilds_agent_on_routing_change(monkeypatch):
     assert shell.api_mode == "codex_responses"
 
 
+def test_cli_turn_routing_uses_primary_when_disabled(monkeypatch):
+    cli = _import_cli()
+    shell = cli.HermesCLI(model="gpt-5", compact=True, max_turns=1)
+    shell.provider = "openrouter"
+    shell.api_mode = "chat_completions"
+    shell.base_url = "https://openrouter.ai/api/v1"
+    shell.api_key = "sk-primary"
+    shell._smart_model_routing = {"enabled": False}
+
+    result = shell._resolve_turn_agent_config("what time is it in tokyo?")
+
+    assert result["model"] == "gpt-5"
+    assert result["runtime"]["provider"] == "openrouter"
+    assert result["label"] is None
+
+
+def test_cli_turn_routing_uses_cheap_model_when_simple(monkeypatch):
+    cli = _import_cli()
+
+    def _runtime_resolve(**kwargs):
+        assert kwargs["requested"] == "zai"
+        return {
+            "provider": "zai",
+            "api_mode": "chat_completions",
+            "base_url": "https://open.z.ai/api/v1",
+            "api_key": "cheap-key",
+            "source": "env/config",
+        }
+
+    monkeypatch.setattr("hermes_cli.runtime_provider.resolve_runtime_provider", _runtime_resolve)
+
+    shell = cli.HermesCLI(model="anthropic/claude-sonnet-4", compact=True, max_turns=1)
+    shell.provider = "openrouter"
+    shell.api_mode = "chat_completions"
+    shell.base_url = "https://openrouter.ai/api/v1"
+    shell.api_key = "primary-key"
+    shell._smart_model_routing = {
+        "enabled": True,
+        "cheap_model": {"provider": "zai", "model": "glm-5-air"},
+        "max_simple_chars": 160,
+        "max_simple_words": 28,
+    }
+
+    result = shell._resolve_turn_agent_config("what time is it in tokyo?")
+
+    assert result["model"] == "glm-5-air"
+    assert result["runtime"]["provider"] == "zai"
+    assert result["runtime"]["api_key"] == "cheap-key"
+    assert result["label"] is not None
+
+
 def test_cli_prefers_config_provider_over_stale_env_override(monkeypatch):
     cli = _import_cli()
 
diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md
index 1df9c491f..56a2ab125 100644
--- a/website/docs/user-guide/configuration.md
+++ b/website/docs/user-guide/configuration.md
@@ -441,6 +441,39 @@ Supported providers: `openrouter`, `nous`, `openai-codex`, `anthropic`, `zai`, `
 Fallback is configured exclusively through `config.yaml` — there are no environment variables for it. For full details on when it triggers, supported providers, and how it interacts with auxiliary tasks and delegation, see [Fallback Providers](/docs/user-guide/features/fallback-providers).
 :::
 
+## Smart Model Routing
+
+Optional cheap-vs-strong routing lets Hermes keep your main model for complex work while sending very short/simple turns to a cheaper model.
+
+```yaml
+smart_model_routing:
+  enabled: true
+  max_simple_chars: 160
+  max_simple_words: 28
+  cheap_model:
+    provider: openrouter
+    model: google/gemini-2.5-flash
+    # base_url: http://localhost:8000/v1  # optional custom endpoint
+    # api_key_env: MY_CUSTOM_KEY          # optional env var name for that endpoint's API key
+```
+
+How it works:
+- If a turn is short, single-line, and does not look code/tool/debug heavy, Hermes may route it to `cheap_model`
+- If the turn looks complex, Hermes stays on your primary model/provider
+- If the cheap route cannot be resolved cleanly, Hermes falls back to the primary model automatically
+
+This is intentionally conservative. It is meant for quick, low-stakes turns like:
+- short factual questions
+- quick rewrites
+- lightweight summaries
+
+It will avoid routing prompts that look like:
+- coding/debugging work
+- tool-heavy requests
+- long or multi-line analysis asks
+
+Use this when you want lower latency or cost without fully changing your default model.
+
 ## Terminal Backend Configuration
 
 Configure which environment the agent uses for terminal commands: