diff --git a/cli-config.yaml.example b/cli-config.yaml.example
index 1f7a515c..504b2178 100644
--- a/cli-config.yaml.example
+++ b/cli-config.yaml.example
@@ -11,14 +11,29 @@ model:
   default: "anthropic/claude-opus-4.6"
   
   # Inference provider selection:
-  #   "auto"       - Use Nous Portal if logged in, otherwise OpenRouter/env vars (default)
-  #   "nous-api"   - Use Nous Portal via API key (requires: NOUS_API_KEY)
-  #   "openrouter" - Always use OpenRouter API key from OPENROUTER_API_KEY
-  #   "nous"       - Always use Nous Portal (requires: hermes login)
-  #   "zai"        - Use z.ai / ZhipuAI GLM models (requires: GLM_API_KEY)
-  #   "kimi-coding"- Use Kimi / Moonshot AI models (requires: KIMI_API_KEY)
-  #   "minimax"    - Use MiniMax global endpoint (requires: MINIMAX_API_KEY)
-  #   "minimax-cn" - Use MiniMax China endpoint (requires: MINIMAX_CN_API_KEY)
+  #   "auto"         - Auto-detect from credentials (default)
+  #   "openrouter"   - OpenRouter (requires: OPENROUTER_API_KEY or OPENAI_API_KEY)
+  #   "nous"         - Nous Portal OAuth (requires: hermes login)
+  #   "nous-api"     - Nous Portal API key (requires: NOUS_API_KEY)
+  #   "anthropic"    - Direct Anthropic API (requires: ANTHROPIC_API_KEY)
+  #   "openai-codex" - OpenAI Codex (requires: hermes login --provider openai-codex)
+  #   "copilot"      - GitHub Copilot / GitHub Models (requires: GITHUB_TOKEN)
+  #   "zai"          - z.ai / ZhipuAI GLM (requires: GLM_API_KEY)
+  #   "kimi-coding"  - Kimi / Moonshot AI (requires: KIMI_API_KEY)
+  #   "minimax"      - MiniMax global (requires: MINIMAX_API_KEY)
+  #   "minimax-cn"   - MiniMax China (requires: MINIMAX_CN_API_KEY)
+  #   "huggingface"  - Hugging Face Inference (requires: HF_TOKEN)
+  #   "kilocode"     - KiloCode gateway (requires: KILOCODE_API_KEY)
+  #   "ai-gateway"   - Vercel AI Gateway (requires: AI_GATEWAY_API_KEY)
+  #
+  # Local servers (LM Studio, Ollama, vLLM, llama.cpp):
+  #   "custom"       - Any OpenAI-compatible endpoint. Set base_url below.
+  #   Aliases: "lmstudio", "ollama", "vllm", "llamacpp" all map to "custom".
+  #   Example for LM Studio:
+  #     provider: "lmstudio"
+  #     base_url: "http://localhost:1234/v1"
+  #   No API key needed — local servers typically ignore auth.
+  #
   # Can also be overridden with --provider flag or HERMES_INFERENCE_PROVIDER env var.
   provider: "auto"
   
diff --git a/cli.py b/cli.py
index 5144b5bb..a601878f 100644
--- a/cli.py
+++ b/cli.py
@@ -1087,10 +1087,10 @@ class HermesCLI:
         # env vars would stomp each other.
         _model_config = CLI_CONFIG.get("model", {})
         _config_model = (_model_config.get("default") or _model_config.get("model") or "") if isinstance(_model_config, dict) else (_model_config or "")
-        _FALLBACK_MODEL = "anthropic/claude-opus-4.6"
-        self.model = model or _config_model or _FALLBACK_MODEL
-        # Auto-detect model from local server if still on fallback
-        if self.model == _FALLBACK_MODEL:
+        _DEFAULT_CONFIG_MODEL = "anthropic/claude-opus-4.6"
+        self.model = model or _config_model or _DEFAULT_CONFIG_MODEL
+        # Auto-detect model from local server if still on default
+        if self.model == _DEFAULT_CONFIG_MODEL:
             _base_url = (_model_config.get("base_url") or "") if isinstance(_model_config, dict) else ""
             if "localhost" in _base_url or "127.0.0.1" in _base_url:
                 from hermes_cli.runtime_provider import _auto_detect_local_model
@@ -1104,7 +1104,7 @@ class HermesCLI:
         # explicit choice — the user just never changed it.  But a config model
         # like "gpt-5.3-codex" IS explicit and must be preserved.
         self._model_is_default = not model and (
-            not _config_model or _config_model == _FALLBACK_MODEL
+            not _config_model or _config_model == _DEFAULT_CONFIG_MODEL
         )
 
         self._explicit_api_key = api_key
diff --git a/cron/scheduler.py b/cron/scheduler.py
index 0058c1c0..52ae8e31 100644
--- a/cron/scheduler.py
+++ b/cron/scheduler.py
@@ -321,7 +321,7 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
             if delivery_target.get("thread_id") is not None:
                 os.environ["HERMES_CRON_AUTO_DELIVER_THREAD_ID"] = str(delivery_target["thread_id"])
 
-        model = job.get("model") or os.getenv("HERMES_MODEL") or "anthropic/claude-opus-4.6"
+        model = job.get("model") or os.getenv("HERMES_MODEL") or ""
 
         # Load config.yaml for model, reasoning, prefill, toolsets, provider routing
         _cfg = {}
diff --git a/gateway/run.py b/gateway/run.py
index ff4f9328..97d85364 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -323,10 +323,10 @@ def _resolve_gateway_model(config: dict | None = None) -> str:
     """Read model from env/config — mirrors the resolution in _run_agent_sync.
 
     Without this, temporary AIAgent instances (memory flush, /compress) fall
-    back to the hardcoded default ("anthropic/claude-opus-4.6") which fails
-    when the active provider is openai-codex.
+    back to the hardcoded default which fails when the active provider is
+    openai-codex.
     """
-    model = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL") or "anthropic/claude-opus-4.6"
+    model = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL") or ""
     cfg = config if config is not None else _load_gateway_config()
     model_cfg = cfg.get("model", {})
     if isinstance(model_cfg, str):
diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py
index 6b41ac6e..0c503701 100644
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
@@ -696,6 +696,10 @@ def resolve_provider(
         "hf": "huggingface", "hugging-face": "huggingface", "huggingface-hub": "huggingface",
         "go": "opencode-go", "opencode-go-sub": "opencode-go",
         "kilo": "kilocode", "kilo-code": "kilocode", "kilo-gateway": "kilocode",
+        # Local server aliases — route through the generic custom provider
+        "lmstudio": "custom", "lm-studio": "custom", "lm_studio": "custom",
+        "ollama": "custom", "vllm": "custom", "llamacpp": "custom",
+        "llama.cpp": "custom", "llama-cpp": "custom",
     }
     normalized = _PROVIDER_ALIASES.get(normalized, normalized)
 
@@ -742,7 +746,12 @@ def resolve_provider(
             if has_usable_secret(os.getenv(env_var, "")):
                 return pid
 
-    return "openrouter"
+    raise AuthError(
+        "No inference provider configured. Run 'hermes model' to choose a "
+        "provider and model, or set an API key (OPENROUTER_API_KEY, "
+        "OPENAI_API_KEY, etc.) in ~/.hermes/.env.",
+        code="no_provider_configured",
+    )
 
 
 # =============================================================================
diff --git a/tests/test_api_key_providers.py b/tests/test_api_key_providers.py
index ad1676e6..0c6337d3 100644
--- a/tests/test_api_key_providers.py
+++ b/tests/test_api_key_providers.py
@@ -266,7 +266,8 @@ class TestResolveProvider:
 
     def test_auto_does_not_select_copilot_from_github_token(self, monkeypatch):
         monkeypatch.setenv("GITHUB_TOKEN", "gh-test-token")
-        assert resolve_provider("auto") == "openrouter"
+        with pytest.raises(AuthError, match="No inference provider configured"):
+            resolve_provider("auto")
 
 
 # =============================================================================