feat: config structure validation — detect malformed YAML at startup (#5426)

Add validate_config_structure() that catches common config.yaml mistakes:
- custom_providers as dict instead of list (missing '-' in YAML)
- fallback_model accidentally nested inside another section
- custom_providers entries missing required fields (name, base_url)
- Missing model section when custom_providers is configured
- Root-level keys that look like misplaced custom_providers fields

Surface these diagnostics at three levels:
1. Startup: print_config_warnings() runs at CLI and gateway module load,
   so users see issues before hitting cryptic errors
2. Error time: 'Unknown provider' errors in auth.py and model_switch.py
   now include config diagnostics with fix suggestions
3. Doctor: 'hermes doctor' shows a Config Structure section with all
   issues and fix hints

Also adds a warning log in runtime_provider.py when custom_providers
is a dict (previously returned None silently).

Motivated by a Discord user who had malformed custom_providers YAML
and got only 'Unknown Provider' with no guidance on what was wrong.

17 new tests covering all validation paths.
This commit is contained in:
Teknium
2026-04-05 23:31:20 -07:00
committed by GitHub
parent 9ca954a274
commit dce5f51c7c
8 changed files with 443 additions and 9 deletions

7
cli.py
View File

@@ -453,6 +453,13 @@ def load_cli_config() -> Dict[str, Any]:
# Load configuration at module startup
CLI_CONFIG = load_cli_config()
# Validate config structure early — print warnings before user hits cryptic errors
try:
from hermes_cli.config import print_config_warnings
print_config_warnings()
except Exception:
pass
# Initialize the skin engine from config
try:
from hermes_cli.skin_engine import init_skin_from_config

View File

@@ -200,6 +200,13 @@ if _config_path.exists():
except Exception:
pass # Non-fatal; gateway can still run with .env values
# Validate config structure early — log warnings so gateway operators see problems
try:
from hermes_cli.config import print_config_warnings
print_config_warnings()
except Exception:
pass
# Gateway runs in quiet mode - suppress debug output and use cwd directly (no temp dirs)
os.environ["HERMES_QUIET"] = "1"

View File

@@ -711,6 +711,32 @@ def deactivate_provider() -> None:
# Provider Resolution — picks which provider to use
# =============================================================================
def _get_config_hint_for_unknown_provider(provider_name: str) -> str:
"""Return a helpful hint string when provider resolution fails.
Checks for common config.yaml mistakes (malformed custom_providers, etc.)
and returns a human-readable diagnostic, or empty string if nothing found.
"""
try:
from hermes_cli.config import validate_config_structure
issues = validate_config_structure()
if not issues:
return ""
lines = ["Config issue detected — run 'hermes doctor' for full diagnostics:"]
for ci in issues:
prefix = "ERROR" if ci.severity == "error" else "WARNING"
lines.append(f" [{prefix}] {ci.message}")
# Show first line of hint
first_hint = ci.hint.splitlines()[0] if ci.hint else ""
if first_hint:
lines.append(f"{first_hint}")
return "\n".join(lines)
except Exception:
return ""
def resolve_provider(
requested: Optional[str] = None,
*,
@@ -757,10 +783,14 @@ def resolve_provider(
if normalized in PROVIDER_REGISTRY:
return normalized
if normalized != "auto":
raise AuthError(
f"Unknown provider '{normalized}'.",
code="invalid_provider",
)
# Check for common config.yaml issues that cause this error
_config_hint = _get_config_hint_for_unknown_provider(normalized)
msg = f"Unknown provider '{normalized}'."
if _config_hint:
msg += f"\n\n{_config_hint}"
else:
msg += " Check 'hermes model' for available providers, or run 'hermes doctor' to diagnose config issues."
raise AuthError(msg, code="invalid_provider")
# Explicit one-off CLI creds always mean openrouter/custom
if explicit_api_key or explicit_base_url:

View File

@@ -19,6 +19,7 @@ import stat
import subprocess
import sys
import tempfile
from dataclasses import dataclass
from pathlib import Path
from typing import Dict, Any, Optional, List, Tuple
@@ -1243,6 +1244,182 @@ def check_config_version() -> Tuple[int, int]:
return current, latest
# =============================================================================
# Config structure validation
# =============================================================================
# Fields that are valid at root level of config.yaml
_KNOWN_ROOT_KEYS = {
"_config_version", "model", "providers", "fallback_model",
"fallback_providers", "credential_pool_strategies", "toolsets",
"agent", "terminal", "display", "compression", "delegation",
"auxiliary", "custom_providers", "memory", "gateway",
}
# Valid fields inside a custom_providers list entry
_VALID_CUSTOM_PROVIDER_FIELDS = {
"name", "base_url", "api_key", "api_mode", "models",
"context_length", "rate_limit_delay",
}
# Fields that look like they should be inside custom_providers, not at root
_CUSTOM_PROVIDER_LIKE_FIELDS = {"base_url", "api_key", "rate_limit_delay", "api_mode"}
@dataclass
class ConfigIssue:
"""A detected config structure problem."""
severity: str # "error", "warning"
message: str
hint: str
def validate_config_structure(config: Optional[Dict[str, Any]] = None) -> List["ConfigIssue"]:
"""Validate config.yaml structure and return a list of detected issues.
Catches common YAML formatting mistakes that produce confusing runtime
errors (like "Unknown provider") instead of clear diagnostics.
Can be called with a pre-loaded config dict, or will load from disk.
"""
if config is None:
try:
config = load_config()
except Exception:
return [ConfigIssue("error", "Could not load config.yaml", "Run 'hermes setup' to create a valid config")]
issues: List[ConfigIssue] = []
# ── custom_providers must be a list, not a dict ──────────────────────
cp = config.get("custom_providers")
if cp is not None:
if isinstance(cp, dict):
issues.append(ConfigIssue(
"error",
"custom_providers is a dict — it must be a YAML list (items prefixed with '-')",
"Change to:\n"
" custom_providers:\n"
" - name: my-provider\n"
" base_url: https://...\n"
" api_key: ...",
))
# Check if dict keys look like they should be list-entry fields
cp_keys = set(cp.keys()) if isinstance(cp, dict) else set()
suspicious = cp_keys & _CUSTOM_PROVIDER_LIKE_FIELDS
if suspicious:
issues.append(ConfigIssue(
"warning",
f"Root-level keys {sorted(suspicious)} look like custom_providers entry fields",
"These should be indented under a '- name: ...' list entry, not at root level",
))
elif isinstance(cp, list):
# Validate each entry in the list
for i, entry in enumerate(cp):
if not isinstance(entry, dict):
issues.append(ConfigIssue(
"warning",
f"custom_providers[{i}] is not a dict (got {type(entry).__name__})",
"Each entry should have at minimum: name, base_url",
))
continue
if not entry.get("name"):
issues.append(ConfigIssue(
"warning",
f"custom_providers[{i}] is missing 'name' field",
"Add a name, e.g.: name: my-provider",
))
if not entry.get("base_url"):
issues.append(ConfigIssue(
"warning",
f"custom_providers[{i}] is missing 'base_url' field",
"Add the API endpoint URL, e.g.: base_url: https://api.example.com/v1",
))
# ── fallback_model must be a top-level dict with provider + model ────
fb = config.get("fallback_model")
if fb is not None:
if not isinstance(fb, dict):
issues.append(ConfigIssue(
"error",
f"fallback_model should be a dict with 'provider' and 'model', got {type(fb).__name__}",
"Change to:\n"
" fallback_model:\n"
" provider: openrouter\n"
" model: anthropic/claude-sonnet-4",
))
elif fb:
if not fb.get("provider"):
issues.append(ConfigIssue(
"warning",
"fallback_model is missing 'provider' field — fallback will be disabled",
"Add: provider: openrouter (or another provider)",
))
if not fb.get("model"):
issues.append(ConfigIssue(
"warning",
"fallback_model is missing 'model' field — fallback will be disabled",
"Add: model: anthropic/claude-sonnet-4 (or another model)",
))
# ── Check for fallback_model accidentally nested inside custom_providers ──
if isinstance(cp, dict) and "fallback_model" not in config and "fallback_model" in (cp or {}):
issues.append(ConfigIssue(
"error",
"fallback_model appears inside custom_providers instead of at root level",
"Move fallback_model to the top level of config.yaml (no indentation)",
))
# ── model section: should exist when custom_providers is configured ──
model_cfg = config.get("model")
if cp and not model_cfg:
issues.append(ConfigIssue(
"warning",
"custom_providers defined but no 'model' section — Hermes won't know which provider to use",
"Add a model section:\n"
" model:\n"
" provider: custom\n"
" default: your-model-name\n"
" base_url: https://...",
))
# ── Root-level keys that look misplaced ──────────────────────────────
for key in config:
if key.startswith("_"):
continue
if key not in _KNOWN_ROOT_KEYS and key in _CUSTOM_PROVIDER_LIKE_FIELDS:
issues.append(ConfigIssue(
"warning",
f"Root-level key '{key}' looks misplaced — should it be under 'model:' or inside a 'custom_providers' entry?",
f"Move '{key}' under the appropriate section",
))
return issues
def print_config_warnings(config: Optional[Dict[str, Any]] = None) -> None:
"""Print config structure warnings to stderr at startup.
Called early in CLI and gateway init so users see problems before
they hit cryptic "Unknown provider" errors. Prints nothing if
config is healthy.
"""
try:
issues = validate_config_structure(config)
except Exception:
return
if not issues:
return
import sys
lines = ["\033[33m⚠ Config issues detected in config.yaml:\033[0m"]
for ci in issues:
marker = "\033[31m✗\033[0m" if ci.severity == "error" else "\033[33m⚠\033[0m"
lines.append(f" {marker} {ci.message}")
lines.append(" \033[2mRun 'hermes doctor' for fix suggestions.\033[0m")
sys.stderr.write("\n".join(lines) + "\n\n")
def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, Any]:
"""
Migrate config to latest version, prompting for new required fields.

View File

@@ -318,6 +318,25 @@ def run_doctor(args):
except Exception:
pass
# Validate config structure (catches malformed custom_providers, etc.)
try:
from hermes_cli.config import validate_config_structure
config_issues = validate_config_structure()
if config_issues:
print()
print(color("◆ Config Structure", Colors.CYAN, Colors.BOLD))
for ci in config_issues:
if ci.severity == "error":
check_fail(ci.message)
else:
check_warn(ci.message)
# Show the hint indented
for hint_line in ci.hint.splitlines():
check_info(hint_line)
issues.append(ci.message)
except Exception:
pass
# =========================================================================
# Check: Auth providers
# =========================================================================

View File

@@ -419,14 +419,25 @@ def switch_model(
# Resolve the provider
pdef = resolve_provider_full(explicit_provider, user_providers)
if pdef is None:
_switch_err = (
f"Unknown provider '{explicit_provider}'. "
f"Check 'hermes model' for available providers, or define it "
f"in config.yaml under 'providers:'."
)
# Check for common config issues that cause provider resolution failures
try:
from hermes_cli.config import validate_config_structure
_cfg_issues = validate_config_structure()
if _cfg_issues:
_switch_err += "\n\nRun 'hermes doctor' — config issues detected:"
for _ci in _cfg_issues[:3]:
_switch_err += f"\n{_ci.message}"
except Exception:
pass
return ModelSwitchResult(
success=False,
is_global=is_global,
error_message=(
f"Unknown provider '{explicit_provider}'. "
f"Check 'hermes model' for available providers, or define it "
f"in config.yaml under 'providers:'."
),
error_message=_switch_err,
)
target_provider = pdef.id

View File

@@ -2,10 +2,13 @@
from __future__ import annotations
import logging
import os
import re
from typing import Any, Dict, Optional
logger = logging.getLogger(__name__)
from hermes_cli import auth as auth_mod
from agent.credential_pool import CredentialPool, PooledCredential, get_custom_provider_pool_key, load_pool
from hermes_cli.auth import (
@@ -258,6 +261,12 @@ def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, An
config = load_config()
custom_providers = config.get("custom_providers")
if not isinstance(custom_providers, list):
if isinstance(custom_providers, dict):
logger.warning(
"custom_providers in config.yaml is a dict, not a list. "
"Each entry must be prefixed with '-' in YAML. "
"Run 'hermes doctor' for details."
)
return None
for entry in custom_providers:

View File

@@ -0,0 +1,174 @@
"""Tests for config.yaml structure validation (validate_config_structure)."""
import pytest
from hermes_cli.config import validate_config_structure, ConfigIssue
class TestCustomProvidersValidation:
"""custom_providers must be a YAML list, not a dict."""
def test_dict_instead_of_list(self):
"""The exact Discord user scenario — custom_providers as flat dict."""
issues = validate_config_structure({
"custom_providers": {
"name": "Generativelanguage.googleapis.com",
"base_url": "https://generativelanguage.googleapis.com/v1beta/openai",
"api_key": "xxx",
"model": "models/gemini-2.5-flash",
"rate_limit_delay": 2.0,
"fallback_model": {
"provider": "openrouter",
"model": "qwen/qwen3.6-plus:free",
},
},
"fallback_providers": [],
})
errors = [i for i in issues if i.severity == "error"]
assert any("dict" in i.message and "list" in i.message for i in errors), (
"Should detect custom_providers as dict instead of list"
)
def test_dict_detects_misplaced_fields(self):
"""When custom_providers is a dict, detect fields that look misplaced."""
issues = validate_config_structure({
"custom_providers": {
"name": "test",
"base_url": "https://example.com",
"api_key": "xxx",
},
})
warnings = [i for i in issues if i.severity == "warning"]
# Should flag base_url, api_key as looking like custom_providers entry fields
misplaced = [i for i in warnings if "custom_providers entry fields" in i.message]
assert len(misplaced) == 1
def test_dict_detects_nested_fallback(self):
"""When fallback_model gets swallowed into custom_providers dict."""
issues = validate_config_structure({
"custom_providers": {
"name": "test",
"fallback_model": {"provider": "openrouter", "model": "test"},
},
})
errors = [i for i in issues if i.severity == "error"]
assert any("fallback_model" in i.message and "inside" in i.message for i in errors)
def test_valid_list_no_issues(self):
"""Properly formatted custom_providers should produce no issues."""
issues = validate_config_structure({
"custom_providers": [
{"name": "gemini", "base_url": "https://example.com/v1"},
],
"model": {"provider": "custom", "default": "test"},
})
assert len(issues) == 0
def test_list_entry_missing_name(self):
"""List entry without name should warn."""
issues = validate_config_structure({
"custom_providers": [{"base_url": "https://example.com/v1"}],
"model": {"provider": "custom"},
})
assert any("missing 'name'" in i.message for i in issues)
def test_list_entry_missing_base_url(self):
"""List entry without base_url should warn."""
issues = validate_config_structure({
"custom_providers": [{"name": "test"}],
"model": {"provider": "custom"},
})
assert any("missing 'base_url'" in i.message for i in issues)
def test_list_entry_not_dict(self):
"""Non-dict list entries should warn."""
issues = validate_config_structure({
"custom_providers": ["not-a-dict"],
"model": {"provider": "custom"},
})
assert any("not a dict" in i.message for i in issues)
def test_none_custom_providers_no_issues(self):
"""No custom_providers at all should be fine."""
issues = validate_config_structure({
"model": {"provider": "openrouter"},
})
assert len(issues) == 0
class TestFallbackModelValidation:
"""fallback_model should be a top-level dict with provider + model."""
def test_missing_provider(self):
issues = validate_config_structure({
"fallback_model": {"model": "anthropic/claude-sonnet-4"},
})
assert any("missing 'provider'" in i.message for i in issues)
def test_missing_model(self):
issues = validate_config_structure({
"fallback_model": {"provider": "openrouter"},
})
assert any("missing 'model'" in i.message for i in issues)
def test_valid_fallback(self):
issues = validate_config_structure({
"fallback_model": {
"provider": "openrouter",
"model": "anthropic/claude-sonnet-4",
},
})
# Only fallback-related issues should be absent
fb_issues = [i for i in issues if "fallback" in i.message.lower()]
assert len(fb_issues) == 0
def test_non_dict_fallback(self):
issues = validate_config_structure({
"fallback_model": "openrouter:anthropic/claude-sonnet-4",
})
assert any("should be a dict" in i.message for i in issues)
def test_empty_fallback_dict_no_issues(self):
"""Empty fallback_model dict means disabled — no warnings needed."""
issues = validate_config_structure({
"fallback_model": {},
})
fb_issues = [i for i in issues if "fallback" in i.message.lower()]
assert len(fb_issues) == 0
class TestMissingModelSection:
"""Warn when custom_providers exists but model section is missing."""
def test_custom_providers_without_model(self):
issues = validate_config_structure({
"custom_providers": [
{"name": "test", "base_url": "https://example.com/v1"},
],
})
assert any("no 'model' section" in i.message for i in issues)
def test_custom_providers_with_model(self):
issues = validate_config_structure({
"custom_providers": [
{"name": "test", "base_url": "https://example.com/v1"},
],
"model": {"provider": "custom", "default": "test-model"},
})
# Should not warn about missing model section
assert not any("no 'model' section" in i.message for i in issues)
class TestConfigIssueDataclass:
"""ConfigIssue should be a proper dataclass."""
def test_fields(self):
issue = ConfigIssue(severity="error", message="test msg", hint="test hint")
assert issue.severity == "error"
assert issue.message == "test msg"
assert issue.hint == "test hint"
def test_equality(self):
a = ConfigIssue("error", "msg", "hint")
b = ConfigIssue("error", "msg", "hint")
assert a == b