diff --git a/scripts/test_config_validation.py b/scripts/test_config_validation.py new file mode 100644 index 000000000..80e966af8 --- /dev/null +++ b/scripts/test_config_validation.py @@ -0,0 +1,541 @@ +#!/usr/bin/env python3 +""" +Comprehensive config structure validation test script for Issue #116. + +Tests the validate_config_structure() function from hermes_cli.config +across four scenarios: + 1. Valid config passes without issues + 2. YAML syntax errors are caught + 3. Type mismatches are detected + 4. Completely broken YAML is handled gracefully + +Usage: + python scripts/test_config_validation.py + python -m pytest scripts/test_config_validation.py -v +""" + +import os +import subprocess +import sys +import tempfile +from pathlib import Path + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +PASS = "\033[32mPASS\033[0m" +FAIL = "\033[31mFAIL\033[0m" + + +def _hermes_agent_root() -> Path: + """Return the hermes-agent project root.""" + return Path(__file__).resolve().parent.parent + + +def _run_in_project(cmd: list[str], extra_env: dict[str, str] | None = None, **kwargs) -> subprocess.CompletedProcess: + """Run a command with the project root on sys.path.""" + env = os.environ.copy() + root = str(_hermes_agent_root()) + env["PYTHONPATH"] = root + if extra_env: + env.update(extra_env) + return subprocess.run(cmd, capture_output=True, text=True, env=env, **kwargs) + + +def _write_and_load_yaml(yaml_content: str): + """Write yaml_content to a temp file, set HERMES_HOME to point at it, + then run validate_config_structure() in a subprocess and return (rc, stdout, stderr). + """ + home = tempfile.mkdtemp(prefix="hermes_test_") + cfg_path = Path(home) / "config.yaml" + cfg_path.write_text(yaml_content, encoding="utf-8") + + # We use a small inline Python script that loads the validator and + # exercises it with the given HERMES_HOME. + py_code = """ +import os, sys, json +root = sys.argv[1] +sys.path.insert(0, root) + +from hermes_cli.config import validate_config_structure, ConfigIssue + +try: + issues = validate_config_structure() + out = [{"severity": i.severity, "message": i.message, "hint": i.hint} for i in issues] + print(json.dumps({"status": "ok", "issues": out})) +except yaml.YAMLError as e: + print(json.dumps({"status": "yaml_error", "detail": str(e)})) +except Exception as e: + print(json.dumps({"status": "error", "detail": str(e)})) +""".strip() + + result = _run_in_project( + [sys.executable, "-c", py_code, str(_hermes_agent_root())], + extra_env={"HERMES_HOME": home}, + ) + return result + + +def _call_validate(config: dict): + """Call validate_config_structure(config) directly in a subprocess and + return a dict: {"status": "ok", "issues": [...]}. + """ + import json + + py_code = """ +import os, sys, json +root = sys.argv[1] +config_str = sys.argv[2] +sys.path.insert(0, root) + +from hermes_cli.config import validate_config_structure + +config = json.loads(config_str) +issues = validate_config_structure(config) +out = [{"severity": i.severity, "message": i.message, "hint": i.hint} for i in issues] +print(json.dumps({"status": "ok", "issues": out})) +""".strip() + + result = _run_in_project( + [sys.executable, "-c", py_code, str(_hermes_agent_root()), json.dumps(config)], + ) + assert result.returncode == 0, f"Subprocess failed:\nstdout={result.stdout}\nstderr={result.stderr}" + return json.loads(result.stdout.strip().splitlines()[-1]) + + +# --------------------------------------------------------------------------- +# Test harness +# --------------------------------------------------------------------------- + +class TestResult: + def __init__(self): + self.passed = 0 + self.failed = 0 + self.results: list[tuple[str, bool, str]] = [] + + def record(self, name: str, ok: bool, detail: str = "") -> None: + if ok: + self.passed += 1 + self.results.append((name, True, detail)) + else: + self.failed += 1 + self.results.append((name, False, detail)) + marker = PASS if ok else FAIL + print(f" [{marker}] {name}" + (f" — {detail}" if detail and not ok else "")) + + def summary(self) -> bool: + total = self.passed + self.failed + print(f"\n{'='*60}") + print(f" Results: {self.passed}/{total} passed, {self.failed} failed") + print(f"{'='*60}") + if self.failed: + print("\n Failed tests:") + for name, ok, detail in self.results: + if not ok: + print(f" - {name}: {detail}") + return self.failed == 0 + + +t = TestResult() + + +# =================================================================== +# 1. Valid config passes +# =================================================================== + +def test_valid_empty_dict(): + issues = _call_validate({}) + # Empty dict — no custom_providers, no fallback_model, so no issues expected + t.record("valid: empty config dict", len(issues["issues"]) == 0) + + +def test_valid_custom_providers_list(): + issues = _call_validate({ + "custom_providers": [ + {"name": "my-provider", "base_url": "https://api.example.com/v1"}, + ], + "model": {"provider": "custom", "default": "test"}, + }) + t.record("valid: custom_providers as proper list", len(issues["issues"]) == 0) + + +def test_valid_fallback_model(): + issues = _call_validate({ + "fallback_model": { + "provider": "openrouter", + "model": "anthropic/claude-sonnet-4", + }, + }) + fb_relevant = [i for i in issues["issues"] if "fallback" in i["message"].lower()] + t.record("valid: fallback_model with provider+model", len(fb_relevant) == 0) + + +def test_valid_empty_fallback(): + issues = _call_validate({"fallback_model": {}}) + fb_relevant = [i for i in issues["issues"] if "fallback" in i["message"].lower()] + t.record("valid: empty fallback_model is fine", len(fb_relevant) == 0) + + +def test_valid_fullish_config(): + issues = _call_validate({ + "model": {"provider": "openrouter", "default": "anthropic/claude-sonnet-4"}, + "providers": {}, + "fallback_providers": [], + "toolsets": ["hermes-cli"], + "custom_providers": [ + {"name": "gemini", "base_url": "https://generativelanguage.googleapis.com/v1beta/openai"}, + ], + }) + t.record("valid: full config with all sections", len(issues["issues"]) == 0) + + +# =================================================================== +# 2. YAML syntax errors caught +# =================================================================== + +def test_yaml_syntax_bad_indent(): + """YAML with content that pyyaml cannot parse (mismatched indentation with + an unexpected block mapping context).""" + # Use a clearly broken structure: unquoted colon in a flow context + broken = "model:\n provider: openrouter\n- list_item: at_wrong_level\n" + result = _write_and_load_yaml(broken) + out = result.stdout.strip().splitlines()[-1] if result.stdout.strip() else "" + import json + try: + data = json.loads(out) + # Should handle gracefully — either yaml_error or ok (pyyaml may accept + # some "broken-looking" YAML by merging). The key is no crash. + ok = data.get("status") in ("ok", "yaml_error", "error") + t.record("yaml syntax: bad indentation handled gracefully", ok, + f"got status={data.get('status')}") + except json.JSONDecodeError: + t.record("yaml syntax: bad indentation handled gracefully", False, "could not parse output") + + +def test_yaml_syntax_duplicate_key(): + """YAML with duplicate keys that confuse the parser.""" + result = _write_and_load_yaml("model: openrouter\nmodel: anthropic\n") + # yaml.safe_load accepts duplicate keys silently (last wins), so + # validate_config_structure should still process it without crash. + out = result.stdout.strip().splitlines()[-1] if result.stdout.strip() else "" + import json + try: + data = json.loads(out) + # Should complete without crashing + ok = data.get("status") == "ok" + t.record("yaml syntax: duplicate keys handled", ok, + f"unexpected status: {data.get('status')}") + except json.JSONDecodeError: + t.record("yaml syntax: duplicate keys handled", False, "could not parse output") + + +def test_yaml_syntax_trailing_colon(): + """YAML with a trailing colon that creates an unexpected mapping.""" + bad_yaml = """ +custom_providers: + name: test + base_url: https://example.com + invalid_key:: some_value +""" + result = _write_and_load_yaml(bad_yaml) + out = result.stdout.strip().splitlines()[-1] if result.stdout.strip() else "" + import json + try: + data = json.loads(out) + # Either yaml_error for parse failure, or ok with detection + ok = data.get("status") in ("ok", "yaml_error") + t.record("yaml syntax: trailing colon handled gracefully", ok, + f"got status={data.get('status')}") + except json.JSONDecodeError: + t.record("yaml syntax: trailing colon handled gracefully", False, "could not parse output") + + +# =================================================================== +# 3. Type mismatches detected +# =================================================================== + +def test_custom_providers_dict_instead_of_list(): + """The classic Discord-user error: custom_providers as flat dict.""" + issues = _call_validate({ + "custom_providers": { + "name": "Generativelanguage.googleapis.com", + "base_url": "https://generativelanguage.googleapis.com/v1beta/openai", + "api_key": "***", + }, + }) + errors = [i for i in issues["issues"] if i["severity"] == "error"] + ok = any("dict" in i["message"].lower() and "list" in i["message"].lower() for i in errors) + t.record("type mismatch: custom_providers as dict instead of list", ok) + + +def test_custom_providers_string_instead_of_list(): + issues = _call_validate({ + "custom_providers": "just a string", + }) + # A string is not a dict or list, so no custom_providers-specific + # errors fire, but the fact that we don't crash is the test. + ok = True # Should complete without crash + t.record("type mismatch: custom_providers as string (no crash)", ok) + + +def test_custom_providers_list_of_strings(): + issues = _call_validate({ + "custom_providers": ["not-a-dict", "also-not-a-dict"], + "model": {"provider": "custom"}, + }) + warnings = [i for i in issues["issues"] if i["severity"] == "warning"] + ok = any("not a dict" in i["message"] for i in warnings) + t.record("type mismatch: custom_providers list of strings detected", ok) + + +def test_fallback_model_string_instead_of_dict(): + issues = _call_validate({ + "fallback_model": "openrouter:anthropic/claude-sonnet-4", + }) + errors = [i for i in issues["issues"] if i["severity"] == "error"] + ok = any("should be a dict" in i["message"] for i in errors) + t.record("type mismatch: fallback_model as string instead of dict", ok) + + +def test_fallback_model_list_instead_of_dict(): + issues = _call_validate({ + "fallback_model": ["openrouter", "claude-sonnet-4"], + }) + errors = [i for i in issues["issues"] if i["severity"] == "error"] + ok = any("should be a dict" in i["message"] for i in errors) + t.record("type mismatch: fallback_model as list instead of dict", ok) + + +def test_fallback_model_number_instead_of_dict(): + issues = _call_validate({"fallback_model": 42}) + errors = [i for i in issues["issues"] if i["severity"] == "error"] + ok = any("should be a dict" in i["message"] for i in errors) + t.record("type mismatch: fallback_model as int instead of dict", ok) + + +def test_custom_providers_missing_name(): + issues = _call_validate({ + "custom_providers": [{"base_url": "https://example.com/v1"}], + "model": {"provider": "custom"}, + }) + ok = any("missing 'name'" in i["message"] for i in issues["issues"]) + t.record("type mismatch: custom_providers entry missing 'name'", ok) + + +def test_custom_providers_missing_base_url(): + issues = _call_validate({ + "custom_providers": [{"name": "test"}], + "model": {"provider": "custom"}, + }) + ok = any("missing 'base_url'" in i["message"] for i in issues["issues"]) + t.record("type mismatch: custom_providers entry missing 'base_url'", ok) + + +def test_custom_providers_missing_model_section(): + issues = _call_validate({ + "custom_providers": [{"name": "test", "base_url": "https://example.com/v1"}], + }) + ok = any("no 'model' section" in i["message"] for i in issues["issues"]) + t.record("type mismatch: custom_providers without model section", ok) + + +def test_nested_fallback_inside_custom_providers(): + issues = _call_validate({ + "custom_providers": { + "name": "test", + "fallback_model": {"provider": "openrouter", "model": "test"}, + }, + }) + errors = [i for i in issues["issues"] if i["severity"] == "error"] + ok = any("fallback_model" in i["message"] and "inside" in i["message"] for i in errors) + t.record("type mismatch: fallback_model nested inside custom_providers dict", ok) + + +# =================================================================== +# 4. Completely broken YAML handled gracefully +# =================================================================== + +def test_completely_broken_yaml_binary_content(): + """Binary-ish content that YAML cannot parse.""" + broken = "key: \x00\x01\x02\x03 invalid binary stuff: \xff\xfe" + result = _write_and_load_yaml(broken) + out = result.stdout.strip().splitlines()[-1] if result.stdout.strip() else "" + import json + try: + data = json.loads(out) + # Any status including yaml_error / error is acceptable — no traceback + ok = True + t.record("broken yaml: binary content handled gracefully", ok) + except json.JSONDecodeError: + t.record("broken yaml: binary content handled gracefully", False, + "subprocess returned non-JSON output (possible crash)") + + +def test_completely_broken_yaml_random_chars(): + """Random garbage that is definitely not valid YAML.""" + broken = "{{{{{}}}}} {{{{not_yaml: [}}}}\n!invalid-tag!!! @@###$$$\n" + result = _write_and_load_yaml(broken) + out = result.stdout.strip().splitlines()[-1] if result.stdout.strip() else "" + import json + try: + data = json.loads(out) + # Should either be yaml_error status, or ok with zero/many issues + ok = True # The fact we got back JSON means we didn't crash + t.record("broken yaml: random garbage handled gracefully", ok) + except json.JSONDecodeError: + t.record("broken yaml: random garbage handled gracefully", False, + "subprocess returned non-JSON output (possible crash)") + + +def test_completely_broken_yaml_nested_braces(): + """Deeply-nested braces that break YAML parsing.""" + broken = "a: {{{{{}}}}}\n b: {{{{{}}}}}\n c: {{{{{}}}}}\n" + result = _write_and_load_yaml(broken) + out = result.stdout.strip().splitlines()[-1] if result.stdout.strip() else "" + import json + try: + data = json.loads(out) + t.record("broken yaml: nested braces handled gracefully", True) + except json.JSONDecodeError: + t.record("broken yaml: nested braces handled gracefully", False, + "subprocess returned non-JSON output") + + +def test_empty_yaml_file(): + """Empty config file — should load and produce no issues.""" + result = _write_and_load_yaml("") + out = result.stdout.strip().splitlines()[-1] if result.stdout.strip() else "" + import json + try: + data = json.loads(out) + ok = data.get("status") == "ok" and len(data.get("issues", [])) == 0 + t.record("broken yaml: empty file handled gracefully (no issues)", ok, + f"got status={data.get('status')}") + except json.JSONDecodeError: + t.record("broken yaml: empty file handled gracefully", False, + "subprocess returned non-JSON output") + + +def test_yaml_with_only_null(): + """YAML file containing only '~' or 'null' should produce empty dict.""" + result = _write_and_load_yaml("~\n") + out = result.stdout.strip().splitlines()[-1] if result.stdout.strip() else "" + import json + try: + data = json.loads(out) + ok = data.get("status") == "ok" + t.record("broken yaml: null-only YAML handled gracefully", ok, + f"got status={data.get('status')}") + except json.JSONDecodeError: + t.record("broken yaml: null-only YAML handled gracefully", False, + "subprocess returned non-JSON output") + + +# =================================================================== +# Print config warnings test +# =================================================================== + +def test_print_config_warnings_output(): + """Ensure print_config_warnings prints warnings when issues exist.""" + import json + + py_code = """ +import os, sys, json +root = sys.argv[1] +sys.path.insert(0, root) + +from hermes_cli.config import print_config_warnings + +# This config should produce warnings +config = { + "custom_providers": { + "name": "test", + "base_url": "https://example.com", + }, +} +print_config_warnings(config) +""".strip() + + result = _run_in_project( + [sys.executable, "-c", py_code, str(_hermes_agent_root())], + ) + ok = "config" in result.stderr.lower() or returncode_ok(result.returncode) + t.record("print_config_warnings: outputs warnings to stderr for bad config", ok, + f"stderr={result.stderr[:200]}") + + +# =================================================================== +# Root-level misplaced keys test +# =================================================================== + +def test_misplaced_root_level_key(): + """A root-level "base_url" that should be inside model/custom_providers.""" + issues = _call_validate({ + "base_url": "https://api.example.com/v1", + "model": {"provider": "openrouter"}, + }) + warnings = [i for i in issues["issues"] if i["severity"] == "warning"] + ok = any("misplaced" in i["message"].lower() for i in warnings) + t.record("misplaced root key: base_url flagged", ok) + + +def test_returncode_ok(code: int) -> bool: + return code == 0 + + +# =================================================================== +# Main +# =================================================================== + +if __name__ == "__main__": + # Ensure project root is on sys.path for import in the _call_validate/ + # _write_and_load_yaml subprocesses + sys.path.insert(0, str(_hermes_agent_root())) + + print(f"\n{'='*60}") + print(" Config Structure Validation Tests (Issue #116)") + print(f"{'='*60}\n") + + # 1. Valid config passes + print("--- 1. Valid config passes ---") + test_valid_empty_dict() + test_valid_custom_providers_list() + test_valid_fallback_model() + test_valid_empty_fallback() + test_valid_fullish_config() + + # 2. YAML syntax errors caught + print("\n--- 2. YAML syntax errors caught ---") + test_yaml_syntax_bad_indent() + test_yaml_syntax_duplicate_key() + test_yaml_syntax_trailing_colon() + + # 3. Type mismatches detected + print("\n--- 3. Type mismatches detected ---") + test_custom_providers_dict_instead_of_list() + test_custom_providers_string_instead_of_list() + test_custom_providers_list_of_strings() + test_fallback_model_string_instead_of_dict() + test_fallback_model_list_instead_of_dict() + test_fallback_model_number_instead_of_dict() + test_custom_providers_missing_name() + test_custom_providers_missing_base_url() + test_custom_providers_missing_model_section() + test_nested_fallback_inside_custom_providers() + test_misplaced_root_level_key() + + # 4. Completely broken YAML handled gracefully + print("\n--- 4. Completely broken YAML handled gracefully ---") + test_completely_broken_yaml_binary_content() + test_completely_broken_yaml_random_chars() + test_completely_broken_yaml_nested_braces() + test_empty_yaml_file() + test_yaml_with_only_null() + + # 5. Print config warnings + print("\n--- 5. Print config warnings ---") + test_print_config_warnings_output() + + ok = t.summary() + sys.exit(0 if ok else 1)