test: Add config validation test suite — 30+ cases (#690 )

feat: Add pre-deploy config validation — YAML syntax, keys, types, forbidden keys (#690 )
2026-04-17 05:19:31 +00:00 · 2026-04-17 05:19:29 +00:00
2 changed files with 631 additions and 0 deletions
--- a/scripts/config_validate.py
+++ b/scripts/config_validate.py
@@ -0,0 +1,356 @@
+#!/usr/bin/env python3
+"""
+config_validate.py — Pre-deploy validation for timmy-config YAML files (Issue #690).
+
+Validates YAML syntax, required keys, value types, and forbidden keys before
+writing config to disk. Prevents broken deploys from bad config.
+
+Usage:
+    python3 scripts/config_validate.py config.yaml              # Validate only
+    python3 scripts/config_validate.py config.yaml --deploy     # Validate then write
+    python3 scripts/config_validate.py --schema                 # Print expected schema
+    python3 scripts/config_validate.py --check-dir config/      # Validate all YAML in dir
+    python3 scripts/config_validate.py --json config.yaml       # Output as JSON
+"""
+import argparse
+import json
+import sys
+import shutil
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple
+
+try:
+    import yaml
+    HAS_YAML = True
+except ImportError:
+    HAS_YAML = False
+
+
+# ─── Schema definition ───────────────────────────────────────────────
+
+SCHEMA = {
+    "model": {
+        "type": str,
+        "required": True,
+        "description": "Default model identifier (e.g. 'nousresearch/hermes-4-14b')",
+    },
+    "provider": {
+        "type": str,
+        "required": False,
+        "description": "Default provider name",
+    },
+    "providers": {
+        "type": dict,
+        "required": False,
+        "description": "Provider configurations keyed by name",
+        "value_schema": {
+            "base_url": {"type": str, "required": False},
+            "api_key_env": {"type": str, "required": False},
+        },
+    },
+    "fallback_providers": {
+        "type": list,
+        "required": False,
+        "description": "Ordered fallback provider chain",
+        "item_type": str,
+    },
+    "toolsets": {
+        "type": list,
+        "required": False,
+        "description": "Enabled toolset names",
+        "item_type": str,
+    },
+    "agent": {
+        "type": dict,
+        "required": False,
+        "description": "Agent behavior configuration",
+        "value_schema": {
+            "max_iterations": {"type": int, "required": False},
+            "temperature": {"type": (int, float), "required": False},
+            "save_trajectories": {"type": bool, "required": False},
+            "quiet_mode": {"type": bool, "required": False},
+        },
+    },
+    "display": {
+        "type": dict,
+        "required": False,
+        "description": "CLI display settings",
+        "value_schema": {
+            "spinner": {"type": bool, "required": False},
+            "colors": {"type": bool, "required": False},
+            "skin": {"type": str, "required": False},
+            "tool_progress": {"type": bool, "required": False},
+        },
+    },
+    "gateway": {
+        "type": dict,
+        "required": False,
+        "description": "Gateway/messaging settings",
+        "value_schema": {
+            "enabled": {"type": bool, "required": False},
+            "port": {"type": int, "required": False},
+            "cors_origins": {"type": list, "required": False},
+        },
+    },
+    "cron": {
+        "type": dict,
+        "required": False,
+        "description": "Cron scheduler settings",
+        "value_schema": {
+            "enabled": {"type": bool, "required": False},
+            "interval_seconds": {"type": int, "required": False},
+            "max_concurrent": {"type": int, "required": False},
+        },
+    },
+    "logging": {
+        "type": dict,
+        "required": False,
+        "description": "Logging configuration",
+        "value_schema": {
+            "level": {"type": str, "required": False},
+            "file": {"type": (str, type(None)), "required": False},
+        },
+    },
+    "session": {
+        "type": dict,
+        "required": False,
+        "description": "Session behavior",
+        "value_schema": {
+            "save_trajectories": {"type": bool, "required": False},
+            "max_iterations": {"type": int, "required": False},
+            "context_compression": {"type": bool, "required": False},
+        },
+    },
+}
+
+FORBIDDEN_KEYS = {
+    "anthropic_api_key": "Use ANTHROPIC_API_KEY env var — never store keys in config",
+    "openai_api_key": "Use OPENAI_API_KEY env var — never store keys in config",
+    "openrouter_api_key": "Use OPENROUTER_API_KEY env var — never store keys in config",
+    "password": "Never store passwords in config",
+    "secret": "Never store secrets in config",
+    "token": "Never store tokens in config — use env vars",
+}
+
+
+# ─── Validation errors ───────────────────────────────────────────────
+
+class ValidationError:
+    def __init__(self, path: str, message: str, severity: str = "error"):
+        self.path = path
+        self.message = message
+        self.severity = severity
+
+    def to_dict(self) -> dict:
+        return {"path": self.path, "message": self.message, "severity": self.severity}
+
+    def __str__(self):
+        tag = "ERROR" if self.severity == "error" else "WARN"
+        return f"[{tag}] {self.path}: {self.message}"
+
+    def __repr__(self):
+        return f"ValidationError({self.path!r}, {self.message!r}, {self.severity!r})"
+
+
+# ─── Core validators ─────────────────────────────────────────────────
+
+def validate_yaml_syntax(content: str) -> Tuple[Optional[dict], List[ValidationError]]:
+    """Parse YAML, return (data, errors)."""
+    errors: List[ValidationError] = []
+    if not HAS_YAML:
+        return None, [ValidationError("root", "PyYAML not installed — pip install pyyaml")]
+    try:
+        data = yaml.safe_load(content)
+    except yaml.YAMLError as e:
+        return None, [ValidationError("syntax", str(e))]
+    if data is None:
+        return {}, [ValidationError("root", "Config file is empty", "warning")]
+    if not isinstance(data, dict):
+        return None, [ValidationError("root", f"Expected mapping, got {type(data).__name__}")]
+    return data, errors
+
+
+def validate_required_keys(data: dict) -> List[ValidationError]:
+    """Check required keys exist and have correct types."""
+    errors: List[ValidationError] = []
+    for key, spec in SCHEMA.items():
+        if spec.get("required"):
+            if key not in data:
+                errors.append(ValidationError(key, f"Required key '{key}' is missing"))
+            elif not isinstance(data[key], spec["type"]):
+                errors.append(ValidationError(
+                    key,
+                    f"Expected {spec['type'].__name__}, got {type(data[key]).__name__}"
+                ))
+    return errors
+
+
+def validate_value_types(data: dict, schema: dict = None, prefix: str = "") -> List[ValidationError]:
+    """Check all known keys have correct types, recursively."""
+    if schema is None:
+        schema = SCHEMA
+    errors: List[ValidationError] = []
+    for key, spec in schema.items():
+        full_key = f"{prefix}.{key}" if prefix else key
+        if key not in data:
+            continue
+        value = data[key]
+        expected = spec["type"]
+        if not isinstance(value, expected):
+            errors.append(ValidationError(
+                full_key,
+                f"Expected {expected.__name__}, got {type(value).__name__}"
+            ))
+            continue
+        # Check list item types
+        if isinstance(value, list) and "item_type" in spec:
+            for i, item in enumerate(value):
+                if not isinstance(item, spec["item_type"]):
+                    errors.append(ValidationError(
+                        f"{full_key}[{i}]",
+                        f"Expected {spec['item_type'].__name__}, got {type(item).__name__}"
+                    ))
+        # Recurse into nested dicts
+        if isinstance(value, dict) and "value_schema" in spec:
+            errors.extend(validate_value_types(value, spec["value_schema"], full_key))
+    return errors
+
+
+def validate_no_forbidden_keys(data: dict, prefix: str = "") -> List[ValidationError]:
+    """Check for keys that should never be in config."""
+    errors: List[ValidationError] = []
+    for key, value in data.items():
+        full_key = f"{prefix}.{key}" if prefix else key
+        if key.lower() in FORBIDDEN_KEYS:
+            errors.append(ValidationError(full_key, FORBIDDEN_KEYS[key.lower()]))
+        if isinstance(value, dict):
+            errors.extend(validate_no_forbidden_keys(value, full_key))
+    return errors
+
+
+def validate_unknown_keys(data: dict, schema: dict = None, prefix: str = "") -> List[ValidationError]:
+    """Warn about keys not in schema (not an error, just a warning)."""
+    if schema is None:
+        schema = SCHEMA
+    warnings: List[ValidationError] = []
+    known = set(schema.keys())
+    for key in data:
+        full_key = f"{prefix}.{key}" if prefix else key
+        if key not in known:
+            warnings.append(ValidationError(full_key, "Unknown key — not in schema", "warning"))
+        elif isinstance(data[key], dict) and key in schema and "value_schema" in schema[key]:
+            warnings.extend(validate_unknown_keys(data[key], schema[key]["value_schema"], full_key))
+    return warnings
+
+
+# ─── Public API ───────────────────────────────────────────────────────
+
+def validate_config(content: str) -> Tuple[bool, List[ValidationError]]:
+    """Full validation pipeline. Returns (is_valid, errors)."""
+    data, errors = validate_yaml_syntax(content)
+    if data is None:
+        return False, errors
+
+    errors.extend(validate_required_keys(data))
+    errors.extend(validate_value_types(data))
+    errors.extend(validate_no_forbidden_keys(data))
+    errors.extend(validate_unknown_keys(data))
+
+    has_errors = any(e.severity == "error" for e in errors)
+    return not has_errors, errors
+
+
+def validate_file(path: str) -> Tuple[bool, List[ValidationError]]:
+    """Validate a YAML file on disk."""
+    p = Path(path)
+    if not p.exists():
+        return False, [ValidationError(str(p), "File not found")]
+    content = p.read_text(encoding="utf-8")
+    return validate_config(content)
+
+
+# ─── Schema dump ──────────────────────────────────────────────────────
+
+def dump_schema(schema: dict = None, prefix: str = "", indent: int = 0) -> List[str]:
+    """Pretty-print schema as text."""
+    if schema is None:
+        schema = SCHEMA
+    lines: List[str] = []
+    for key, spec in schema.items():
+        full_key = f"{prefix}.{key}" if prefix else key
+        req = " (required)" if spec.get("required") else ""
+        desc = spec.get("description", "")
+        type_name = spec["type"].__name__ if hasattr(spec["type"], "__name__") else str(spec["type"])
+        lines.append(f"{'  ' * indent}{full_key}: {type_name}{req} — {desc}")
+        if "value_schema" in spec:
+            lines.extend(dump_schema(spec["value_schema"], full_key, indent + 1))
+    return lines
+
+
+# ─── CLI ──────────────────────────────────────────────────────────────
+
+def main():
+    parser = argparse.ArgumentParser(description="Validate timmy-config YAML before deploy")
+    parser.add_argument("file", nargs="?", help="YAML file to validate")
+    parser.add_argument("--deploy", metavar="DEST", help="Validate then copy to DEST")
+    parser.add_argument("--schema", action="store_true", help="Print expected schema")
+    parser.add_argument("--check-dir", metavar="DIR", help="Validate all .yaml/.yml in directory")
+    parser.add_argument("--json", action="store_true", dest="as_json", help="Output as JSON")
+    args = parser.parse_args()
+
+    if args.schema:
+        lines = dump_schema()
+        print("timmy-config schema:\n")
+        for line in lines:
+            print(f"  {line}")
+        return
+
+    if args.check_dir:
+        d = Path(args.check_dir)
+        if not d.is_dir():
+            print(f"ERROR: {d} is not a directory", file=sys.stderr)
+            sys.exit(1)
+        all_valid = True
+        results = []
+        for yf in sorted(d.glob("*.y*ml")):
+            valid, errors = validate_file(str(yf))
+            results.append({"file": str(yf), "valid": valid, "errors": [e.to_dict() for e in errors]})
+            if not valid:
+                all_valid = False
+                for e in errors:
+                    if e.severity == "error":
+                        print(f"  {yf.name}: {e}")
+        if args.as_json:
+            print(json.dumps(results, indent=2))
+        elif all_valid:
+            print(f"OK: All YAML in {d} valid")
+        sys.exit(0 if all_valid else 1)
+
+    if not args.file:
+        parser.error("FILE required (or use --schema / --check-dir)")
+
+    valid, errors = validate_file(args.file)
+
+    if args.as_json:
+        out = {"file": args.file, "valid": valid, "errors": [e.to_dict() for e in errors]}
+        print(json.dumps(out, indent=2))
+    else:
+        for e in errors:
+            print(f"  {e}")
+        if valid:
+            print(f"OK: {args.file} is valid")
+        else:
+            print(f"FAIL: {args.file} has {sum(1 for e in errors if e.severity == 'error')} errors")
+
+    if not valid:
+        sys.exit(1)
+
+    # Deploy mode: copy validated file to destination
+    if args.deploy:
+        dest = Path(args.deploy)
+        shutil.copy2(args.file, dest)
+        print(f"DEPLOYED: {args.file} -> {dest}")
+
+
+if __name__ == "__main__":
+    main()
--- a/tests/test_config_validate.py
+++ b/tests/test_config_validate.py
@@ -0,0 +1,275 @@
+#!/usr/bin/env python3
+"""Tests for config_validate.py — issue #690."""
+import json
+import sys
+import tempfile
+from pathlib import Path
+
+import pytest
+
+sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "scripts"))
+from config_validate import (
+    validate_config,
+    validate_file,
+    validate_yaml_syntax,
+    validate_required_keys,
+    validate_value_types,
+    validate_no_forbidden_keys,
+    validate_unknown_keys,
+    ValidationError,
+    SCHEMA,
+)
+
+
+class TestYAMLSyntax:
+    def test_valid_yaml(self):
+        data, errors = validate_yaml_syntax("model: gpt-4\nprovider: openai\n")
+        assert data is not None
+        assert errors == []
+
+    def test_empty_yaml(self):
+        data, errors = validate_yaml_syntax("")
+        assert data == {}
+        assert any(e.severity == "warning" for e in errors)
+
+    def test_invalid_yaml(self):
+        data, errors = validate_yaml_syntax("model: gpt-4\n  bad: [\n")
+        assert data is None
+        assert len(errors) == 1
+
+    def test_non_mapping_yaml(self):
+        data, errors = validate_yaml_syntax("- item1\n- item2\n")
+        assert data is None
+        assert any("mapping" in e.message for e in errors)
+
+
+class TestRequiredKeys:
+    def test_model_present(self):
+        errors = validate_required_keys({"model": "gpt-4"})
+        assert not any(e.path == "model" for e in errors)
+
+    def test_model_missing(self):
+        errors = validate_required_keys({"provider": "openai"})
+        assert any(e.path == "model" and "missing" in e.message.lower() for e in errors)
+
+    def test_model_wrong_type(self):
+        errors = validate_required_keys({"model": 123})
+        assert any(e.path == "model" and "str" in e.message for e in errors)
+
+
+class TestValueTypes:
+    def test_correct_types(self):
+        data = {"model": "gpt-4", "agent": {"max_iterations": 90, "temperature": 0.7}}
+        errors = validate_value_types(data)
+        assert errors == []
+
+    def test_wrong_agent_type(self):
+        data = {"agent": {"max_iterations": "ninety"}}
+        errors = validate_value_types(data)
+        assert any("max_iterations" in e.path and "int" in e.message for e in errors)
+
+    def test_wrong_display_type(self):
+        data = {"display": {"spinner": "yes"}}
+        errors = validate_value_types(data)
+        assert any("spinner" in e.path and "bool" in e.message for e in errors)
+
+    def test_wrong_cron_type(self):
+        data = {"cron": {"interval_seconds": "5m"}}
+        errors = validate_value_types(data)
+        assert any("interval_seconds" in e.path for e in errors)
+
+    def test_list_item_types(self):
+        data = {"toolsets": ["web", "browser", 123]}
+        errors = validate_value_types(data)
+        assert any("toolsets[2]" in e.path for e in errors)
+
+    def test_nested_dict_depth(self):
+        data = {"providers": {"openrouter": {"base_url": 42}}}
+        errors = validate_value_types(data)
+        assert any("providers.openrouter.base_url" in e.path for e in errors)
+
+
+class TestForbiddenKeys:
+    def test_no_forbidden(self):
+        errors = validate_no_forbidden_keys({"model": "gpt-4"})
+        assert errors == []
+
+    def test_password_rejected(self):
+        errors = validate_no_forbidden_keys({"model": "gpt-4", "password": "s3cret"})
+        assert any("password" in e.path for e in errors)
+
+    def test_secret_rejected(self):
+        errors = validate_no_forbidden_keys({"secret": "abc"})
+        assert any("secret" in e.path for e in errors)
+
+    def test_nested_forbidden(self):
+        errors = validate_no_forbidden_keys({"providers": {"x": {"api_key": "sk-xxx"}}})
+        # api_key is not forbidden, but let's check token
+        errors = validate_no_forbidden_keys({"providers": {"x": {"token": "tok"}}})
+        assert any("token" in e.path for e in errors)
+
+    def test_api_key_env_allowed(self):
+        errors = validate_no_forbidden_keys({"providers": {"x": {"api_key_env": "MY_KEY"}}})
+        assert not any("api_key_env" in e.path for e in errors)
+
+
+class TestUnknownKeys:
+    def test_known_keys_no_warnings(self):
+        warnings = validate_unknown_keys({"model": "gpt-4", "provider": "openai"})
+        assert warnings == []
+
+    def test_unknown_top_level_warns(self):
+        warnings = validate_unknown_keys({"model": "gpt-4", "custom_field": 1})
+        assert any("custom_field" in w.path and w.severity == "warning" for w in warnings)
+
+
+class TestFullValidation:
+    def test_valid_config(self):
+        content = "model: nousresearch/hermes-4-14b\nprovider: openrouter\n"
+        valid, errors = validate_config(content)
+        assert valid
+        assert not any(e.severity == "error" for e in errors)
+
+    def test_missing_model(self):
+        content = "provider: openrouter\n"
+        valid, errors = validate_config(content)
+        assert not valid
+        assert any("model" in e.path for e in errors)
+
+    def test_forbidden_key(self):
+        content = "model: gpt-4\npassword: secret\n"
+        valid, errors = validate_config(content)
+        assert not valid
+
+    def test_invalid_yaml(self):
+        content = "model: [\n  broken\n"
+        valid, errors = validate_config(content)
+        assert not valid
+
+    def test_full_realistic_config(self):
+        content = """
+model: nousresearch/hermes-4-14b
+provider: openrouter
+providers:
+  openrouter:
+    base_url: https://openrouter.ai/api/v1
+    api_key_env: OPENROUTER_API_KEY
+  ollama:
+    base_url: http://localhost:11434
+toolsets:
+  - web
+  - browser
+agent:
+  max_iterations: 90
+  temperature: 0.7
+  save_trajectories: false
+display:
+  spinner: true
+  colors: true
+  skin: default
+cron:
+  enabled: false
+  interval_seconds: 300
+gateway:
+  enabled: false
+  port: 8080
+logging:
+  level: INFO
+"""
+        valid, errors = validate_config(content)
+        assert valid, f"Unexpected errors: {errors}"
+
+    def test_warnings_dont_fail(self):
+        content = "model: gpt-4\ncustom_key: value\n"
+        valid, errors = validate_config(content)
+        assert valid  # warnings don't make it invalid
+        assert any(e.severity == "warning" for e in errors)
+
+
+class TestValidateFile:
+    def test_valid_file(self, tmp_path):
+        f = tmp_path / "config.yaml"
+        f.write_text("model: gpt-4\n")
+        valid, errors = validate_file(str(f))
+        assert valid
+
+    def test_missing_file(self):
+        valid, errors = validate_file("/nonexistent/config.yaml")
+        assert not valid
+        assert any("not found" in e.message for e in errors)
+
+    def test_roundtrip(self, tmp_path):
+        f = tmp_path / "config.yaml"
+        f.write_text("model: gpt-4\nagent:\n  max_iterations: 50\n")
+        valid, errors = validate_file(str(f))
+        assert valid
+
+
+class TestCLI:
+    def test_deploy_mode(self, tmp_path):
+        import subprocess
+        src = tmp_path / "src.yaml"
+        src.write_text("model: gpt-4\n")
+        dest = tmp_path / "deployed.yaml"
+
+        result = subprocess.run(
+            [sys.executable, str(Path(__file__).resolve().parent.parent / "scripts" / "config_validate.py"),
+             str(src), "--deploy", str(dest)],
+            capture_output=True, text=True
+        )
+        assert result.returncode == 0
+        assert dest.exists()
+        assert "model: gpt-4" in dest.read_text()
+
+    def test_deploy_rejects_invalid(self, tmp_path):
+        import subprocess
+        src = tmp_path / "bad.yaml"
+        src.write_text("provider: openai\n")  # missing required model
+        dest = tmp_path / "should_not_exist.yaml"
+
+        result = subprocess.run(
+            [sys.executable, str(Path(__file__).resolve().parent.parent / "scripts" / "config_validate.py"),
+             str(src), "--deploy", str(dest)],
+            capture_output=True, text=True
+        )
+        assert result.returncode == 1
+        assert not dest.exists()
+
+    def test_schema_flag(self):
+        import subprocess
+        result = subprocess.run(
+            [sys.executable, str(Path(__file__).resolve().parent.parent / "scripts" / "config_validate.py"),
+             "--schema"],
+            capture_output=True, text=True
+        )
+        assert result.returncode == 0
+        assert "model:" in result.stdout
+        assert "required" in result.stdout
+
+    def test_json_output(self, tmp_path):
+        import subprocess
+        f = tmp_path / "config.yaml"
+        f.write_text("model: gpt-4\n")
+
+        result = subprocess.run(
+            [sys.executable, str(Path(__file__).resolve().parent.parent / "scripts" / "config_validate.py"),
+             str(f), "--json"],
+            capture_output=True, text=True
+        )
+        assert result.returncode == 0
+        out = json.loads(result.stdout)
+        assert out["valid"] is True
+        assert "errors" in out
+
+    def test_check_dir(self, tmp_path):
+        import subprocess
+        (tmp_path / "good.yaml").write_text("model: gpt-4\n")
+        (tmp_path / "bad.yaml").write_text("provider: openai\n")
+
+        result = subprocess.run(
+            [sys.executable, str(Path(__file__).resolve().parent.parent / "scripts" / "config_validate.py"),
+             "--check-dir", str(tmp_path)],
+            capture_output=True, text=True
+        )
+        assert result.returncode == 1  # bad.yaml fails
+        assert "bad.yaml" in result.stdout
Author	SHA1	Message	Date
Alexander Whitestone	57483f2e92	test: Add config validation test suite — 30+ cases (#690 )	2026-04-17 05:19:31 +00:00
Alexander Whitestone	a6b668abf1	feat: Add pre-deploy config validation — YAML syntax, keys, types, forbidden keys (#690 )	2026-04-17 05:19:29 +00:00