feat: Add pre-deploy config validation — YAML syntax, keys, types, forbidden keys (#690)

2026-04-17 05:19:29 +00:00
parent c587fc069b
commit a6b668abf1
1 changed files with 356 additions and 0 deletions
--- a/scripts/config_validate.py
+++ b/scripts/config_validate.py
@@ -0,0 +1,356 @@
+#!/usr/bin/env python3
+"""
+config_validate.py — Pre-deploy validation for timmy-config YAML files (Issue #690).
+
+Validates YAML syntax, required keys, value types, and forbidden keys before
+writing config to disk. Prevents broken deploys from bad config.
+
+Usage:
+    python3 scripts/config_validate.py config.yaml              # Validate only
+    python3 scripts/config_validate.py config.yaml --deploy     # Validate then write
+    python3 scripts/config_validate.py --schema                 # Print expected schema
+    python3 scripts/config_validate.py --check-dir config/      # Validate all YAML in dir
+    python3 scripts/config_validate.py --json config.yaml       # Output as JSON
+"""
+import argparse
+import json
+import sys
+import shutil
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple
+
+try:
+    import yaml
+    HAS_YAML = True
+except ImportError:
+    HAS_YAML = False
+
+
+# ─── Schema definition ───────────────────────────────────────────────
+
+SCHEMA = {
+    "model": {
+        "type": str,
+        "required": True,
+        "description": "Default model identifier (e.g. 'nousresearch/hermes-4-14b')",
+    },
+    "provider": {
+        "type": str,
+        "required": False,
+        "description": "Default provider name",
+    },
+    "providers": {
+        "type": dict,
+        "required": False,
+        "description": "Provider configurations keyed by name",
+        "value_schema": {
+            "base_url": {"type": str, "required": False},
+            "api_key_env": {"type": str, "required": False},
+        },
+    },
+    "fallback_providers": {
+        "type": list,
+        "required": False,
+        "description": "Ordered fallback provider chain",
+        "item_type": str,
+    },
+    "toolsets": {
+        "type": list,
+        "required": False,
+        "description": "Enabled toolset names",
+        "item_type": str,
+    },
+    "agent": {
+        "type": dict,
+        "required": False,
+        "description": "Agent behavior configuration",
+        "value_schema": {
+            "max_iterations": {"type": int, "required": False},
+            "temperature": {"type": (int, float), "required": False},
+            "save_trajectories": {"type": bool, "required": False},
+            "quiet_mode": {"type": bool, "required": False},
+        },
+    },
+    "display": {
+        "type": dict,
+        "required": False,
+        "description": "CLI display settings",
+        "value_schema": {
+            "spinner": {"type": bool, "required": False},
+            "colors": {"type": bool, "required": False},
+            "skin": {"type": str, "required": False},
+            "tool_progress": {"type": bool, "required": False},
+        },
+    },
+    "gateway": {
+        "type": dict,
+        "required": False,
+        "description": "Gateway/messaging settings",
+        "value_schema": {
+            "enabled": {"type": bool, "required": False},
+            "port": {"type": int, "required": False},
+            "cors_origins": {"type": list, "required": False},
+        },
+    },
+    "cron": {
+        "type": dict,
+        "required": False,
+        "description": "Cron scheduler settings",
+        "value_schema": {
+            "enabled": {"type": bool, "required": False},
+            "interval_seconds": {"type": int, "required": False},
+            "max_concurrent": {"type": int, "required": False},
+        },
+    },
+    "logging": {
+        "type": dict,
+        "required": False,
+        "description": "Logging configuration",
+        "value_schema": {
+            "level": {"type": str, "required": False},
+            "file": {"type": (str, type(None)), "required": False},
+        },
+    },
+    "session": {
+        "type": dict,
+        "required": False,
+        "description": "Session behavior",
+        "value_schema": {
+            "save_trajectories": {"type": bool, "required": False},
+            "max_iterations": {"type": int, "required": False},
+            "context_compression": {"type": bool, "required": False},
+        },
+    },
+}
+
+FORBIDDEN_KEYS = {
+    "anthropic_api_key": "Use ANTHROPIC_API_KEY env var — never store keys in config",
+    "openai_api_key": "Use OPENAI_API_KEY env var — never store keys in config",
+    "openrouter_api_key": "Use OPENROUTER_API_KEY env var — never store keys in config",
+    "password": "Never store passwords in config",
+    "secret": "Never store secrets in config",
+    "token": "Never store tokens in config — use env vars",
+}
+
+
+# ─── Validation errors ───────────────────────────────────────────────
+
+class ValidationError:
+    def __init__(self, path: str, message: str, severity: str = "error"):
+        self.path = path
+        self.message = message
+        self.severity = severity
+
+    def to_dict(self) -> dict:
+        return {"path": self.path, "message": self.message, "severity": self.severity}
+
+    def __str__(self):
+        tag = "ERROR" if self.severity == "error" else "WARN"
+        return f"[{tag}] {self.path}: {self.message}"
+
+    def __repr__(self):
+        return f"ValidationError({self.path!r}, {self.message!r}, {self.severity!r})"
+
+
+# ─── Core validators ─────────────────────────────────────────────────
+
+def validate_yaml_syntax(content: str) -> Tuple[Optional[dict], List[ValidationError]]:
+    """Parse YAML, return (data, errors)."""
+    errors: List[ValidationError] = []
+    if not HAS_YAML:
+        return None, [ValidationError("root", "PyYAML not installed — pip install pyyaml")]
+    try:
+        data = yaml.safe_load(content)
+    except yaml.YAMLError as e:
+        return None, [ValidationError("syntax", str(e))]
+    if data is None:
+        return {}, [ValidationError("root", "Config file is empty", "warning")]
+    if not isinstance(data, dict):
+        return None, [ValidationError("root", f"Expected mapping, got {type(data).__name__}")]
+    return data, errors
+
+
+def validate_required_keys(data: dict) -> List[ValidationError]:
+    """Check required keys exist and have correct types."""
+    errors: List[ValidationError] = []
+    for key, spec in SCHEMA.items():
+        if spec.get("required"):
+            if key not in data:
+                errors.append(ValidationError(key, f"Required key '{key}' is missing"))
+            elif not isinstance(data[key], spec["type"]):
+                errors.append(ValidationError(
+                    key,
+                    f"Expected {spec['type'].__name__}, got {type(data[key]).__name__}"
+                ))
+    return errors
+
+
+def validate_value_types(data: dict, schema: dict = None, prefix: str = "") -> List[ValidationError]:
+    """Check all known keys have correct types, recursively."""
+    if schema is None:
+        schema = SCHEMA
+    errors: List[ValidationError] = []
+    for key, spec in schema.items():
+        full_key = f"{prefix}.{key}" if prefix else key
+        if key not in data:
+            continue
+        value = data[key]
+        expected = spec["type"]
+        if not isinstance(value, expected):
+            errors.append(ValidationError(
+                full_key,
+                f"Expected {expected.__name__}, got {type(value).__name__}"
+            ))
+            continue
+        # Check list item types
+        if isinstance(value, list) and "item_type" in spec:
+            for i, item in enumerate(value):
+                if not isinstance(item, spec["item_type"]):
+                    errors.append(ValidationError(
+                        f"{full_key}[{i}]",
+                        f"Expected {spec['item_type'].__name__}, got {type(item).__name__}"
+                    ))
+        # Recurse into nested dicts
+        if isinstance(value, dict) and "value_schema" in spec:
+            errors.extend(validate_value_types(value, spec["value_schema"], full_key))
+    return errors
+
+
+def validate_no_forbidden_keys(data: dict, prefix: str = "") -> List[ValidationError]:
+    """Check for keys that should never be in config."""
+    errors: List[ValidationError] = []
+    for key, value in data.items():
+        full_key = f"{prefix}.{key}" if prefix else key
+        if key.lower() in FORBIDDEN_KEYS:
+            errors.append(ValidationError(full_key, FORBIDDEN_KEYS[key.lower()]))
+        if isinstance(value, dict):
+            errors.extend(validate_no_forbidden_keys(value, full_key))
+    return errors
+
+
+def validate_unknown_keys(data: dict, schema: dict = None, prefix: str = "") -> List[ValidationError]:
+    """Warn about keys not in schema (not an error, just a warning)."""
+    if schema is None:
+        schema = SCHEMA
+    warnings: List[ValidationError] = []
+    known = set(schema.keys())
+    for key in data:
+        full_key = f"{prefix}.{key}" if prefix else key
+        if key not in known:
+            warnings.append(ValidationError(full_key, "Unknown key — not in schema", "warning"))
+        elif isinstance(data[key], dict) and key in schema and "value_schema" in schema[key]:
+            warnings.extend(validate_unknown_keys(data[key], schema[key]["value_schema"], full_key))
+    return warnings
+
+
+# ─── Public API ───────────────────────────────────────────────────────
+
+def validate_config(content: str) -> Tuple[bool, List[ValidationError]]:
+    """Full validation pipeline. Returns (is_valid, errors)."""
+    data, errors = validate_yaml_syntax(content)
+    if data is None:
+        return False, errors
+
+    errors.extend(validate_required_keys(data))
+    errors.extend(validate_value_types(data))
+    errors.extend(validate_no_forbidden_keys(data))
+    errors.extend(validate_unknown_keys(data))
+
+    has_errors = any(e.severity == "error" for e in errors)
+    return not has_errors, errors
+
+
+def validate_file(path: str) -> Tuple[bool, List[ValidationError]]:
+    """Validate a YAML file on disk."""
+    p = Path(path)
+    if not p.exists():
+        return False, [ValidationError(str(p), "File not found")]
+    content = p.read_text(encoding="utf-8")
+    return validate_config(content)
+
+
+# ─── Schema dump ──────────────────────────────────────────────────────
+
+def dump_schema(schema: dict = None, prefix: str = "", indent: int = 0) -> List[str]:
+    """Pretty-print schema as text."""
+    if schema is None:
+        schema = SCHEMA
+    lines: List[str] = []
+    for key, spec in schema.items():
+        full_key = f"{prefix}.{key}" if prefix else key
+        req = " (required)" if spec.get("required") else ""
+        desc = spec.get("description", "")
+        type_name = spec["type"].__name__ if hasattr(spec["type"], "__name__") else str(spec["type"])
+        lines.append(f"{'  ' * indent}{full_key}: {type_name}{req} — {desc}")
+        if "value_schema" in spec:
+            lines.extend(dump_schema(spec["value_schema"], full_key, indent + 1))
+    return lines
+
+
+# ─── CLI ──────────────────────────────────────────────────────────────
+
+def main():
+    parser = argparse.ArgumentParser(description="Validate timmy-config YAML before deploy")
+    parser.add_argument("file", nargs="?", help="YAML file to validate")
+    parser.add_argument("--deploy", metavar="DEST", help="Validate then copy to DEST")
+    parser.add_argument("--schema", action="store_true", help="Print expected schema")
+    parser.add_argument("--check-dir", metavar="DIR", help="Validate all .yaml/.yml in directory")
+    parser.add_argument("--json", action="store_true", dest="as_json", help="Output as JSON")
+    args = parser.parse_args()
+
+    if args.schema:
+        lines = dump_schema()
+        print("timmy-config schema:\n")
+        for line in lines:
+            print(f"  {line}")
+        return
+
+    if args.check_dir:
+        d = Path(args.check_dir)
+        if not d.is_dir():
+            print(f"ERROR: {d} is not a directory", file=sys.stderr)
+            sys.exit(1)
+        all_valid = True
+        results = []
+        for yf in sorted(d.glob("*.y*ml")):
+            valid, errors = validate_file(str(yf))
+            results.append({"file": str(yf), "valid": valid, "errors": [e.to_dict() for e in errors]})
+            if not valid:
+                all_valid = False
+                for e in errors:
+                    if e.severity == "error":
+                        print(f"  {yf.name}: {e}")
+        if args.as_json:
+            print(json.dumps(results, indent=2))
+        elif all_valid:
+            print(f"OK: All YAML in {d} valid")
+        sys.exit(0 if all_valid else 1)
+
+    if not args.file:
+        parser.error("FILE required (or use --schema / --check-dir)")
+
+    valid, errors = validate_file(args.file)
+
+    if args.as_json:
+        out = {"file": args.file, "valid": valid, "errors": [e.to_dict() for e in errors]}
+        print(json.dumps(out, indent=2))
+    else:
+        for e in errors:
+            print(f"  {e}")
+        if valid:
+            print(f"OK: {args.file} is valid")
+        else:
+            print(f"FAIL: {args.file} has {sum(1 for e in errors if e.severity == 'error')} errors")
+
+    if not valid:
+        sys.exit(1)
+
+    # Deploy mode: copy validated file to destination
+    if args.deploy:
+        dest = Path(args.deploy)
+        shutil.copy2(args.file, dest)
+        print(f"DEPLOYED: {args.file} -> {dest}")
+
+
+if __name__ == "__main__":
+    main()