#!/usr/bin/env python3 """ config_validate.py — Pre-deploy validation for timmy-config YAML files (Issue #690). Validates YAML syntax, required keys, value types, and forbidden keys before writing config to disk. Prevents broken deploys from bad config. Usage: python3 scripts/config_validate.py config.yaml # Validate only python3 scripts/config_validate.py config.yaml --deploy # Validate then write python3 scripts/config_validate.py --schema # Print expected schema python3 scripts/config_validate.py --check-dir config/ # Validate all YAML in dir python3 scripts/config_validate.py --json config.yaml # Output as JSON """ import argparse import json import sys import shutil from pathlib import Path from typing import Any, Dict, List, Optional, Tuple try: import yaml HAS_YAML = True except ImportError: HAS_YAML = False # ─── Schema definition ─────────────────────────────────────────────── SCHEMA = { "model": { "type": str, "required": True, "description": "Default model identifier (e.g. 'nousresearch/hermes-4-14b')", }, "provider": { "type": str, "required": False, "description": "Default provider name", }, "providers": { "type": dict, "required": False, "description": "Provider configurations keyed by name", "value_schema": { "base_url": {"type": str, "required": False}, "api_key_env": {"type": str, "required": False}, }, }, "fallback_providers": { "type": list, "required": False, "description": "Ordered fallback provider chain", "item_type": str, }, "toolsets": { "type": list, "required": False, "description": "Enabled toolset names", "item_type": str, }, "agent": { "type": dict, "required": False, "description": "Agent behavior configuration", "value_schema": { "max_iterations": {"type": int, "required": False}, "temperature": {"type": (int, float), "required": False}, "save_trajectories": {"type": bool, "required": False}, "quiet_mode": {"type": bool, "required": False}, }, }, "display": { "type": dict, "required": False, "description": "CLI display settings", "value_schema": { "spinner": {"type": bool, "required": False}, "colors": {"type": bool, "required": False}, "skin": {"type": str, "required": False}, "tool_progress": {"type": bool, "required": False}, }, }, "gateway": { "type": dict, "required": False, "description": "Gateway/messaging settings", "value_schema": { "enabled": {"type": bool, "required": False}, "port": {"type": int, "required": False}, "cors_origins": {"type": list, "required": False}, }, }, "cron": { "type": dict, "required": False, "description": "Cron scheduler settings", "value_schema": { "enabled": {"type": bool, "required": False}, "interval_seconds": {"type": int, "required": False}, "max_concurrent": {"type": int, "required": False}, }, }, "logging": { "type": dict, "required": False, "description": "Logging configuration", "value_schema": { "level": {"type": str, "required": False}, "file": {"type": (str, type(None)), "required": False}, }, }, "session": { "type": dict, "required": False, "description": "Session behavior", "value_schema": { "save_trajectories": {"type": bool, "required": False}, "max_iterations": {"type": int, "required": False}, "context_compression": {"type": bool, "required": False}, }, }, } FORBIDDEN_KEYS = { "anthropic_api_key": "Use ANTHROPIC_API_KEY env var — never store keys in config", "openai_api_key": "Use OPENAI_API_KEY env var — never store keys in config", "openrouter_api_key": "Use OPENROUTER_API_KEY env var — never store keys in config", "password": "Never store passwords in config", "secret": "Never store secrets in config", "token": "Never store tokens in config — use env vars", } # ─── Validation errors ─────────────────────────────────────────────── class ValidationError: def __init__(self, path: str, message: str, severity: str = "error"): self.path = path self.message = message self.severity = severity def to_dict(self) -> dict: return {"path": self.path, "message": self.message, "severity": self.severity} def __str__(self): tag = "ERROR" if self.severity == "error" else "WARN" return f"[{tag}] {self.path}: {self.message}" def __repr__(self): return f"ValidationError({self.path!r}, {self.message!r}, {self.severity!r})" # ─── Core validators ───────────────────────────────────────────────── def validate_yaml_syntax(content: str) -> Tuple[Optional[dict], List[ValidationError]]: """Parse YAML, return (data, errors).""" errors: List[ValidationError] = [] if not HAS_YAML: return None, [ValidationError("root", "PyYAML not installed — pip install pyyaml")] try: data = yaml.safe_load(content) except yaml.YAMLError as e: return None, [ValidationError("syntax", str(e))] if data is None: return {}, [ValidationError("root", "Config file is empty", "warning")] if not isinstance(data, dict): return None, [ValidationError("root", f"Expected mapping, got {type(data).__name__}")] return data, errors def validate_required_keys(data: dict) -> List[ValidationError]: """Check required keys exist and have correct types.""" errors: List[ValidationError] = [] for key, spec in SCHEMA.items(): if spec.get("required"): if key not in data: errors.append(ValidationError(key, f"Required key '{key}' is missing")) elif not isinstance(data[key], spec["type"]): errors.append(ValidationError( key, f"Expected {spec['type'].__name__}, got {type(data[key]).__name__}" )) return errors def validate_value_types(data: dict, schema: dict = None, prefix: str = "") -> List[ValidationError]: """Check all known keys have correct types, recursively.""" if schema is None: schema = SCHEMA errors: List[ValidationError] = [] for key, spec in schema.items(): full_key = f"{prefix}.{key}" if prefix else key if key not in data: continue value = data[key] expected = spec["type"] if not isinstance(value, expected): errors.append(ValidationError( full_key, f"Expected {expected.__name__}, got {type(value).__name__}" )) continue # Check list item types if isinstance(value, list) and "item_type" in spec: for i, item in enumerate(value): if not isinstance(item, spec["item_type"]): errors.append(ValidationError( f"{full_key}[{i}]", f"Expected {spec['item_type'].__name__}, got {type(item).__name__}" )) # Recurse into nested dicts if isinstance(value, dict) and "value_schema" in spec: errors.extend(validate_value_types(value, spec["value_schema"], full_key)) return errors def validate_no_forbidden_keys(data: dict, prefix: str = "") -> List[ValidationError]: """Check for keys that should never be in config.""" errors: List[ValidationError] = [] for key, value in data.items(): full_key = f"{prefix}.{key}" if prefix else key if key.lower() in FORBIDDEN_KEYS: errors.append(ValidationError(full_key, FORBIDDEN_KEYS[key.lower()])) if isinstance(value, dict): errors.extend(validate_no_forbidden_keys(value, full_key)) return errors def validate_unknown_keys(data: dict, schema: dict = None, prefix: str = "") -> List[ValidationError]: """Warn about keys not in schema (not an error, just a warning).""" if schema is None: schema = SCHEMA warnings: List[ValidationError] = [] known = set(schema.keys()) for key in data: full_key = f"{prefix}.{key}" if prefix else key if key not in known: warnings.append(ValidationError(full_key, "Unknown key — not in schema", "warning")) elif isinstance(data[key], dict) and key in schema and "value_schema" in schema[key]: warnings.extend(validate_unknown_keys(data[key], schema[key]["value_schema"], full_key)) return warnings # ─── Public API ─────────────────────────────────────────────────────── def validate_config(content: str) -> Tuple[bool, List[ValidationError]]: """Full validation pipeline. Returns (is_valid, errors).""" data, errors = validate_yaml_syntax(content) if data is None: return False, errors errors.extend(validate_required_keys(data)) errors.extend(validate_value_types(data)) errors.extend(validate_no_forbidden_keys(data)) errors.extend(validate_unknown_keys(data)) has_errors = any(e.severity == "error" for e in errors) return not has_errors, errors def validate_file(path: str) -> Tuple[bool, List[ValidationError]]: """Validate a YAML file on disk.""" p = Path(path) if not p.exists(): return False, [ValidationError(str(p), "File not found")] content = p.read_text(encoding="utf-8") return validate_config(content) # ─── Schema dump ────────────────────────────────────────────────────── def dump_schema(schema: dict = None, prefix: str = "", indent: int = 0) -> List[str]: """Pretty-print schema as text.""" if schema is None: schema = SCHEMA lines: List[str] = [] for key, spec in schema.items(): full_key = f"{prefix}.{key}" if prefix else key req = " (required)" if spec.get("required") else "" desc = spec.get("description", "") type_name = spec["type"].__name__ if hasattr(spec["type"], "__name__") else str(spec["type"]) lines.append(f"{' ' * indent}{full_key}: {type_name}{req} — {desc}") if "value_schema" in spec: lines.extend(dump_schema(spec["value_schema"], full_key, indent + 1)) return lines # ─── CLI ────────────────────────────────────────────────────────────── def main(): parser = argparse.ArgumentParser(description="Validate timmy-config YAML before deploy") parser.add_argument("file", nargs="?", help="YAML file to validate") parser.add_argument("--deploy", metavar="DEST", help="Validate then copy to DEST") parser.add_argument("--schema", action="store_true", help="Print expected schema") parser.add_argument("--check-dir", metavar="DIR", help="Validate all .yaml/.yml in directory") parser.add_argument("--json", action="store_true", dest="as_json", help="Output as JSON") args = parser.parse_args() if args.schema: lines = dump_schema() print("timmy-config schema:\n") for line in lines: print(f" {line}") return if args.check_dir: d = Path(args.check_dir) if not d.is_dir(): print(f"ERROR: {d} is not a directory", file=sys.stderr) sys.exit(1) all_valid = True results = [] for yf in sorted(d.glob("*.y*ml")): valid, errors = validate_file(str(yf)) results.append({"file": str(yf), "valid": valid, "errors": [e.to_dict() for e in errors]}) if not valid: all_valid = False for e in errors: if e.severity == "error": print(f" {yf.name}: {e}") if args.as_json: print(json.dumps(results, indent=2)) elif all_valid: print(f"OK: All YAML in {d} valid") sys.exit(0 if all_valid else 1) if not args.file: parser.error("FILE required (or use --schema / --check-dir)") valid, errors = validate_file(args.file) if args.as_json: out = {"file": args.file, "valid": valid, "errors": [e.to_dict() for e in errors]} print(json.dumps(out, indent=2)) else: for e in errors: print(f" {e}") if valid: print(f"OK: {args.file} is valid") else: print(f"FAIL: {args.file} has {sum(1 for e in errors if e.severity == 'error')} errors") if not valid: sys.exit(1) # Deploy mode: copy validated file to destination if args.deploy: dest = Path(args.deploy) shutil.copy2(args.file, dest) print(f"DEPLOYED: {args.file} -> {dest}") if __name__ == "__main__": main()