diff --git a/scripts/config_validate.py b/scripts/config_validate.py new file mode 100644 index 00000000..22edc865 --- /dev/null +++ b/scripts/config_validate.py @@ -0,0 +1,356 @@ +#!/usr/bin/env python3 +""" +config_validate.py — Pre-deploy validation for timmy-config YAML files (Issue #690). + +Validates YAML syntax, required keys, value types, and forbidden keys before +writing config to disk. Prevents broken deploys from bad config. + +Usage: + python3 scripts/config_validate.py config.yaml # Validate only + python3 scripts/config_validate.py config.yaml --deploy # Validate then write + python3 scripts/config_validate.py --schema # Print expected schema + python3 scripts/config_validate.py --check-dir config/ # Validate all YAML in dir + python3 scripts/config_validate.py --json config.yaml # Output as JSON +""" +import argparse +import json +import sys +import shutil +from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple + +try: + import yaml + HAS_YAML = True +except ImportError: + HAS_YAML = False + + +# ─── Schema definition ─────────────────────────────────────────────── + +SCHEMA = { + "model": { + "type": str, + "required": True, + "description": "Default model identifier (e.g. 'nousresearch/hermes-4-14b')", + }, + "provider": { + "type": str, + "required": False, + "description": "Default provider name", + }, + "providers": { + "type": dict, + "required": False, + "description": "Provider configurations keyed by name", + "value_schema": { + "base_url": {"type": str, "required": False}, + "api_key_env": {"type": str, "required": False}, + }, + }, + "fallback_providers": { + "type": list, + "required": False, + "description": "Ordered fallback provider chain", + "item_type": str, + }, + "toolsets": { + "type": list, + "required": False, + "description": "Enabled toolset names", + "item_type": str, + }, + "agent": { + "type": dict, + "required": False, + "description": "Agent behavior configuration", + "value_schema": { + "max_iterations": {"type": int, "required": False}, + "temperature": {"type": (int, float), "required": False}, + "save_trajectories": {"type": bool, "required": False}, + "quiet_mode": {"type": bool, "required": False}, + }, + }, + "display": { + "type": dict, + "required": False, + "description": "CLI display settings", + "value_schema": { + "spinner": {"type": bool, "required": False}, + "colors": {"type": bool, "required": False}, + "skin": {"type": str, "required": False}, + "tool_progress": {"type": bool, "required": False}, + }, + }, + "gateway": { + "type": dict, + "required": False, + "description": "Gateway/messaging settings", + "value_schema": { + "enabled": {"type": bool, "required": False}, + "port": {"type": int, "required": False}, + "cors_origins": {"type": list, "required": False}, + }, + }, + "cron": { + "type": dict, + "required": False, + "description": "Cron scheduler settings", + "value_schema": { + "enabled": {"type": bool, "required": False}, + "interval_seconds": {"type": int, "required": False}, + "max_concurrent": {"type": int, "required": False}, + }, + }, + "logging": { + "type": dict, + "required": False, + "description": "Logging configuration", + "value_schema": { + "level": {"type": str, "required": False}, + "file": {"type": (str, type(None)), "required": False}, + }, + }, + "session": { + "type": dict, + "required": False, + "description": "Session behavior", + "value_schema": { + "save_trajectories": {"type": bool, "required": False}, + "max_iterations": {"type": int, "required": False}, + "context_compression": {"type": bool, "required": False}, + }, + }, +} + +FORBIDDEN_KEYS = { + "anthropic_api_key": "Use ANTHROPIC_API_KEY env var — never store keys in config", + "openai_api_key": "Use OPENAI_API_KEY env var — never store keys in config", + "openrouter_api_key": "Use OPENROUTER_API_KEY env var — never store keys in config", + "password": "Never store passwords in config", + "secret": "Never store secrets in config", + "token": "Never store tokens in config — use env vars", +} + + +# ─── Validation errors ─────────────────────────────────────────────── + +class ValidationError: + def __init__(self, path: str, message: str, severity: str = "error"): + self.path = path + self.message = message + self.severity = severity + + def to_dict(self) -> dict: + return {"path": self.path, "message": self.message, "severity": self.severity} + + def __str__(self): + tag = "ERROR" if self.severity == "error" else "WARN" + return f"[{tag}] {self.path}: {self.message}" + + def __repr__(self): + return f"ValidationError({self.path!r}, {self.message!r}, {self.severity!r})" + + +# ─── Core validators ───────────────────────────────────────────────── + +def validate_yaml_syntax(content: str) -> Tuple[Optional[dict], List[ValidationError]]: + """Parse YAML, return (data, errors).""" + errors: List[ValidationError] = [] + if not HAS_YAML: + return None, [ValidationError("root", "PyYAML not installed — pip install pyyaml")] + try: + data = yaml.safe_load(content) + except yaml.YAMLError as e: + return None, [ValidationError("syntax", str(e))] + if data is None: + return {}, [ValidationError("root", "Config file is empty", "warning")] + if not isinstance(data, dict): + return None, [ValidationError("root", f"Expected mapping, got {type(data).__name__}")] + return data, errors + + +def validate_required_keys(data: dict) -> List[ValidationError]: + """Check required keys exist and have correct types.""" + errors: List[ValidationError] = [] + for key, spec in SCHEMA.items(): + if spec.get("required"): + if key not in data: + errors.append(ValidationError(key, f"Required key '{key}' is missing")) + elif not isinstance(data[key], spec["type"]): + errors.append(ValidationError( + key, + f"Expected {spec['type'].__name__}, got {type(data[key]).__name__}" + )) + return errors + + +def validate_value_types(data: dict, schema: dict = None, prefix: str = "") -> List[ValidationError]: + """Check all known keys have correct types, recursively.""" + if schema is None: + schema = SCHEMA + errors: List[ValidationError] = [] + for key, spec in schema.items(): + full_key = f"{prefix}.{key}" if prefix else key + if key not in data: + continue + value = data[key] + expected = spec["type"] + if not isinstance(value, expected): + errors.append(ValidationError( + full_key, + f"Expected {expected.__name__}, got {type(value).__name__}" + )) + continue + # Check list item types + if isinstance(value, list) and "item_type" in spec: + for i, item in enumerate(value): + if not isinstance(item, spec["item_type"]): + errors.append(ValidationError( + f"{full_key}[{i}]", + f"Expected {spec['item_type'].__name__}, got {type(item).__name__}" + )) + # Recurse into nested dicts + if isinstance(value, dict) and "value_schema" in spec: + errors.extend(validate_value_types(value, spec["value_schema"], full_key)) + return errors + + +def validate_no_forbidden_keys(data: dict, prefix: str = "") -> List[ValidationError]: + """Check for keys that should never be in config.""" + errors: List[ValidationError] = [] + for key, value in data.items(): + full_key = f"{prefix}.{key}" if prefix else key + if key.lower() in FORBIDDEN_KEYS: + errors.append(ValidationError(full_key, FORBIDDEN_KEYS[key.lower()])) + if isinstance(value, dict): + errors.extend(validate_no_forbidden_keys(value, full_key)) + return errors + + +def validate_unknown_keys(data: dict, schema: dict = None, prefix: str = "") -> List[ValidationError]: + """Warn about keys not in schema (not an error, just a warning).""" + if schema is None: + schema = SCHEMA + warnings: List[ValidationError] = [] + known = set(schema.keys()) + for key in data: + full_key = f"{prefix}.{key}" if prefix else key + if key not in known: + warnings.append(ValidationError(full_key, "Unknown key — not in schema", "warning")) + elif isinstance(data[key], dict) and key in schema and "value_schema" in schema[key]: + warnings.extend(validate_unknown_keys(data[key], schema[key]["value_schema"], full_key)) + return warnings + + +# ─── Public API ─────────────────────────────────────────────────────── + +def validate_config(content: str) -> Tuple[bool, List[ValidationError]]: + """Full validation pipeline. Returns (is_valid, errors).""" + data, errors = validate_yaml_syntax(content) + if data is None: + return False, errors + + errors.extend(validate_required_keys(data)) + errors.extend(validate_value_types(data)) + errors.extend(validate_no_forbidden_keys(data)) + errors.extend(validate_unknown_keys(data)) + + has_errors = any(e.severity == "error" for e in errors) + return not has_errors, errors + + +def validate_file(path: str) -> Tuple[bool, List[ValidationError]]: + """Validate a YAML file on disk.""" + p = Path(path) + if not p.exists(): + return False, [ValidationError(str(p), "File not found")] + content = p.read_text(encoding="utf-8") + return validate_config(content) + + +# ─── Schema dump ────────────────────────────────────────────────────── + +def dump_schema(schema: dict = None, prefix: str = "", indent: int = 0) -> List[str]: + """Pretty-print schema as text.""" + if schema is None: + schema = SCHEMA + lines: List[str] = [] + for key, spec in schema.items(): + full_key = f"{prefix}.{key}" if prefix else key + req = " (required)" if spec.get("required") else "" + desc = spec.get("description", "") + type_name = spec["type"].__name__ if hasattr(spec["type"], "__name__") else str(spec["type"]) + lines.append(f"{' ' * indent}{full_key}: {type_name}{req} — {desc}") + if "value_schema" in spec: + lines.extend(dump_schema(spec["value_schema"], full_key, indent + 1)) + return lines + + +# ─── CLI ────────────────────────────────────────────────────────────── + +def main(): + parser = argparse.ArgumentParser(description="Validate timmy-config YAML before deploy") + parser.add_argument("file", nargs="?", help="YAML file to validate") + parser.add_argument("--deploy", metavar="DEST", help="Validate then copy to DEST") + parser.add_argument("--schema", action="store_true", help="Print expected schema") + parser.add_argument("--check-dir", metavar="DIR", help="Validate all .yaml/.yml in directory") + parser.add_argument("--json", action="store_true", dest="as_json", help="Output as JSON") + args = parser.parse_args() + + if args.schema: + lines = dump_schema() + print("timmy-config schema:\n") + for line in lines: + print(f" {line}") + return + + if args.check_dir: + d = Path(args.check_dir) + if not d.is_dir(): + print(f"ERROR: {d} is not a directory", file=sys.stderr) + sys.exit(1) + all_valid = True + results = [] + for yf in sorted(d.glob("*.y*ml")): + valid, errors = validate_file(str(yf)) + results.append({"file": str(yf), "valid": valid, "errors": [e.to_dict() for e in errors]}) + if not valid: + all_valid = False + for e in errors: + if e.severity == "error": + print(f" {yf.name}: {e}") + if args.as_json: + print(json.dumps(results, indent=2)) + elif all_valid: + print(f"OK: All YAML in {d} valid") + sys.exit(0 if all_valid else 1) + + if not args.file: + parser.error("FILE required (or use --schema / --check-dir)") + + valid, errors = validate_file(args.file) + + if args.as_json: + out = {"file": args.file, "valid": valid, "errors": [e.to_dict() for e in errors]} + print(json.dumps(out, indent=2)) + else: + for e in errors: + print(f" {e}") + if valid: + print(f"OK: {args.file} is valid") + else: + print(f"FAIL: {args.file} has {sum(1 for e in errors if e.severity == 'error')} errors") + + if not valid: + sys.exit(1) + + # Deploy mode: copy validated file to destination + if args.deploy: + dest = Path(args.deploy) + shutil.copy2(args.file, dest) + print(f"DEPLOYED: {args.file} -> {dest}") + + +if __name__ == "__main__": + main()