timmy-config/scripts/config_validate.py

#!/usr/bin/env python3
"""
config-validate — Pre-deploy validation for timmy-config YAML files.

Validates YAML syntax, required keys, and value types before writing
config to disk. Prevents broken deploys from bad config.

Usage:
    python scripts/config_validate.py config.yaml
    python scripts/config_validate.py config.yaml --deploy    # Validate + write
    python scripts/config_validate.py --schema                # Show expected schema
"""

import argparse
import json
import sys
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple

try:
    import yaml
    HAS_YAML = True
except ImportError:
    HAS_YAML = False


# Expected schema for hermes config.yaml
REQUIRED_KEYS = {
    "model": {"type": str, "required": True, "description": "Default model name"},
}

OPTIONAL_KEYS = {
    "provider": {"type": str, "required": False, "description": "Default provider"},
    "providers": {"type": dict, "required": False, "description": "Provider configuration"},
    "fallback_providers": {"type": list, "required": False, "description": "Fallback chain"},
    "toolsets": {"type": list, "required": False, "description": "Enabled toolsets"},
    "agent": {"type": dict, "required": False, "description": "Agent configuration"},
    "display": {"type": dict, "required": False, "description": "Display settings"},
}

# Keys that should NOT be present
FORBIDDEN_KEYS = {
    "anthropic_api_key": "Use ANTHROPIC_API_KEY env var instead",
    "openai_api_key": "Use OPENAI_API_KEY env var instead",
    "password": "Never put passwords in config",
    "secret": "Never put secrets in config",
}


class ValidationError:
    def __init__(self, path: str, message: str, severity: str = "error"):
        self.path = path
        self.message = message
        self.severity = severity

    def __str__(self):
        return f"[{self.severity.upper()}] {self.path}: {self.message}"


def validate_yaml_syntax(content: str) -> Tuple[Optional[Dict], List[ValidationError]]:
    """Validate YAML can be parsed."""
    errors = []

    if not HAS_YAML:
        return None, [ValidationError("root", "PyYAML not installed", "error")]

    try:
        data = yaml.safe_load(content)
        if data is None:
            return {}, []
        if not isinstance(data, dict):
            errors.append(ValidationError("root", f"Expected dict, got {type(data).__name__}", "error"))
            return None, errors
        return data, errors
    except yaml.YAMLError as e:
        errors.append(ValidationError("syntax", str(e), "error"))
        return None, errors


def validate_required_keys(data: Dict[str, Any]) -> List[ValidationError]:
    """Check required keys exist."""
    errors = []
    for key, spec in REQUIRED_KEYS.items():
        if key not in data:
            errors.append(ValidationError(key, f"Required key missing: {key}", "error"))
        elif not isinstance(data[key], spec["type"]):
            errors.append(ValidationError(key, f"Expected {spec['type'].__name__}, got {type(data[key]).__name__}", "error"))
    return errors


def validate_value_types(data: Dict[str, Any], schema: Dict[str, Dict]) -> List[ValidationError]:
    """Check value types match schema."""
    errors = []
    for key, spec in schema.items():
        if key in data:
            expected_type = spec["type"]
            actual = data[key]
            if not isinstance(actual, expected_type):
                errors.append(ValidationError(key, f"Expected {expected_type.__name__}, got {type(actual).__name__}", "error"))
    return errors


def validate_no_forbidden_keys(data: Dict[str, Any]) -> List[ValidationError]:
    """Check for keys that should not be in config."""
    errors = []
    for key, reason in FORBIDDEN_KEYS.items():
        if key in data:
            errors.append(ValidationError(key, f"Forbidden key: {reason}", "error"))
    return errors


def validate_nested(data: Dict[str, Any], path: str = "") -> List[ValidationError]:
    """Recursively validate nested structures."""
    errors = []

    # Check providers dict
    if "providers" in data and isinstance(data["providers"], dict):
        for provider_name, provider_config in data["providers"].items():
            if not isinstance(provider_config, dict):
                errors.append(ValidationError(f"providers.{provider_name}", "Provider config must be a dict", "error"))

    # Check agent dict
    if "agent" in data and isinstance(data["agent"], dict):
        agent = data["agent"]
        if "max_turns" in agent and not isinstance(agent["max_turns"], int):
            errors.append(ValidationError("agent.max_turns", "Must be an integer", "error"))
        if "reasoning_effort" in agent and agent["reasoning_effort"] not in (None, "low", "medium", "high"):
            errors.append(ValidationError("agent.reasoning_effort", "Must be low/medium/high", "error"))

    # Check toolsets is list of strings
    if "toolsets" in data and isinstance(data["toolsets"], list):
        for i, ts in enumerate(data["toolsets"]):
            if not isinstance(ts, str):
                errors.append(ValidationError(f"toolsets[{i}]", "Toolset must be a string", "error"))

    return errors


def validate_config(content: str) -> Tuple[bool, List[ValidationError]]:
    """Full validation pipeline. Returns (valid, errors)."""
    all_errors = []

    # Step 1: YAML syntax
    data, errors = validate_yaml_syntax(content)
    all_errors.extend(errors)
    if data is None:
        return False, all_errors

    # Step 2: Required keys
    all_errors.extend(validate_required_keys(data))

    # Step 3: Value types (required + optional)
    all_errors.extend(validate_value_types(data, {**REQUIRED_KEYS, **OPTIONAL_KEYS}))

    # Step 4: Forbidden keys
    all_errors.extend(validate_no_forbidden_keys(data))

    # Step 5: Nested validation
    all_errors.extend(validate_nested(data))

    # Any errors = invalid
    has_errors = any(e.severity == "error" for e in all_errors)
    return not has_errors, all_errors


def print_schema():
    """Print expected config schema."""
    print("Required keys:")
    for key, spec in REQUIRED_KEYS.items():
        print(f"  {key}: {spec['type'].__name__} — {spec['description']}")

    print("\nOptional keys:")
    for key, spec in OPTIONAL_KEYS.items():
        print(f"  {key}: {spec['type'].__name__} — {spec['description']}")

    print("\nForbidden keys:")
    for key, reason in FORBIDDEN_KEYS.items():
        print(f"  {key} — {reason}")


def main():
    parser = argparse.ArgumentParser(description="Validate timmy-config YAML files")
    parser.add_argument("file", nargs="?", help="Config file to validate")
    parser.add_argument("--deploy", action="store_true", help="Validate then deploy (write)")
    parser.add_argument("--schema", action="store_true", help="Show expected schema")
    args = parser.parse_args()

    if args.schema:
        print_schema()
        return

    if not args.file:
        parser.print_help()
        return

    path = Path(args.file)
    if not path.exists():
        print(f"Error: {path} not found")
        sys.exit(1)

    content = path.read_text()
    valid, errors = validate_config(content)

    if errors:
        print(f"Validation results for {path}:")
        for err in errors:
            print(f"  {err}")
        print()

    if valid:
        print(f"✓ {path} is valid")
        if args.deploy:
            print(f"Deploying {path}...")
            # In real usage, this would write to ~/.hermes/config.yaml
            print("Deploy complete.")
    else:
        print(f"✗ {path} has {sum(1 for e in errors if e.severity == 'error')} error(s)")
        print("Fix errors before deploying.")
        sys.exit(1)


if __name__ == "__main__":
    main()