scripts/config_validate.py

#!/usr/bin/env python3
"""
config_validate.py — Pre-deploy validation for timmy-config YAML files (Issue #690).

Validates YAML syntax, required keys, value types, and forbidden keys before
writing config to disk. Prevents broken deploys from bad config.

Usage:
    python3 scripts/config_validate.py config.yaml              # Validate only
    python3 scripts/config_validate.py config.yaml --deploy     # Validate then write
    python3 scripts/config_validate.py --schema                 # Print expected schema
    python3 scripts/config_validate.py --check-dir config/      # Validate all YAML in dir
    python3 scripts/config_validate.py --json config.yaml       # Output as JSON
"""
import argparse
import json
import sys
import shutil
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple

try:
    import yaml
    HAS_YAML = True
except ImportError:
    HAS_YAML = False


# ─── Schema definition ───────────────────────────────────────────────

SCHEMA = {
    "model": {
        "type": str,
        "required": True,
        "description": "Default model identifier (e.g. 'nousresearch/hermes-4-14b')",
    },
    "provider": {
        "type": str,
        "required": False,
        "description": "Default provider name",
    },
    "providers": {
        "type": dict,
        "required": False,
        "description": "Provider configurations keyed by name",
        "value_schema": {
            "base_url": {"type": str, "required": False},
            "api_key_env": {"type": str, "required": False},
        },
    },
    "fallback_providers": {
        "type": list,
        "required": False,
        "description": "Ordered fallback provider chain",
        "item_type": str,
    },
    "toolsets": {
        "type": list,
        "required": False,
        "description": "Enabled toolset names",
        "item_type": str,
    },
    "agent": {
        "type": dict,
        "required": False,
        "description": "Agent behavior configuration",
        "value_schema": {
            "max_iterations": {"type": int, "required": False},
            "temperature": {"type": (int, float), "required": False},
            "save_trajectories": {"type": bool, "required": False},
            "quiet_mode": {"type": bool, "required": False},
        },
    },
    "display": {
        "type": dict,
        "required": False,
        "description": "CLI display settings",
        "value_schema": {
            "spinner": {"type": bool, "required": False},
            "colors": {"type": bool, "required": False},
            "skin": {"type": str, "required": False},
            "tool_progress": {"type": bool, "required": False},
        },
    },
    "gateway": {
        "type": dict,
        "required": False,
        "description": "Gateway/messaging settings",
        "value_schema": {
            "enabled": {"type": bool, "required": False},
            "port": {"type": int, "required": False},
            "cors_origins": {"type": list, "required": False},
        },
    },
    "cron": {
        "type": dict,
        "required": False,
        "description": "Cron scheduler settings",
        "value_schema": {
            "enabled": {"type": bool, "required": False},
            "interval_seconds": {"type": int, "required": False},
            "max_concurrent": {"type": int, "required": False},
        },
    },
    "logging": {
        "type": dict,
        "required": False,
        "description": "Logging configuration",
        "value_schema": {
            "level": {"type": str, "required": False},
            "file": {"type": (str, type(None)), "required": False},
        },
    },
    "session": {
        "type": dict,
        "required": False,
        "description": "Session behavior",
        "value_schema": {
            "save_trajectories": {"type": bool, "required": False},
            "max_iterations": {"type": int, "required": False},
            "context_compression": {"type": bool, "required": False},
        },
    },
}

FORBIDDEN_KEYS = {
    "anthropic_api_key": "Use ANTHROPIC_API_KEY env var — never store keys in config",
    "openai_api_key": "Use OPENAI_API_KEY env var — never store keys in config",
    "openrouter_api_key": "Use OPENROUTER_API_KEY env var — never store keys in config",
    "password": "Never store passwords in config",
    "secret": "Never store secrets in config",
    "token": "Never store tokens in config — use env vars",
}


# ─── Validation errors ───────────────────────────────────────────────

class ValidationError:
    def __init__(self, path: str, message: str, severity: str = "error"):
        self.path = path
        self.message = message
        self.severity = severity

    def to_dict(self) -> dict:
        return {"path": self.path, "message": self.message, "severity": self.severity}

    def __str__(self):
        tag = "ERROR" if self.severity == "error" else "WARN"
        return f"[{tag}] {self.path}: {self.message}"

    def __repr__(self):
        return f"ValidationError({self.path!r}, {self.message!r}, {self.severity!r})"


# ─── Core validators ─────────────────────────────────────────────────

def validate_yaml_syntax(content: str) -> Tuple[Optional[dict], List[ValidationError]]:
    """Parse YAML, return (data, errors)."""
    errors: List[ValidationError] = []
    if not HAS_YAML:
        return None, [ValidationError("root", "PyYAML not installed — pip install pyyaml")]
    try:
        data = yaml.safe_load(content)
    except yaml.YAMLError as e:
        return None, [ValidationError("syntax", str(e))]
    if data is None:
        return {}, [ValidationError("root", "Config file is empty", "warning")]
    if not isinstance(data, dict):
        return None, [ValidationError("root", f"Expected mapping, got {type(data).__name__}")]
    return data, errors


def validate_required_keys(data: dict) -> List[ValidationError]:
    """Check required keys exist and have correct types."""
    errors: List[ValidationError] = []
    for key, spec in SCHEMA.items():
        if spec.get("required"):
            if key not in data:
                errors.append(ValidationError(key, f"Required key '{key}' is missing"))
            elif not isinstance(data[key], spec["type"]):
                errors.append(ValidationError(
                    key,
                    f"Expected {spec['type'].__name__}, got {type(data[key]).__name__}"
                ))
    return errors


def validate_value_types(data: dict, schema: dict = None, prefix: str = "") -> List[ValidationError]:
    """Check all known keys have correct types, recursively."""
    if schema is None:
        schema = SCHEMA
    errors: List[ValidationError] = []
    for key, spec in schema.items():
        full_key = f"{prefix}.{key}" if prefix else key
        if key not in data:
            continue
        value = data[key]
        expected = spec["type"]
        if not isinstance(value, expected):
            errors.append(ValidationError(
                full_key,
                f"Expected {expected.__name__}, got {type(value).__name__}"
            ))
            continue
        # Check list item types
        if isinstance(value, list) and "item_type" in spec:
            for i, item in enumerate(value):
                if not isinstance(item, spec["item_type"]):
                    errors.append(ValidationError(
                        f"{full_key}[{i}]",
                        f"Expected {spec['item_type'].__name__}, got {type(item).__name__}"
                    ))
        # Recurse into nested dicts
        if isinstance(value, dict) and "value_schema" in spec:
            errors.extend(validate_value_types(value, spec["value_schema"], full_key))
    return errors


def validate_no_forbidden_keys(data: dict, prefix: str = "") -> List[ValidationError]:
    """Check for keys that should never be in config."""
    errors: List[ValidationError] = []
    for key, value in data.items():
        full_key = f"{prefix}.{key}" if prefix else key
        if key.lower() in FORBIDDEN_KEYS:
            errors.append(ValidationError(full_key, FORBIDDEN_KEYS[key.lower()]))
        if isinstance(value, dict):
            errors.extend(validate_no_forbidden_keys(value, full_key))
    return errors


def validate_unknown_keys(data: dict, schema: dict = None, prefix: str = "") -> List[ValidationError]:
    """Warn about keys not in schema (not an error, just a warning)."""
    if schema is None:
        schema = SCHEMA
    warnings: List[ValidationError] = []
    known = set(schema.keys())
    for key in data:
        full_key = f"{prefix}.{key}" if prefix else key
        if key not in known:
            warnings.append(ValidationError(full_key, "Unknown key — not in schema", "warning"))
        elif isinstance(data[key], dict) and key in schema and "value_schema" in schema[key]:
            warnings.extend(validate_unknown_keys(data[key], schema[key]["value_schema"], full_key))
    return warnings


# ─── Public API ───────────────────────────────────────────────────────

def validate_config(content: str) -> Tuple[bool, List[ValidationError]]:
    """Full validation pipeline. Returns (is_valid, errors)."""
    data, errors = validate_yaml_syntax(content)
    if data is None:
        return False, errors

    errors.extend(validate_required_keys(data))
    errors.extend(validate_value_types(data))
    errors.extend(validate_no_forbidden_keys(data))
    errors.extend(validate_unknown_keys(data))

    has_errors = any(e.severity == "error" for e in errors)
    return not has_errors, errors


def validate_file(path: str) -> Tuple[bool, List[ValidationError]]:
    """Validate a YAML file on disk."""
    p = Path(path)
    if not p.exists():
        return False, [ValidationError(str(p), "File not found")]
    content = p.read_text(encoding="utf-8")
    return validate_config(content)


# ─── Schema dump ──────────────────────────────────────────────────────

def dump_schema(schema: dict = None, prefix: str = "", indent: int = 0) -> List[str]:
    """Pretty-print schema as text."""
    if schema is None:
        schema = SCHEMA
    lines: List[str] = []
    for key, spec in schema.items():
        full_key = f"{prefix}.{key}" if prefix else key
        req = " (required)" if spec.get("required") else ""
        desc = spec.get("description", "")
        type_name = spec["type"].__name__ if hasattr(spec["type"], "__name__") else str(spec["type"])
        lines.append(f"{'  ' * indent}{full_key}: {type_name}{req} — {desc}")
        if "value_schema" in spec:
            lines.extend(dump_schema(spec["value_schema"], full_key, indent + 1))
    return lines


# ─── CLI ──────────────────────────────────────────────────────────────

def main():
    parser = argparse.ArgumentParser(description="Validate timmy-config YAML before deploy")
    parser.add_argument("file", nargs="?", help="YAML file to validate")
    parser.add_argument("--deploy", metavar="DEST", help="Validate then copy to DEST")
    parser.add_argument("--schema", action="store_true", help="Print expected schema")
    parser.add_argument("--check-dir", metavar="DIR", help="Validate all .yaml/.yml in directory")
    parser.add_argument("--json", action="store_true", dest="as_json", help="Output as JSON")
    args = parser.parse_args()

    if args.schema:
        lines = dump_schema()
        print("timmy-config schema:\n")
        for line in lines:
            print(f"  {line}")
        return

    if args.check_dir:
        d = Path(args.check_dir)
        if not d.is_dir():
            print(f"ERROR: {d} is not a directory", file=sys.stderr)
            sys.exit(1)
        all_valid = True
        results = []
        for yf in sorted(d.glob("*.y*ml")):
            valid, errors = validate_file(str(yf))
            results.append({"file": str(yf), "valid": valid, "errors": [e.to_dict() for e in errors]})
            if not valid:
                all_valid = False
                for e in errors:
                    if e.severity == "error":
                        print(f"  {yf.name}: {e}")
        if args.as_json:
            print(json.dumps(results, indent=2))
        elif all_valid:
            print(f"OK: All YAML in {d} valid")
        sys.exit(0 if all_valid else 1)

    if not args.file:
        parser.error("FILE required (or use --schema / --check-dir)")

    valid, errors = validate_file(args.file)

    if args.as_json:
        out = {"file": args.file, "valid": valid, "errors": [e.to_dict() for e in errors]}
        print(json.dumps(out, indent=2))
    else:
        for e in errors:
            print(f"  {e}")
        if valid:
            print(f"OK: {args.file} is valid")
        else:
            print(f"FAIL: {args.file} has {sum(1 for e in errors if e.severity == 'error')} errors")

    if not valid:
        sys.exit(1)

    # Deploy mode: copy validated file to destination
    if args.deploy:
        dest = Path(args.deploy)
        shutil.copy2(args.file, dest)
        print(f"DEPLOYED: {args.file} -> {dest}")


if __name__ == "__main__":
    main()
feat: Add pre-deploy config validation — YAML syntax, keys, types, forbidden keys (#690) 2026-04-17 05:19:29 +00:00			`#!/usr/bin/env python3`
			`"""`
			`config_validate.py — Pre-deploy validation for timmy-config YAML files (Issue #690).`

			`Validates YAML syntax, required keys, value types, and forbidden keys before`
			`writing config to disk. Prevents broken deploys from bad config.`

			`Usage:`
			`python3 scripts/config_validate.py config.yaml # Validate only`
			`python3 scripts/config_validate.py config.yaml --deploy # Validate then write`
			`python3 scripts/config_validate.py --schema # Print expected schema`
			`python3 scripts/config_validate.py --check-dir config/ # Validate all YAML in dir`
			`python3 scripts/config_validate.py --json config.yaml # Output as JSON`
			`"""`
			`import argparse`
			`import json`
			`import sys`
			`import shutil`
			`from pathlib import Path`
			`from typing import Any, Dict, List, Optional, Tuple`

			`try:`
			`import yaml`
			`HAS_YAML = True`
			`except ImportError:`
			`HAS_YAML = False`


			`# ─── Schema definition ───────────────────────────────────────────────`

			`SCHEMA = {`
			`"model": {`
			`"type": str,`
			`"required": True,`
			`"description": "Default model identifier (e.g. 'nousresearch/hermes-4-14b')",`
			`},`
			`"provider": {`
			`"type": str,`
			`"required": False,`
			`"description": "Default provider name",`
			`},`
			`"providers": {`
			`"type": dict,`
			`"required": False,`
			`"description": "Provider configurations keyed by name",`
			`"value_schema": {`
			`"base_url": {"type": str, "required": False},`
			`"api_key_env": {"type": str, "required": False},`
			`},`
			`},`
			`"fallback_providers": {`
			`"type": list,`
			`"required": False,`
			`"description": "Ordered fallback provider chain",`
			`"item_type": str,`
			`},`
			`"toolsets": {`
			`"type": list,`
			`"required": False,`
			`"description": "Enabled toolset names",`
			`"item_type": str,`
			`},`
			`"agent": {`
			`"type": dict,`
			`"required": False,`
			`"description": "Agent behavior configuration",`
			`"value_schema": {`
			`"max_iterations": {"type": int, "required": False},`
			`"temperature": {"type": (int, float), "required": False},`
			`"save_trajectories": {"type": bool, "required": False},`
			`"quiet_mode": {"type": bool, "required": False},`
			`},`
			`},`
			`"display": {`
			`"type": dict,`
			`"required": False,`
			`"description": "CLI display settings",`
			`"value_schema": {`
			`"spinner": {"type": bool, "required": False},`
			`"colors": {"type": bool, "required": False},`
			`"skin": {"type": str, "required": False},`
			`"tool_progress": {"type": bool, "required": False},`
			`},`
			`},`
			`"gateway": {`
			`"type": dict,`
			`"required": False,`
			`"description": "Gateway/messaging settings",`
			`"value_schema": {`
			`"enabled": {"type": bool, "required": False},`
			`"port": {"type": int, "required": False},`
			`"cors_origins": {"type": list, "required": False},`
			`},`
			`},`
			`"cron": {`
			`"type": dict,`
			`"required": False,`
			`"description": "Cron scheduler settings",`
			`"value_schema": {`
			`"enabled": {"type": bool, "required": False},`
			`"interval_seconds": {"type": int, "required": False},`
			`"max_concurrent": {"type": int, "required": False},`
			`},`
			`},`
			`"logging": {`
			`"type": dict,`
			`"required": False,`
			`"description": "Logging configuration",`
			`"value_schema": {`
			`"level": {"type": str, "required": False},`
			`"file": {"type": (str, type(None)), "required": False},`
			`},`
			`},`
			`"session": {`
			`"type": dict,`
			`"required": False,`
			`"description": "Session behavior",`
			`"value_schema": {`
			`"save_trajectories": {"type": bool, "required": False},`
			`"max_iterations": {"type": int, "required": False},`
			`"context_compression": {"type": bool, "required": False},`
			`},`
			`},`
			`}`

			`FORBIDDEN_KEYS = {`
			`"anthropic_api_key": "Use ANTHROPIC_API_KEY env var — never store keys in config",`
			`"openai_api_key": "Use OPENAI_API_KEY env var — never store keys in config",`
			`"openrouter_api_key": "Use OPENROUTER_API_KEY env var — never store keys in config",`
			`"password": "Never store passwords in config",`
			`"secret": "Never store secrets in config",`
			`"token": "Never store tokens in config — use env vars",`
			`}`


			`# ─── Validation errors ───────────────────────────────────────────────`

			`class ValidationError:`
			`def __init__(self, path: str, message: str, severity: str = "error"):`
			`self.path = path`
			`self.message = message`
			`self.severity = severity`

			`def to_dict(self) -> dict:`
			`return {"path": self.path, "message": self.message, "severity": self.severity}`

			`def __str__(self):`
			`tag = "ERROR" if self.severity == "error" else "WARN"`
			`return f"[{tag}] {self.path}: {self.message}"`

			`def __repr__(self):`
			`return f"ValidationError({self.path!r}, {self.message!r}, {self.severity!r})"`


			`# ─── Core validators ─────────────────────────────────────────────────`

			`def validate_yaml_syntax(content: str) -> Tuple[Optional[dict], List[ValidationError]]:`
			`"""Parse YAML, return (data, errors)."""`
			`errors: List[ValidationError] = []`
			`if not HAS_YAML:`
			`return None, [ValidationError("root", "PyYAML not installed — pip install pyyaml")]`
			`try:`
			`data = yaml.safe_load(content)`
			`except yaml.YAMLError as e:`
			`return None, [ValidationError("syntax", str(e))]`
			`if data is None:`
			`return {}, [ValidationError("root", "Config file is empty", "warning")]`
			`if not isinstance(data, dict):`
			`return None, [ValidationError("root", f"Expected mapping, got {type(data).__name__}")]`
			`return data, errors`


			`def validate_required_keys(data: dict) -> List[ValidationError]:`
			`"""Check required keys exist and have correct types."""`
			`errors: List[ValidationError] = []`
			`for key, spec in SCHEMA.items():`
			`if spec.get("required"):`
			`if key not in data:`
			`errors.append(ValidationError(key, f"Required key '{key}' is missing"))`
			`elif not isinstance(data[key], spec["type"]):`
			`errors.append(ValidationError(`
			`key,`
			`f"Expected {spec['type'].__name__}, got {type(data[key]).__name__}"`
			`))`
			`return errors`


			`def validate_value_types(data: dict, schema: dict = None, prefix: str = "") -> List[ValidationError]:`
			`"""Check all known keys have correct types, recursively."""`
			`if schema is None:`
			`schema = SCHEMA`
			`errors: List[ValidationError] = []`
			`for key, spec in schema.items():`
			`full_key = f"{prefix}.{key}" if prefix else key`
			`if key not in data:`
			`continue`
			`value = data[key]`
			`expected = spec["type"]`
			`if not isinstance(value, expected):`
			`errors.append(ValidationError(`
			`full_key,`
			`f"Expected {expected.__name__}, got {type(value).__name__}"`
			`))`
			`continue`
			`# Check list item types`
			`if isinstance(value, list) and "item_type" in spec:`
			`for i, item in enumerate(value):`
			`if not isinstance(item, spec["item_type"]):`
			`errors.append(ValidationError(`
			`f"{full_key}[{i}]",`
			`f"Expected {spec['item_type'].__name__}, got {type(item).__name__}"`
			`))`
			`# Recurse into nested dicts`
			`if isinstance(value, dict) and "value_schema" in spec:`
			`errors.extend(validate_value_types(value, spec["value_schema"], full_key))`
			`return errors`


			`def validate_no_forbidden_keys(data: dict, prefix: str = "") -> List[ValidationError]:`
			`"""Check for keys that should never be in config."""`
			`errors: List[ValidationError] = []`
			`for key, value in data.items():`
			`full_key = f"{prefix}.{key}" if prefix else key`
			`if key.lower() in FORBIDDEN_KEYS:`
			`errors.append(ValidationError(full_key, FORBIDDEN_KEYS[key.lower()]))`
			`if isinstance(value, dict):`
			`errors.extend(validate_no_forbidden_keys(value, full_key))`
			`return errors`


			`def validate_unknown_keys(data: dict, schema: dict = None, prefix: str = "") -> List[ValidationError]:`
			`"""Warn about keys not in schema (not an error, just a warning)."""`
			`if schema is None:`
			`schema = SCHEMA`
			`warnings: List[ValidationError] = []`
			`known = set(schema.keys())`
			`for key in data:`
			`full_key = f"{prefix}.{key}" if prefix else key`
			`if key not in known:`
			`warnings.append(ValidationError(full_key, "Unknown key — not in schema", "warning"))`
			`elif isinstance(data[key], dict) and key in schema and "value_schema" in schema[key]:`
			`warnings.extend(validate_unknown_keys(data[key], schema[key]["value_schema"], full_key))`
			`return warnings`


			`# ─── Public API ───────────────────────────────────────────────────────`

			`def validate_config(content: str) -> Tuple[bool, List[ValidationError]]:`
			`"""Full validation pipeline. Returns (is_valid, errors)."""`
			`data, errors = validate_yaml_syntax(content)`
			`if data is None:`
			`return False, errors`

			`errors.extend(validate_required_keys(data))`
			`errors.extend(validate_value_types(data))`
			`errors.extend(validate_no_forbidden_keys(data))`
			`errors.extend(validate_unknown_keys(data))`

			`has_errors = any(e.severity == "error" for e in errors)`
			`return not has_errors, errors`


			`def validate_file(path: str) -> Tuple[bool, List[ValidationError]]:`
			`"""Validate a YAML file on disk."""`
			`p = Path(path)`
			`if not p.exists():`
			`return False, [ValidationError(str(p), "File not found")]`
			`content = p.read_text(encoding="utf-8")`
			`return validate_config(content)`


			`# ─── Schema dump ──────────────────────────────────────────────────────`

			`def dump_schema(schema: dict = None, prefix: str = "", indent: int = 0) -> List[str]:`
			`"""Pretty-print schema as text."""`
			`if schema is None:`
			`schema = SCHEMA`
			`lines: List[str] = []`
			`for key, spec in schema.items():`
			`full_key = f"{prefix}.{key}" if prefix else key`
			`req = " (required)" if spec.get("required") else ""`
			`desc = spec.get("description", "")`
			`type_name = spec["type"].__name__ if hasattr(spec["type"], "__name__") else str(spec["type"])`
			`lines.append(f"{' ' * indent}{full_key}: {type_name}{req} — {desc}")`
			`if "value_schema" in spec:`
			`lines.extend(dump_schema(spec["value_schema"], full_key, indent + 1))`
			`return lines`


			`# ─── CLI ──────────────────────────────────────────────────────────────`

			`def main():`
			`parser = argparse.ArgumentParser(description="Validate timmy-config YAML before deploy")`
			`parser.add_argument("file", nargs="?", help="YAML file to validate")`
			`parser.add_argument("--deploy", metavar="DEST", help="Validate then copy to DEST")`
			`parser.add_argument("--schema", action="store_true", help="Print expected schema")`
			`parser.add_argument("--check-dir", metavar="DIR", help="Validate all .yaml/.yml in directory")`
			`parser.add_argument("--json", action="store_true", dest="as_json", help="Output as JSON")`
			`args = parser.parse_args()`

			`if args.schema:`
			`lines = dump_schema()`
			`print("timmy-config schema:\n")`
			`for line in lines:`
			`print(f" {line}")`
			`return`

			`if args.check_dir:`
			`d = Path(args.check_dir)`
			`if not d.is_dir():`
			`print(f"ERROR: {d} is not a directory", file=sys.stderr)`
			`sys.exit(1)`
			`all_valid = True`
			`results = []`
			`for yf in sorted(d.glob(".yml")):`
			`valid, errors = validate_file(str(yf))`
			`results.append({"file": str(yf), "valid": valid, "errors": [e.to_dict() for e in errors]})`
			`if not valid:`
			`all_valid = False`
			`for e in errors:`
			`if e.severity == "error":`
			`print(f" {yf.name}: {e}")`
			`if args.as_json:`
			`print(json.dumps(results, indent=2))`
			`elif all_valid:`
			`print(f"OK: All YAML in {d} valid")`
			`sys.exit(0 if all_valid else 1)`

			`if not args.file:`
			`parser.error("FILE required (or use --schema / --check-dir)")`

			`valid, errors = validate_file(args.file)`

			`if args.as_json:`
			`out = {"file": args.file, "valid": valid, "errors": [e.to_dict() for e in errors]}`
			`print(json.dumps(out, indent=2))`
			`else:`
			`for e in errors:`
			`print(f" {e}")`
			`if valid:`
			`print(f"OK: {args.file} is valid")`
			`else:`
			`print(f"FAIL: {args.file} has {sum(1 for e in errors if e.severity == 'error')} errors")`

			`if not valid:`
			`sys.exit(1)`

			`# Deploy mode: copy validated file to destination`
			`if args.deploy:`
			`dest = Path(args.deploy)`
			`shutil.copy2(args.file, dest)`
			`print(f"DEPLOYED: {args.file} -> {dest}")`


			`if __name__ == "__main__":`
			`main()`