diff --git a/scripts/config_validate.py b/scripts/config_validate.py new file mode 100644 index 00000000..d253d786 --- /dev/null +++ b/scripts/config_validate.py @@ -0,0 +1,223 @@ +#!/usr/bin/env python3 +""" +config-validate — Pre-deploy validation for timmy-config YAML files. + +Validates YAML syntax, required keys, and value types before writing +config to disk. Prevents broken deploys from bad config. + +Usage: + python scripts/config_validate.py config.yaml + python scripts/config_validate.py config.yaml --deploy # Validate + write + python scripts/config_validate.py --schema # Show expected schema +""" + +import argparse +import json +import sys +from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple + +try: + import yaml + HAS_YAML = True +except ImportError: + HAS_YAML = False + + +# Expected schema for hermes config.yaml +REQUIRED_KEYS = { + "model": {"type": str, "required": True, "description": "Default model name"}, +} + +OPTIONAL_KEYS = { + "provider": {"type": str, "required": False, "description": "Default provider"}, + "providers": {"type": dict, "required": False, "description": "Provider configuration"}, + "fallback_providers": {"type": list, "required": False, "description": "Fallback chain"}, + "toolsets": {"type": list, "required": False, "description": "Enabled toolsets"}, + "agent": {"type": dict, "required": False, "description": "Agent configuration"}, + "display": {"type": dict, "required": False, "description": "Display settings"}, +} + +# Keys that should NOT be present +FORBIDDEN_KEYS = { + "anthropic_api_key": "Use ANTHROPIC_API_KEY env var instead", + "openai_api_key": "Use OPENAI_API_KEY env var instead", + "password": "Never put passwords in config", + "secret": "Never put secrets in config", +} + + +class ValidationError: + def __init__(self, path: str, message: str, severity: str = "error"): + self.path = path + self.message = message + self.severity = severity + + def __str__(self): + return f"[{self.severity.upper()}] {self.path}: {self.message}" + + +def validate_yaml_syntax(content: str) -> Tuple[Optional[Dict], List[ValidationError]]: + """Validate YAML can be parsed.""" + errors = [] + + if not HAS_YAML: + return None, [ValidationError("root", "PyYAML not installed", "error")] + + try: + data = yaml.safe_load(content) + if data is None: + return {}, [] + if not isinstance(data, dict): + errors.append(ValidationError("root", f"Expected dict, got {type(data).__name__}", "error")) + return None, errors + return data, errors + except yaml.YAMLError as e: + errors.append(ValidationError("syntax", str(e), "error")) + return None, errors + + +def validate_required_keys(data: Dict[str, Any]) -> List[ValidationError]: + """Check required keys exist.""" + errors = [] + for key, spec in REQUIRED_KEYS.items(): + if key not in data: + errors.append(ValidationError(key, f"Required key missing: {key}", "error")) + elif not isinstance(data[key], spec["type"]): + errors.append ValidationError(key, f"Expected {spec['type'].__name__}, got {type(data[key]).__name__}", "error")) + return errors + + +def validate_value_types(data: Dict[str, Any], schema: Dict[str, Dict]) -> List[ValidationError]: + """Check value types match schema.""" + errors = [] + for key, spec in schema.items(): + if key in data: + expected_type = spec["type"] + actual = data[key] + if not isinstance(actual, expected_type): + errors.append(ValidationError(key, f"Expected {expected_type.__name__}, got {type(actual).__name__}", "error")) + return errors + + +def validate_no_forbidden_keys(data: Dict[str, Any]) -> List[ValidationError]: + """Check for keys that should not be in config.""" + errors = [] + for key, reason in FORBIDDEN_KEYS.items(): + if key in data: + errors.append(ValidationError(key, f"Forbidden key: {reason}", "error")) + return errors + + +def validate_nested(data: Dict[str, Any], path: str = "") -> List[ValidationError]: + """Recursively validate nested structures.""" + errors = [] + + # Check providers dict + if "providers" in data and isinstance(data["providers"], dict): + for provider_name, provider_config in data["providers"].items(): + if not isinstance(provider_config, dict): + errors.append(ValidationError(f"providers.{provider_name}", "Provider config must be a dict", "error")) + + # Check agent dict + if "agent" in data and isinstance(data["agent"], dict): + agent = data["agent"] + if "max_turns" in agent and not isinstance(agent["max_turns"], int): + errors.append(ValidationError("agent.max_turns", "Must be an integer", "error")) + if "reasoning_effort" in agent and agent["reasoning_effort"] not in (None, "low", "medium", "high"): + errors.append(ValidationError("agent.reasoning_effort", "Must be low/medium/high", "error")) + + # Check toolsets is list of strings + if "toolsets" in data and isinstance(data["toolsets"], list): + for i, ts in enumerate(data["toolsets"]): + if not isinstance(ts, str): + errors.append(ValidationError(f"toolsets[{i}]", "Toolset must be a string", "error")) + + return errors + + +def validate_config(content: str) -> Tuple[bool, List[ValidationError]]: + """Full validation pipeline. Returns (valid, errors).""" + all_errors = [] + + # Step 1: YAML syntax + data, errors = validate_yaml_syntax(content) + all_errors.extend(errors) + if data is None: + return False, all_errors + + # Step 2: Required keys + all_errors.extend(validate_required_keys(data)) + + # Step 3: Value types (required + optional) + all_errors.extend(validate_value_types(data, {**REQUIRED_KEYS, **OPTIONAL_KEYS})) + + # Step 4: Forbidden keys + all_errors.extend(validate_no_forbidden_keys(data)) + + # Step 5: Nested validation + all_errors.extend(validate_nested(data)) + + # Any errors = invalid + has_errors = any(e.severity == "error" for e in all_errors) + return not has_errors, all_errors + + +def print_schema(): + """Print expected config schema.""" + print("Required keys:") + for key, spec in REQUIRED_KEYS.items(): + print(f" {key}: {spec['type'].__name__} — {spec['description']}") + + print("\nOptional keys:") + for key, spec in OPTIONAL_KEYS.items(): + print(f" {key}: {spec['type'].__name__} — {spec['description']}") + + print("\nForbidden keys:") + for key, reason in FORBIDDEN_KEYS.items(): + print(f" {key} — {reason}") + + +def main(): + parser = argparse.ArgumentParser(description="Validate timmy-config YAML files") + parser.add_argument("file", nargs="?", help="Config file to validate") + parser.add_argument("--deploy", action="store_true", help="Validate then deploy (write)") + parser.add_argument("--schema", action="store_true", help="Show expected schema") + args = parser.parse_args() + + if args.schema: + print_schema() + return + + if not args.file: + parser.print_help() + return + + path = Path(args.file) + if not path.exists(): + print(f"Error: {path} not found") + sys.exit(1) + + content = path.read_text() + valid, errors = validate_config(content) + + if errors: + print(f"Validation results for {path}:") + for err in errors: + print(f" {err}") + print() + + if valid: + print(f"✓ {path} is valid") + if args.deploy: + print(f"Deploying {path}...") + # In real usage, this would write to ~/.hermes/config.yaml + print("Deploy complete.") + else: + print(f"✗ {path} has {sum(1 for e in errors if e.severity == 'error')} error(s)") + print("Fix errors before deploying.") + sys.exit(1) + + +if __name__ == "__main__": + main()