357 lines
13 KiB
Python
357 lines
13 KiB
Python
|
|
#!/usr/bin/env python3
|
||
|
|
"""
|
||
|
|
config_validate.py — Pre-deploy validation for timmy-config YAML files (Issue #690).
|
||
|
|
|
||
|
|
Validates YAML syntax, required keys, value types, and forbidden keys before
|
||
|
|
writing config to disk. Prevents broken deploys from bad config.
|
||
|
|
|
||
|
|
Usage:
|
||
|
|
python3 scripts/config_validate.py config.yaml # Validate only
|
||
|
|
python3 scripts/config_validate.py config.yaml --deploy # Validate then write
|
||
|
|
python3 scripts/config_validate.py --schema # Print expected schema
|
||
|
|
python3 scripts/config_validate.py --check-dir config/ # Validate all YAML in dir
|
||
|
|
python3 scripts/config_validate.py --json config.yaml # Output as JSON
|
||
|
|
"""
|
||
|
|
import argparse
|
||
|
|
import json
|
||
|
|
import sys
|
||
|
|
import shutil
|
||
|
|
from pathlib import Path
|
||
|
|
from typing import Any, Dict, List, Optional, Tuple
|
||
|
|
|
||
|
|
try:
|
||
|
|
import yaml
|
||
|
|
HAS_YAML = True
|
||
|
|
except ImportError:
|
||
|
|
HAS_YAML = False
|
||
|
|
|
||
|
|
|
||
|
|
# ─── Schema definition ───────────────────────────────────────────────
|
||
|
|
|
||
|
|
SCHEMA = {
|
||
|
|
"model": {
|
||
|
|
"type": str,
|
||
|
|
"required": True,
|
||
|
|
"description": "Default model identifier (e.g. 'nousresearch/hermes-4-14b')",
|
||
|
|
},
|
||
|
|
"provider": {
|
||
|
|
"type": str,
|
||
|
|
"required": False,
|
||
|
|
"description": "Default provider name",
|
||
|
|
},
|
||
|
|
"providers": {
|
||
|
|
"type": dict,
|
||
|
|
"required": False,
|
||
|
|
"description": "Provider configurations keyed by name",
|
||
|
|
"value_schema": {
|
||
|
|
"base_url": {"type": str, "required": False},
|
||
|
|
"api_key_env": {"type": str, "required": False},
|
||
|
|
},
|
||
|
|
},
|
||
|
|
"fallback_providers": {
|
||
|
|
"type": list,
|
||
|
|
"required": False,
|
||
|
|
"description": "Ordered fallback provider chain",
|
||
|
|
"item_type": str,
|
||
|
|
},
|
||
|
|
"toolsets": {
|
||
|
|
"type": list,
|
||
|
|
"required": False,
|
||
|
|
"description": "Enabled toolset names",
|
||
|
|
"item_type": str,
|
||
|
|
},
|
||
|
|
"agent": {
|
||
|
|
"type": dict,
|
||
|
|
"required": False,
|
||
|
|
"description": "Agent behavior configuration",
|
||
|
|
"value_schema": {
|
||
|
|
"max_iterations": {"type": int, "required": False},
|
||
|
|
"temperature": {"type": (int, float), "required": False},
|
||
|
|
"save_trajectories": {"type": bool, "required": False},
|
||
|
|
"quiet_mode": {"type": bool, "required": False},
|
||
|
|
},
|
||
|
|
},
|
||
|
|
"display": {
|
||
|
|
"type": dict,
|
||
|
|
"required": False,
|
||
|
|
"description": "CLI display settings",
|
||
|
|
"value_schema": {
|
||
|
|
"spinner": {"type": bool, "required": False},
|
||
|
|
"colors": {"type": bool, "required": False},
|
||
|
|
"skin": {"type": str, "required": False},
|
||
|
|
"tool_progress": {"type": bool, "required": False},
|
||
|
|
},
|
||
|
|
},
|
||
|
|
"gateway": {
|
||
|
|
"type": dict,
|
||
|
|
"required": False,
|
||
|
|
"description": "Gateway/messaging settings",
|
||
|
|
"value_schema": {
|
||
|
|
"enabled": {"type": bool, "required": False},
|
||
|
|
"port": {"type": int, "required": False},
|
||
|
|
"cors_origins": {"type": list, "required": False},
|
||
|
|
},
|
||
|
|
},
|
||
|
|
"cron": {
|
||
|
|
"type": dict,
|
||
|
|
"required": False,
|
||
|
|
"description": "Cron scheduler settings",
|
||
|
|
"value_schema": {
|
||
|
|
"enabled": {"type": bool, "required": False},
|
||
|
|
"interval_seconds": {"type": int, "required": False},
|
||
|
|
"max_concurrent": {"type": int, "required": False},
|
||
|
|
},
|
||
|
|
},
|
||
|
|
"logging": {
|
||
|
|
"type": dict,
|
||
|
|
"required": False,
|
||
|
|
"description": "Logging configuration",
|
||
|
|
"value_schema": {
|
||
|
|
"level": {"type": str, "required": False},
|
||
|
|
"file": {"type": (str, type(None)), "required": False},
|
||
|
|
},
|
||
|
|
},
|
||
|
|
"session": {
|
||
|
|
"type": dict,
|
||
|
|
"required": False,
|
||
|
|
"description": "Session behavior",
|
||
|
|
"value_schema": {
|
||
|
|
"save_trajectories": {"type": bool, "required": False},
|
||
|
|
"max_iterations": {"type": int, "required": False},
|
||
|
|
"context_compression": {"type": bool, "required": False},
|
||
|
|
},
|
||
|
|
},
|
||
|
|
}
|
||
|
|
|
||
|
|
FORBIDDEN_KEYS = {
|
||
|
|
"anthropic_api_key": "Use ANTHROPIC_API_KEY env var — never store keys in config",
|
||
|
|
"openai_api_key": "Use OPENAI_API_KEY env var — never store keys in config",
|
||
|
|
"openrouter_api_key": "Use OPENROUTER_API_KEY env var — never store keys in config",
|
||
|
|
"password": "Never store passwords in config",
|
||
|
|
"secret": "Never store secrets in config",
|
||
|
|
"token": "Never store tokens in config — use env vars",
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
# ─── Validation errors ───────────────────────────────────────────────
|
||
|
|
|
||
|
|
class ValidationError:
|
||
|
|
def __init__(self, path: str, message: str, severity: str = "error"):
|
||
|
|
self.path = path
|
||
|
|
self.message = message
|
||
|
|
self.severity = severity
|
||
|
|
|
||
|
|
def to_dict(self) -> dict:
|
||
|
|
return {"path": self.path, "message": self.message, "severity": self.severity}
|
||
|
|
|
||
|
|
def __str__(self):
|
||
|
|
tag = "ERROR" if self.severity == "error" else "WARN"
|
||
|
|
return f"[{tag}] {self.path}: {self.message}"
|
||
|
|
|
||
|
|
def __repr__(self):
|
||
|
|
return f"ValidationError({self.path!r}, {self.message!r}, {self.severity!r})"
|
||
|
|
|
||
|
|
|
||
|
|
# ─── Core validators ─────────────────────────────────────────────────
|
||
|
|
|
||
|
|
def validate_yaml_syntax(content: str) -> Tuple[Optional[dict], List[ValidationError]]:
|
||
|
|
"""Parse YAML, return (data, errors)."""
|
||
|
|
errors: List[ValidationError] = []
|
||
|
|
if not HAS_YAML:
|
||
|
|
return None, [ValidationError("root", "PyYAML not installed — pip install pyyaml")]
|
||
|
|
try:
|
||
|
|
data = yaml.safe_load(content)
|
||
|
|
except yaml.YAMLError as e:
|
||
|
|
return None, [ValidationError("syntax", str(e))]
|
||
|
|
if data is None:
|
||
|
|
return {}, [ValidationError("root", "Config file is empty", "warning")]
|
||
|
|
if not isinstance(data, dict):
|
||
|
|
return None, [ValidationError("root", f"Expected mapping, got {type(data).__name__}")]
|
||
|
|
return data, errors
|
||
|
|
|
||
|
|
|
||
|
|
def validate_required_keys(data: dict) -> List[ValidationError]:
|
||
|
|
"""Check required keys exist and have correct types."""
|
||
|
|
errors: List[ValidationError] = []
|
||
|
|
for key, spec in SCHEMA.items():
|
||
|
|
if spec.get("required"):
|
||
|
|
if key not in data:
|
||
|
|
errors.append(ValidationError(key, f"Required key '{key}' is missing"))
|
||
|
|
elif not isinstance(data[key], spec["type"]):
|
||
|
|
errors.append(ValidationError(
|
||
|
|
key,
|
||
|
|
f"Expected {spec['type'].__name__}, got {type(data[key]).__name__}"
|
||
|
|
))
|
||
|
|
return errors
|
||
|
|
|
||
|
|
|
||
|
|
def validate_value_types(data: dict, schema: dict = None, prefix: str = "") -> List[ValidationError]:
|
||
|
|
"""Check all known keys have correct types, recursively."""
|
||
|
|
if schema is None:
|
||
|
|
schema = SCHEMA
|
||
|
|
errors: List[ValidationError] = []
|
||
|
|
for key, spec in schema.items():
|
||
|
|
full_key = f"{prefix}.{key}" if prefix else key
|
||
|
|
if key not in data:
|
||
|
|
continue
|
||
|
|
value = data[key]
|
||
|
|
expected = spec["type"]
|
||
|
|
if not isinstance(value, expected):
|
||
|
|
errors.append(ValidationError(
|
||
|
|
full_key,
|
||
|
|
f"Expected {expected.__name__}, got {type(value).__name__}"
|
||
|
|
))
|
||
|
|
continue
|
||
|
|
# Check list item types
|
||
|
|
if isinstance(value, list) and "item_type" in spec:
|
||
|
|
for i, item in enumerate(value):
|
||
|
|
if not isinstance(item, spec["item_type"]):
|
||
|
|
errors.append(ValidationError(
|
||
|
|
f"{full_key}[{i}]",
|
||
|
|
f"Expected {spec['item_type'].__name__}, got {type(item).__name__}"
|
||
|
|
))
|
||
|
|
# Recurse into nested dicts
|
||
|
|
if isinstance(value, dict) and "value_schema" in spec:
|
||
|
|
errors.extend(validate_value_types(value, spec["value_schema"], full_key))
|
||
|
|
return errors
|
||
|
|
|
||
|
|
|
||
|
|
def validate_no_forbidden_keys(data: dict, prefix: str = "") -> List[ValidationError]:
|
||
|
|
"""Check for keys that should never be in config."""
|
||
|
|
errors: List[ValidationError] = []
|
||
|
|
for key, value in data.items():
|
||
|
|
full_key = f"{prefix}.{key}" if prefix else key
|
||
|
|
if key.lower() in FORBIDDEN_KEYS:
|
||
|
|
errors.append(ValidationError(full_key, FORBIDDEN_KEYS[key.lower()]))
|
||
|
|
if isinstance(value, dict):
|
||
|
|
errors.extend(validate_no_forbidden_keys(value, full_key))
|
||
|
|
return errors
|
||
|
|
|
||
|
|
|
||
|
|
def validate_unknown_keys(data: dict, schema: dict = None, prefix: str = "") -> List[ValidationError]:
|
||
|
|
"""Warn about keys not in schema (not an error, just a warning)."""
|
||
|
|
if schema is None:
|
||
|
|
schema = SCHEMA
|
||
|
|
warnings: List[ValidationError] = []
|
||
|
|
known = set(schema.keys())
|
||
|
|
for key in data:
|
||
|
|
full_key = f"{prefix}.{key}" if prefix else key
|
||
|
|
if key not in known:
|
||
|
|
warnings.append(ValidationError(full_key, "Unknown key — not in schema", "warning"))
|
||
|
|
elif isinstance(data[key], dict) and key in schema and "value_schema" in schema[key]:
|
||
|
|
warnings.extend(validate_unknown_keys(data[key], schema[key]["value_schema"], full_key))
|
||
|
|
return warnings
|
||
|
|
|
||
|
|
|
||
|
|
# ─── Public API ───────────────────────────────────────────────────────
|
||
|
|
|
||
|
|
def validate_config(content: str) -> Tuple[bool, List[ValidationError]]:
|
||
|
|
"""Full validation pipeline. Returns (is_valid, errors)."""
|
||
|
|
data, errors = validate_yaml_syntax(content)
|
||
|
|
if data is None:
|
||
|
|
return False, errors
|
||
|
|
|
||
|
|
errors.extend(validate_required_keys(data))
|
||
|
|
errors.extend(validate_value_types(data))
|
||
|
|
errors.extend(validate_no_forbidden_keys(data))
|
||
|
|
errors.extend(validate_unknown_keys(data))
|
||
|
|
|
||
|
|
has_errors = any(e.severity == "error" for e in errors)
|
||
|
|
return not has_errors, errors
|
||
|
|
|
||
|
|
|
||
|
|
def validate_file(path: str) -> Tuple[bool, List[ValidationError]]:
|
||
|
|
"""Validate a YAML file on disk."""
|
||
|
|
p = Path(path)
|
||
|
|
if not p.exists():
|
||
|
|
return False, [ValidationError(str(p), "File not found")]
|
||
|
|
content = p.read_text(encoding="utf-8")
|
||
|
|
return validate_config(content)
|
||
|
|
|
||
|
|
|
||
|
|
# ─── Schema dump ──────────────────────────────────────────────────────
|
||
|
|
|
||
|
|
def dump_schema(schema: dict = None, prefix: str = "", indent: int = 0) -> List[str]:
|
||
|
|
"""Pretty-print schema as text."""
|
||
|
|
if schema is None:
|
||
|
|
schema = SCHEMA
|
||
|
|
lines: List[str] = []
|
||
|
|
for key, spec in schema.items():
|
||
|
|
full_key = f"{prefix}.{key}" if prefix else key
|
||
|
|
req = " (required)" if spec.get("required") else ""
|
||
|
|
desc = spec.get("description", "")
|
||
|
|
type_name = spec["type"].__name__ if hasattr(spec["type"], "__name__") else str(spec["type"])
|
||
|
|
lines.append(f"{' ' * indent}{full_key}: {type_name}{req} — {desc}")
|
||
|
|
if "value_schema" in spec:
|
||
|
|
lines.extend(dump_schema(spec["value_schema"], full_key, indent + 1))
|
||
|
|
return lines
|
||
|
|
|
||
|
|
|
||
|
|
# ─── CLI ──────────────────────────────────────────────────────────────
|
||
|
|
|
||
|
|
def main():
|
||
|
|
parser = argparse.ArgumentParser(description="Validate timmy-config YAML before deploy")
|
||
|
|
parser.add_argument("file", nargs="?", help="YAML file to validate")
|
||
|
|
parser.add_argument("--deploy", metavar="DEST", help="Validate then copy to DEST")
|
||
|
|
parser.add_argument("--schema", action="store_true", help="Print expected schema")
|
||
|
|
parser.add_argument("--check-dir", metavar="DIR", help="Validate all .yaml/.yml in directory")
|
||
|
|
parser.add_argument("--json", action="store_true", dest="as_json", help="Output as JSON")
|
||
|
|
args = parser.parse_args()
|
||
|
|
|
||
|
|
if args.schema:
|
||
|
|
lines = dump_schema()
|
||
|
|
print("timmy-config schema:\n")
|
||
|
|
for line in lines:
|
||
|
|
print(f" {line}")
|
||
|
|
return
|
||
|
|
|
||
|
|
if args.check_dir:
|
||
|
|
d = Path(args.check_dir)
|
||
|
|
if not d.is_dir():
|
||
|
|
print(f"ERROR: {d} is not a directory", file=sys.stderr)
|
||
|
|
sys.exit(1)
|
||
|
|
all_valid = True
|
||
|
|
results = []
|
||
|
|
for yf in sorted(d.glob("*.y*ml")):
|
||
|
|
valid, errors = validate_file(str(yf))
|
||
|
|
results.append({"file": str(yf), "valid": valid, "errors": [e.to_dict() for e in errors]})
|
||
|
|
if not valid:
|
||
|
|
all_valid = False
|
||
|
|
for e in errors:
|
||
|
|
if e.severity == "error":
|
||
|
|
print(f" {yf.name}: {e}")
|
||
|
|
if args.as_json:
|
||
|
|
print(json.dumps(results, indent=2))
|
||
|
|
elif all_valid:
|
||
|
|
print(f"OK: All YAML in {d} valid")
|
||
|
|
sys.exit(0 if all_valid else 1)
|
||
|
|
|
||
|
|
if not args.file:
|
||
|
|
parser.error("FILE required (or use --schema / --check-dir)")
|
||
|
|
|
||
|
|
valid, errors = validate_file(args.file)
|
||
|
|
|
||
|
|
if args.as_json:
|
||
|
|
out = {"file": args.file, "valid": valid, "errors": [e.to_dict() for e in errors]}
|
||
|
|
print(json.dumps(out, indent=2))
|
||
|
|
else:
|
||
|
|
for e in errors:
|
||
|
|
print(f" {e}")
|
||
|
|
if valid:
|
||
|
|
print(f"OK: {args.file} is valid")
|
||
|
|
else:
|
||
|
|
print(f"FAIL: {args.file} has {sum(1 for e in errors if e.severity == 'error')} errors")
|
||
|
|
|
||
|
|
if not valid:
|
||
|
|
sys.exit(1)
|
||
|
|
|
||
|
|
# Deploy mode: copy validated file to destination
|
||
|
|
if args.deploy:
|
||
|
|
dest = Path(args.deploy)
|
||
|
|
shutil.copy2(args.file, dest)
|
||
|
|
print(f"DEPLOYED: {args.file} -> {dest}")
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
main()
|