feat: Add pre-deploy config validation — YAML syntax, keys, types, forbidden keys (#690)
This commit is contained in:
356
scripts/config_validate.py
Normal file
356
scripts/config_validate.py
Normal file
@@ -0,0 +1,356 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
config_validate.py — Pre-deploy validation for timmy-config YAML files (Issue #690).
|
||||
|
||||
Validates YAML syntax, required keys, value types, and forbidden keys before
|
||||
writing config to disk. Prevents broken deploys from bad config.
|
||||
|
||||
Usage:
|
||||
python3 scripts/config_validate.py config.yaml # Validate only
|
||||
python3 scripts/config_validate.py config.yaml --deploy # Validate then write
|
||||
python3 scripts/config_validate.py --schema # Print expected schema
|
||||
python3 scripts/config_validate.py --check-dir config/ # Validate all YAML in dir
|
||||
python3 scripts/config_validate.py --json config.yaml # Output as JSON
|
||||
"""
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
try:
|
||||
import yaml
|
||||
HAS_YAML = True
|
||||
except ImportError:
|
||||
HAS_YAML = False
|
||||
|
||||
|
||||
# ─── Schema definition ───────────────────────────────────────────────
|
||||
|
||||
SCHEMA = {
|
||||
"model": {
|
||||
"type": str,
|
||||
"required": True,
|
||||
"description": "Default model identifier (e.g. 'nousresearch/hermes-4-14b')",
|
||||
},
|
||||
"provider": {
|
||||
"type": str,
|
||||
"required": False,
|
||||
"description": "Default provider name",
|
||||
},
|
||||
"providers": {
|
||||
"type": dict,
|
||||
"required": False,
|
||||
"description": "Provider configurations keyed by name",
|
||||
"value_schema": {
|
||||
"base_url": {"type": str, "required": False},
|
||||
"api_key_env": {"type": str, "required": False},
|
||||
},
|
||||
},
|
||||
"fallback_providers": {
|
||||
"type": list,
|
||||
"required": False,
|
||||
"description": "Ordered fallback provider chain",
|
||||
"item_type": str,
|
||||
},
|
||||
"toolsets": {
|
||||
"type": list,
|
||||
"required": False,
|
||||
"description": "Enabled toolset names",
|
||||
"item_type": str,
|
||||
},
|
||||
"agent": {
|
||||
"type": dict,
|
||||
"required": False,
|
||||
"description": "Agent behavior configuration",
|
||||
"value_schema": {
|
||||
"max_iterations": {"type": int, "required": False},
|
||||
"temperature": {"type": (int, float), "required": False},
|
||||
"save_trajectories": {"type": bool, "required": False},
|
||||
"quiet_mode": {"type": bool, "required": False},
|
||||
},
|
||||
},
|
||||
"display": {
|
||||
"type": dict,
|
||||
"required": False,
|
||||
"description": "CLI display settings",
|
||||
"value_schema": {
|
||||
"spinner": {"type": bool, "required": False},
|
||||
"colors": {"type": bool, "required": False},
|
||||
"skin": {"type": str, "required": False},
|
||||
"tool_progress": {"type": bool, "required": False},
|
||||
},
|
||||
},
|
||||
"gateway": {
|
||||
"type": dict,
|
||||
"required": False,
|
||||
"description": "Gateway/messaging settings",
|
||||
"value_schema": {
|
||||
"enabled": {"type": bool, "required": False},
|
||||
"port": {"type": int, "required": False},
|
||||
"cors_origins": {"type": list, "required": False},
|
||||
},
|
||||
},
|
||||
"cron": {
|
||||
"type": dict,
|
||||
"required": False,
|
||||
"description": "Cron scheduler settings",
|
||||
"value_schema": {
|
||||
"enabled": {"type": bool, "required": False},
|
||||
"interval_seconds": {"type": int, "required": False},
|
||||
"max_concurrent": {"type": int, "required": False},
|
||||
},
|
||||
},
|
||||
"logging": {
|
||||
"type": dict,
|
||||
"required": False,
|
||||
"description": "Logging configuration",
|
||||
"value_schema": {
|
||||
"level": {"type": str, "required": False},
|
||||
"file": {"type": (str, type(None)), "required": False},
|
||||
},
|
||||
},
|
||||
"session": {
|
||||
"type": dict,
|
||||
"required": False,
|
||||
"description": "Session behavior",
|
||||
"value_schema": {
|
||||
"save_trajectories": {"type": bool, "required": False},
|
||||
"max_iterations": {"type": int, "required": False},
|
||||
"context_compression": {"type": bool, "required": False},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
FORBIDDEN_KEYS = {
|
||||
"anthropic_api_key": "Use ANTHROPIC_API_KEY env var — never store keys in config",
|
||||
"openai_api_key": "Use OPENAI_API_KEY env var — never store keys in config",
|
||||
"openrouter_api_key": "Use OPENROUTER_API_KEY env var — never store keys in config",
|
||||
"password": "Never store passwords in config",
|
||||
"secret": "Never store secrets in config",
|
||||
"token": "Never store tokens in config — use env vars",
|
||||
}
|
||||
|
||||
|
||||
# ─── Validation errors ───────────────────────────────────────────────
|
||||
|
||||
class ValidationError:
|
||||
def __init__(self, path: str, message: str, severity: str = "error"):
|
||||
self.path = path
|
||||
self.message = message
|
||||
self.severity = severity
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {"path": self.path, "message": self.message, "severity": self.severity}
|
||||
|
||||
def __str__(self):
|
||||
tag = "ERROR" if self.severity == "error" else "WARN"
|
||||
return f"[{tag}] {self.path}: {self.message}"
|
||||
|
||||
def __repr__(self):
|
||||
return f"ValidationError({self.path!r}, {self.message!r}, {self.severity!r})"
|
||||
|
||||
|
||||
# ─── Core validators ─────────────────────────────────────────────────
|
||||
|
||||
def validate_yaml_syntax(content: str) -> Tuple[Optional[dict], List[ValidationError]]:
|
||||
"""Parse YAML, return (data, errors)."""
|
||||
errors: List[ValidationError] = []
|
||||
if not HAS_YAML:
|
||||
return None, [ValidationError("root", "PyYAML not installed — pip install pyyaml")]
|
||||
try:
|
||||
data = yaml.safe_load(content)
|
||||
except yaml.YAMLError as e:
|
||||
return None, [ValidationError("syntax", str(e))]
|
||||
if data is None:
|
||||
return {}, [ValidationError("root", "Config file is empty", "warning")]
|
||||
if not isinstance(data, dict):
|
||||
return None, [ValidationError("root", f"Expected mapping, got {type(data).__name__}")]
|
||||
return data, errors
|
||||
|
||||
|
||||
def validate_required_keys(data: dict) -> List[ValidationError]:
|
||||
"""Check required keys exist and have correct types."""
|
||||
errors: List[ValidationError] = []
|
||||
for key, spec in SCHEMA.items():
|
||||
if spec.get("required"):
|
||||
if key not in data:
|
||||
errors.append(ValidationError(key, f"Required key '{key}' is missing"))
|
||||
elif not isinstance(data[key], spec["type"]):
|
||||
errors.append(ValidationError(
|
||||
key,
|
||||
f"Expected {spec['type'].__name__}, got {type(data[key]).__name__}"
|
||||
))
|
||||
return errors
|
||||
|
||||
|
||||
def validate_value_types(data: dict, schema: dict = None, prefix: str = "") -> List[ValidationError]:
|
||||
"""Check all known keys have correct types, recursively."""
|
||||
if schema is None:
|
||||
schema = SCHEMA
|
||||
errors: List[ValidationError] = []
|
||||
for key, spec in schema.items():
|
||||
full_key = f"{prefix}.{key}" if prefix else key
|
||||
if key not in data:
|
||||
continue
|
||||
value = data[key]
|
||||
expected = spec["type"]
|
||||
if not isinstance(value, expected):
|
||||
errors.append(ValidationError(
|
||||
full_key,
|
||||
f"Expected {expected.__name__}, got {type(value).__name__}"
|
||||
))
|
||||
continue
|
||||
# Check list item types
|
||||
if isinstance(value, list) and "item_type" in spec:
|
||||
for i, item in enumerate(value):
|
||||
if not isinstance(item, spec["item_type"]):
|
||||
errors.append(ValidationError(
|
||||
f"{full_key}[{i}]",
|
||||
f"Expected {spec['item_type'].__name__}, got {type(item).__name__}"
|
||||
))
|
||||
# Recurse into nested dicts
|
||||
if isinstance(value, dict) and "value_schema" in spec:
|
||||
errors.extend(validate_value_types(value, spec["value_schema"], full_key))
|
||||
return errors
|
||||
|
||||
|
||||
def validate_no_forbidden_keys(data: dict, prefix: str = "") -> List[ValidationError]:
|
||||
"""Check for keys that should never be in config."""
|
||||
errors: List[ValidationError] = []
|
||||
for key, value in data.items():
|
||||
full_key = f"{prefix}.{key}" if prefix else key
|
||||
if key.lower() in FORBIDDEN_KEYS:
|
||||
errors.append(ValidationError(full_key, FORBIDDEN_KEYS[key.lower()]))
|
||||
if isinstance(value, dict):
|
||||
errors.extend(validate_no_forbidden_keys(value, full_key))
|
||||
return errors
|
||||
|
||||
|
||||
def validate_unknown_keys(data: dict, schema: dict = None, prefix: str = "") -> List[ValidationError]:
|
||||
"""Warn about keys not in schema (not an error, just a warning)."""
|
||||
if schema is None:
|
||||
schema = SCHEMA
|
||||
warnings: List[ValidationError] = []
|
||||
known = set(schema.keys())
|
||||
for key in data:
|
||||
full_key = f"{prefix}.{key}" if prefix else key
|
||||
if key not in known:
|
||||
warnings.append(ValidationError(full_key, "Unknown key — not in schema", "warning"))
|
||||
elif isinstance(data[key], dict) and key in schema and "value_schema" in schema[key]:
|
||||
warnings.extend(validate_unknown_keys(data[key], schema[key]["value_schema"], full_key))
|
||||
return warnings
|
||||
|
||||
|
||||
# ─── Public API ───────────────────────────────────────────────────────
|
||||
|
||||
def validate_config(content: str) -> Tuple[bool, List[ValidationError]]:
|
||||
"""Full validation pipeline. Returns (is_valid, errors)."""
|
||||
data, errors = validate_yaml_syntax(content)
|
||||
if data is None:
|
||||
return False, errors
|
||||
|
||||
errors.extend(validate_required_keys(data))
|
||||
errors.extend(validate_value_types(data))
|
||||
errors.extend(validate_no_forbidden_keys(data))
|
||||
errors.extend(validate_unknown_keys(data))
|
||||
|
||||
has_errors = any(e.severity == "error" for e in errors)
|
||||
return not has_errors, errors
|
||||
|
||||
|
||||
def validate_file(path: str) -> Tuple[bool, List[ValidationError]]:
|
||||
"""Validate a YAML file on disk."""
|
||||
p = Path(path)
|
||||
if not p.exists():
|
||||
return False, [ValidationError(str(p), "File not found")]
|
||||
content = p.read_text(encoding="utf-8")
|
||||
return validate_config(content)
|
||||
|
||||
|
||||
# ─── Schema dump ──────────────────────────────────────────────────────
|
||||
|
||||
def dump_schema(schema: dict = None, prefix: str = "", indent: int = 0) -> List[str]:
|
||||
"""Pretty-print schema as text."""
|
||||
if schema is None:
|
||||
schema = SCHEMA
|
||||
lines: List[str] = []
|
||||
for key, spec in schema.items():
|
||||
full_key = f"{prefix}.{key}" if prefix else key
|
||||
req = " (required)" if spec.get("required") else ""
|
||||
desc = spec.get("description", "")
|
||||
type_name = spec["type"].__name__ if hasattr(spec["type"], "__name__") else str(spec["type"])
|
||||
lines.append(f"{' ' * indent}{full_key}: {type_name}{req} — {desc}")
|
||||
if "value_schema" in spec:
|
||||
lines.extend(dump_schema(spec["value_schema"], full_key, indent + 1))
|
||||
return lines
|
||||
|
||||
|
||||
# ─── CLI ──────────────────────────────────────────────────────────────
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Validate timmy-config YAML before deploy")
|
||||
parser.add_argument("file", nargs="?", help="YAML file to validate")
|
||||
parser.add_argument("--deploy", metavar="DEST", help="Validate then copy to DEST")
|
||||
parser.add_argument("--schema", action="store_true", help="Print expected schema")
|
||||
parser.add_argument("--check-dir", metavar="DIR", help="Validate all .yaml/.yml in directory")
|
||||
parser.add_argument("--json", action="store_true", dest="as_json", help="Output as JSON")
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.schema:
|
||||
lines = dump_schema()
|
||||
print("timmy-config schema:\n")
|
||||
for line in lines:
|
||||
print(f" {line}")
|
||||
return
|
||||
|
||||
if args.check_dir:
|
||||
d = Path(args.check_dir)
|
||||
if not d.is_dir():
|
||||
print(f"ERROR: {d} is not a directory", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
all_valid = True
|
||||
results = []
|
||||
for yf in sorted(d.glob("*.y*ml")):
|
||||
valid, errors = validate_file(str(yf))
|
||||
results.append({"file": str(yf), "valid": valid, "errors": [e.to_dict() for e in errors]})
|
||||
if not valid:
|
||||
all_valid = False
|
||||
for e in errors:
|
||||
if e.severity == "error":
|
||||
print(f" {yf.name}: {e}")
|
||||
if args.as_json:
|
||||
print(json.dumps(results, indent=2))
|
||||
elif all_valid:
|
||||
print(f"OK: All YAML in {d} valid")
|
||||
sys.exit(0 if all_valid else 1)
|
||||
|
||||
if not args.file:
|
||||
parser.error("FILE required (or use --schema / --check-dir)")
|
||||
|
||||
valid, errors = validate_file(args.file)
|
||||
|
||||
if args.as_json:
|
||||
out = {"file": args.file, "valid": valid, "errors": [e.to_dict() for e in errors]}
|
||||
print(json.dumps(out, indent=2))
|
||||
else:
|
||||
for e in errors:
|
||||
print(f" {e}")
|
||||
if valid:
|
||||
print(f"OK: {args.file} is valid")
|
||||
else:
|
||||
print(f"FAIL: {args.file} has {sum(1 for e in errors if e.severity == 'error')} errors")
|
||||
|
||||
if not valid:
|
||||
sys.exit(1)
|
||||
|
||||
# Deploy mode: copy validated file to destination
|
||||
if args.deploy:
|
||||
dest = Path(args.deploy)
|
||||
shutil.copy2(args.file, dest)
|
||||
print(f"DEPLOYED: {args.file} -> {dest}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user