Compare commits

...

2 Commits

2 changed files with 631 additions and 0 deletions

356
scripts/config_validate.py Normal file
View File

@@ -0,0 +1,356 @@
#!/usr/bin/env python3
"""
config_validate.py — Pre-deploy validation for timmy-config YAML files (Issue #690).
Validates YAML syntax, required keys, value types, and forbidden keys before
writing config to disk. Prevents broken deploys from bad config.
Usage:
python3 scripts/config_validate.py config.yaml # Validate only
python3 scripts/config_validate.py config.yaml --deploy # Validate then write
python3 scripts/config_validate.py --schema # Print expected schema
python3 scripts/config_validate.py --check-dir config/ # Validate all YAML in dir
python3 scripts/config_validate.py --json config.yaml # Output as JSON
"""
import argparse
import json
import sys
import shutil
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
try:
import yaml
HAS_YAML = True
except ImportError:
HAS_YAML = False
# ─── Schema definition ───────────────────────────────────────────────
SCHEMA = {
"model": {
"type": str,
"required": True,
"description": "Default model identifier (e.g. 'nousresearch/hermes-4-14b')",
},
"provider": {
"type": str,
"required": False,
"description": "Default provider name",
},
"providers": {
"type": dict,
"required": False,
"description": "Provider configurations keyed by name",
"value_schema": {
"base_url": {"type": str, "required": False},
"api_key_env": {"type": str, "required": False},
},
},
"fallback_providers": {
"type": list,
"required": False,
"description": "Ordered fallback provider chain",
"item_type": str,
},
"toolsets": {
"type": list,
"required": False,
"description": "Enabled toolset names",
"item_type": str,
},
"agent": {
"type": dict,
"required": False,
"description": "Agent behavior configuration",
"value_schema": {
"max_iterations": {"type": int, "required": False},
"temperature": {"type": (int, float), "required": False},
"save_trajectories": {"type": bool, "required": False},
"quiet_mode": {"type": bool, "required": False},
},
},
"display": {
"type": dict,
"required": False,
"description": "CLI display settings",
"value_schema": {
"spinner": {"type": bool, "required": False},
"colors": {"type": bool, "required": False},
"skin": {"type": str, "required": False},
"tool_progress": {"type": bool, "required": False},
},
},
"gateway": {
"type": dict,
"required": False,
"description": "Gateway/messaging settings",
"value_schema": {
"enabled": {"type": bool, "required": False},
"port": {"type": int, "required": False},
"cors_origins": {"type": list, "required": False},
},
},
"cron": {
"type": dict,
"required": False,
"description": "Cron scheduler settings",
"value_schema": {
"enabled": {"type": bool, "required": False},
"interval_seconds": {"type": int, "required": False},
"max_concurrent": {"type": int, "required": False},
},
},
"logging": {
"type": dict,
"required": False,
"description": "Logging configuration",
"value_schema": {
"level": {"type": str, "required": False},
"file": {"type": (str, type(None)), "required": False},
},
},
"session": {
"type": dict,
"required": False,
"description": "Session behavior",
"value_schema": {
"save_trajectories": {"type": bool, "required": False},
"max_iterations": {"type": int, "required": False},
"context_compression": {"type": bool, "required": False},
},
},
}
FORBIDDEN_KEYS = {
"anthropic_api_key": "Use ANTHROPIC_API_KEY env var — never store keys in config",
"openai_api_key": "Use OPENAI_API_KEY env var — never store keys in config",
"openrouter_api_key": "Use OPENROUTER_API_KEY env var — never store keys in config",
"password": "Never store passwords in config",
"secret": "Never store secrets in config",
"token": "Never store tokens in config — use env vars",
}
# ─── Validation errors ───────────────────────────────────────────────
class ValidationError:
def __init__(self, path: str, message: str, severity: str = "error"):
self.path = path
self.message = message
self.severity = severity
def to_dict(self) -> dict:
return {"path": self.path, "message": self.message, "severity": self.severity}
def __str__(self):
tag = "ERROR" if self.severity == "error" else "WARN"
return f"[{tag}] {self.path}: {self.message}"
def __repr__(self):
return f"ValidationError({self.path!r}, {self.message!r}, {self.severity!r})"
# ─── Core validators ─────────────────────────────────────────────────
def validate_yaml_syntax(content: str) -> Tuple[Optional[dict], List[ValidationError]]:
"""Parse YAML, return (data, errors)."""
errors: List[ValidationError] = []
if not HAS_YAML:
return None, [ValidationError("root", "PyYAML not installed — pip install pyyaml")]
try:
data = yaml.safe_load(content)
except yaml.YAMLError as e:
return None, [ValidationError("syntax", str(e))]
if data is None:
return {}, [ValidationError("root", "Config file is empty", "warning")]
if not isinstance(data, dict):
return None, [ValidationError("root", f"Expected mapping, got {type(data).__name__}")]
return data, errors
def validate_required_keys(data: dict) -> List[ValidationError]:
"""Check required keys exist and have correct types."""
errors: List[ValidationError] = []
for key, spec in SCHEMA.items():
if spec.get("required"):
if key not in data:
errors.append(ValidationError(key, f"Required key '{key}' is missing"))
elif not isinstance(data[key], spec["type"]):
errors.append(ValidationError(
key,
f"Expected {spec['type'].__name__}, got {type(data[key]).__name__}"
))
return errors
def validate_value_types(data: dict, schema: dict = None, prefix: str = "") -> List[ValidationError]:
"""Check all known keys have correct types, recursively."""
if schema is None:
schema = SCHEMA
errors: List[ValidationError] = []
for key, spec in schema.items():
full_key = f"{prefix}.{key}" if prefix else key
if key not in data:
continue
value = data[key]
expected = spec["type"]
if not isinstance(value, expected):
errors.append(ValidationError(
full_key,
f"Expected {expected.__name__}, got {type(value).__name__}"
))
continue
# Check list item types
if isinstance(value, list) and "item_type" in spec:
for i, item in enumerate(value):
if not isinstance(item, spec["item_type"]):
errors.append(ValidationError(
f"{full_key}[{i}]",
f"Expected {spec['item_type'].__name__}, got {type(item).__name__}"
))
# Recurse into nested dicts
if isinstance(value, dict) and "value_schema" in spec:
errors.extend(validate_value_types(value, spec["value_schema"], full_key))
return errors
def validate_no_forbidden_keys(data: dict, prefix: str = "") -> List[ValidationError]:
"""Check for keys that should never be in config."""
errors: List[ValidationError] = []
for key, value in data.items():
full_key = f"{prefix}.{key}" if prefix else key
if key.lower() in FORBIDDEN_KEYS:
errors.append(ValidationError(full_key, FORBIDDEN_KEYS[key.lower()]))
if isinstance(value, dict):
errors.extend(validate_no_forbidden_keys(value, full_key))
return errors
def validate_unknown_keys(data: dict, schema: dict = None, prefix: str = "") -> List[ValidationError]:
"""Warn about keys not in schema (not an error, just a warning)."""
if schema is None:
schema = SCHEMA
warnings: List[ValidationError] = []
known = set(schema.keys())
for key in data:
full_key = f"{prefix}.{key}" if prefix else key
if key not in known:
warnings.append(ValidationError(full_key, "Unknown key — not in schema", "warning"))
elif isinstance(data[key], dict) and key in schema and "value_schema" in schema[key]:
warnings.extend(validate_unknown_keys(data[key], schema[key]["value_schema"], full_key))
return warnings
# ─── Public API ───────────────────────────────────────────────────────
def validate_config(content: str) -> Tuple[bool, List[ValidationError]]:
"""Full validation pipeline. Returns (is_valid, errors)."""
data, errors = validate_yaml_syntax(content)
if data is None:
return False, errors
errors.extend(validate_required_keys(data))
errors.extend(validate_value_types(data))
errors.extend(validate_no_forbidden_keys(data))
errors.extend(validate_unknown_keys(data))
has_errors = any(e.severity == "error" for e in errors)
return not has_errors, errors
def validate_file(path: str) -> Tuple[bool, List[ValidationError]]:
"""Validate a YAML file on disk."""
p = Path(path)
if not p.exists():
return False, [ValidationError(str(p), "File not found")]
content = p.read_text(encoding="utf-8")
return validate_config(content)
# ─── Schema dump ──────────────────────────────────────────────────────
def dump_schema(schema: dict = None, prefix: str = "", indent: int = 0) -> List[str]:
"""Pretty-print schema as text."""
if schema is None:
schema = SCHEMA
lines: List[str] = []
for key, spec in schema.items():
full_key = f"{prefix}.{key}" if prefix else key
req = " (required)" if spec.get("required") else ""
desc = spec.get("description", "")
type_name = spec["type"].__name__ if hasattr(spec["type"], "__name__") else str(spec["type"])
lines.append(f"{' ' * indent}{full_key}: {type_name}{req}{desc}")
if "value_schema" in spec:
lines.extend(dump_schema(spec["value_schema"], full_key, indent + 1))
return lines
# ─── CLI ──────────────────────────────────────────────────────────────
def main():
parser = argparse.ArgumentParser(description="Validate timmy-config YAML before deploy")
parser.add_argument("file", nargs="?", help="YAML file to validate")
parser.add_argument("--deploy", metavar="DEST", help="Validate then copy to DEST")
parser.add_argument("--schema", action="store_true", help="Print expected schema")
parser.add_argument("--check-dir", metavar="DIR", help="Validate all .yaml/.yml in directory")
parser.add_argument("--json", action="store_true", dest="as_json", help="Output as JSON")
args = parser.parse_args()
if args.schema:
lines = dump_schema()
print("timmy-config schema:\n")
for line in lines:
print(f" {line}")
return
if args.check_dir:
d = Path(args.check_dir)
if not d.is_dir():
print(f"ERROR: {d} is not a directory", file=sys.stderr)
sys.exit(1)
all_valid = True
results = []
for yf in sorted(d.glob("*.y*ml")):
valid, errors = validate_file(str(yf))
results.append({"file": str(yf), "valid": valid, "errors": [e.to_dict() for e in errors]})
if not valid:
all_valid = False
for e in errors:
if e.severity == "error":
print(f" {yf.name}: {e}")
if args.as_json:
print(json.dumps(results, indent=2))
elif all_valid:
print(f"OK: All YAML in {d} valid")
sys.exit(0 if all_valid else 1)
if not args.file:
parser.error("FILE required (or use --schema / --check-dir)")
valid, errors = validate_file(args.file)
if args.as_json:
out = {"file": args.file, "valid": valid, "errors": [e.to_dict() for e in errors]}
print(json.dumps(out, indent=2))
else:
for e in errors:
print(f" {e}")
if valid:
print(f"OK: {args.file} is valid")
else:
print(f"FAIL: {args.file} has {sum(1 for e in errors if e.severity == 'error')} errors")
if not valid:
sys.exit(1)
# Deploy mode: copy validated file to destination
if args.deploy:
dest = Path(args.deploy)
shutil.copy2(args.file, dest)
print(f"DEPLOYED: {args.file} -> {dest}")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,275 @@
#!/usr/bin/env python3
"""Tests for config_validate.py — issue #690."""
import json
import sys
import tempfile
from pathlib import Path
import pytest
sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "scripts"))
from config_validate import (
validate_config,
validate_file,
validate_yaml_syntax,
validate_required_keys,
validate_value_types,
validate_no_forbidden_keys,
validate_unknown_keys,
ValidationError,
SCHEMA,
)
class TestYAMLSyntax:
def test_valid_yaml(self):
data, errors = validate_yaml_syntax("model: gpt-4\nprovider: openai\n")
assert data is not None
assert errors == []
def test_empty_yaml(self):
data, errors = validate_yaml_syntax("")
assert data == {}
assert any(e.severity == "warning" for e in errors)
def test_invalid_yaml(self):
data, errors = validate_yaml_syntax("model: gpt-4\n bad: [\n")
assert data is None
assert len(errors) == 1
def test_non_mapping_yaml(self):
data, errors = validate_yaml_syntax("- item1\n- item2\n")
assert data is None
assert any("mapping" in e.message for e in errors)
class TestRequiredKeys:
def test_model_present(self):
errors = validate_required_keys({"model": "gpt-4"})
assert not any(e.path == "model" for e in errors)
def test_model_missing(self):
errors = validate_required_keys({"provider": "openai"})
assert any(e.path == "model" and "missing" in e.message.lower() for e in errors)
def test_model_wrong_type(self):
errors = validate_required_keys({"model": 123})
assert any(e.path == "model" and "str" in e.message for e in errors)
class TestValueTypes:
def test_correct_types(self):
data = {"model": "gpt-4", "agent": {"max_iterations": 90, "temperature": 0.7}}
errors = validate_value_types(data)
assert errors == []
def test_wrong_agent_type(self):
data = {"agent": {"max_iterations": "ninety"}}
errors = validate_value_types(data)
assert any("max_iterations" in e.path and "int" in e.message for e in errors)
def test_wrong_display_type(self):
data = {"display": {"spinner": "yes"}}
errors = validate_value_types(data)
assert any("spinner" in e.path and "bool" in e.message for e in errors)
def test_wrong_cron_type(self):
data = {"cron": {"interval_seconds": "5m"}}
errors = validate_value_types(data)
assert any("interval_seconds" in e.path for e in errors)
def test_list_item_types(self):
data = {"toolsets": ["web", "browser", 123]}
errors = validate_value_types(data)
assert any("toolsets[2]" in e.path for e in errors)
def test_nested_dict_depth(self):
data = {"providers": {"openrouter": {"base_url": 42}}}
errors = validate_value_types(data)
assert any("providers.openrouter.base_url" in e.path for e in errors)
class TestForbiddenKeys:
def test_no_forbidden(self):
errors = validate_no_forbidden_keys({"model": "gpt-4"})
assert errors == []
def test_password_rejected(self):
errors = validate_no_forbidden_keys({"model": "gpt-4", "password": "s3cret"})
assert any("password" in e.path for e in errors)
def test_secret_rejected(self):
errors = validate_no_forbidden_keys({"secret": "abc"})
assert any("secret" in e.path for e in errors)
def test_nested_forbidden(self):
errors = validate_no_forbidden_keys({"providers": {"x": {"api_key": "sk-xxx"}}})
# api_key is not forbidden, but let's check token
errors = validate_no_forbidden_keys({"providers": {"x": {"token": "tok"}}})
assert any("token" in e.path for e in errors)
def test_api_key_env_allowed(self):
errors = validate_no_forbidden_keys({"providers": {"x": {"api_key_env": "MY_KEY"}}})
assert not any("api_key_env" in e.path for e in errors)
class TestUnknownKeys:
def test_known_keys_no_warnings(self):
warnings = validate_unknown_keys({"model": "gpt-4", "provider": "openai"})
assert warnings == []
def test_unknown_top_level_warns(self):
warnings = validate_unknown_keys({"model": "gpt-4", "custom_field": 1})
assert any("custom_field" in w.path and w.severity == "warning" for w in warnings)
class TestFullValidation:
def test_valid_config(self):
content = "model: nousresearch/hermes-4-14b\nprovider: openrouter\n"
valid, errors = validate_config(content)
assert valid
assert not any(e.severity == "error" for e in errors)
def test_missing_model(self):
content = "provider: openrouter\n"
valid, errors = validate_config(content)
assert not valid
assert any("model" in e.path for e in errors)
def test_forbidden_key(self):
content = "model: gpt-4\npassword: secret\n"
valid, errors = validate_config(content)
assert not valid
def test_invalid_yaml(self):
content = "model: [\n broken\n"
valid, errors = validate_config(content)
assert not valid
def test_full_realistic_config(self):
content = """
model: nousresearch/hermes-4-14b
provider: openrouter
providers:
openrouter:
base_url: https://openrouter.ai/api/v1
api_key_env: OPENROUTER_API_KEY
ollama:
base_url: http://localhost:11434
toolsets:
- web
- browser
agent:
max_iterations: 90
temperature: 0.7
save_trajectories: false
display:
spinner: true
colors: true
skin: default
cron:
enabled: false
interval_seconds: 300
gateway:
enabled: false
port: 8080
logging:
level: INFO
"""
valid, errors = validate_config(content)
assert valid, f"Unexpected errors: {errors}"
def test_warnings_dont_fail(self):
content = "model: gpt-4\ncustom_key: value\n"
valid, errors = validate_config(content)
assert valid # warnings don't make it invalid
assert any(e.severity == "warning" for e in errors)
class TestValidateFile:
def test_valid_file(self, tmp_path):
f = tmp_path / "config.yaml"
f.write_text("model: gpt-4\n")
valid, errors = validate_file(str(f))
assert valid
def test_missing_file(self):
valid, errors = validate_file("/nonexistent/config.yaml")
assert not valid
assert any("not found" in e.message for e in errors)
def test_roundtrip(self, tmp_path):
f = tmp_path / "config.yaml"
f.write_text("model: gpt-4\nagent:\n max_iterations: 50\n")
valid, errors = validate_file(str(f))
assert valid
class TestCLI:
def test_deploy_mode(self, tmp_path):
import subprocess
src = tmp_path / "src.yaml"
src.write_text("model: gpt-4\n")
dest = tmp_path / "deployed.yaml"
result = subprocess.run(
[sys.executable, str(Path(__file__).resolve().parent.parent / "scripts" / "config_validate.py"),
str(src), "--deploy", str(dest)],
capture_output=True, text=True
)
assert result.returncode == 0
assert dest.exists()
assert "model: gpt-4" in dest.read_text()
def test_deploy_rejects_invalid(self, tmp_path):
import subprocess
src = tmp_path / "bad.yaml"
src.write_text("provider: openai\n") # missing required model
dest = tmp_path / "should_not_exist.yaml"
result = subprocess.run(
[sys.executable, str(Path(__file__).resolve().parent.parent / "scripts" / "config_validate.py"),
str(src), "--deploy", str(dest)],
capture_output=True, text=True
)
assert result.returncode == 1
assert not dest.exists()
def test_schema_flag(self):
import subprocess
result = subprocess.run(
[sys.executable, str(Path(__file__).resolve().parent.parent / "scripts" / "config_validate.py"),
"--schema"],
capture_output=True, text=True
)
assert result.returncode == 0
assert "model:" in result.stdout
assert "required" in result.stdout
def test_json_output(self, tmp_path):
import subprocess
f = tmp_path / "config.yaml"
f.write_text("model: gpt-4\n")
result = subprocess.run(
[sys.executable, str(Path(__file__).resolve().parent.parent / "scripts" / "config_validate.py"),
str(f), "--json"],
capture_output=True, text=True
)
assert result.returncode == 0
out = json.loads(result.stdout)
assert out["valid"] is True
assert "errors" in out
def test_check_dir(self, tmp_path):
import subprocess
(tmp_path / "good.yaml").write_text("model: gpt-4\n")
(tmp_path / "bad.yaml").write_text("provider: openai\n")
result = subprocess.run(
[sys.executable, str(Path(__file__).resolve().parent.parent / "scripts" / "config_validate.py"),
"--check-dir", str(tmp_path)],
capture_output=True, text=True
)
assert result.returncode == 1 # bad.yaml fails
assert "bad.yaml" in result.stdout