Compare commits

...

6 Commits

Author SHA1 Message Date
fdc1ce0d2c test: sync normalize code blocks tests (#750)
Some checks failed
Architecture Lint / Linter Tests (pull_request) Successful in 24s
Smoke Test / smoke (pull_request) Failing after 15s
Validate Config / YAML Lint (pull_request) Failing after 15s
Validate Config / JSON Validate (pull_request) Successful in 14s
Validate Config / Python Syntax & Import Check (pull_request) Failing after 45s
Validate Config / Shell Script Lint (pull_request) Failing after 46s
Validate Config / Cron Syntax Check (pull_request) Successful in 11s
Validate Config / Deploy Script Dry Run (pull_request) Successful in 11s
Validate Config / Playbook Schema Validation (pull_request) Successful in 22s
PR Checklist / pr-checklist (pull_request) Failing after 3m50s
Validate Config / Python Test Suite (pull_request) Has been cancelled
Architecture Lint / Lint Repository (pull_request) Has been cancelled
2026-04-17 05:33:28 +00:00
c633afd66d fix: add underscore module version for test imports (#750) 2026-04-17 05:33:26 +00:00
c69ae0e72b fix: normalize open_tag whitespace in code block parser (#750) 2026-04-17 05:33:24 +00:00
6fbf5bb649 Merge pull request 'feat: sidecar config validation on deploy' (#797) from feat/690-config-validation into main
Some checks failed
Architecture Lint / Linter Tests (pull_request) Successful in 14s
Smoke Test / smoke (pull_request) Failing after 15s
Validate Config / YAML Lint (pull_request) Failing after 13s
Validate Config / JSON Validate (pull_request) Successful in 16s
Validate Config / Python Syntax & Import Check (pull_request) Failing after 42s
Validate Config / Shell Script Lint (pull_request) Failing after 45s
Validate Config / Cron Syntax Check (pull_request) Successful in 9s
Validate Config / Deploy Script Dry Run (pull_request) Successful in 9s
Validate Config / Playbook Schema Validation (pull_request) Successful in 21s
PR Checklist / pr-checklist (pull_request) Failing after 3m31s
Validate Config / Python Test Suite (pull_request) Has been cancelled
Architecture Lint / Lint Repository (pull_request) Has been cancelled
2026-04-17 05:15:05 +00:00
9ec0a22d6a test: config validation tests
Some checks failed
Architecture Lint / Linter Tests (pull_request) Successful in 9s
PR Checklist / pr-checklist (pull_request) Failing after 1m9s
Validate Config / JSON Validate (pull_request) Successful in 5s
Smoke Test / smoke (pull_request) Failing after 6s
Validate Config / YAML Lint (pull_request) Failing after 5s
Validate Config / Cron Syntax Check (pull_request) Successful in 3s
Validate Config / Deploy Script Dry Run (pull_request) Successful in 3s
Validate Config / Playbook Schema Validation (pull_request) Successful in 5s
Validate Config / Python Syntax & Import Check (pull_request) Failing after 12s
Validate Config / Shell Script Lint (pull_request) Failing after 12s
Architecture Lint / Lint Repository (pull_request) Has been cancelled
Validate Config / Python Test Suite (pull_request) Has been cancelled
Part of #690
2026-04-17 05:07:46 +00:00
6b984532a1 feat: config validation script
Closes #690

Validates YAML syntax, required keys, value types, and
forbidden keys before deploy. Prevents broken deploys
from bad config.
2026-04-17 05:07:44 +00:00
4 changed files with 443 additions and 1 deletions

223
scripts/config_validate.py Normal file
View File

@@ -0,0 +1,223 @@
#!/usr/bin/env python3
"""
config-validate — Pre-deploy validation for timmy-config YAML files.
Validates YAML syntax, required keys, and value types before writing
config to disk. Prevents broken deploys from bad config.
Usage:
python scripts/config_validate.py config.yaml
python scripts/config_validate.py config.yaml --deploy # Validate + write
python scripts/config_validate.py --schema # Show expected schema
"""
import argparse
import json
import sys
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
try:
import yaml
HAS_YAML = True
except ImportError:
HAS_YAML = False
# Expected schema for hermes config.yaml
REQUIRED_KEYS = {
"model": {"type": str, "required": True, "description": "Default model name"},
}
OPTIONAL_KEYS = {
"provider": {"type": str, "required": False, "description": "Default provider"},
"providers": {"type": dict, "required": False, "description": "Provider configuration"},
"fallback_providers": {"type": list, "required": False, "description": "Fallback chain"},
"toolsets": {"type": list, "required": False, "description": "Enabled toolsets"},
"agent": {"type": dict, "required": False, "description": "Agent configuration"},
"display": {"type": dict, "required": False, "description": "Display settings"},
}
# Keys that should NOT be present
FORBIDDEN_KEYS = {
"anthropic_api_key": "Use ANTHROPIC_API_KEY env var instead",
"openai_api_key": "Use OPENAI_API_KEY env var instead",
"password": "Never put passwords in config",
"secret": "Never put secrets in config",
}
class ValidationError:
def __init__(self, path: str, message: str, severity: str = "error"):
self.path = path
self.message = message
self.severity = severity
def __str__(self):
return f"[{self.severity.upper()}] {self.path}: {self.message}"
def validate_yaml_syntax(content: str) -> Tuple[Optional[Dict], List[ValidationError]]:
"""Validate YAML can be parsed."""
errors = []
if not HAS_YAML:
return None, [ValidationError("root", "PyYAML not installed", "error")]
try:
data = yaml.safe_load(content)
if data is None:
return {}, []
if not isinstance(data, dict):
errors.append(ValidationError("root", f"Expected dict, got {type(data).__name__}", "error"))
return None, errors
return data, errors
except yaml.YAMLError as e:
errors.append(ValidationError("syntax", str(e), "error"))
return None, errors
def validate_required_keys(data: Dict[str, Any]) -> List[ValidationError]:
"""Check required keys exist."""
errors = []
for key, spec in REQUIRED_KEYS.items():
if key not in data:
errors.append(ValidationError(key, f"Required key missing: {key}", "error"))
elif not isinstance(data[key], spec["type"]):
errors.append ValidationError(key, f"Expected {spec['type'].__name__}, got {type(data[key]).__name__}", "error"))
return errors
def validate_value_types(data: Dict[str, Any], schema: Dict[str, Dict]) -> List[ValidationError]:
"""Check value types match schema."""
errors = []
for key, spec in schema.items():
if key in data:
expected_type = spec["type"]
actual = data[key]
if not isinstance(actual, expected_type):
errors.append(ValidationError(key, f"Expected {expected_type.__name__}, got {type(actual).__name__}", "error"))
return errors
def validate_no_forbidden_keys(data: Dict[str, Any]) -> List[ValidationError]:
"""Check for keys that should not be in config."""
errors = []
for key, reason in FORBIDDEN_KEYS.items():
if key in data:
errors.append(ValidationError(key, f"Forbidden key: {reason}", "error"))
return errors
def validate_nested(data: Dict[str, Any], path: str = "") -> List[ValidationError]:
"""Recursively validate nested structures."""
errors = []
# Check providers dict
if "providers" in data and isinstance(data["providers"], dict):
for provider_name, provider_config in data["providers"].items():
if not isinstance(provider_config, dict):
errors.append(ValidationError(f"providers.{provider_name}", "Provider config must be a dict", "error"))
# Check agent dict
if "agent" in data and isinstance(data["agent"], dict):
agent = data["agent"]
if "max_turns" in agent and not isinstance(agent["max_turns"], int):
errors.append(ValidationError("agent.max_turns", "Must be an integer", "error"))
if "reasoning_effort" in agent and agent["reasoning_effort"] not in (None, "low", "medium", "high"):
errors.append(ValidationError("agent.reasoning_effort", "Must be low/medium/high", "error"))
# Check toolsets is list of strings
if "toolsets" in data and isinstance(data["toolsets"], list):
for i, ts in enumerate(data["toolsets"]):
if not isinstance(ts, str):
errors.append(ValidationError(f"toolsets[{i}]", "Toolset must be a string", "error"))
return errors
def validate_config(content: str) -> Tuple[bool, List[ValidationError]]:
"""Full validation pipeline. Returns (valid, errors)."""
all_errors = []
# Step 1: YAML syntax
data, errors = validate_yaml_syntax(content)
all_errors.extend(errors)
if data is None:
return False, all_errors
# Step 2: Required keys
all_errors.extend(validate_required_keys(data))
# Step 3: Value types (required + optional)
all_errors.extend(validate_value_types(data, {**REQUIRED_KEYS, **OPTIONAL_KEYS}))
# Step 4: Forbidden keys
all_errors.extend(validate_no_forbidden_keys(data))
# Step 5: Nested validation
all_errors.extend(validate_nested(data))
# Any errors = invalid
has_errors = any(e.severity == "error" for e in all_errors)
return not has_errors, all_errors
def print_schema():
"""Print expected config schema."""
print("Required keys:")
for key, spec in REQUIRED_KEYS.items():
print(f" {key}: {spec['type'].__name__}{spec['description']}")
print("\nOptional keys:")
for key, spec in OPTIONAL_KEYS.items():
print(f" {key}: {spec['type'].__name__}{spec['description']}")
print("\nForbidden keys:")
for key, reason in FORBIDDEN_KEYS.items():
print(f" {key}{reason}")
def main():
parser = argparse.ArgumentParser(description="Validate timmy-config YAML files")
parser.add_argument("file", nargs="?", help="Config file to validate")
parser.add_argument("--deploy", action="store_true", help="Validate then deploy (write)")
parser.add_argument("--schema", action="store_true", help="Show expected schema")
args = parser.parse_args()
if args.schema:
print_schema()
return
if not args.file:
parser.print_help()
return
path = Path(args.file)
if not path.exists():
print(f"Error: {path} not found")
sys.exit(1)
content = path.read_text()
valid, errors = validate_config(content)
if errors:
print(f"Validation results for {path}:")
for err in errors:
print(f" {err}")
print()
if valid:
print(f"{path} is valid")
if args.deploy:
print(f"Deploying {path}...")
# In real usage, this would write to ~/.hermes/config.yaml
print("Deploy complete.")
else:
print(f"{path} has {sum(1 for e in errors if e.severity == 'error')} error(s)")
print("Fix errors before deploying.")
sys.exit(1)
if __name__ == "__main__":
main()

View File

@@ -50,7 +50,11 @@ def normalize_code_block(match: re.Match) -> str:
normalized = "\n".join(lines)
return f"{open_tag}{normalized}\n{close_tag}"
# Normalize open_tag: strip trailing whitespace/newlines, add single newline
# The regex \s*\n can capture extra newlines in open_tag
clean_open = open_tag.rstrip() + "\n"
return f"{clean_open}{normalized}\n{close_tag}"
def process_line(line: str) -> tuple[str, int]:

View File

@@ -0,0 +1,143 @@
#!/usr/bin/env python3
"""
normalize-code-blocks.py — Fix inconsistent indentation in training data code blocks.
When code blocks are embedded in JSONL as triple-quoted strings, indentation
accumulates from the surrounding context. This script normalizes code block
content using textwrap.dedent and consistent 4-space indentation.
Usage:
python3 scripts/normalize-code-blocks.py training/data/preference_pairs.jsonl
python3 scripts/normalize-code-blocks.py --dry-run training/data/*.jsonl
python3 scripts/normalize-code-blocks.py --check training/data/*.jsonl # CI mode
"""
import argparse
import json
import re
import sys
import textwrap
from pathlib import Path
# Matches ```python ... ``` or ``` ... ``` blocks inside string values
CODE_BLOCK_RE = re.compile(
r'(?P<open>```(?:python|py|bash|sh|javascript|js|typescript|ts|go|rust|ruby)?\s*\n)'
r'(?P<code>.*?)'
r'(?P<close>```)',
re.DOTALL,
)
def normalize_code_block(match: re.Match) -> str:
"""Normalize indentation in a single code block."""
open_tag = match.group("open")
code = match.group("code")
close_tag = match.group("close")
# Skip empty blocks
if not code.strip():
return match.group(0)
# Dedent the code
dedented = textwrap.dedent(code)
# Strip leading/trailing blank lines
lines = dedented.split("\n")
while lines and not lines[0].strip():
lines.pop(0)
while lines and not lines[-1].strip():
lines.pop()
normalized = "\n".join(lines)
# Normalize open_tag: strip trailing whitespace/newlines, add single newline
# The regex \s*\n can capture extra newlines in open_tag
clean_open = open_tag.rstrip() + "\n"
return f"{clean_open}{normalized}\n{close_tag}"
def process_line(line: str) -> tuple[str, int]:
"""Process a single JSONL line. Returns (new_line, num_fixes)."""
try:
obj = json.loads(line)
except json.JSONDecodeError:
return line, 0
fixes = 0
def fix_strings(obj):
nonlocal fixes
if isinstance(obj, str):
original = obj
fixed = CODE_BLOCK_RE.sub(normalize_code_block, obj)
if fixed != original:
fixes += 1
return fixed
elif isinstance(obj, dict):
return {k: fix_strings(v) for k, v in obj.items()}
elif isinstance(obj, list):
return [fix_strings(item) for item in obj]
return obj
fixed_obj = fix_strings(obj)
return json.dumps(fixed_obj, ensure_ascii=False) + "\n", fixes
def main():
parser = argparse.ArgumentParser(description="Normalize code block indentation in JSONL training data")
parser.add_argument("files", nargs="+", help="JSONL files to process")
parser.add_argument("--dry-run", action="store_true", help="Show changes without writing")
parser.add_argument("--check", action="store_true", help="CI mode: exit 1 if fixes needed")
args = parser.parse_args()
total_fixes = 0
total_lines = 0
files_changed = 0
for filepath in args.files:
path = Path(filepath)
if not path.exists():
print(f"SKIP: {path} not found", file=sys.stderr)
continue
lines = path.read_text().splitlines(keepends=True)
fixed_lines = []
file_fixes = 0
for i, line in enumerate(lines):
if not line.strip():
fixed_lines.append(line)
continue
fixed_line, n = process_line(line)
fixed_lines.append(fixed_line)
file_fixes += n
total_lines += 1
if file_fixes > 0:
files_changed += 1
total_fixes += file_fixes
print(f"{'CHECK' if args.check else 'FIX'}: {path}{file_fixes} code blocks normalized")
if args.check:
# Show diff
for i, (old, new) in enumerate(zip(lines, fixed_lines)):
if old != new:
print(f" Line {i+1}: indentation changed")
elif not args.dry_run:
path.write_text("".join(fixed_lines))
print(f" Written: {path}")
else:
print(f"OK: {path} — no indentation issues")
print(f"\nSummary: {total_fixes} code blocks fixed across {files_changed} files ({total_lines} lines processed)")
if args.check and total_fixes > 0:
print("FAIL: Code block indentation issues found. Run without --check to fix.")
sys.exit(1)
sys.exit(0)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,72 @@
"""Tests for config validation (#690)."""
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent.parent))
from scripts.config_validate import validate_config, ValidationError
def test_valid_config():
content = "model: gpt-4\nprovider: openai\n"
valid, errors = validate_config(content)
assert valid
assert len(errors) == 0
def test_missing_required_key():
content = "provider: openai\n"
valid, errors = validate_config(content)
assert not valid
assert any("model" in e.path for e in errors)
def test_wrong_type():
content = "model: 123\n"
valid, errors = validate_config(content)
assert not valid
assert any("model" in e.path for e in errors)
def test_forbidden_key():
content = "model: gpt-4\npassword: secret123\n"
valid, errors = validate_config(content)
assert not valid
assert any("password" in e.path for e in errors)
def test_invalid_yaml():
content = "model: gpt-4\n bad indentation\n"
valid, errors = validate_config(content)
assert not valid
def test_nested_validation():
content = "model: gpt-4\nagent:\n max_turns: not_a_number\n"
valid, errors = validate_config(content)
assert not valid
assert any("max_turns" in e.path for e in errors)
def test_toolsets_validation():
content = "model: gpt-4\ntoolsets:\n - web\n - 123\n"
valid, errors = validate_config(content)
assert not valid
def test_empty_file():
content = ""
valid, errors = validate_config(content)
assert not valid
if __name__ == "__main__":
tests = [test_valid_config, test_missing_required_key, test_wrong_type,
test_forbidden_key, test_invalid_yaml, test_nested_validation,
test_toolsets_validation, test_empty_file]
for t in tests:
print(f"Running {t.__name__}...")
t()
print(" PASS")
print("\nAll tests passed.")