Compare commits

..

2 Commits

Author SHA1 Message Date
60c3838c2b feat: config validator with schema checks (#690)
Some checks failed
Validate Config / Python Test Suite (pull_request) Blocked by required conditions
Architecture Lint / Lint Repository (pull_request) Blocked by required conditions
Architecture Lint / Linter Tests (pull_request) Successful in 44s
PR Checklist / pr-checklist (pull_request) Failing after 4m2s
Smoke Test / smoke (pull_request) Failing after 9s
Validate Config / YAML Lint (pull_request) Failing after 30s
Validate Config / JSON Validate (pull_request) Successful in 29s
Validate Config / Shell Script Lint (pull_request) Failing after 1m8s
Validate Config / Cron Syntax Check (pull_request) Successful in 14s
Validate Config / Python Syntax & Import Check (pull_request) Failing after 1m52s
Validate Config / Deploy Script Dry Run (pull_request) Successful in 10s
Validate Config / Playbook Schema Validation (pull_request) Successful in 17s
2026-04-15 03:35:41 +00:00
9251ffc4b5 test 2026-04-15 03:34:20 +00:00
2 changed files with 88 additions and 6 deletions

83
bin/validate_config.py Normal file
View File

@@ -0,0 +1,83 @@
#!/usr/bin/env python3
"""
Config Validator -- pre-deploy YAML validation for timmy-config sidecar.
Validates YAML syntax, required keys (model.default, model.provider,
toolsets), and provider names before deploy.sh writes to ~/.hermes/.
Usage:
python3 bin/validate_config.py [path/to/config.yaml]
python3 bin/validate_config.py --strict (fail on warnings too)
"""
import json, os, sys, yaml
from pathlib import Path
REQUIRED = {
"model": {"type": dict, "keys": {"default": str, "provider": str}},
"toolsets": {"type": list},
}
ALLOWED_PROVIDERS = [
"anthropic", "openai", "nous", "ollama", "openrouter", "openai-codex"
]
def validate(path):
errors = []
try:
with open(path) as f:
data = yaml.safe_load(f)
except Exception as e:
return [f"YAML parse error: {e}"]
if not isinstance(data, dict):
return [f"Expected mapping, got {type(data).__name__}"]
for key, spec in REQUIRED.items():
if key not in data:
errors.append(f"Required key missing: {key}")
continue
if spec["type"] == dict and not isinstance(data[key], dict):
errors.append(f"{key}: expected dict")
continue
if spec["type"] == list and not isinstance(data[key], list):
errors.append(f"{key}: expected list")
continue
if "keys" in spec:
for sub, sub_type in spec["keys"].items():
if sub not in data[key]:
errors.append(f"{key}.{sub}: required")
elif not isinstance(data[key][sub], sub_type):
errors.append(f"{key}.{sub}: expected {sub_type.__name__}")
provider = data.get("model", {}).get("provider")
if provider and provider not in ALLOWED_PROVIDERS:
errors.append(f"model.provider: unknown provider '{provider}'")
# Check JSON files
for jf in ["channel_directory.json"]:
jp = Path(path).parent / jf
if jp.exists():
try:
json.loads(jp.read_text())
except Exception as e:
errors.append(f"{jf}: invalid JSON: {e}")
return errors
def main():
strict = "--strict" in sys.argv
args = [a for a in sys.argv[1:] if not a.startswith("--")]
path = args[0] if args else str(Path(__file__).parent.parent / "config.yaml")
if not os.path.exists(path):
print(f"ERROR: {path} not found")
sys.exit(1)
errs = validate(path)
if errs:
for e in errs:
print(f"ERROR: {e}")
print(f"Validation FAILED: {len(errs)} issue(s)")
sys.exit(1)
print(f"OK: {path} is valid")
if __name__ == "__main__":
main()

View File

@@ -15,7 +15,6 @@
MODEL ?= timmy:v0.1-q4
BASELINE ?= hermes3:latest
OLLAMA_URL ?= http://localhost:11434
PYTHON ?= python3
OUTPUT ?= output
# ── Training ──────────────────────────────────────────────────────────
@@ -24,7 +23,7 @@ train-cloud: ## QLoRA fine-tune on cloud GPU (Axolotl)
axolotl train axolotl.yaml
train-local: ## LoRA fine-tune on Apple Silicon (MLX)
$(PYTHON) -m mlx_lm.lora --config mlx-lora.yaml
python -m mlx_lm.lora --config mlx-lora.yaml
# ── Evaluation ────────────────────────────────────────────────────────
@@ -46,7 +45,7 @@ vibes: ## Run vibes check — hand-picked prompts, human review
@echo "Date: $$(date '+%Y-%m-%d %H:%M')" > $(OUTPUT)/vibes-$(MODEL).md
@echo "Model: $(MODEL)" >> $(OUTPUT)/vibes-$(MODEL).md
@echo "" >> $(OUTPUT)/vibes-$(MODEL).md
@$(PYTHON) -c "\
@python -c "\
import yaml, subprocess, sys; \
prompts = yaml.safe_load(open('data/prompts_vibes.yaml'))['prompts']; \
f = open('$(OUTPUT)/vibes-$(MODEL).md', 'a'); \
@@ -70,19 +69,19 @@ vibes: ## Run vibes check — hand-picked prompts, human review
# ── Data Pipeline ─────────────────────────────────────────────────────
ingest: ## Pull heartbeat trajectories into training data
$(PYTHON) ingest_trajectories.py \
python ingest_trajectories.py \
--trajectories ~/.nexus/trajectories/ \
--curated data/curated_dataset.jsonl \
--output data/merged_training_data.jsonl
@echo "Merged dataset ready. Convert for MLX with: make convert"
curated: ## Regenerate curated exemplar dataset
$(PYTHON) build_curated.py
python build_curated.py
@echo "Curated dataset regenerated."
convert: ## Convert merged dataset to MLX format (train/valid split)
@mkdir -p data/mlx_curated
$(PYTHON) -c "\
python -c "\
import json; \
lines = open('data/merged_training_data.jsonl').readlines(); \
sessions = [json.loads(l) for l in lines]; \