#!/usr/bin/env python3 """Validate knowledge files and index.json against the schema.""" import json, sys from pathlib import Path VALID_CATEGORIES = {"fact", "pitfall", "pattern", "tool-quirk", "question"} REQUIRED = {"id", "fact", "category", "domain", "confidence"} def validate_fact(fact, src=""): errs = [] for f in REQUIRED: if f not in fact: errs.append(f"{src}: missing '{f}'") if "category" in fact and fact["category"] not in VALID_CATEGORIES: errs.append(f"{src}: invalid category '{fact['category']}'") if "confidence" in fact: if not isinstance(fact["confidence"], (int, float)) or not (0 <= fact["confidence"] <= 1): errs.append(f"{src}: confidence must be 0.0-1.0") if "id" in fact: parts = fact["id"].split(":") if len(parts) != 3: errs.append(f"{src}: id must be domain:category:sequence") return errs def main(): idx = Path(__file__).parent.parent / "knowledge" / "index.json" if not idx.exists(): print(f"FAILED: {idx} not found"); sys.exit(1) data = json.load(open(idx)) errs = [] seen = set() for i, f in enumerate(data.get("facts", [])): errs.extend(validate_fact(f, f"[{i}]")) if "id" in f: if f["id"] in seen: errs.append(f"duplicate id '{f['id']}'") seen.add(f["id"]) if errs: print(f"FAILED - {len(errs)} errors:"); [print(f" x {e}") for e in errs]; sys.exit(1) print(f"PASSED - {len(data.get('facts', []))} facts") if __name__ == "__main__": main()