Files
compounding-intelligence/scripts/validate_knowledge.py

39 lines
1.5 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
"""Validate knowledge files and index.json against the schema."""
import json, sys
from pathlib import Path
VALID_CATEGORIES = {"fact", "pitfall", "pattern", "tool-quirk", "question"}
REQUIRED = {"id", "fact", "category", "domain", "confidence"}
def validate_fact(fact, src=""):
errs = []
for f in REQUIRED:
if f not in fact: errs.append(f"{src}: missing '{f}'")
if "category" in fact and fact["category"] not in VALID_CATEGORIES:
errs.append(f"{src}: invalid category '{fact['category']}'")
if "confidence" in fact:
if not isinstance(fact["confidence"], (int, float)) or not (0 <= fact["confidence"] <= 1):
errs.append(f"{src}: confidence must be 0.0-1.0")
if "id" in fact:
parts = fact["id"].split(":")
if len(parts) != 3: errs.append(f"{src}: id must be domain:category:sequence")
return errs
def main():
idx = Path(__file__).parent.parent / "knowledge" / "index.json"
if not idx.exists(): print(f"FAILED: {idx} not found"); sys.exit(1)
data = json.load(open(idx))
errs = []
seen = set()
for i, f in enumerate(data.get("facts", [])):
errs.extend(validate_fact(f, f"[{i}]"))
if "id" in f:
if f["id"] in seen: errs.append(f"duplicate id '{f['id']}'")
seen.add(f["id"])
if errs:
print(f"FAILED - {len(errs)} errors:"); [print(f" x {e}") for e in errs]; sys.exit(1)
print(f"PASSED - {len(data.get('facts', []))} facts")
if __name__ == "__main__": main()