39 lines
1.5 KiB
Python
39 lines
1.5 KiB
Python
|
|
#!/usr/bin/env python3
|
||
|
|
"""Validate knowledge files and index.json against the schema."""
|
||
|
|
import json, sys
|
||
|
|
from pathlib import Path
|
||
|
|
|
||
|
|
VALID_CATEGORIES = {"fact", "pitfall", "pattern", "tool-quirk", "question"}
|
||
|
|
REQUIRED = {"id", "fact", "category", "domain", "confidence"}
|
||
|
|
|
||
|
|
def validate_fact(fact, src=""):
|
||
|
|
errs = []
|
||
|
|
for f in REQUIRED:
|
||
|
|
if f not in fact: errs.append(f"{src}: missing '{f}'")
|
||
|
|
if "category" in fact and fact["category"] not in VALID_CATEGORIES:
|
||
|
|
errs.append(f"{src}: invalid category '{fact['category']}'")
|
||
|
|
if "confidence" in fact:
|
||
|
|
if not isinstance(fact["confidence"], (int, float)) or not (0 <= fact["confidence"] <= 1):
|
||
|
|
errs.append(f"{src}: confidence must be 0.0-1.0")
|
||
|
|
if "id" in fact:
|
||
|
|
parts = fact["id"].split(":")
|
||
|
|
if len(parts) != 3: errs.append(f"{src}: id must be domain:category:sequence")
|
||
|
|
return errs
|
||
|
|
|
||
|
|
def main():
|
||
|
|
idx = Path(__file__).parent.parent / "knowledge" / "index.json"
|
||
|
|
if not idx.exists(): print(f"FAILED: {idx} not found"); sys.exit(1)
|
||
|
|
data = json.load(open(idx))
|
||
|
|
errs = []
|
||
|
|
seen = set()
|
||
|
|
for i, f in enumerate(data.get("facts", [])):
|
||
|
|
errs.extend(validate_fact(f, f"[{i}]"))
|
||
|
|
if "id" in f:
|
||
|
|
if f["id"] in seen: errs.append(f"duplicate id '{f['id']}'")
|
||
|
|
seen.add(f["id"])
|
||
|
|
if errs:
|
||
|
|
print(f"FAILED - {len(errs)} errors:"); [print(f" x {e}") for e in errs]; sys.exit(1)
|
||
|
|
print(f"PASSED - {len(data.get('facts', []))} facts")
|
||
|
|
|
||
|
|
if __name__ == "__main__": main()
|