Files
compounding-intelligence/scripts/validate_knowledge.py

81 lines
2.7 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
"""
Validate knowledge files and index.json against the schema.
Usage:
python scripts/validate_knowledge.py
"""
import json
import sys
from pathlib import Path
VALID_CATEGORIES = {"fact", "pitfall", "pattern", "tool-quirk", "question"}
REQUIRED_FACT_FIELDS = {"id", "fact", "category", "domain", "confidence"}
MAX_FACT_LENGTH = 280
def validate_fact(fact, source=""):
errors = []
for field in REQUIRED_FACT_FIELDS:
if field not in fact:
errors.append(f"{source}: missing required field '{field}'")
if "fact" in fact:
if not isinstance(fact["fact"], str) or len(fact["fact"].strip()) == 0:
errors.append(f"{source}: 'fact' must be non-empty string")
elif len(fact["fact"]) > MAX_FACT_LENGTH:
errors.append(f"{source}: 'fact' exceeds {MAX_FACT_LENGTH} chars")
if "category" in fact and fact["category"] not in VALID_CATEGORIES:
errors.append(f"{source}: invalid category '{fact['category']}'")
if "confidence" in fact:
if not isinstance(fact["confidence"], (int, float)):
errors.append(f"{source}: 'confidence' must be a number")
elif not (0.0 <= fact["confidence"] <= 1.0):
errors.append(f"{source}: 'confidence' must be 0.0-1.0")
if "id" in fact:
parts = fact["id"].split(":")
if len(parts) != 3:
errors.append(f"{source}: 'id' must be domain:category:sequence")
elif parts[1] not in VALID_CATEGORIES:
errors.append(f"{source}: id category '{parts[1]}' invalid")
return errors
def main():
repo_root = Path(__file__).parent.parent
index_path = repo_root / "knowledge" / "index.json"
all_errors = []
if not index_path.exists():
print(f"VALIDATION FAILED: index.json not found at {index_path}")
sys.exit(1)
with open(index_path) as f:
data = json.load(f)
if "version" not in data:
all_errors.append("index.json: missing 'version'")
if "facts" not in data or not isinstance(data["facts"], list):
all_errors.append("index.json: missing or invalid 'facts'")
seen_ids = set()
for i, fact in enumerate(data.get("facts", [])):
all_errors.extend(validate_fact(fact, f"facts[{i}]"))
if "id" in fact:
if fact["id"] in seen_ids:
all_errors.append(f"index.json: duplicate id '{fact['id']}'")
seen_ids.add(fact["id"])
if all_errors:
print(f"VALIDATION FAILED - {len(all_errors)} error(s):\n")
for e in all_errors:
print(f" x {e}")
sys.exit(1)
else:
print(f"VALIDATION PASSED - {len(data.get('facts', []))} facts, schema v{data.get('version', '?')}")
sys.exit(0)
if __name__ == "__main__":
main()