#!/usr/bin/env python3 """ Validate knowledge files and index.json against the schema. Usage: python scripts/validate_knowledge.py """ import json import sys from pathlib import Path VALID_CATEGORIES = {"fact", "pitfall", "pattern", "tool-quirk", "question"} REQUIRED_FACT_FIELDS = {"id", "fact", "category", "domain", "confidence"} MAX_FACT_LENGTH = 280 def validate_fact(fact, source=""): errors = [] for field in REQUIRED_FACT_FIELDS: if field not in fact: errors.append(f"{source}: missing required field '{field}'") if "fact" in fact: if not isinstance(fact["fact"], str) or len(fact["fact"].strip()) == 0: errors.append(f"{source}: 'fact' must be non-empty string") elif len(fact["fact"]) > MAX_FACT_LENGTH: errors.append(f"{source}: 'fact' exceeds {MAX_FACT_LENGTH} chars") if "category" in fact and fact["category"] not in VALID_CATEGORIES: errors.append(f"{source}: invalid category '{fact['category']}'") if "confidence" in fact: if not isinstance(fact["confidence"], (int, float)): errors.append(f"{source}: 'confidence' must be a number") elif not (0.0 <= fact["confidence"] <= 1.0): errors.append(f"{source}: 'confidence' must be 0.0-1.0") if "id" in fact: parts = fact["id"].split(":") if len(parts) != 3: errors.append(f"{source}: 'id' must be domain:category:sequence") elif parts[1] not in VALID_CATEGORIES: errors.append(f"{source}: id category '{parts[1]}' invalid") return errors def main(): repo_root = Path(__file__).parent.parent index_path = repo_root / "knowledge" / "index.json" all_errors = [] if not index_path.exists(): print(f"VALIDATION FAILED: index.json not found at {index_path}") sys.exit(1) with open(index_path) as f: data = json.load(f) if "version" not in data: all_errors.append("index.json: missing 'version'") if "facts" not in data or not isinstance(data["facts"], list): all_errors.append("index.json: missing or invalid 'facts'") seen_ids = set() for i, fact in enumerate(data.get("facts", [])): all_errors.extend(validate_fact(fact, f"facts[{i}]")) if "id" in fact: if fact["id"] in seen_ids: all_errors.append(f"index.json: duplicate id '{fact['id']}'") seen_ids.add(fact["id"]) if all_errors: print(f"VALIDATION FAILED - {len(all_errors)} error(s):\n") for e in all_errors: print(f" x {e}") sys.exit(1) else: print(f"VALIDATION PASSED - {len(data.get('facts', []))} facts, schema v{data.get('version', '?')}") sys.exit(0) if __name__ == "__main__": main()