compounding-intelligence/scripts/test_harvest_prompt.py

#!/usr/bin/env python3
"""
Test script for knowledge extraction prompt.
Validates that the prompt produces consistent, structured output.
"""

import json
import sys
from pathlib import Path

def validate_knowledge_item(item):
    """Validate a single knowledge item."""
    required_fields = ["fact", "category", "repo", "confidence"]
    for field in required_fields:
        if field not in item:
            return False, f"Missing field: {field}"

    if not isinstance(item["fact"], str) or len(item["fact"].strip()) == 0:
        return False, "Fact must be a non-empty string"

    valid_categories = ["fact", "pitfall", "pattern", "tool-quirk", "question"]
    if item["category"] not in valid_categories:
        return False, f"Invalid category: {item['category']}"

    if not isinstance(item["repo"], str):
        return False, "Repo must be a string"

    if not isinstance(item["confidence"], (int, float)):
        return False, "Confidence must be a number"

    if not (0.0 <= item["confidence"] <= 1.0):
        return False, "Confidence must be between 0.0 and 1.0"

    return True, "Valid"

def test_sample_transcript():
    """Test with a sample transcript."""
    sample_transcript = """
User: Clone the repo at forge.alexanderwhitestone.com/Timmy_Foundation/compounding-intelligence
Agent: Trying git clone... Error: 401 Unauthorized
Agent: Checking for token... Found in ~/.config/gitea/token
Agent: Token is gitea_token format, not OAuth
Agent: Clone successful with token authentication
Agent: Now examining the repo structure...
Agent: Found knowledge/index.json with 0 facts
Agent: Creating harvest-prompt.md in templates/
"""

    # This would be replaced with actual prompt execution
    print("Sample transcript processed")
    print("Expected categories: fact, pitfall, pattern, tool-quirk, question")
    return True

if __name__ == "__main__":
    print("Testing knowledge extraction prompt...")

    # Test 1: Validate prompt file exists
    prompt_path = Path("templates/harvest-prompt.md")
    if not prompt_path.exists():
        print("ERROR: harvest-prompt.md not found")
        sys.exit(1)

    print(f"OK: Prompt file exists: {prompt_path}")

    # Test 2: Check prompt size
    prompt_size = prompt_path.stat().st_size
    print(f"OK: Prompt size: {prompt_size} bytes")

    # Test 3: Test sample transcript processing
    if test_sample_transcript():
        print("OK: Sample transcript test passed")

    print("\nAll tests passed!")