timmy-config/scripts/meaning-kernels/test_extraction.py

#!/usr/bin/env python3
"""
Improved test script for meaning kernel extraction pipeline.
"""
import os
import sys
import tempfile
from pathlib import Path

# Add parent directory to path
sys.path.insert(0, str(Path(__file__).parent))

def create_test_image_with_text():
    """Create a test image with text."""
    try:
        from PIL import Image, ImageDraw, ImageFont

        # Create image
        img = Image.new('RGB', (800, 600), color='white')
        draw = ImageDraw.Draw(img)

        # Draw some content
        try:
            font = ImageFont.truetype("Arial", 20)
        except:
            font = ImageFont.load_default()

        # Draw text
        text_lines = [
            "Research Diagram: Knowledge Extraction Pipeline",
            "",
            "Input → Processing → Output",
            "",
            "Key Concepts:",
            "- Data ingestion",
            "- Feature extraction",
            "- Pattern recognition",
            "- Knowledge representation",
            "",
            "Philosophical aspects:",
            "- Truth and knowledge",
            "- Meaning and purpose",
            "- Reality and existence"
        ]

        y = 50
        for line in text_lines:
            draw.text((50, y), line, fill='black', font=font)
            y += 30

        # Draw a simple flowchart
        draw.rectangle([300, 200, 500, 250], outline='blue', width=2)
        draw.text((320, 210), "Process", fill='blue', font=font)

        draw.line([500, 225, 600, 225], fill='black', width=2)
        draw.polygon([600, 225, 590, 215, 590, 235], fill='black')

        draw.rectangle([600, 200, 750, 250], outline='green', width=2)
        draw.text((620, 210), "Output", fill='green', font=font)

        # Save to temp file
        temp_dir = Path(tempfile.mkdtemp())
        image_path = temp_dir / "test_diagram_with_text.png"
        img.save(image_path)

        print(f"Created test image with text: {image_path}")
        return image_path

    except ImportError as e:
        print(f"Cannot create test image: {e}")
        return None

def test_extraction():
    """Test the extraction pipeline."""
    print("Testing Improved Meaning Kernel Extraction Pipeline...")

    # Check if we can import the extractor
    try:
        from extract_meaning_kernels import MeaningKernelExtractor
        print("✓ Successfully imported MeaningKernelExtractor")
    except ImportError as e:
        print(f"✗ Failed to import: {e}")
        return False

    # Create test image
    test_image = create_test_image_with_text()
    if not test_image:
        print("Skipping test - cannot create test image")
        return True

    # Test extraction
    try:
        extractor = MeaningKernelExtractor()

        print("\nExtracting kernels from test image...")
        kernels = extractor.extract_from_image(test_image)

        print(f"✓ Extracted {len(kernels)} kernels")

        # Print kernel details
        for kernel in kernels:
            print(f"\nKernel: {kernel.kernel_id}")
            print(f"  Type: {kernel.kernel_type}")
            print(f"  Confidence: {kernel.confidence:.2f}")
            print(f"  Tags: {', '.join(kernel.tags)}")
            print(f"  Content: {kernel.content[:100]}...")

        # Get stats
        stats = extractor.get_stats()
        print(f"\nStatistics:")
        for key, value in stats.items():
            print(f"  {key}: {value}")

        # Check for philosophical kernels
        philosophical_kernels = [k for k in kernels if k.kernel_type == "philosophical"]
        if philosophical_kernels:
            print(f"\n✓ Found {len(philosophical_kernels)} philosophical kernel(s)")
        else:
            print("\n⚠ No philosophical kernels found (may need OCR dependencies)")

        return True

    except Exception as e:
        print(f"✗ Extraction test failed: {e}")
        import traceback
        traceback.print_exc()
        return False

if __name__ == "__main__":
    print("Improved Meaning Kernel Extraction Pipeline Test")
    print("=" * 50)

    success = test_extraction()

    print("\n" + "=" * 50)
    if success:
        print("✓ All tests passed!")
        sys.exit(0)
    else:
        print("✗ Some tests failed")
        sys.exit(1)