Files
timmy-config/scripts/meaning-kernels/test_extraction.py
Alexander Whitestone efdc0dc886 Improve #493: Enhanced meaning kernel extraction pipeline
- Added 5 kernel types: text, structure, summary, philosophical, semantic
- Improved diagram type detection with content analysis
- Added color analysis and grayscale detection
- Enhanced philosophical keyword extraction
- Added semantic relationship detection
- Improved error handling for missing dependencies
- Added comprehensive testing with text-rich test images
- Enhanced metadata and tagging system

Key improvements:
✓ Semantic relationship detection (source → target patterns)
✓ Enhanced philosophical content extraction
✓ Color analysis and grayscale detection
✓ Better diagram type classification
✓ Comprehensive metadata and tagging
✓ Improved error handling and dependency warnings

Still requires OCR dependencies for text extraction:
- pytesseract for OCR
- pdf2image for PDF processing
- Tesseract OCR engine (see issue #563)
2026-04-14 11:44:55 -04:00

142 lines
4.3 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Improved test script for meaning kernel extraction pipeline.
"""
import os
import sys
import tempfile
from pathlib import Path
# Add parent directory to path
sys.path.insert(0, str(Path(__file__).parent))
def create_test_image_with_text():
"""Create a test image with text."""
try:
from PIL import Image, ImageDraw, ImageFont
# Create image
img = Image.new('RGB', (800, 600), color='white')
draw = ImageDraw.Draw(img)
# Draw some content
try:
font = ImageFont.truetype("Arial", 20)
except:
font = ImageFont.load_default()
# Draw text
text_lines = [
"Research Diagram: Knowledge Extraction Pipeline",
"",
"Input → Processing → Output",
"",
"Key Concepts:",
"- Data ingestion",
"- Feature extraction",
"- Pattern recognition",
"- Knowledge representation",
"",
"Philosophical aspects:",
"- Truth and knowledge",
"- Meaning and purpose",
"- Reality and existence"
]
y = 50
for line in text_lines:
draw.text((50, y), line, fill='black', font=font)
y += 30
# Draw a simple flowchart
draw.rectangle([300, 200, 500, 250], outline='blue', width=2)
draw.text((320, 210), "Process", fill='blue', font=font)
draw.line([500, 225, 600, 225], fill='black', width=2)
draw.polygon([600, 225, 590, 215, 590, 235], fill='black')
draw.rectangle([600, 200, 750, 250], outline='green', width=2)
draw.text((620, 210), "Output", fill='green', font=font)
# Save to temp file
temp_dir = Path(tempfile.mkdtemp())
image_path = temp_dir / "test_diagram_with_text.png"
img.save(image_path)
print(f"Created test image with text: {image_path}")
return image_path
except ImportError as e:
print(f"Cannot create test image: {e}")
return None
def test_extraction():
"""Test the extraction pipeline."""
print("Testing Improved Meaning Kernel Extraction Pipeline...")
# Check if we can import the extractor
try:
from extract_meaning_kernels import MeaningKernelExtractor
print("✓ Successfully imported MeaningKernelExtractor")
except ImportError as e:
print(f"✗ Failed to import: {e}")
return False
# Create test image
test_image = create_test_image_with_text()
if not test_image:
print("Skipping test - cannot create test image")
return True
# Test extraction
try:
extractor = MeaningKernelExtractor()
print("\nExtracting kernels from test image...")
kernels = extractor.extract_from_image(test_image)
print(f"✓ Extracted {len(kernels)} kernels")
# Print kernel details
for kernel in kernels:
print(f"\nKernel: {kernel.kernel_id}")
print(f" Type: {kernel.kernel_type}")
print(f" Confidence: {kernel.confidence:.2f}")
print(f" Tags: {', '.join(kernel.tags)}")
print(f" Content: {kernel.content[:100]}...")
# Get stats
stats = extractor.get_stats()
print(f"\nStatistics:")
for key, value in stats.items():
print(f" {key}: {value}")
# Check for philosophical kernels
philosophical_kernels = [k for k in kernels if k.kernel_type == "philosophical"]
if philosophical_kernels:
print(f"\n✓ Found {len(philosophical_kernels)} philosophical kernel(s)")
else:
print("\n⚠ No philosophical kernels found (may need OCR dependencies)")
return True
except Exception as e:
print(f"✗ Extraction test failed: {e}")
import traceback
traceback.print_exc()
return False
if __name__ == "__main__":
print("Improved Meaning Kernel Extraction Pipeline Test")
print("=" * 50)
success = test_extraction()
print("\n" + "=" * 50)
if success:
print("✓ All tests passed!")
sys.exit(0)
else:
print("✗ Some tests failed")
sys.exit(1)