diff --git a/scripts/meaning-kernels/__pycache__/extract_meaning_kernels.cpython-312.pyc b/scripts/meaning-kernels/__pycache__/extract_meaning_kernels.cpython-312.pyc
index 70dba65e..017083ea 100644
Binary files a/scripts/meaning-kernels/__pycache__/extract_meaning_kernels.cpython-312.pyc and b/scripts/meaning-kernels/__pycache__/extract_meaning_kernels.cpython-312.pyc differ
diff --git a/scripts/meaning-kernels/extract_meaning_kernels.py b/scripts/meaning-kernels/extract_meaning_kernels.py
index 2af13bca..81d78017 100755
--- a/scripts/meaning-kernels/extract_meaning_kernels.py
+++ b/scripts/meaning-kernels/extract_meaning_kernels.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 """
-Meaning Kernel Extraction Pipeline
+Improved Meaning Kernel Extraction Pipeline
 Extract structured meaning kernels from academic PDF diagrams.
 Issue #493: [Multimodal] Extract Meaning Kernels from Research Diagrams
 """
@@ -8,9 +8,10 @@ import os
 import sys
 import json
 import argparse
+import re
 from pathlib import Path
 from datetime import datetime
-from typing import List, Dict, Any, Optional
+from typing import List, Dict, Any, Optional, Tuple
 import hashlib
 
 # Try to import vision libraries
@@ -40,13 +41,14 @@ class MeaningKernel:
     
     def __init__(self, kernel_id: str, content: str, source: str, 
                  kernel_type: str = "text", confidence: float = 0.0,
-                 metadata: Dict[str, Any] = None):
+                 metadata: Dict[str, Any] = None, tags: List[str] = None):
         self.kernel_id = kernel_id
         self.content = content
         self.source = source
-        self.kernel_type = kernel_type  # text, structure, summary, philosophical
+        self.kernel_type = kernel_type  # text, structure, summary, philosophical, semantic
         self.confidence = confidence
         self.metadata = metadata or {}
+        self.tags = tags or []
         self.timestamp = datetime.now().isoformat()
         self.hash = self._generate_hash()
     
@@ -64,18 +66,26 @@ class MeaningKernel:
             "kernel_type": self.kernel_type,
             "confidence": self.confidence,
             "metadata": self.metadata,
+            "tags": self.tags,
             "timestamp": self.timestamp,
             "hash": self.hash
         }
     
     def __str__(self) -> str:
-        return f"Kernel[{self.kernel_id}]: {self.content[:100]}..."
+        return f"Kernel[{self.kernel_id}] ({self.kernel_type}): {self.content[:100]}..."
 
 class DiagramAnalyzer:
     """Analyze diagrams using multiple methods."""
     
     def __init__(self, config: Dict[str, Any] = None):
         self.config = config or {}
+        self.philosophical_keywords = self.config.get("philosophical_keywords", [
+            "truth", "knowledge", "wisdom", "meaning", "purpose",
+            "existence", "reality", "consciousness", "ethics", "morality",
+            "beauty", "justice", "freedom", "responsibility", "identity",
+            "causality", "determinism", "free will", "rationality", "logic",
+            "metaphysics", "epistemology", "ontology", "phenomenology"
+        ])
     
     def analyze_image(self, image_path: str) -> Dict[str, Any]:
         """Analyze an image using multiple methods."""
@@ -90,43 +100,183 @@ class DiagramAnalyzer:
             "aspect_ratio": image.width / image.height,
             "mode": image.mode,
             "format": image.format,
-            "size_bytes": os.path.getsize(image_path)
+            "size_bytes": os.path.getsize(image_path),
+            "color_analysis": self._analyze_colors(image)
         }
         
         # OCR text extraction
         if TESSERACT_AVAILABLE:
             try:
-                ocr_text = pytesseract.image_to_string(image)
-                analysis["ocr_text"] = ocr_text.strip()
-                analysis["ocr_confidence"] = self._estimate_ocr_confidence(image)
+                ocr_data = pytesseract.image_to_data(image, output_type=pytesseract.Output.DICT)
+                ocr_text = " ".join([text for text in ocr_data['text'] if text.strip()])
+                analysis["ocr_text"] = ocr_text
+                analysis["ocr_confidence"] = self._calculate_ocr_confidence(ocr_data)
+                analysis["ocr_word_count"] = len(ocr_text.split())
+                analysis["ocr_lines"] = self._extract_ocr_lines(ocr_data)
             except Exception as e:
                 analysis["ocr_text"] = ""
                 analysis["ocr_confidence"] = 0.0
                 analysis["ocr_error"] = str(e)
         
         # Diagram type estimation
-        analysis["diagram_type"] = self._estimate_diagram_type(image)
+        analysis["diagram_type"] = self._estimate_diagram_type(image, analysis)
+        
+        # Content analysis
+        analysis["content_analysis"] = self._analyze_content(analysis)
         
         return analysis
     
-    def _estimate_ocr_confidence(self, image: Image.Image) -> float:
-        """Estimate OCR confidence (simplified)."""
-        # In reality, would use pytesseract's confidence output
-        return 0.8  # Placeholder
+    def _analyze_colors(self, image: Image.Image) -> Dict[str, Any]:
+        """Analyze color distribution in image."""
+        # Convert to RGB if necessary
+        if image.mode != 'RGB':
+            image = image.convert('RGB')
+        
+        # Get colors
+        colors = image.getcolors(maxcolors=10000)
+        if colors:
+            # Sort by frequency
+            colors.sort(key=lambda x: x[0], reverse=True)
+            total_pixels = image.width * image.height
+            
+            # Get dominant colors
+            dominant_colors = []
+            for count, color in colors[:5]:
+                percentage = (count / total_pixels) * 100
+                dominant_colors.append({
+                    "color": color,
+                    "count": count,
+                    "percentage": round(percentage, 2)
+                })
+            
+            return {
+                "dominant_colors": dominant_colors,
+                "unique_colors": len(colors),
+                "is_grayscale": self._is_grayscale(image)
+            }
+        
+        return {"dominant_colors": [], "unique_colors": 0}
     
-    def _estimate_diagram_type(self, image: Image.Image) -> str:
+    def _is_grayscale(self, image: Image.Image) -> bool:
+        """Check if image is grayscale."""
+        # Sample some pixels
+        width, height = image.size
+        for x in range(0, width, width // 10):
+            for y in range(0, height, height // 10):
+                r, g, b = image.getpixel((x, y))
+                if not (r == g == b):
+                    return False
+        return True
+    
+    def _calculate_ocr_confidence(self, ocr_data: Dict[str, Any]) -> float:
+        """Calculate average OCR confidence."""
+        confidences = [int(conf) for conf in ocr_data['conf'] if int(conf) > 0]
+        if confidences:
+            return sum(confidences) / len(confidences) / 100.0
+        return 0.0
+    
+    def _extract_ocr_lines(self, ocr_data: Dict[str, Any]) -> List[str]:
+        """Extract text lines from OCR data."""
+        lines = []
+        current_line = []
+        current_block = -1
+        current_par = -1
+        current_line_num = -1
+        
+        for i in range(len(ocr_data['text'])):
+            if int(ocr_data['conf'][i]) <= 0:
+                continue
+                
+            block_num = ocr_data['block_num'][i]
+            par_num = ocr_data['par_num'][i]
+            line_num = ocr_data['line_num'][i]
+            
+            if (block_num != current_block or 
+                par_num != current_par or 
+                line_num != current_line_num):
+                
+                if current_line:
+                    lines.append(' '.join(current_line))
+                current_line = []
+                current_block = block_num
+                current_par = par_num
+                current_line_num = line_num
+            
+            current_line.append(ocr_data['text'][i])
+        
+        if current_line:
+            lines.append(' '.join(current_line))
+        
+        return lines
+    
+    def _estimate_diagram_type(self, image: Image.Image, analysis: Dict[str, Any]) -> str:
         """Estimate diagram type based on image characteristics."""
         width, height = image.size
         aspect_ratio = width / height
         
+        # Check for flowchart characteristics
         if aspect_ratio > 2:
             return "flowchart"
         elif aspect_ratio < 0.5:
             return "vertical_hierarchy"
         elif 0.8 <= aspect_ratio <= 1.2:
+            # Check for circular patterns
+            if self._has_circular_patterns(image):
+                return "circular_diagram"
             return "square_diagram"
-        else:
-            return "standard_diagram"
+        
+        # Check OCR content for clues
+        ocr_text = analysis.get("ocr_text", "").lower()
+        if any(word in ocr_text for word in ["process", "flow", "step", "arrow"]):
+            return "process_diagram"
+        elif any(word in ocr_text for word in ["system", "component", "module"]):
+            return "system_diagram"
+        elif any(word in ocr_text for word in ["data", "information", "input", "output"]):
+            return "data_diagram"
+        
+        return "standard_diagram"
+    
+    def _has_circular_patterns(self, image: Image.Image) -> bool:
+        """Check for circular patterns in image (simplified)."""
+        # This is a simplified check - real implementation would use computer vision
+        return False
+    
+    def _analyze_content(self, analysis: Dict[str, Any]) -> Dict[str, Any]:
+        """Analyze content for themes and patterns."""
+        ocr_text = analysis.get("ocr_text", "")
+        
+        content_analysis = {
+            "word_count": len(ocr_text.split()),
+            "has_text": bool(ocr_text),
+            "themes": [],
+            "entities": [],
+            "relationships": []
+        }
+        
+        if ocr_text:
+            # Extract potential entities (capitalized words)
+            words = ocr_text.split()
+            entities = [word for word in words if word[0].isupper() and len(word) > 2]
+            content_analysis["entities"] = list(set(entities))[:10]
+            
+            # Look for relationships
+            relationship_patterns = [
+                r"(\w+)\s*->\s*(\w+)",
+                r"(\w+)\s*→\s*(\w+)",
+                r"(\w+)\s*to\s*(\w+)",
+                r"(\w+)\s*from\s*(\w+)"
+            ]
+            
+            for pattern in relationship_patterns:
+                matches = re.findall(pattern, ocr_text)
+                for match in matches:
+                    content_analysis["relationships"].append({
+                        "source": match[0],
+                        "target": match[1],
+                        "type": "connection"
+                    })
+        
+        return content_analysis
 
 class MeaningKernelExtractor:
     """Extract meaning kernels from diagrams."""
@@ -139,17 +289,34 @@ class MeaningKernelExtractor:
             "pages_processed": 0,
             "diagrams_analyzed": 0,
             "kernels_extracted": 0,
-            "errors": 0
+            "errors": 0,
+            "dependency_warnings": 0
         }
+        
+        # Check dependencies and update stats
+        if not PIL_AVAILABLE:
+            self.stats["dependency_warnings"] += 1
+        if not TESSERACT_AVAILABLE:
+            self.stats["dependency_warnings"] += 1
+        if not PDF2IMAGE_AVAILABLE:
+            self.stats["dependency_warnings"] += 1
     
     def extract_from_pdf(self, pdf_path: str, output_dir: str = None) -> List[MeaningKernel]:
         """Extract meaning kernels from a PDF file."""
         if not PDF2IMAGE_AVAILABLE:
-            raise ImportError("pdf2image is required for PDF processing")
+            print("Error: pdf2image is required for PDF processing")
+            print("Install with: pip install pdf2image")
+            print("System dependencies:")
+            print("  macOS: brew install poppler")
+            print("  Ubuntu: sudo apt-get install poppler-utils")
+            self.stats["errors"] += 1
+            return []
         
         pdf_path = Path(pdf_path)
         if not pdf_path.exists():
-            raise FileNotFoundError(f"PDF not found: {pdf_path}")
+            print(f"Error: PDF not found: {pdf_path}")
+            self.stats["errors"] += 1
+            return []
         
         print(f"Processing PDF: {pdf_path}")
         
@@ -229,16 +396,26 @@ class MeaningKernelExtractor:
                 kernel_type="text",
                 confidence=analysis.get("ocr_confidence", 0.0),
                 metadata={
-                    "word_count": len(analysis["ocr_text"].split()),
+                    "word_count": analysis.get("ocr_word_count", 0),
+                    "line_count": len(analysis.get("ocr_lines", [])),
                     "diagram_type": analysis.get("diagram_type", "unknown")
-                }
+                },
+                tags=["ocr", "text", "extracted"]
             )
             kernels.append(text_kernel)
         
         # 2. Structure kernel
         structure_content = f"Diagram type: {analysis.get('diagram_type', 'unknown')}. "
         structure_content += f"Dimensions: {analysis['dimensions']['width']}x{analysis['dimensions']['height']}. "
-        structure_content += f"Aspect ratio: {analysis['aspect_ratio']:.2f}."
+        structure_content += f"Aspect ratio: {analysis['aspect_ratio']:.2f}. "
+        
+        # Add color information
+        color_analysis = analysis.get("color_analysis", {})
+        if color_analysis.get("is_grayscale"):
+            structure_content += "Grayscale image. "
+        elif color_analysis.get("dominant_colors"):
+            top_color = color_analysis["dominant_colors"][0]
+            structure_content += f"Dominant color: RGB{top_color['color']} ({top_color['percentage']}%). "
         
         structure_kernel = MeaningKernel(
             kernel_id=f"{base_id}_structure",
@@ -249,8 +426,10 @@ class MeaningKernelExtractor:
             metadata={
                 "dimensions": analysis["dimensions"],
                 "aspect_ratio": analysis["aspect_ratio"],
-                "diagram_type": analysis.get("diagram_type", "unknown")
-            }
+                "diagram_type": analysis.get("diagram_type", "unknown"),
+                "color_analysis": color_analysis
+            },
+            tags=["structure", "layout", "visual"]
         )
         kernels.append(structure_kernel)
         
@@ -261,6 +440,11 @@ class MeaningKernelExtractor:
         else:
             summary += "No text detected."
         
+        # Add content analysis
+        content_analysis = analysis.get("content_analysis", {})
+        if content_analysis.get("entities"):
+            summary += f" Entities: {', '.join(content_analysis['entities'][:5])}."
+        
         summary_kernel = MeaningKernel(
             kernel_id=f"{base_id}_summary",
             content=summary,
@@ -269,14 +453,16 @@ class MeaningKernelExtractor:
             confidence=0.7,
             metadata={
                 "has_text": bool(analysis.get("ocr_text")),
-                "text_length": len(analysis.get("ocr_text", ""))
-            }
+                "text_length": len(analysis.get("ocr_text", "")),
+                "entities": content_analysis.get("entities", []),
+                "relationships": content_analysis.get("relationships", [])
+            },
+            tags=["summary", "overview", "analysis"]
         )
         kernels.append(summary_kernel)
         
         # 4. Philosophical kernel (if we have text)
         if analysis.get("ocr_text") and len(analysis["ocr_text"]) > 50:
-            # Simple philosophical extraction
             philosophical_content = self._extract_philosophical_content(analysis["ocr_text"])
             if philosophical_content:
                 philosophical_kernel = MeaningKernel(
@@ -287,33 +473,61 @@ class MeaningKernelExtractor:
                     confidence=0.6,
                     metadata={
                         "extraction_method": "keyword_analysis",
-                        "source_text_length": len(analysis["ocr_text"])
-                    }
+                        "source_text_length": len(analysis["ocr_text"]),
+                        "keywords_found": self._find_philosophical_keywords(analysis["ocr_text"])
+                    },
+                    tags=["philosophical", "meaning", "conceptual"]
                 )
                 kernels.append(philosophical_kernel)
         
+        # 5. Semantic kernel (if we have relationships)
+        content_analysis = analysis.get("content_analysis", {})
+        if content_analysis.get("relationships"):
+            relationships = content_analysis["relationships"]
+            semantic_content = f"Semantic relationships detected: {len(relationships)} connections. "
+            for rel in relationships[:3]:
+                semantic_content += f"{rel['source']} → {rel['target']}. "
+            
+            semantic_kernel = MeaningKernel(
+                kernel_id=f"{base_id}_semantic",
+                content=semantic_content,
+                source=source,
+                kernel_type="semantic",
+                confidence=0.8,
+                metadata={
+                    "relationship_count": len(relationships),
+                    "relationships": relationships
+                },
+                tags=["semantic", "relationships", "connections"]
+            )
+            kernels.append(semantic_kernel)
+        
         # Add to internal list
         self.kernels.extend(kernels)
         
         return kernels
     
     def _extract_philosophical_content(self, text: str) -> Optional[str]:
-        """Extract philosophical content from text (simplified)."""
+        """Extract philosophical content from text."""
         # Look for philosophical keywords
-        philosophical_keywords = [
-            "truth", "knowledge", "wisdom", "meaning", "purpose",
-            "existence", "reality", "consciousness", "ethics", "morality",
-            "beauty", "justice", "freedom", "responsibility", "identity"
-        ]
-        
-        text_lower = text.lower()
-        found_keywords = [kw for kw in philosophical_keywords if kw in text_lower]
+        found_keywords = self._find_philosophical_keywords(text)
         
         if found_keywords:
             return f"Philosophical themes detected: {', '.join(found_keywords)}. "                    f"Source text explores concepts of {found_keywords[0]}."
         
         return None
     
+    def _find_philosophical_keywords(self, text: str) -> List[str]:
+        """Find philosophical keywords in text."""
+        text_lower = text.lower()
+        found_keywords = []
+        
+        for keyword in self.analyzer.philosophical_keywords:
+            if keyword in text_lower:
+                found_keywords.append(keyword)
+        
+        return found_keywords
+    
     def _save_kernels(self, kernels: List[MeaningKernel], output_path: Path):
         """Save kernels to files."""
         if not kernels:
@@ -346,6 +560,7 @@ class MeaningKernelExtractor:
                     f.write(f"- **Source**: {kernel.source}\n")
                     f.write(f"- **Confidence**: {kernel.confidence:.2f}\n")
                     f.write(f"- **Timestamp**: {kernel.timestamp}\n")
+                    f.write(f"- **Tags**: {', '.join(kernel.tags)}\n")
                     f.write(f"- **Content**: {kernel.content}\n")
                     f.write(f"- **Metadata**: {json.dumps(kernel.metadata, indent=2)}\n\n")
         
@@ -416,6 +631,7 @@ def main():
     print(f"Diagrams analyzed: {stats['diagrams_analyzed']}")
     print(f"Kernels extracted: {stats['kernels_extracted']}")
     print(f"Errors: {stats['errors']}")
+    print(f"Dependency warnings: {stats['dependency_warnings']}")
     print("="*50)
     
     # Exit with appropriate code
diff --git a/scripts/meaning-kernels/test_extraction.py b/scripts/meaning-kernels/test_extraction.py
index cd77e419..96842737 100755
--- a/scripts/meaning-kernels/test_extraction.py
+++ b/scripts/meaning-kernels/test_extraction.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 """
-Test script for meaning kernel extraction pipeline.
+Improved test script for meaning kernel extraction pipeline.
 """
 import os
 import sys
@@ -10,8 +10,8 @@ from pathlib import Path
 # Add parent directory to path
 sys.path.insert(0, str(Path(__file__).parent))
 
-def create_test_image():
-    """Create a simple test image."""
+def create_test_image_with_text():
+    """Create a test image with text."""
     try:
         from PIL import Image, ImageDraw, ImageFont
         
@@ -35,7 +35,12 @@ def create_test_image():
             "- Data ingestion",
             "- Feature extraction",
             "- Pattern recognition",
-            "- Knowledge representation"
+            "- Knowledge representation",
+            "",
+            "Philosophical aspects:",
+            "- Truth and knowledge",
+            "- Meaning and purpose",
+            "- Reality and existence"
         ]
         
         y = 50
@@ -55,10 +60,10 @@ def create_test_image():
         
         # Save to temp file
         temp_dir = Path(tempfile.mkdtemp())
-        image_path = temp_dir / "test_diagram.png"
+        image_path = temp_dir / "test_diagram_with_text.png"
         img.save(image_path)
         
-        print(f"Created test image: {image_path}")
+        print(f"Created test image with text: {image_path}")
         return image_path
         
     except ImportError as e:
@@ -67,7 +72,7 @@ def create_test_image():
 
 def test_extraction():
     """Test the extraction pipeline."""
-    print("Testing Meaning Kernel Extraction Pipeline...")
+    print("Testing Improved Meaning Kernel Extraction Pipeline...")
     
     # Check if we can import the extractor
     try:
@@ -78,7 +83,7 @@ def test_extraction():
         return False
     
     # Create test image
-    test_image = create_test_image()
+    test_image = create_test_image_with_text()
     if not test_image:
         print("Skipping test - cannot create test image")
         return True
@@ -97,6 +102,7 @@ def test_extraction():
             print(f"\nKernel: {kernel.kernel_id}")
             print(f"  Type: {kernel.kernel_type}")
             print(f"  Confidence: {kernel.confidence:.2f}")
+            print(f"  Tags: {', '.join(kernel.tags)}")
             print(f"  Content: {kernel.content[:100]}...")
         
         # Get stats
@@ -105,6 +111,13 @@ def test_extraction():
         for key, value in stats.items():
             print(f"  {key}: {value}")
         
+        # Check for philosophical kernels
+        philosophical_kernels = [k for k in kernels if k.kernel_type == "philosophical"]
+        if philosophical_kernels:
+            print(f"\n✓ Found {len(philosophical_kernels)} philosophical kernel(s)")
+        else:
+            print("\n⚠ No philosophical kernels found (may need OCR dependencies)")
+        
         return True
         
     except Exception as e:
@@ -114,7 +127,7 @@ def test_extraction():
         return False
 
 if __name__ == "__main__":
-    print("Meaning Kernel Extraction Pipeline Test")
+    print("Improved Meaning Kernel Extraction Pipeline Test")
     print("=" * 50)
     
     success = test_extraction()