A bit of restructuring for simplicity and organization

2025-10-01 23:29:25 +00:00
parent 0411ca1880
commit a7ff4d49e9
8 changed files with 2005 additions and 1945 deletions
--- a/tools/vision_tools.py
+++ b/tools/vision_tools.py
@@ -1,346 +1,346 @@
-#!/usr/bin/env python3
-"""
-Vision Tools Module
-
-This module provides vision analysis tools that work with image URLs.
-Uses Gemini Flash via Nous Research API for intelligent image understanding.
-
-Available tools:
- vision_analyze_tool: Analyze images from URLs with custom prompts
-
-Features:
- Comprehensive image description
- Context-aware analysis based on user queries
- Proper error handling and validation
- Debug logging support
-
-Usage:
-    from vision_tools import vision_analyze_tool
-    import asyncio
-    
-    # Analyze an image
-    result = await vision_analyze_tool(
-        image_url="https://example.com/image.jpg",
-        user_prompt="What architectural style is this building?"
-    )
-"""
-
-import json
-import os
-import asyncio
-import uuid
-import datetime
-from pathlib import Path
-from typing import Dict, Any, Optional
-from openai import AsyncOpenAI
-
-# Initialize Nous Research API client for vision processing
-nous_client = AsyncOpenAI(
-    api_key=os.getenv("NOUS_API_KEY"),
-    base_url="https://inference-api.nousresearch.com/v1"
-)
-
-# Configuration for vision processing
-DEFAULT_VISION_MODEL = "gemini-2.5-flash"
-
-# Debug mode configuration
-DEBUG_MODE = os.getenv("VISION_TOOLS_DEBUG", "false").lower() == "true"
-DEBUG_SESSION_ID = str(uuid.uuid4())
-DEBUG_LOG_PATH = Path("./logs")
-DEBUG_DATA = {
-    "session_id": DEBUG_SESSION_ID,
-    "start_time": datetime.datetime.now().isoformat(),
-    "debug_enabled": DEBUG_MODE,
-    "tool_calls": []
-} if DEBUG_MODE else None
-
-# Create logs directory if debug mode is enabled
-if DEBUG_MODE:
-    DEBUG_LOG_PATH.mkdir(exist_ok=True)
-    print(f"🐛 Vision debug mode enabled - Session ID: {DEBUG_SESSION_ID}")
-
-
-def _log_debug_call(tool_name: str, call_data: Dict[str, Any]) -> None:
-    """
-    Log a debug call entry to the global debug data structure.
-    
-    Args:
-        tool_name (str): Name of the tool being called
-        call_data (Dict[str, Any]): Data about the call including parameters and results
-    """
-    if not DEBUG_MODE or not DEBUG_DATA:
-        return
-    
-    call_entry = {
-        "timestamp": datetime.datetime.now().isoformat(),
-        "tool_name": tool_name,
-        **call_data
-    }
-    
-    DEBUG_DATA["tool_calls"].append(call_entry)
-
-
-def _save_debug_log() -> None:
-    """
-    Save the current debug data to a JSON file in the logs directory.
-    """
-    if not DEBUG_MODE or not DEBUG_DATA:
-        return
-    
-    try:
-        debug_filename = f"vision_tools_debug_{DEBUG_SESSION_ID}.json"
-        debug_filepath = DEBUG_LOG_PATH / debug_filename
-        
-        # Update end time
-        DEBUG_DATA["end_time"] = datetime.datetime.now().isoformat()
-        DEBUG_DATA["total_calls"] = len(DEBUG_DATA["tool_calls"])
-        
-        with open(debug_filepath, 'w', encoding='utf-8') as f:
-            json.dump(DEBUG_DATA, f, indent=2, ensure_ascii=False)
-        
-        print(f"🐛 Vision debug log saved: {debug_filepath}")
-        
-    except Exception as e:
-        print(f"❌ Error saving vision debug log: {str(e)}")
-
-
-def _validate_image_url(url: str) -> bool:
-    """
-    Basic validation of image URL format.
-    
-    Args:
-        url (str): The URL to validate
-        
-    Returns:
-        bool: True if URL appears to be valid, False otherwise
-    """
-    if not url or not isinstance(url, str):
-        return False
-    
-    # Check if it's a valid URL format
-    if not (url.startswith('http://') or url.startswith('https://')):
-        return False
-    
-    # Check for common image extensions (optional, as URLs may not have extensions)
-    image_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.svg']
-    
-    return True  # Allow all HTTP/HTTPS URLs for flexibility
-
-
-async def vision_analyze_tool(
-    image_url: str,
-    user_prompt: str,
-    model: str = DEFAULT_VISION_MODEL
-) -> str:
-    """
-    Analyze an image from a URL using vision AI.
-    
-    This tool processes images using Gemini Flash via Nous Research API.
-    The user_prompt parameter is expected to be pre-formatted by the calling
-    function (typically model_tools.py) to include both full description
-    requests and specific questions.
-    
-    Args:
-        image_url (str): The URL of the image to analyze
-        user_prompt (str): The pre-formatted prompt for the vision model
-        model (str): The vision model to use (default: gemini-2.5-flash)
-    
-    Returns:
-        str: JSON string containing the analysis results with the following structure:
-             {
-                 "success": bool,
-                 "analysis": str (defaults to error message if None)
-             }
-    
-    Raises:
-        Exception: If analysis fails or API key is not set
-    """
-    debug_call_data = {
-        "parameters": {
-            "image_url": image_url,
-            "user_prompt": user_prompt,
-            "model": model
-        },
-        "error": None,
-        "success": False,
-        "analysis_length": 0,
-        "model_used": model
-    }
-    
-    try:
-        print(f"🔍 Analyzing image from URL: {image_url[:60]}{'...' if len(image_url) > 60 else ''}")
-        print(f"📝 User prompt: {user_prompt[:100]}{'...' if len(user_prompt) > 100 else ''}")
-        
-        # Validate image URL
-        if not _validate_image_url(image_url):
-            raise ValueError("Invalid image URL format. Must start with http:// or https://")
-        
-        # Check API key availability
-        if not os.getenv("NOUS_API_KEY"):
-            raise ValueError("NOUS_API_KEY environment variable not set")
-        
-        # Use the prompt as provided (model_tools.py now handles full description formatting)
-        comprehensive_prompt = user_prompt
-        
-        # Prepare the message with image URL format
-        messages = [
-            {
-                "role": "user",
-                "content": [
-                    {
-                        "type": "text",
-                        "text": comprehensive_prompt
-                    },
-                    {
-                        "type": "image_url",
-                        "image_url": {
-                            "url": image_url
-                        }
-                    }
-                ]
-            }
-        ]
-        
-        print(f"🧠 Processing image with {model}...")
-        
-        # Call the vision API
-        response = await nous_client.chat.completions.create(
-            model=model,
-            messages=messages,
-            temperature=0.1,  # Low temperature for consistent analysis
-            max_tokens=2000   # Generous limit for detailed analysis
-        )
-        
-        # Extract the analysis
-        analysis = response.choices[0].message.content.strip()
-        analysis_length = len(analysis)
-        
-        print(f"✅ Image analysis completed ({analysis_length} characters)")
-        
-        # Prepare successful response
-        result = {
-            "success": True,
-            "analysis": analysis or "There was a problem with the request and the image could not be analyzed."
-        }
-        
-        debug_call_data["success"] = True
-        debug_call_data["analysis_length"] = analysis_length
-        
-        # Log debug information
-        _log_debug_call("vision_analyze_tool", debug_call_data)
-        _save_debug_log()
-        
-        return json.dumps(result, indent=2)
-        
-    except Exception as e:
-        error_msg = f"Error analyzing image: {str(e)}"
-        print(f"❌ {error_msg}")
-        
-        # Prepare error response
-        result = {
-            "success": False,
-            "analysis": "There was a problem with the request and the image could not be analyzed."
-        }
-        
-        debug_call_data["error"] = error_msg
-        _log_debug_call("vision_analyze_tool", debug_call_data)
-        _save_debug_log()
-        
-        return json.dumps(result, indent=2)
-
-
-def check_nous_api_key() -> bool:
-    """
-    Check if the Nous Research API key is available in environment variables.
-    
-    Returns:
-        bool: True if API key is set, False otherwise
-    """
-    return bool(os.getenv("NOUS_API_KEY"))
-
-
-def check_vision_requirements() -> bool:
-    """
-    Check if all requirements for vision tools are met.
-    
-    Returns:
-        bool: True if requirements are met, False otherwise
-    """
-    return check_nous_api_key()
-
-
-def get_debug_session_info() -> Dict[str, Any]:
-    """
-    Get information about the current debug session.
-    
-    Returns:
-        Dict[str, Any]: Dictionary containing debug session information
-    """
-    if not DEBUG_MODE or not DEBUG_DATA:
-        return {
-            "enabled": False,
-            "session_id": None,
-            "log_path": None,
-            "total_calls": 0
-        }
-    
-    return {
-        "enabled": True,
-        "session_id": DEBUG_SESSION_ID,
-        "log_path": str(DEBUG_LOG_PATH / f"vision_tools_debug_{DEBUG_SESSION_ID}.json"),
-        "total_calls": len(DEBUG_DATA["tool_calls"])
-    }
-
-
-if __name__ == "__main__":
-    """
-    Simple test/demo when run directly
-    """
-    print("👁️ Vision Tools Module")
-    print("=" * 40)
-    
-    # Check if API key is available
-    api_available = check_nous_api_key()
-    
-    if not api_available:
-        print("❌ NOUS_API_KEY environment variable not set")
-        print("Please set your API key: export NOUS_API_KEY='your-key-here'")
-        print("Get API key at: https://inference-api.nousresearch.com/")
-        exit(1)
-    else:
-        print("✅ Nous Research API key found")
-    
-    print("🛠️ Vision tools ready for use!")
-    print(f"🧠 Using model: {DEFAULT_VISION_MODEL}")
-    
-    # Show debug mode status
-    if DEBUG_MODE:
-        print(f"🐛 Debug mode ENABLED - Session ID: {DEBUG_SESSION_ID}")
-        print(f"   Debug logs will be saved to: ./logs/vision_tools_debug_{DEBUG_SESSION_ID}.json")
-    else:
-        print("🐛 Debug mode disabled (set VISION_TOOLS_DEBUG=true to enable)")
-    
-    print("\nBasic usage:")
-    print("  from vision_tools import vision_analyze_tool")
-    print("  import asyncio")
-    print("")
-    print("  async def main():")
-    print("      result = await vision_analyze_tool(")
-    print("          image_url='https://example.com/image.jpg',")
-    print("          user_prompt='What do you see in this image?'")
-    print("      )")
-    print("      print(result)")
-    print("  asyncio.run(main())")
-    
-    print("\nExample prompts:")
-    print("  - 'What architectural style is this building?'")
-    print("  - 'Describe the emotions and mood in this image'")
-    print("  - 'What text can you read in this image?'")
-    print("  - 'Identify any safety hazards visible'")
-    print("  - 'What products or brands are shown?'")
-    
-    print("\nDebug mode:")
-    print("  # Enable debug logging")
-    print("  export VISION_TOOLS_DEBUG=true")
-    print("  # Debug logs capture all vision analysis calls and results")
-    print("  # Logs saved to: ./logs/vision_tools_debug_UUID.json")
+#!/usr/bin/env python3
+"""
+Vision Tools Module
+
+This module provides vision analysis tools that work with image URLs.
+Uses Gemini Flash via Nous Research API for intelligent image understanding.
+
+Available tools:
+- vision_analyze_tool: Analyze images from URLs with custom prompts
+
+Features:
+- Comprehensive image description
+- Context-aware analysis based on user queries
+- Proper error handling and validation
+- Debug logging support
+
+Usage:
+    from vision_tools import vision_analyze_tool
+    import asyncio
+    
+    # Analyze an image
+    result = await vision_analyze_tool(
+        image_url="https://example.com/image.jpg",
+        user_prompt="What architectural style is this building?"
+    )
+"""
+
+import json
+import os
+import asyncio
+import uuid
+import datetime
+from pathlib import Path
+from typing import Dict, Any, Optional
+from openai import AsyncOpenAI
+
+# Initialize Nous Research API client for vision processing
+nous_client = AsyncOpenAI(
+    api_key=os.getenv("NOUS_API_KEY"),
+    base_url="https://inference-api.nousresearch.com/v1"
+)
+
+# Configuration for vision processing
+DEFAULT_VISION_MODEL = "gemini-2.5-flash"
+
+# Debug mode configuration
+DEBUG_MODE = os.getenv("VISION_TOOLS_DEBUG", "false").lower() == "true"
+DEBUG_SESSION_ID = str(uuid.uuid4())
+DEBUG_LOG_PATH = Path("./logs")
+DEBUG_DATA = {
+    "session_id": DEBUG_SESSION_ID,
+    "start_time": datetime.datetime.now().isoformat(),
+    "debug_enabled": DEBUG_MODE,
+    "tool_calls": []
+} if DEBUG_MODE else None
+
+# Create logs directory if debug mode is enabled
+if DEBUG_MODE:
+    DEBUG_LOG_PATH.mkdir(exist_ok=True)
+    print(f"🐛 Vision debug mode enabled - Session ID: {DEBUG_SESSION_ID}")
+
+
+def _log_debug_call(tool_name: str, call_data: Dict[str, Any]) -> None:
+    """
+    Log a debug call entry to the global debug data structure.
+    
+    Args:
+        tool_name (str): Name of the tool being called
+        call_data (Dict[str, Any]): Data about the call including parameters and results
+    """
+    if not DEBUG_MODE or not DEBUG_DATA:
+        return
+    
+    call_entry = {
+        "timestamp": datetime.datetime.now().isoformat(),
+        "tool_name": tool_name,
+        **call_data
+    }
+    
+    DEBUG_DATA["tool_calls"].append(call_entry)
+
+
+def _save_debug_log() -> None:
+    """
+    Save the current debug data to a JSON file in the logs directory.
+    """
+    if not DEBUG_MODE or not DEBUG_DATA:
+        return
+    
+    try:
+        debug_filename = f"vision_tools_debug_{DEBUG_SESSION_ID}.json"
+        debug_filepath = DEBUG_LOG_PATH / debug_filename
+        
+        # Update end time
+        DEBUG_DATA["end_time"] = datetime.datetime.now().isoformat()
+        DEBUG_DATA["total_calls"] = len(DEBUG_DATA["tool_calls"])
+        
+        with open(debug_filepath, 'w', encoding='utf-8') as f:
+            json.dump(DEBUG_DATA, f, indent=2, ensure_ascii=False)
+        
+        print(f"🐛 Vision debug log saved: {debug_filepath}")
+        
+    except Exception as e:
+        print(f"❌ Error saving vision debug log: {str(e)}")
+
+
+def _validate_image_url(url: str) -> bool:
+    """
+    Basic validation of image URL format.
+    
+    Args:
+        url (str): The URL to validate
+        
+    Returns:
+        bool: True if URL appears to be valid, False otherwise
+    """
+    if not url or not isinstance(url, str):
+        return False
+    
+    # Check if it's a valid URL format
+    if not (url.startswith('http://') or url.startswith('https://')):
+        return False
+    
+    # Check for common image extensions (optional, as URLs may not have extensions)
+    image_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.svg']
+    
+    return True  # Allow all HTTP/HTTPS URLs for flexibility
+
+
+async def vision_analyze_tool(
+    image_url: str,
+    user_prompt: str,
+    model: str = DEFAULT_VISION_MODEL
+) -> str:
+    """
+    Analyze an image from a URL using vision AI.
+    
+    This tool processes images using Gemini Flash via Nous Research API.
+    The user_prompt parameter is expected to be pre-formatted by the calling
+    function (typically model_tools.py) to include both full description
+    requests and specific questions.
+    
+    Args:
+        image_url (str): The URL of the image to analyze
+        user_prompt (str): The pre-formatted prompt for the vision model
+        model (str): The vision model to use (default: gemini-2.5-flash)
+    
+    Returns:
+        str: JSON string containing the analysis results with the following structure:
+             {
+                 "success": bool,
+                 "analysis": str (defaults to error message if None)
+             }
+    
+    Raises:
+        Exception: If analysis fails or API key is not set
+    """
+    debug_call_data = {
+        "parameters": {
+            "image_url": image_url,
+            "user_prompt": user_prompt,
+            "model": model
+        },
+        "error": None,
+        "success": False,
+        "analysis_length": 0,
+        "model_used": model
+    }
+    
+    try:
+        print(f"🔍 Analyzing image from URL: {image_url[:60]}{'...' if len(image_url) > 60 else ''}")
+        print(f"📝 User prompt: {user_prompt[:100]}{'...' if len(user_prompt) > 100 else ''}")
+        
+        # Validate image URL
+        if not _validate_image_url(image_url):
+            raise ValueError("Invalid image URL format. Must start with http:// or https://")
+        
+        # Check API key availability
+        if not os.getenv("NOUS_API_KEY"):
+            raise ValueError("NOUS_API_KEY environment variable not set")
+        
+        # Use the prompt as provided (model_tools.py now handles full description formatting)
+        comprehensive_prompt = user_prompt
+        
+        # Prepare the message with image URL format
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "text",
+                        "text": comprehensive_prompt
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": image_url
+                        }
+                    }
+                ]
+            }
+        ]
+        
+        print(f"🧠 Processing image with {model}...")
+        
+        # Call the vision API
+        response = await nous_client.chat.completions.create(
+            model=model,
+            messages=messages,
+            temperature=0.1,  # Low temperature for consistent analysis
+            max_tokens=2000   # Generous limit for detailed analysis
+        )
+        
+        # Extract the analysis
+        analysis = response.choices[0].message.content.strip()
+        analysis_length = len(analysis)
+        
+        print(f"✅ Image analysis completed ({analysis_length} characters)")
+        
+        # Prepare successful response
+        result = {
+            "success": True,
+            "analysis": analysis or "There was a problem with the request and the image could not be analyzed."
+        }
+        
+        debug_call_data["success"] = True
+        debug_call_data["analysis_length"] = analysis_length
+        
+        # Log debug information
+        _log_debug_call("vision_analyze_tool", debug_call_data)
+        _save_debug_log()
+        
+        return json.dumps(result, indent=2)
+        
+    except Exception as e:
+        error_msg = f"Error analyzing image: {str(e)}"
+        print(f"❌ {error_msg}")
+        
+        # Prepare error response
+        result = {
+            "success": False,
+            "analysis": "There was a problem with the request and the image could not be analyzed."
+        }
+        
+        debug_call_data["error"] = error_msg
+        _log_debug_call("vision_analyze_tool", debug_call_data)
+        _save_debug_log()
+        
+        return json.dumps(result, indent=2)
+
+
+def check_nous_api_key() -> bool:
+    """
+    Check if the Nous Research API key is available in environment variables.
+    
+    Returns:
+        bool: True if API key is set, False otherwise
+    """
+    return bool(os.getenv("NOUS_API_KEY"))
+
+
+def check_vision_requirements() -> bool:
+    """
+    Check if all requirements for vision tools are met.
+    
+    Returns:
+        bool: True if requirements are met, False otherwise
+    """
+    return check_nous_api_key()
+
+
+def get_debug_session_info() -> Dict[str, Any]:
+    """
+    Get information about the current debug session.
+    
+    Returns:
+        Dict[str, Any]: Dictionary containing debug session information
+    """
+    if not DEBUG_MODE or not DEBUG_DATA:
+        return {
+            "enabled": False,
+            "session_id": None,
+            "log_path": None,
+            "total_calls": 0
+        }
+    
+    return {
+        "enabled": True,
+        "session_id": DEBUG_SESSION_ID,
+        "log_path": str(DEBUG_LOG_PATH / f"vision_tools_debug_{DEBUG_SESSION_ID}.json"),
+        "total_calls": len(DEBUG_DATA["tool_calls"])
+    }
+
+
+if __name__ == "__main__":
+    """
+    Simple test/demo when run directly
+    """
+    print("👁️ Vision Tools Module")
+    print("=" * 40)
+    
+    # Check if API key is available
+    api_available = check_nous_api_key()
+    
+    if not api_available:
+        print("❌ NOUS_API_KEY environment variable not set")
+        print("Please set your API key: export NOUS_API_KEY='your-key-here'")
+        print("Get API key at: https://inference-api.nousresearch.com/")
+        exit(1)
+    else:
+        print("✅ Nous Research API key found")
+    
+    print("🛠️ Vision tools ready for use!")
+    print(f"🧠 Using model: {DEFAULT_VISION_MODEL}")
+    
+    # Show debug mode status
+    if DEBUG_MODE:
+        print(f"🐛 Debug mode ENABLED - Session ID: {DEBUG_SESSION_ID}")
+        print(f"   Debug logs will be saved to: ./logs/vision_tools_debug_{DEBUG_SESSION_ID}.json")
+    else:
+        print("🐛 Debug mode disabled (set VISION_TOOLS_DEBUG=true to enable)")
+    
+    print("\nBasic usage:")
+    print("  from vision_tools import vision_analyze_tool")
+    print("  import asyncio")
+    print("")
+    print("  async def main():")
+    print("      result = await vision_analyze_tool(")
+    print("          image_url='https://example.com/image.jpg',")
+    print("          user_prompt='What do you see in this image?'")
+    print("      )")
+    print("      print(result)")
+    print("  asyncio.run(main())")
+    
+    print("\nExample prompts:")
+    print("  - 'What architectural style is this building?'")
+    print("  - 'Describe the emotions and mood in this image'")
+    print("  - 'What text can you read in this image?'")
+    print("  - 'Identify any safety hazards visible'")
+    print("  - 'What products or brands are shown?'")
+    
+    print("\nDebug mode:")
+    print("  # Enable debug logging")
+    print("  export VISION_TOOLS_DEBUG=true")
+    print("  # Debug logs capture all vision analysis calls and results")
+    print("  # Logs saved to: ./logs/vision_tools_debug_UUID.json")