tools/vision_tools.py

#!/usr/bin/env python3
"""
Vision Tools Module

This module provides vision analysis tools that work with image URLs.
Uses Gemini Flash via Nous Research API for intelligent image understanding.

Available tools:
- vision_analyze_tool: Analyze images from URLs with custom prompts

Features:
- Comprehensive image description
- Context-aware analysis based on user queries
- Proper error handling and validation
- Debug logging support

Usage:
    from vision_tools import vision_analyze_tool
    import asyncio
    
    # Analyze an image
    result = await vision_analyze_tool(
        image_url="https://example.com/image.jpg",
        user_prompt="What architectural style is this building?"
    )
"""

import json
import os
import asyncio
import uuid
import datetime
from pathlib import Path
from typing import Dict, Any, Optional
from openai import AsyncOpenAI

# Initialize Nous Research API client for vision processing
nous_client = AsyncOpenAI(
    api_key=os.getenv("NOUS_API_KEY"),
    base_url="https://inference-api.nousresearch.com/v1"
)

# Configuration for vision processing
DEFAULT_VISION_MODEL = "gemini-2.5-flash"

# Debug mode configuration
DEBUG_MODE = os.getenv("VISION_TOOLS_DEBUG", "false").lower() == "true"
DEBUG_SESSION_ID = str(uuid.uuid4())
DEBUG_LOG_PATH = Path("./logs")
DEBUG_DATA = {
    "session_id": DEBUG_SESSION_ID,
    "start_time": datetime.datetime.now().isoformat(),
    "debug_enabled": DEBUG_MODE,
    "tool_calls": []
} if DEBUG_MODE else None

# Create logs directory if debug mode is enabled
if DEBUG_MODE:
    DEBUG_LOG_PATH.mkdir(exist_ok=True)
    print(f"🐛 Vision debug mode enabled - Session ID: {DEBUG_SESSION_ID}")


def _log_debug_call(tool_name: str, call_data: Dict[str, Any]) -> None:
    """
    Log a debug call entry to the global debug data structure.
    
    Args:
        tool_name (str): Name of the tool being called
        call_data (Dict[str, Any]): Data about the call including parameters and results
    """
    if not DEBUG_MODE or not DEBUG_DATA:
        return
    
    call_entry = {
        "timestamp": datetime.datetime.now().isoformat(),
        "tool_name": tool_name,
        **call_data
    }
    
    DEBUG_DATA["tool_calls"].append(call_entry)


def _save_debug_log() -> None:
    """
    Save the current debug data to a JSON file in the logs directory.
    """
    if not DEBUG_MODE or not DEBUG_DATA:
        return
    
    try:
        debug_filename = f"vision_tools_debug_{DEBUG_SESSION_ID}.json"
        debug_filepath = DEBUG_LOG_PATH / debug_filename
        
        # Update end time
        DEBUG_DATA["end_time"] = datetime.datetime.now().isoformat()
        DEBUG_DATA["total_calls"] = len(DEBUG_DATA["tool_calls"])
        
        with open(debug_filepath, 'w', encoding='utf-8') as f:
            json.dump(DEBUG_DATA, f, indent=2, ensure_ascii=False)
        
        print(f"🐛 Vision debug log saved: {debug_filepath}")
        
    except Exception as e:
        print(f"❌ Error saving vision debug log: {str(e)}")


def _validate_image_url(url: str) -> bool:
    """
    Basic validation of image URL format.
    
    Args:
        url (str): The URL to validate
        
    Returns:
        bool: True if URL appears to be valid, False otherwise
    """
    if not url or not isinstance(url, str):
        return False
    
    # Check if it's a valid URL format
    if not (url.startswith('http://') or url.startswith('https://')):
        return False
    
    # Check for common image extensions (optional, as URLs may not have extensions)
    image_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.svg']
    
    return True  # Allow all HTTP/HTTPS URLs for flexibility


async def vision_analyze_tool(
    image_url: str,
    user_prompt: str,
    model: str = DEFAULT_VISION_MODEL
) -> str:
    """
    Analyze an image from a URL using vision AI.
    
    This tool processes images using Gemini Flash via Nous Research API.
    The user_prompt parameter is expected to be pre-formatted by the calling
    function (typically model_tools.py) to include both full description
    requests and specific questions.
    
    Args:
        image_url (str): The URL of the image to analyze
        user_prompt (str): The pre-formatted prompt for the vision model
        model (str): The vision model to use (default: gemini-2.5-flash)
    
    Returns:
        str: JSON string containing the analysis results with the following structure:
             {
                 "success": bool,
                 "analysis": str (defaults to error message if None)
             }
    
    Raises:
        Exception: If analysis fails or API key is not set
    """
    debug_call_data = {
        "parameters": {
            "image_url": image_url,
            "user_prompt": user_prompt,
            "model": model
        },
        "error": None,
        "success": False,
        "analysis_length": 0,
        "model_used": model
    }
    
    try:
        print(f"🔍 Analyzing image from URL: {image_url[:60]}{'...' if len(image_url) > 60 else ''}")
        print(f"📝 User prompt: {user_prompt[:100]}{'...' if len(user_prompt) > 100 else ''}")
        
        # Validate image URL
        if not _validate_image_url(image_url):
            raise ValueError("Invalid image URL format. Must start with http:// or https://")
        
        # Check API key availability
        if not os.getenv("NOUS_API_KEY"):
            raise ValueError("NOUS_API_KEY environment variable not set")
        
        # Use the prompt as provided (model_tools.py now handles full description formatting)
        comprehensive_prompt = user_prompt
        
        # Prepare the message with image URL format
        messages = [
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": comprehensive_prompt
                    },
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": image_url
                        }
                    }
                ]
            }
        ]
        
        print(f"🧠 Processing image with {model}...")
        
        # Call the vision API
        response = await nous_client.chat.completions.create(
            model=model,
            messages=messages,
            temperature=0.1,  # Low temperature for consistent analysis
            max_tokens=2000   # Generous limit for detailed analysis
        )
        
        # Extract the analysis
        analysis = response.choices[0].message.content.strip()
        analysis_length = len(analysis)
        
        print(f"✅ Image analysis completed ({analysis_length} characters)")
        
        # Prepare successful response
        result = {
            "success": True,
            "analysis": analysis or "There was a problem with the request and the image could not be analyzed."
        }
        
        debug_call_data["success"] = True
        debug_call_data["analysis_length"] = analysis_length
        
        # Log debug information
        _log_debug_call("vision_analyze_tool", debug_call_data)
        _save_debug_log()
        
        return json.dumps(result, indent=2)
        
    except Exception as e:
        error_msg = f"Error analyzing image: {str(e)}"
        print(f"❌ {error_msg}")
        
        # Prepare error response
        result = {
            "success": False,
            "analysis": "There was a problem with the request and the image could not be analyzed."
        }
        
        debug_call_data["error"] = error_msg
        _log_debug_call("vision_analyze_tool", debug_call_data)
        _save_debug_log()
        
        return json.dumps(result, indent=2)


def check_nous_api_key() -> bool:
    """
    Check if the Nous Research API key is available in environment variables.
    
    Returns:
        bool: True if API key is set, False otherwise
    """
    return bool(os.getenv("NOUS_API_KEY"))


def check_vision_requirements() -> bool:
    """
    Check if all requirements for vision tools are met.
    
    Returns:
        bool: True if requirements are met, False otherwise
    """
    return check_nous_api_key()


def get_debug_session_info() -> Dict[str, Any]:
    """
    Get information about the current debug session.
    
    Returns:
        Dict[str, Any]: Dictionary containing debug session information
    """
    if not DEBUG_MODE or not DEBUG_DATA:
        return {
            "enabled": False,
            "session_id": None,
            "log_path": None,
            "total_calls": 0
        }
    
    return {
        "enabled": True,
        "session_id": DEBUG_SESSION_ID,
        "log_path": str(DEBUG_LOG_PATH / f"vision_tools_debug_{DEBUG_SESSION_ID}.json"),
        "total_calls": len(DEBUG_DATA["tool_calls"])
    }


if __name__ == "__main__":
    """
    Simple test/demo when run directly
    """
    print("👁️ Vision Tools Module")
    print("=" * 40)
    
    # Check if API key is available
    api_available = check_nous_api_key()
    
    if not api_available:
        print("❌ NOUS_API_KEY environment variable not set")
        print("Please set your API key: export NOUS_API_KEY='your-key-here'")
        print("Get API key at: https://inference-api.nousresearch.com/")
        exit(1)
    else:
        print("✅ Nous Research API key found")
    
    print("🛠️ Vision tools ready for use!")
    print(f"🧠 Using model: {DEFAULT_VISION_MODEL}")
    
    # Show debug mode status
    if DEBUG_MODE:
        print(f"🐛 Debug mode ENABLED - Session ID: {DEBUG_SESSION_ID}")
        print(f"   Debug logs will be saved to: ./logs/vision_tools_debug_{DEBUG_SESSION_ID}.json")
    else:
        print("🐛 Debug mode disabled (set VISION_TOOLS_DEBUG=true to enable)")
    
    print("\nBasic usage:")
    print("  from vision_tools import vision_analyze_tool")
    print("  import asyncio")
    print("")
    print("  async def main():")
    print("      result = await vision_analyze_tool(")
    print("          image_url='https://example.com/image.jpg',")
    print("          user_prompt='What do you see in this image?'")
    print("      )")
    print("      print(result)")
    print("  asyncio.run(main())")
    
    print("\nExample prompts:")
    print("  - 'What architectural style is this building?'")
    print("  - 'Describe the emotions and mood in this image'")
    print("  - 'What text can you read in this image?'")
    print("  - 'Identify any safety hazards visible'")
    print("  - 'What products or brands are shown?'")
    
    print("\nDebug mode:")
    print("  # Enable debug logging")
    print("  export VISION_TOOLS_DEBUG=true")
    print("  # Debug logs capture all vision analysis calls and results")
    print("  # Logs saved to: ./logs/vision_tools_debug_UUID.json")
add vision model tool, cli updates for exclusive and inclusive toolsets 2025-08-04 00:14:16 -07:00			`#!/usr/bin/env python3`
			`"""`
			`Vision Tools Module`

			`This module provides vision analysis tools that work with image URLs.`
			`Uses Gemini Flash via Nous Research API for intelligent image understanding.`

			`Available tools:`
			`- vision_analyze_tool: Analyze images from URLs with custom prompts`

			`Features:`
			`- Comprehensive image description`
			`- Context-aware analysis based on user queries`
			`- Proper error handling and validation`
			`- Debug logging support`

			`Usage:`
			`from vision_tools import vision_analyze_tool`
			`import asyncio`

			`# Analyze an image`
			`result = await vision_analyze_tool(`
			`image_url="https://example.com/image.jpg",`
			`user_prompt="What architectural style is this building?"`
			`)`
			`"""`

			`import json`
			`import os`
			`import asyncio`
			`import uuid`
			`import datetime`
			`from pathlib import Path`
			`from typing import Dict, Any, Optional`
			`from openai import AsyncOpenAI`

			`# Initialize Nous Research API client for vision processing`
			`nous_client = AsyncOpenAI(`
			`api_key=os.getenv("NOUS_API_KEY"),`
			`base_url="https://inference-api.nousresearch.com/v1"`
			`)`

			`# Configuration for vision processing`
			`DEFAULT_VISION_MODEL = "gemini-2.5-flash"`

			`# Debug mode configuration`
			`DEBUG_MODE = os.getenv("VISION_TOOLS_DEBUG", "false").lower() == "true"`
			`DEBUG_SESSION_ID = str(uuid.uuid4())`
			`DEBUG_LOG_PATH = Path("./logs")`
			`DEBUG_DATA = {`
			`"session_id": DEBUG_SESSION_ID,`
			`"start_time": datetime.datetime.now().isoformat(),`
			`"debug_enabled": DEBUG_MODE,`
			`"tool_calls": []`
			`} if DEBUG_MODE else None`

			`# Create logs directory if debug mode is enabled`
			`if DEBUG_MODE:`
			`DEBUG_LOG_PATH.mkdir(exist_ok=True)`
			`print(f"🐛 Vision debug mode enabled - Session ID: {DEBUG_SESSION_ID}")`


			`def _log_debug_call(tool_name: str, call_data: Dict[str, Any]) -> None:`
			`"""`
			`Log a debug call entry to the global debug data structure.`

			`Args:`
			`tool_name (str): Name of the tool being called`
			`call_data (Dict[str, Any]): Data about the call including parameters and results`
			`"""`
			`if not DEBUG_MODE or not DEBUG_DATA:`
			`return`

			`call_entry = {`
			`"timestamp": datetime.datetime.now().isoformat(),`
			`"tool_name": tool_name,`
			`**call_data`
			`}`

			`DEBUG_DATA["tool_calls"].append(call_entry)`


			`def _save_debug_log() -> None:`
			`"""`
			`Save the current debug data to a JSON file in the logs directory.`
			`"""`
			`if not DEBUG_MODE or not DEBUG_DATA:`
			`return`

			`try:`
			`debug_filename = f"vision_tools_debug_{DEBUG_SESSION_ID}.json"`
			`debug_filepath = DEBUG_LOG_PATH / debug_filename`

			`# Update end time`
			`DEBUG_DATA["end_time"] = datetime.datetime.now().isoformat()`
			`DEBUG_DATA["total_calls"] = len(DEBUG_DATA["tool_calls"])`

			`with open(debug_filepath, 'w', encoding='utf-8') as f:`
			`json.dump(DEBUG_DATA, f, indent=2, ensure_ascii=False)`

			`print(f"🐛 Vision debug log saved: {debug_filepath}")`

			`except Exception as e:`
			`print(f"❌ Error saving vision debug log: {str(e)}")`


			`def _validate_image_url(url: str) -> bool:`
			`"""`
			`Basic validation of image URL format.`

			`Args:`
			`url (str): The URL to validate`

			`Returns:`
			`bool: True if URL appears to be valid, False otherwise`
			`"""`
			`if not url or not isinstance(url, str):`
			`return False`

			`# Check if it's a valid URL format`
			`if not (url.startswith('http://') or url.startswith('https://')):`
			`return False`

			`# Check for common image extensions (optional, as URLs may not have extensions)`
			`image_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.svg']`

			`return True # Allow all HTTP/HTTPS URLs for flexibility`


			`async def vision_analyze_tool(`
			`image_url: str,`
			`user_prompt: str,`
			`model: str = DEFAULT_VISION_MODEL`
			`) -> str:`
			`"""`
			`Analyze an image from a URL using vision AI.`

			`This tool processes images using Gemini Flash via Nous Research API.`
			`The user_prompt parameter is expected to be pre-formatted by the calling`
			`function (typically model_tools.py) to include both full description`
			`requests and specific questions.`

			`Args:`
			`image_url (str): The URL of the image to analyze`
			`user_prompt (str): The pre-formatted prompt for the vision model`
			`model (str): The vision model to use (default: gemini-2.5-flash)`

			`Returns:`
			`str: JSON string containing the analysis results with the following structure:`
			`{`
			`"success": bool,`
			`"analysis": str (defaults to error message if None)`
			`}`

			`Raises:`
			`Exception: If analysis fails or API key is not set`
			`"""`
			`debug_call_data = {`
			`"parameters": {`
			`"image_url": image_url,`
			`"user_prompt": user_prompt,`
			`"model": model`
			`},`
			`"error": None,`
			`"success": False,`
			`"analysis_length": 0,`
			`"model_used": model`
			`}`

			`try:`
			`print(f"🔍 Analyzing image from URL: {image_url[:60]}{'...' if len(image_url) > 60 else ''}")`
			`print(f"📝 User prompt: {user_prompt[:100]}{'...' if len(user_prompt) > 100 else ''}")`

			`# Validate image URL`
			`if not _validate_image_url(image_url):`
			`raise ValueError("Invalid image URL format. Must start with http:// or https://")`

			`# Check API key availability`
			`if not os.getenv("NOUS_API_KEY"):`
			`raise ValueError("NOUS_API_KEY environment variable not set")`

			`# Use the prompt as provided (model_tools.py now handles full description formatting)`
			`comprehensive_prompt = user_prompt`

			`# Prepare the message with image URL format`
			`messages = [`
			`{`
			`"role": "user",`
			`"content": [`
			`{`
			`"type": "text",`
			`"text": comprehensive_prompt`
			`},`
			`{`
			`"type": "image_url",`
			`"image_url": {`
			`"url": image_url`
			`}`
			`}`
			`]`
			`}`
			`]`

			`print(f"🧠 Processing image with {model}...")`

			`# Call the vision API`
			`response = await nous_client.chat.completions.create(`
			`model=model,`
			`messages=messages,`
			`temperature=0.1, # Low temperature for consistent analysis`
			`max_tokens=2000 # Generous limit for detailed analysis`
			`)`

			`# Extract the analysis`
			`analysis = response.choices[0].message.content.strip()`
			`analysis_length = len(analysis)`

			`print(f"✅ Image analysis completed ({analysis_length} characters)")`

			`# Prepare successful response`
			`result = {`
			`"success": True,`
			`"analysis": analysis or "There was a problem with the request and the image could not be analyzed."`
			`}`

			`debug_call_data["success"] = True`
			`debug_call_data["analysis_length"] = analysis_length`

			`# Log debug information`
			`_log_debug_call("vision_analyze_tool", debug_call_data)`
			`_save_debug_log()`

			`return json.dumps(result, indent=2)`

			`except Exception as e:`
			`error_msg = f"Error analyzing image: {str(e)}"`
			`print(f"❌ {error_msg}")`

			`# Prepare error response`
			`result = {`
			`"success": False,`
			`"analysis": "There was a problem with the request and the image could not be analyzed."`
			`}`

			`debug_call_data["error"] = error_msg`
			`_log_debug_call("vision_analyze_tool", debug_call_data)`
			`_save_debug_log()`

			`return json.dumps(result, indent=2)`


			`def check_nous_api_key() -> bool:`
			`"""`
			`Check if the Nous Research API key is available in environment variables.`

			`Returns:`
			`bool: True if API key is set, False otherwise`
			`"""`
			`return bool(os.getenv("NOUS_API_KEY"))`


			`def check_vision_requirements() -> bool:`
			`"""`
			`Check if all requirements for vision tools are met.`

			`Returns:`
			`bool: True if requirements are met, False otherwise`
			`"""`
			`return check_nous_api_key()`


			`def get_debug_session_info() -> Dict[str, Any]:`
			`"""`
			`Get information about the current debug session.`

			`Returns:`
			`Dict[str, Any]: Dictionary containing debug session information`
			`"""`
			`if not DEBUG_MODE or not DEBUG_DATA:`
			`return {`
			`"enabled": False,`
			`"session_id": None,`
			`"log_path": None,`
			`"total_calls": 0`
			`}`

			`return {`
			`"enabled": True,`
			`"session_id": DEBUG_SESSION_ID,`
			`"log_path": str(DEBUG_LOG_PATH / f"vision_tools_debug_{DEBUG_SESSION_ID}.json"),`
			`"total_calls": len(DEBUG_DATA["tool_calls"])`
			`}`


			`if __name__ == "__main__":`
			`"""`
			`Simple test/demo when run directly`
			`"""`
			`print("👁️ Vision Tools Module")`
			`print("=" * 40)`

			`# Check if API key is available`
			`api_available = check_nous_api_key()`

			`if not api_available:`
			`print("❌ NOUS_API_KEY environment variable not set")`
			`print("Please set your API key: export NOUS_API_KEY='your-key-here'")`
			`print("Get API key at: https://inference-api.nousresearch.com/")`
			`exit(1)`
			`else:`
			`print("✅ Nous Research API key found")`

			`print("🛠️ Vision tools ready for use!")`
			`print(f"🧠 Using model: {DEFAULT_VISION_MODEL}")`

			`# Show debug mode status`
			`if DEBUG_MODE:`
			`print(f"🐛 Debug mode ENABLED - Session ID: {DEBUG_SESSION_ID}")`
			`print(f" Debug logs will be saved to: ./logs/vision_tools_debug_{DEBUG_SESSION_ID}.json")`
			`else:`
			`print("🐛 Debug mode disabled (set VISION_TOOLS_DEBUG=true to enable)")`

			`print("\nBasic usage:")`
			`print(" from vision_tools import vision_analyze_tool")`
			`print(" import asyncio")`
			`print("")`
			`print(" async def main():")`
			`print(" result = await vision_analyze_tool(")`
			`print(" image_url='https://example.com/image.jpg',")`
			`print(" user_prompt='What do you see in this image?'")`
			`print(" )")`
			`print(" print(result)")`
			`print(" asyncio.run(main())")`

			`print("\nExample prompts:")`
			`print(" - 'What architectural style is this building?'")`
			`print(" - 'Describe the emotions and mood in this image'")`
			`print(" - 'What text can you read in this image?'")`
			`print(" - 'Identify any safety hazards visible'")`
			`print(" - 'What products or brands are shown?'")`

			`print("\nDebug mode:")`
			`print(" # Enable debug logging")`
			`print(" export VISION_TOOLS_DEBUG=true")`
			`print(" # Debug logs capture all vision analysis calls and results")`
			`print(" # Logs saved to: ./logs/vision_tools_debug_UUID.json")`