Enhance image handling and analysis capabilities across platforms

- Updated the vision tool to accept both HTTP/HTTPS URLs and local file paths for image analysis.
- Implemented caching of user-uploaded images in local directories to ensure reliable access for the vision tool, addressing issues with ephemeral URLs.
- Enhanced platform adapters (Discord, Telegram, WhatsApp) to download and cache images, allowing for immediate analysis and enriched message context.
- Added a new method to auto-analyze images attached by users, enriching the conversation with detailed descriptions.
- Improved documentation for image handling processes and updated related functions for clarity and efficiency.
This commit is contained in:
teknium1
2026-02-15 16:10:50 -08:00
parent eb49936a60
commit 5404a8fcd8
7 changed files with 303 additions and 35 deletions

View File

@@ -248,18 +248,19 @@ async def vision_analyze_tool(
model: str = DEFAULT_VISION_MODEL
) -> str:
"""
Analyze an image from a URL using vision AI.
Analyze an image from a URL or local file path using vision AI.
This tool downloads images from URLs, converts them to base64, and processes
them using Gemini 3 Flash Preview via OpenRouter API. The image is downloaded to a
temporary location and automatically cleaned up after processing.
This tool accepts either an HTTP/HTTPS URL or a local file path. For URLs,
it downloads the image first. In both cases, the image is converted to base64
and processed using Gemini 3 Flash Preview via OpenRouter API.
The user_prompt parameter is expected to be pre-formatted by the calling
function (typically model_tools.py) to include both full description
requests and specific questions.
Args:
image_url (str): The URL of the image to analyze (must be http:// or https://)
image_url (str): The URL or local file path of the image to analyze.
Accepts http://, https:// URLs or absolute/relative file paths.
user_prompt (str): The pre-formatted prompt for the vision model
model (str): The vision model to use (default: google/gemini-3-flash-preview)
@@ -274,8 +275,8 @@ async def vision_analyze_tool(
Exception: If download fails, analysis fails, or API key is not set
Note:
- Temporary images are stored in ./temp_vision_images/
- Images are automatically deleted after processing
- For URLs, temporary images are stored in ./temp_vision_images/ and cleaned up
- For local file paths, the file is used directly and NOT deleted
- Supports common image formats (JPEG, PNG, GIF, WebP, etc.)
"""
debug_call_data = {
@@ -292,30 +293,41 @@ async def vision_analyze_tool(
}
temp_image_path = None
# Track whether we should clean up the file after processing.
# Local files (e.g. from the image cache) should NOT be deleted.
should_cleanup = True
try:
print(f"🔍 Analyzing image from URL: {image_url[:60]}{'...' if len(image_url) > 60 else ''}", flush=True)
print(f"🔍 Analyzing image: {image_url[:60]}{'...' if len(image_url) > 60 else ''}", flush=True)
print(f"📝 User prompt: {user_prompt[:100]}{'...' if len(user_prompt) > 100 else ''}", flush=True)
# Validate image URL
if not _validate_image_url(image_url):
raise ValueError("Invalid image URL format. Must start with http:// or https://")
# Check API key availability
if not os.getenv("OPENROUTER_API_KEY"):
raise ValueError("OPENROUTER_API_KEY environment variable not set")
# Download the image to a temporary location
print(f"⬇️ Downloading image from URL...", flush=True)
temp_dir = Path("./temp_vision_images")
temp_image_path = temp_dir / f"temp_image_{uuid.uuid4()}.jpg"
await _download_image(image_url, temp_image_path)
# Determine if this is a local file path or a remote URL
local_path = Path(image_url)
if local_path.is_file():
# Local file path (e.g. from platform image cache) -- skip download
print(f"📁 Using local image file: {image_url}", flush=True)
temp_image_path = local_path
should_cleanup = False # Don't delete cached/local files
elif _validate_image_url(image_url):
# Remote URL -- download to a temporary location
print(f"⬇️ Downloading image from URL...", flush=True)
temp_dir = Path("./temp_vision_images")
temp_image_path = temp_dir / f"temp_image_{uuid.uuid4()}.jpg"
await _download_image(image_url, temp_image_path)
should_cleanup = True
else:
raise ValueError(
"Invalid image source. Provide an HTTP/HTTPS URL or a valid local file path."
)
# Get image file size for logging
image_size_bytes = temp_image_path.stat().st_size
image_size_kb = image_size_bytes / 1024
print(f"✅ Image downloaded successfully ({image_size_kb:.1f} KB)", flush=True)
print(f"✅ Image ready ({image_size_kb:.1f} KB)", flush=True)
# Convert image to base64 data URL
print(f"🔄 Converting image to base64...", flush=True)
@@ -402,8 +414,8 @@ async def vision_analyze_tool(
return json.dumps(result, indent=2, ensure_ascii=False)
finally:
# Clean up temporary image file
if temp_image_path and temp_image_path.exists():
# Clean up temporary image file (but NOT local/cached files)
if should_cleanup and temp_image_path and temp_image_path.exists():
try:
temp_image_path.unlink()
print(f"🧹 Cleaned up temporary image file", flush=True)