Enhance tool normalization and API integration across modules
- Introduced normalization functions for tool statistics and error counts to ensure consistent schema across all trajectory entries, facilitating compatibility with HuggingFace datasets. - Updated batch processing to utilize normalized tool stats and error counts, improving data integrity. - Refactored vision tools and mixture of agents tool to integrate with OpenRouter API, replacing Nous Research API references and updating model configurations. - Enabled reasoning capabilities in API calls for enhanced response quality across various tools. - Improved error handling and API key validation for OpenRouter integration.
This commit is contained in:
@@ -44,6 +44,70 @@ from toolset_distributions import (
|
||||
# Global configuration for worker processes
|
||||
_WORKER_CONFIG = {}
|
||||
|
||||
# All possible tools - used to ensure consistent schema across all trajectory entries
|
||||
# This is required because Arrow/Parquet (used by HuggingFace datasets) needs identical schemas
|
||||
ALL_POSSIBLE_TOOLS = {
|
||||
'terminal', 'web_search', 'web_extract', 'web_crawl',
|
||||
'vision_analyze', 'image_generate', 'mixture_of_agents'
|
||||
}
|
||||
|
||||
# Default stats for tools that weren't used
|
||||
DEFAULT_TOOL_STATS = {'count': 0, 'success': 0, 'failure': 0}
|
||||
|
||||
|
||||
def _normalize_tool_stats(tool_stats: Dict[str, Dict[str, int]]) -> Dict[str, Dict[str, int]]:
|
||||
"""
|
||||
Normalize tool_stats to include all possible tools with consistent schema.
|
||||
|
||||
This ensures HuggingFace datasets can load the JSONL without schema mismatch errors.
|
||||
Tools that weren't used get zero counts.
|
||||
|
||||
Args:
|
||||
tool_stats (Dict): Raw tool statistics from extraction
|
||||
|
||||
Returns:
|
||||
Dict: Normalized tool statistics with all tools present
|
||||
"""
|
||||
normalized = {}
|
||||
|
||||
# Add all possible tools with defaults
|
||||
for tool in ALL_POSSIBLE_TOOLS:
|
||||
if tool in tool_stats:
|
||||
normalized[tool] = tool_stats[tool].copy()
|
||||
else:
|
||||
normalized[tool] = DEFAULT_TOOL_STATS.copy()
|
||||
|
||||
# Also include any unexpected tools (in case new tools are added)
|
||||
for tool, stats in tool_stats.items():
|
||||
if tool not in normalized:
|
||||
normalized[tool] = stats.copy()
|
||||
|
||||
return normalized
|
||||
|
||||
|
||||
def _normalize_tool_error_counts(tool_error_counts: Dict[str, int]) -> Dict[str, int]:
|
||||
"""
|
||||
Normalize tool_error_counts to include all possible tools.
|
||||
|
||||
Args:
|
||||
tool_error_counts (Dict): Raw error counts mapping
|
||||
|
||||
Returns:
|
||||
Dict: Normalized error counts with all tools present
|
||||
"""
|
||||
normalized = {}
|
||||
|
||||
# Add all possible tools with zero defaults
|
||||
for tool in ALL_POSSIBLE_TOOLS:
|
||||
normalized[tool] = tool_error_counts.get(tool, 0)
|
||||
|
||||
# Also include any unexpected tools
|
||||
for tool, count in tool_error_counts.items():
|
||||
if tool not in normalized:
|
||||
normalized[tool] = count
|
||||
|
||||
return normalized
|
||||
|
||||
|
||||
def _extract_tool_stats(messages: List[Dict[str, Any]]) -> Dict[str, Dict[str, int]]:
|
||||
"""
|
||||
@@ -273,12 +337,16 @@ def _process_batch_worker(args: Tuple) -> Dict[str, Any]:
|
||||
|
||||
# Save trajectory if successful
|
||||
if result["success"] and result["trajectory"]:
|
||||
# Create tool_error_counts mapping tool names to their failure counts
|
||||
tool_stats = result.get("tool_stats", {})
|
||||
tool_error_counts = {
|
||||
# Get and normalize tool stats for consistent schema across all entries
|
||||
raw_tool_stats = result.get("tool_stats", {})
|
||||
tool_stats = _normalize_tool_stats(raw_tool_stats)
|
||||
|
||||
# Create normalized tool_error_counts mapping tool names to their failure counts
|
||||
raw_error_counts = {
|
||||
tool_name: stats.get("failure", 0)
|
||||
for tool_name, stats in tool_stats.items()
|
||||
for tool_name, stats in raw_tool_stats.items()
|
||||
}
|
||||
tool_error_counts = _normalize_tool_error_counts(raw_error_counts)
|
||||
|
||||
trajectory_entry = {
|
||||
"prompt_index": prompt_index,
|
||||
@@ -288,8 +356,8 @@ def _process_batch_worker(args: Tuple) -> Dict[str, Any]:
|
||||
"partial": result.get("partial", False), # True if stopped due to invalid tool calls
|
||||
"api_calls": result["api_calls"],
|
||||
"toolsets_used": result["toolsets_used"],
|
||||
"tool_stats": tool_stats, # Full stats: {tool: {count, success, failure}}
|
||||
"tool_error_counts": tool_error_counts # Simple: {tool: failure_count}
|
||||
"tool_stats": tool_stats, # Full stats: {tool: {count, success, failure}} - normalized
|
||||
"tool_error_counts": tool_error_counts # Simple: {tool: failure_count} - normalized
|
||||
}
|
||||
|
||||
# Append to batch output file
|
||||
|
||||
@@ -513,7 +513,7 @@ def handle_vision_function_call(function_name: str, function_args: Dict[str, Any
|
||||
full_prompt = f"Fully describe and explain everything about this image, then answer the following question:\n\n{question}"
|
||||
|
||||
# Run async function in event loop
|
||||
return asyncio.run(vision_analyze_tool(image_url, full_prompt, "gemini-2.5-flash"))
|
||||
return asyncio.run(vision_analyze_tool(image_url, full_prompt, "google/gemini-3-flash-preview"))
|
||||
|
||||
else:
|
||||
return json.dumps({"error": f"Unknown vision function: {function_name}"}, ensure_ascii=False)
|
||||
|
||||
17
run_agent.py
17
run_agent.py
@@ -555,9 +555,22 @@ class AIAgent:
|
||||
"timeout": 600.0 # 10 minute timeout for very long responses
|
||||
}
|
||||
|
||||
# Add provider preferences for OpenRouter via extra_body
|
||||
# Add extra_body for OpenRouter (provider preferences + reasoning)
|
||||
extra_body = {}
|
||||
|
||||
# Add provider preferences if specified
|
||||
if provider_preferences:
|
||||
api_kwargs["extra_body"] = {"provider": provider_preferences}
|
||||
extra_body["provider"] = provider_preferences
|
||||
|
||||
# Enable reasoning with xhigh effort for OpenRouter
|
||||
if "openrouter" in self.base_url.lower():
|
||||
extra_body["reasoning"] = {
|
||||
"enabled": True,
|
||||
"effort": "xhigh"
|
||||
}
|
||||
|
||||
if extra_body:
|
||||
api_kwargs["extra_body"] = extra_body
|
||||
|
||||
response = self.client.chat.completions.create(**api_kwargs)
|
||||
|
||||
|
||||
@@ -24,9 +24,9 @@ Architecture:
|
||||
2. Aggregator model synthesizes responses into a high-quality output
|
||||
3. Multiple layers can be used for iterative refinement (future enhancement)
|
||||
|
||||
Models Used:
|
||||
- Reference Models: claude-opus-4-20250514, gemini-2.5-pro, o4-mini, deepseek-r1
|
||||
- Aggregator Model: claude-opus-4-20250514 (highest capability for synthesis)
|
||||
Models Used (via OpenRouter):
|
||||
- Reference Models: claude-opus-4, gemini-2.5-pro, gpt-4.1, deepseek-r1
|
||||
- Aggregator Model: claude-opus-4 (highest capability for synthesis)
|
||||
|
||||
Configuration:
|
||||
To customize the MoA setup, modify the configuration constants at the top of this file:
|
||||
@@ -54,23 +54,23 @@ from pathlib import Path
|
||||
from typing import Dict, Any, List, Optional
|
||||
from openai import AsyncOpenAI
|
||||
|
||||
# Initialize Nous Research API client for MoA processing
|
||||
nous_client = AsyncOpenAI(
|
||||
api_key=os.getenv("NOUS_API_KEY"),
|
||||
base_url="https://inference-api.nousresearch.com/v1"
|
||||
# Initialize OpenRouter API client for MoA processing
|
||||
openrouter_client = AsyncOpenAI(
|
||||
api_key=os.getenv("OPENROUTER_API_KEY"),
|
||||
base_url="https://openrouter.ai/api/v1"
|
||||
)
|
||||
|
||||
# Configuration for MoA processing
|
||||
# Reference models - these generate diverse initial responses in parallel
|
||||
# Reference models - these generate diverse initial responses in parallel (OpenRouter slugs)
|
||||
REFERENCE_MODELS = [
|
||||
"claude-opus-4-20250514",
|
||||
"gemini-2.5-pro",
|
||||
"gpt-5",
|
||||
"deepseek-r1"
|
||||
"anthropic/claude-opus-4.5",
|
||||
"google/gemini-3-pro-preview",
|
||||
"openai/gpt-5.2-pro",
|
||||
"deepseek/deepseek-v3.2"
|
||||
]
|
||||
|
||||
# Aggregator model - synthesizes reference responses into final output
|
||||
AGGREGATOR_MODEL = "claude-opus-4-20250514" # Use highest capability model for aggregation
|
||||
AGGREGATOR_MODEL = "anthropic/claude-opus-4.5" # Use highest capability model for aggregation
|
||||
|
||||
# Temperature settings optimized for MoA performance
|
||||
REFERENCE_TEMPERATURE = 0.6 # Balanced creativity for diverse perspectives
|
||||
@@ -187,7 +187,13 @@ async def _run_reference_model_safe(
|
||||
# Build parameters for the API call
|
||||
api_params = {
|
||||
"model": model,
|
||||
"messages": [{"role": "user", "content": user_prompt}]
|
||||
"messages": [{"role": "user", "content": user_prompt}],
|
||||
"extra_body": {
|
||||
"reasoning": {
|
||||
"enabled": True,
|
||||
"effort": "xhigh"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# GPT models (especially gpt-4o-mini) don't support custom temperature values
|
||||
@@ -195,7 +201,7 @@ async def _run_reference_model_safe(
|
||||
if not model.lower().startswith('gpt-'):
|
||||
api_params["temperature"] = temperature
|
||||
|
||||
response = await nous_client.chat.completions.create(**api_params)
|
||||
response = await openrouter_client.chat.completions.create(**api_params)
|
||||
|
||||
content = response.choices[0].message.content.strip()
|
||||
print(f"✅ {model} responded ({len(content)} characters)")
|
||||
@@ -248,7 +254,13 @@ async def _run_aggregator_model(
|
||||
"messages": [
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": user_prompt}
|
||||
]
|
||||
],
|
||||
"extra_body": {
|
||||
"reasoning": {
|
||||
"enabled": True,
|
||||
"effort": "xhigh"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# GPT models (especially gpt-4o-mini) don't support custom temperature values
|
||||
@@ -256,7 +268,7 @@ async def _run_aggregator_model(
|
||||
if not AGGREGATOR_MODEL.lower().startswith('gpt-'):
|
||||
api_params["temperature"] = temperature
|
||||
|
||||
response = await nous_client.chat.completions.create(**api_params)
|
||||
response = await openrouter_client.chat.completions.create(**api_params)
|
||||
|
||||
content = response.choices[0].message.content.strip()
|
||||
print(f"✅ Aggregation complete ({len(content)} characters)")
|
||||
@@ -330,8 +342,8 @@ async def mixture_of_agents_tool(
|
||||
print(f"📝 Query: {user_prompt[:100]}{'...' if len(user_prompt) > 100 else ''}")
|
||||
|
||||
# Validate API key availability
|
||||
if not os.getenv("NOUS_API_KEY"):
|
||||
raise ValueError("NOUS_API_KEY environment variable not set")
|
||||
if not os.getenv("OPENROUTER_API_KEY"):
|
||||
raise ValueError("OPENROUTER_API_KEY environment variable not set")
|
||||
|
||||
# Use provided models or defaults
|
||||
ref_models = reference_models or REFERENCE_MODELS
|
||||
@@ -439,14 +451,14 @@ async def mixture_of_agents_tool(
|
||||
return json.dumps(result, indent=2, ensure_ascii=False)
|
||||
|
||||
|
||||
def check_nous_api_key() -> bool:
|
||||
def check_openrouter_api_key() -> bool:
|
||||
"""
|
||||
Check if the Nous Research API key is available in environment variables.
|
||||
Check if the OpenRouter API key is available in environment variables.
|
||||
|
||||
Returns:
|
||||
bool: True if API key is set, False otherwise
|
||||
"""
|
||||
return bool(os.getenv("NOUS_API_KEY"))
|
||||
return bool(os.getenv("OPENROUTER_API_KEY"))
|
||||
|
||||
|
||||
def check_moa_requirements() -> bool:
|
||||
@@ -456,7 +468,7 @@ def check_moa_requirements() -> bool:
|
||||
Returns:
|
||||
bool: True if requirements are met, False otherwise
|
||||
"""
|
||||
return check_nous_api_key()
|
||||
return check_openrouter_api_key()
|
||||
|
||||
|
||||
def get_debug_session_info() -> Dict[str, Any]:
|
||||
@@ -522,15 +534,15 @@ if __name__ == "__main__":
|
||||
print("=" * 50)
|
||||
|
||||
# Check if API key is available
|
||||
api_available = check_nous_api_key()
|
||||
api_available = check_openrouter_api_key()
|
||||
|
||||
if not api_available:
|
||||
print("❌ NOUS_API_KEY environment variable not set")
|
||||
print("Please set your API key: export NOUS_API_KEY='your-key-here'")
|
||||
print("Get API key at: https://inference-api.nousresearch.com/")
|
||||
print("❌ OPENROUTER_API_KEY environment variable not set")
|
||||
print("Please set your API key: export OPENROUTER_API_KEY='your-key-here'")
|
||||
print("Get API key at: https://openrouter.ai/")
|
||||
exit(1)
|
||||
else:
|
||||
print("✅ Nous Research API key found")
|
||||
print("✅ OpenRouter API key found")
|
||||
|
||||
print("🛠️ MoA tools ready for use!")
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
Vision Tools Module
|
||||
|
||||
This module provides vision analysis tools that work with image URLs.
|
||||
Uses Gemini Flash via Nous Research API for intelligent image understanding.
|
||||
Uses Gemini 3 Flash Preview via OpenRouter API for intelligent image understanding.
|
||||
|
||||
Available tools:
|
||||
- vision_analyze_tool: Analyze images from URLs with custom prompts
|
||||
@@ -38,14 +38,14 @@ from typing import Dict, Any, Optional
|
||||
from openai import AsyncOpenAI
|
||||
import httpx # Use httpx for async HTTP requests
|
||||
|
||||
# Initialize Nous Research API client for vision processing
|
||||
nous_client = AsyncOpenAI(
|
||||
api_key=os.getenv("NOUS_API_KEY"),
|
||||
base_url="https://inference-api.nousresearch.com/v1"
|
||||
# Initialize OpenRouter API client for vision processing
|
||||
openrouter_client = AsyncOpenAI(
|
||||
api_key=os.getenv("OPENROUTER_API_KEY"),
|
||||
base_url="https://openrouter.ai/api/v1"
|
||||
)
|
||||
|
||||
# Configuration for vision processing
|
||||
DEFAULT_VISION_MODEL = "gemini-2.5-flash"
|
||||
DEFAULT_VISION_MODEL = "google/gemini-3-flash-preview"
|
||||
|
||||
# Debug mode configuration
|
||||
DEBUG_MODE = os.getenv("VISION_TOOLS_DEBUG", "false").lower() == "true"
|
||||
@@ -220,7 +220,7 @@ async def vision_analyze_tool(
|
||||
Analyze an image from a URL using vision AI.
|
||||
|
||||
This tool downloads images from URLs, converts them to base64, and processes
|
||||
them using Gemini Flash via Nous Research API. The image is downloaded to a
|
||||
them using Gemini 3 Flash Preview via OpenRouter API. The image is downloaded to a
|
||||
temporary location and automatically cleaned up after processing.
|
||||
|
||||
The user_prompt parameter is expected to be pre-formatted by the calling
|
||||
@@ -230,7 +230,7 @@ async def vision_analyze_tool(
|
||||
Args:
|
||||
image_url (str): The URL of the image to analyze (must be http:// or https://)
|
||||
user_prompt (str): The pre-formatted prompt for the vision model
|
||||
model (str): The vision model to use (default: gemini-2.5-flash)
|
||||
model (str): The vision model to use (default: google/gemini-3-flash-preview)
|
||||
|
||||
Returns:
|
||||
str: JSON string containing the analysis results with the following structure:
|
||||
@@ -271,8 +271,8 @@ async def vision_analyze_tool(
|
||||
raise ValueError("Invalid image URL format. Must start with http:// or https://")
|
||||
|
||||
# Check API key availability
|
||||
if not os.getenv("NOUS_API_KEY"):
|
||||
raise ValueError("NOUS_API_KEY environment variable not set")
|
||||
if not os.getenv("OPENROUTER_API_KEY"):
|
||||
raise ValueError("OPENROUTER_API_KEY environment variable not set")
|
||||
|
||||
# Download the image to a temporary location
|
||||
print(f"⬇️ Downloading image from URL...", flush=True)
|
||||
@@ -319,12 +319,18 @@ async def vision_analyze_tool(
|
||||
|
||||
print(f"🧠 Processing image with {model}...", flush=True)
|
||||
|
||||
# Call the vision API
|
||||
response = await nous_client.chat.completions.create(
|
||||
# Call the vision API with reasoning enabled
|
||||
response = await openrouter_client.chat.completions.create(
|
||||
model=model,
|
||||
messages=messages,
|
||||
temperature=0.1, # Low temperature for consistent analysis
|
||||
max_tokens=2000 # Generous limit for detailed analysis
|
||||
max_tokens=2000, # Generous limit for detailed analysis
|
||||
extra_body={
|
||||
"reasoning": {
|
||||
"enabled": True,
|
||||
"effort": "xhigh"
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
# Extract the analysis
|
||||
@@ -374,14 +380,14 @@ async def vision_analyze_tool(
|
||||
print(f"⚠️ Warning: Could not delete temporary file: {cleanup_error}", flush=True)
|
||||
|
||||
|
||||
def check_nous_api_key() -> bool:
|
||||
def check_openrouter_api_key() -> bool:
|
||||
"""
|
||||
Check if the Nous Research API key is available in environment variables.
|
||||
Check if the OpenRouter API key is available in environment variables.
|
||||
|
||||
Returns:
|
||||
bool: True if API key is set, False otherwise
|
||||
"""
|
||||
return bool(os.getenv("NOUS_API_KEY"))
|
||||
return bool(os.getenv("OPENROUTER_API_KEY"))
|
||||
|
||||
|
||||
def check_vision_requirements() -> bool:
|
||||
@@ -391,7 +397,7 @@ def check_vision_requirements() -> bool:
|
||||
Returns:
|
||||
bool: True if requirements are met, False otherwise
|
||||
"""
|
||||
return check_nous_api_key()
|
||||
return check_openrouter_api_key()
|
||||
|
||||
|
||||
def get_debug_session_info() -> Dict[str, Any]:
|
||||
@@ -425,15 +431,15 @@ if __name__ == "__main__":
|
||||
print("=" * 40)
|
||||
|
||||
# Check if API key is available
|
||||
api_available = check_nous_api_key()
|
||||
api_available = check_openrouter_api_key()
|
||||
|
||||
if not api_available:
|
||||
print("❌ NOUS_API_KEY environment variable not set")
|
||||
print("Please set your API key: export NOUS_API_KEY='your-key-here'")
|
||||
print("Get API key at: https://inference-api.nousresearch.com/")
|
||||
print("❌ OPENROUTER_API_KEY environment variable not set")
|
||||
print("Please set your API key: export OPENROUTER_API_KEY='your-key-here'")
|
||||
print("Get API key at: https://openrouter.ai/")
|
||||
exit(1)
|
||||
else:
|
||||
print("✅ Nous Research API key found")
|
||||
print("✅ OpenRouter API key found")
|
||||
|
||||
print("🛠️ Vision tools ready for use!")
|
||||
print(f"🧠 Using model: {DEFAULT_VISION_MODEL}")
|
||||
|
||||
@@ -285,7 +285,13 @@ Create a markdown summary that captures all key information in a well-organized,
|
||||
{"role": "user", "content": user_prompt}
|
||||
],
|
||||
temperature=0.1,
|
||||
max_tokens=max_tokens
|
||||
max_tokens=max_tokens,
|
||||
extra_body={
|
||||
"reasoning": {
|
||||
"enabled": True,
|
||||
"effort": "xhigh"
|
||||
}
|
||||
}
|
||||
)
|
||||
return response.choices[0].message.content.strip()
|
||||
except Exception as api_error:
|
||||
@@ -398,7 +404,13 @@ Create a single, unified markdown summary."""
|
||||
{"role": "user", "content": synthesis_prompt}
|
||||
],
|
||||
temperature=0.1,
|
||||
max_tokens=4000
|
||||
max_tokens=4000,
|
||||
extra_body={
|
||||
"reasoning": {
|
||||
"enabled": True,
|
||||
"effort": "xhigh"
|
||||
}
|
||||
}
|
||||
)
|
||||
final_summary = response.choices[0].message.content.strip()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user