Enhance logging and tool initialization for improved performance

- Updated logging configuration in `run_agent.py` to suppress debug messages from additional third-party libraries, reducing noise in logs.
- Enhanced shell scripts for terminal tasks to utilize Singularity for containerized execution, including pre-build SIF image logic and improved logging.
- Refactored tool initialization in `mixture_of_agents_tool.py`, `vision_tools.py`, and `web_tools.py` to implement lazy loading of API clients, optimizing resource usage and error handling.
- Updated ephemeral system prompts in shell scripts to provide clearer guidance on task execution and resource usage.
This commit is contained in:
teknium
2026-01-29 19:59:59 +00:00
parent 5438b64e32
commit 4c05ef0ba8
6 changed files with 118 additions and 37 deletions

View File

@@ -28,7 +28,7 @@ python batch_runner.py \
--providers_allowed="gmicloud,siliconflow,atlas-cloud,z-ai,novita" \
--num_workers=25 \
--max_turns=60 \
--ephemeral_system_prompt="You are an AI assistant capable of both browser automation and terminal operations. Use browser tools to navigate websites, interact with web pages, fill forms, and extract information. Use terminal tools to execute commands, write and run code, install packages (use --break-system-packages with pip if needed), and perform local computations. When web search is available, use it to find URLs, documentation, or current information. If vision is available, use it to analyze images or screenshots. If image generation is available, use it when the task requires creating images. Combine browser and terminal capabilities effectively - for example, you might use the browser to fetch data from a website and terminal to process or analyze it. Always verify your work and handle errors gracefully." \
--ephemeral_system_prompt="You are an AI assistant capable of both browser automation and terminal operations. Use browser tools to navigate websites, interact with web pages, fill forms, and extract information. Use terminal tools to execute commands, write and run code, install packages (use --break-system-packages with pip if needed), and perform local computations. When web search is available, use it to find URLs, documentation, or current information. If vision is available, use it to analyze images or screenshots. If image generation is available, use it when the task requires creating images. Combine browser and terminal capabilities effectively - for example, you might use the browser to fetch data from a website and terminal to process or analyze it. Always verify your work and handle errors gracefully. Whenever you can do something in a terminal instead of a web browser, you should choose to do so, as it's much cheaper." \
2>&1 | tee "$LOG_FILE"
echo "✅ Log saved to: $LOG_FILE"

View File

@@ -13,21 +13,55 @@ LOG_FILE="logs/terminal_tasks_$(date +%Y%m%d_%H%M%S).log"
echo "📝 Logging output to: $LOG_FILE"
echo "💻 Running terminal-focused tasks with terminal_tasks distribution"
# Set terminal environment (Modal sandboxes recommended for safety)
export TERMINAL_ENV=modal
export TERMINAL_MODAL_IMAGE=nikolaik/python-nodejs:python3.11-nodejs20
# Set terminal environment (using Singularity for containerized execution)
export TERMINAL_ENV=singularity
export TERMINAL_TIMEOUT=300
# Set up Apptainer cache directories (use /scratch if available, otherwise /tmp)
if [ -d "/scratch" ] && [ -w "/scratch" ]; then
CACHE_BASE="/scratch/$USER/.apptainer"
else
CACHE_BASE="/tmp/$USER/.apptainer"
fi
export APPTAINER_CACHEDIR="$CACHE_BASE"
export APPTAINER_TMPDIR="$CACHE_BASE/tmp"
mkdir -p "$APPTAINER_CACHEDIR" "$APPTAINER_TMPDIR"
# Pre-build SIF image if it doesn't exist (avoids 40 workers all downloading simultaneously)
SIF_IMAGE="$CACHE_BASE/python-nodejs-3.11-20.sif"
DOCKER_IMAGE="docker://nikolaik/python-nodejs:python3.11-nodejs20"
if [ ! -f "$SIF_IMAGE" ]; then
echo "🔨 Building Singularity image (one-time setup)..."
echo " Source: $DOCKER_IMAGE"
echo " Target: $SIF_IMAGE"
apptainer build "$SIF_IMAGE" "$DOCKER_IMAGE"
if [ $? -ne 0 ]; then
echo "❌ Failed to build SIF image. Falling back to docker:// URL"
export TERMINAL_SINGULARITY_IMAGE="$DOCKER_IMAGE"
else
echo "✅ SIF image built successfully"
export TERMINAL_SINGULARITY_IMAGE="$SIF_IMAGE"
fi
else
echo "✅ Using pre-built SIF image: $SIF_IMAGE"
export TERMINAL_SINGULARITY_IMAGE="$SIF_IMAGE"
fi
echo "📁 Apptainer cache: $APPTAINER_CACHEDIR"
python batch_runner.py \
--dataset_file="nous-terminal-tasks.jsonl" \
--batch_size=20 \
--run_name="terminal_tasks" \
--batch_size=5 \
--run_name="terminal_tasks-kimi-k2.5" \
--distribution="terminal_tasks" \
--model="z-ai/glm-4.7" \
--model="moonshotai/kimi-k2.5" \
--verbose \
--base_url="https://openrouter.ai/api/v1" \
--providers_allowed="gmicloud,siliconflow,atlas-cloud,z-ai,novita" \
--num_workers=40 \
--num_workers=80 \
--max_turns=60 \
--providers_ignored="Novita" \
--resume \
--ephemeral_system_prompt="You have access to a terminal tool for executing commands and completing coding, system administration, and computing tasks. Use the terminal to write code, run scripts, install packages (use --break-system-packages with pip if needed), manipulate files, and verify your work. Always test and validate code you create. Do not use interactive tools like vim, nano, or python REPL. If git output is large, pipe to cat. When web search is available, use it to look up documentation, APIs, or best practices. If browser tools are available, use them for web interactions that require page manipulation. Do not use the terminal to communicate with the user - only your final response will be shown to them." \
2>&1 | tee "$LOG_FILE"

View File

@@ -122,14 +122,20 @@ class AIAgent:
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
datefmt='%H:%M:%S'
)
# Keep OpenAI and httpx at WARNING level to reduce noise
# Keep third-party libraries at WARNING level to reduce noise
# We have our own retry and error logging that's more informative
logging.getLogger('openai').setLevel(logging.WARNING)
logging.getLogger('openai._base_client').setLevel(logging.WARNING)
logging.getLogger('httpx').setLevel(logging.WARNING)
logging.getLogger('httpcore').setLevel(logging.WARNING)
logging.getLogger('asyncio').setLevel(logging.WARNING) # Suppress asyncio debug
print("🔍 Verbose logging enabled (OpenAI/httpx/asyncio internal logs suppressed)")
logging.getLogger('asyncio').setLevel(logging.WARNING)
# Suppress Modal/gRPC related debug spam
logging.getLogger('hpack').setLevel(logging.WARNING)
logging.getLogger('hpack.hpack').setLevel(logging.WARNING)
logging.getLogger('grpc').setLevel(logging.WARNING)
logging.getLogger('modal').setLevel(logging.WARNING)
logging.getLogger('rex-deploy').setLevel(logging.INFO) # Keep INFO for sandbox status
print("🔍 Verbose logging enabled (third-party library logs suppressed)")
else:
# Set logging to INFO level for important messages only
logging.basicConfig(

View File

@@ -54,11 +54,21 @@ from pathlib import Path
from typing import Dict, Any, List, Optional
from openai import AsyncOpenAI
# Initialize OpenRouter API client for MoA processing
openrouter_client = AsyncOpenAI(
api_key=os.getenv("OPENROUTER_API_KEY"),
base_url="https://openrouter.ai/api/v1"
)
# Initialize OpenRouter API client lazily (only when needed)
_openrouter_client = None
def _get_openrouter_client():
"""Get or create the OpenRouter client (lazy initialization)."""
global _openrouter_client
if _openrouter_client is None:
api_key = os.getenv("OPENROUTER_API_KEY")
if not api_key:
raise ValueError("OPENROUTER_API_KEY environment variable not set")
_openrouter_client = AsyncOpenAI(
api_key=api_key,
base_url="https://openrouter.ai/api/v1"
)
return _openrouter_client
# Configuration for MoA processing
# Reference models - these generate diverse initial responses in parallel (OpenRouter slugs)
@@ -201,7 +211,7 @@ async def _run_reference_model_safe(
if not model.lower().startswith('gpt-'):
api_params["temperature"] = temperature
response = await openrouter_client.chat.completions.create(**api_params)
response = await _get_openrouter_client().chat.completions.create(**api_params)
content = response.choices[0].message.content.strip()
print(f"{model} responded ({len(content)} characters)")
@@ -268,7 +278,7 @@ async def _run_aggregator_model(
if not AGGREGATOR_MODEL.lower().startswith('gpt-'):
api_params["temperature"] = temperature
response = await openrouter_client.chat.completions.create(**api_params)
response = await _get_openrouter_client().chat.completions.create(**api_params)
content = response.choices[0].message.content.strip()
print(f"✅ Aggregation complete ({len(content)} characters)")

View File

@@ -38,11 +38,21 @@ from typing import Dict, Any, Optional
from openai import AsyncOpenAI
import httpx # Use httpx for async HTTP requests
# Initialize OpenRouter API client for vision processing
openrouter_client = AsyncOpenAI(
api_key=os.getenv("OPENROUTER_API_KEY"),
base_url="https://openrouter.ai/api/v1"
)
# Initialize OpenRouter API client lazily (only when needed)
_openrouter_client = None
def _get_openrouter_client():
"""Get or create the OpenRouter client (lazy initialization)."""
global _openrouter_client
if _openrouter_client is None:
api_key = os.getenv("OPENROUTER_API_KEY")
if not api_key:
raise ValueError("OPENROUTER_API_KEY environment variable not set")
_openrouter_client = AsyncOpenAI(
api_key=api_key,
base_url="https://openrouter.ai/api/v1"
)
return _openrouter_client
# Configuration for vision processing
DEFAULT_VISION_MODEL = "google/gemini-3-flash-preview"
@@ -341,7 +351,7 @@ async def vision_analyze_tool(
print(f"🧠 Processing image with {model}...", flush=True)
# Call the vision API with reasoning enabled
response = await openrouter_client.chat.completions.create(
response = await _get_openrouter_client().chat.completions.create(
model=model,
messages=messages,
temperature=0.1, # Low temperature for consistent analysis

View File

@@ -51,14 +51,35 @@ from typing import List, Dict, Any, Optional
from firecrawl import Firecrawl
from openai import AsyncOpenAI
# Initialize Firecrawl client once at module level
firecrawl_client = Firecrawl(api_key=os.getenv("FIRECRAWL_API_KEY"))
# Initialize Firecrawl client lazily (only when needed)
# This prevents import errors when FIRECRAWL_API_KEY is not set
_firecrawl_client = None
# Initialize OpenRouter API client for LLM processing (async)
summarizer_client = AsyncOpenAI(
api_key=os.getenv("OPENROUTER_API_KEY"),
base_url="https://openrouter.ai/api/v1"
)
def _get_firecrawl_client():
"""Get or create the Firecrawl client (lazy initialization)."""
global _firecrawl_client
if _firecrawl_client is None:
api_key = os.getenv("FIRECRAWL_API_KEY")
if not api_key:
raise ValueError("FIRECRAWL_API_KEY environment variable not set")
_firecrawl_client = Firecrawl(api_key=api_key)
return _firecrawl_client
# Initialize OpenRouter API client lazily (only when needed)
_summarizer_client = None
def _get_summarizer_client():
"""Get or create the summarizer client (lazy initialization)."""
global _summarizer_client
if _summarizer_client is None:
api_key = os.getenv("OPENROUTER_API_KEY")
if not api_key:
raise ValueError("OPENROUTER_API_KEY environment variable not set")
_summarizer_client = AsyncOpenAI(
api_key=api_key,
base_url="https://openrouter.ai/api/v1"
)
return _summarizer_client
# Configuration for LLM processing
DEFAULT_SUMMARIZER_MODEL = "google/gemini-3-flash-preview"
@@ -278,7 +299,7 @@ Create a markdown summary that captures all key information in a well-organized,
for attempt in range(max_retries):
try:
response = await summarizer_client.chat.completions.create(
response = await _get_summarizer_client().chat.completions.create(
model=model,
messages=[
{"role": "system", "content": system_prompt},
@@ -397,7 +418,7 @@ Synthesize these into ONE cohesive, comprehensive summary that:
Create a single, unified markdown summary."""
try:
response = await summarizer_client.chat.completions.create(
response = await _get_summarizer_client().chat.completions.create(
model=model,
messages=[
{"role": "system", "content": "You synthesize multiple summaries into one cohesive, comprehensive summary. Be thorough but concise."},
@@ -518,7 +539,7 @@ def web_search_tool(query: str, limit: int = 5) -> str:
# Use Firecrawl's v2 search functionality WITHOUT scraping
# We only want search result metadata, not scraped content
# Docs: https://docs.firecrawl.dev/features/search
response = firecrawl_client.search(
response = _get_firecrawl_client().search(
query=query,
limit=limit
)
@@ -652,7 +673,7 @@ async def web_extract_tool(
for url in urls:
try:
print(f" 📄 Scraping: {url}")
scrape_result = firecrawl_client.scrape(
scrape_result = _get_firecrawl_client().scrape(
url=url,
formats=formats
)
@@ -926,7 +947,7 @@ async def web_crawl_tool(
# Use the crawl method which waits for completion automatically
try:
crawl_result = firecrawl_client.crawl(
crawl_result = _get_firecrawl_client().crawl(
url=url,
**crawl_params
)