From 4c05ef0ba8f01886daafc47ec09e7839cdac3136 Mon Sep 17 00:00:00 2001
From: teknium <teknium@nousresearch.com>
Date: Thu, 29 Jan 2026 19:59:59 +0000
Subject: [PATCH] Enhance logging and tool initialization for improved
 performance

- Updated logging configuration in `run_agent.py` to suppress debug messages from additional third-party libraries, reducing noise in logs.
- Enhanced shell scripts for terminal tasks to utilize Singularity for containerized execution, including pre-build SIF image logic and improved logging.
- Refactored tool initialization in `mixture_of_agents_tool.py`, `vision_tools.py`, and `web_tools.py` to implement lazy loading of API clients, optimizing resource usage and error handling.
- Updated ephemeral system prompts in shell scripts to provide clearer guidance on task execution and resource usage.
---
 configs/run_mixed_tasks.sh      |  2 +-
 configs/run_terminal_tasks.sh   | 50 +++++++++++++++++++++++++++------
 run_agent.py                    | 12 ++++++--
 tools/mixture_of_agents_tool.py | 24 +++++++++++-----
 tools/vision_tools.py           | 22 +++++++++++----
 tools/web_tools.py              | 45 +++++++++++++++++++++--------
 6 files changed, 118 insertions(+), 37 deletions(-)

diff --git a/configs/run_mixed_tasks.sh b/configs/run_mixed_tasks.sh
index b072f6541..e4db4de1d 100755
--- a/configs/run_mixed_tasks.sh
+++ b/configs/run_mixed_tasks.sh
@@ -28,7 +28,7 @@ python batch_runner.py \
   --providers_allowed="gmicloud,siliconflow,atlas-cloud,z-ai,novita" \
   --num_workers=25 \
   --max_turns=60 \
-  --ephemeral_system_prompt="You are an AI assistant capable of both browser automation and terminal operations. Use browser tools to navigate websites, interact with web pages, fill forms, and extract information. Use terminal tools to execute commands, write and run code, install packages (use --break-system-packages with pip if needed), and perform local computations. When web search is available, use it to find URLs, documentation, or current information. If vision is available, use it to analyze images or screenshots. If image generation is available, use it when the task requires creating images. Combine browser and terminal capabilities effectively - for example, you might use the browser to fetch data from a website and terminal to process or analyze it. Always verify your work and handle errors gracefully." \
+  --ephemeral_system_prompt="You are an AI assistant capable of both browser automation and terminal operations. Use browser tools to navigate websites, interact with web pages, fill forms, and extract information. Use terminal tools to execute commands, write and run code, install packages (use --break-system-packages with pip if needed), and perform local computations. When web search is available, use it to find URLs, documentation, or current information. If vision is available, use it to analyze images or screenshots. If image generation is available, use it when the task requires creating images. Combine browser and terminal capabilities effectively - for example, you might use the browser to fetch data from a website and terminal to process or analyze it. Always verify your work and handle errors gracefully. Whenever you can do something in a terminal instead of a web browser, you should choose to do so, as it's much cheaper." \
   2>&1 | tee "$LOG_FILE"
 
 echo "✅ Log saved to: $LOG_FILE"
diff --git a/configs/run_terminal_tasks.sh b/configs/run_terminal_tasks.sh
index e26945988..6a0fd459d 100755
--- a/configs/run_terminal_tasks.sh
+++ b/configs/run_terminal_tasks.sh
@@ -13,21 +13,55 @@ LOG_FILE="logs/terminal_tasks_$(date +%Y%m%d_%H%M%S).log"
 echo "📝 Logging output to: $LOG_FILE"
 echo "💻 Running terminal-focused tasks with terminal_tasks distribution"
 
-# Set terminal environment (Modal sandboxes recommended for safety)
-export TERMINAL_ENV=modal
-export TERMINAL_MODAL_IMAGE=nikolaik/python-nodejs:python3.11-nodejs20
+# Set terminal environment (using Singularity for containerized execution)
+export TERMINAL_ENV=singularity
 export TERMINAL_TIMEOUT=300
 
+# Set up Apptainer cache directories (use /scratch if available, otherwise /tmp)
+if [ -d "/scratch" ] && [ -w "/scratch" ]; then
+    CACHE_BASE="/scratch/$USER/.apptainer"
+else
+    CACHE_BASE="/tmp/$USER/.apptainer"
+fi
+export APPTAINER_CACHEDIR="$CACHE_BASE"
+export APPTAINER_TMPDIR="$CACHE_BASE/tmp"
+mkdir -p "$APPTAINER_CACHEDIR" "$APPTAINER_TMPDIR"
+
+# Pre-build SIF image if it doesn't exist (avoids 40 workers all downloading simultaneously)
+SIF_IMAGE="$CACHE_BASE/python-nodejs-3.11-20.sif"
+DOCKER_IMAGE="docker://nikolaik/python-nodejs:python3.11-nodejs20"
+
+if [ ! -f "$SIF_IMAGE" ]; then
+    echo "🔨 Building Singularity image (one-time setup)..."
+    echo "   Source: $DOCKER_IMAGE"
+    echo "   Target: $SIF_IMAGE"
+    apptainer build "$SIF_IMAGE" "$DOCKER_IMAGE"
+    if [ $? -ne 0 ]; then
+        echo "❌ Failed to build SIF image. Falling back to docker:// URL"
+        export TERMINAL_SINGULARITY_IMAGE="$DOCKER_IMAGE"
+    else
+        echo "✅ SIF image built successfully"
+        export TERMINAL_SINGULARITY_IMAGE="$SIF_IMAGE"
+    fi
+else
+    echo "✅ Using pre-built SIF image: $SIF_IMAGE"
+    export TERMINAL_SINGULARITY_IMAGE="$SIF_IMAGE"
+fi
+
+echo "📁 Apptainer cache: $APPTAINER_CACHEDIR"
+
 python batch_runner.py \
   --dataset_file="nous-terminal-tasks.jsonl" \
-  --batch_size=20 \
-  --run_name="terminal_tasks" \
+  --batch_size=5 \
+  --run_name="terminal_tasks-kimi-k2.5" \
   --distribution="terminal_tasks" \
-  --model="z-ai/glm-4.7" \
+  --model="moonshotai/kimi-k2.5" \
+  --verbose \
   --base_url="https://openrouter.ai/api/v1" \
-  --providers_allowed="gmicloud,siliconflow,atlas-cloud,z-ai,novita" \
-  --num_workers=40 \
+  --num_workers=80 \
   --max_turns=60 \
+  --providers_ignored="Novita" \
+  --resume \
   --ephemeral_system_prompt="You have access to a terminal tool for executing commands and completing coding, system administration, and computing tasks. Use the terminal to write code, run scripts, install packages (use --break-system-packages with pip if needed), manipulate files, and verify your work. Always test and validate code you create. Do not use interactive tools like vim, nano, or python REPL. If git output is large, pipe to cat. When web search is available, use it to look up documentation, APIs, or best practices. If browser tools are available, use them for web interactions that require page manipulation. Do not use the terminal to communicate with the user - only your final response will be shown to them." \
   2>&1 | tee "$LOG_FILE"
 
diff --git a/run_agent.py b/run_agent.py
index 174c2bf97..0787e6724 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -122,14 +122,20 @@ class AIAgent:
                 format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
                 datefmt='%H:%M:%S'
             )
-            # Keep OpenAI and httpx at WARNING level to reduce noise
+            # Keep third-party libraries at WARNING level to reduce noise
             # We have our own retry and error logging that's more informative
             logging.getLogger('openai').setLevel(logging.WARNING)
             logging.getLogger('openai._base_client').setLevel(logging.WARNING)
             logging.getLogger('httpx').setLevel(logging.WARNING)
             logging.getLogger('httpcore').setLevel(logging.WARNING)
-            logging.getLogger('asyncio').setLevel(logging.WARNING)  # Suppress asyncio debug
-            print("🔍 Verbose logging enabled (OpenAI/httpx/asyncio internal logs suppressed)")
+            logging.getLogger('asyncio').setLevel(logging.WARNING)
+            # Suppress Modal/gRPC related debug spam
+            logging.getLogger('hpack').setLevel(logging.WARNING)
+            logging.getLogger('hpack.hpack').setLevel(logging.WARNING)
+            logging.getLogger('grpc').setLevel(logging.WARNING)
+            logging.getLogger('modal').setLevel(logging.WARNING)
+            logging.getLogger('rex-deploy').setLevel(logging.INFO)  # Keep INFO for sandbox status
+            print("🔍 Verbose logging enabled (third-party library logs suppressed)")
         else:
             # Set logging to INFO level for important messages only
             logging.basicConfig(
diff --git a/tools/mixture_of_agents_tool.py b/tools/mixture_of_agents_tool.py
index 82e9535b4..73703269b 100644
--- a/tools/mixture_of_agents_tool.py
+++ b/tools/mixture_of_agents_tool.py
@@ -54,11 +54,21 @@ from pathlib import Path
 from typing import Dict, Any, List, Optional
 from openai import AsyncOpenAI
 
-# Initialize OpenRouter API client for MoA processing
-openrouter_client = AsyncOpenAI(
-    api_key=os.getenv("OPENROUTER_API_KEY"),
-    base_url="https://openrouter.ai/api/v1"
-)
+# Initialize OpenRouter API client lazily (only when needed)
+_openrouter_client = None
+
+def _get_openrouter_client():
+    """Get or create the OpenRouter client (lazy initialization)."""
+    global _openrouter_client
+    if _openrouter_client is None:
+        api_key = os.getenv("OPENROUTER_API_KEY")
+        if not api_key:
+            raise ValueError("OPENROUTER_API_KEY environment variable not set")
+        _openrouter_client = AsyncOpenAI(
+            api_key=api_key,
+            base_url="https://openrouter.ai/api/v1"
+        )
+    return _openrouter_client
 
 # Configuration for MoA processing
 # Reference models - these generate diverse initial responses in parallel (OpenRouter slugs)
@@ -201,7 +211,7 @@ async def _run_reference_model_safe(
             if not model.lower().startswith('gpt-'):
                 api_params["temperature"] = temperature
             
-            response = await openrouter_client.chat.completions.create(**api_params)
+            response = await _get_openrouter_client().chat.completions.create(**api_params)
             
             content = response.choices[0].message.content.strip()
             print(f"✅ {model} responded ({len(content)} characters)")
@@ -268,7 +278,7 @@ async def _run_aggregator_model(
     if not AGGREGATOR_MODEL.lower().startswith('gpt-'):
         api_params["temperature"] = temperature
     
-    response = await openrouter_client.chat.completions.create(**api_params)
+    response = await _get_openrouter_client().chat.completions.create(**api_params)
     
     content = response.choices[0].message.content.strip()
     print(f"✅ Aggregation complete ({len(content)} characters)")
diff --git a/tools/vision_tools.py b/tools/vision_tools.py
index f0145b7b6..defa2d6af 100644
--- a/tools/vision_tools.py
+++ b/tools/vision_tools.py
@@ -38,11 +38,21 @@ from typing import Dict, Any, Optional
 from openai import AsyncOpenAI
 import httpx  # Use httpx for async HTTP requests
 
-# Initialize OpenRouter API client for vision processing
-openrouter_client = AsyncOpenAI(
-    api_key=os.getenv("OPENROUTER_API_KEY"),
-    base_url="https://openrouter.ai/api/v1"
-)
+# Initialize OpenRouter API client lazily (only when needed)
+_openrouter_client = None
+
+def _get_openrouter_client():
+    """Get or create the OpenRouter client (lazy initialization)."""
+    global _openrouter_client
+    if _openrouter_client is None:
+        api_key = os.getenv("OPENROUTER_API_KEY")
+        if not api_key:
+            raise ValueError("OPENROUTER_API_KEY environment variable not set")
+        _openrouter_client = AsyncOpenAI(
+            api_key=api_key,
+            base_url="https://openrouter.ai/api/v1"
+        )
+    return _openrouter_client
 
 # Configuration for vision processing
 DEFAULT_VISION_MODEL = "google/gemini-3-flash-preview"
@@ -341,7 +351,7 @@ async def vision_analyze_tool(
         print(f"🧠 Processing image with {model}...", flush=True)
         
         # Call the vision API with reasoning enabled
-        response = await openrouter_client.chat.completions.create(
+        response = await _get_openrouter_client().chat.completions.create(
             model=model,
             messages=messages,
             temperature=0.1,  # Low temperature for consistent analysis
diff --git a/tools/web_tools.py b/tools/web_tools.py
index 01efea5ba..ed89bf0e6 100644
--- a/tools/web_tools.py
+++ b/tools/web_tools.py
@@ -51,14 +51,35 @@ from typing import List, Dict, Any, Optional
 from firecrawl import Firecrawl
 from openai import AsyncOpenAI
 
-# Initialize Firecrawl client once at module level
-firecrawl_client = Firecrawl(api_key=os.getenv("FIRECRAWL_API_KEY"))
+# Initialize Firecrawl client lazily (only when needed)
+# This prevents import errors when FIRECRAWL_API_KEY is not set
+_firecrawl_client = None
 
-# Initialize OpenRouter API client for LLM processing (async)
-summarizer_client = AsyncOpenAI(
-    api_key=os.getenv("OPENROUTER_API_KEY"),
-    base_url="https://openrouter.ai/api/v1"
-)
+def _get_firecrawl_client():
+    """Get or create the Firecrawl client (lazy initialization)."""
+    global _firecrawl_client
+    if _firecrawl_client is None:
+        api_key = os.getenv("FIRECRAWL_API_KEY")
+        if not api_key:
+            raise ValueError("FIRECRAWL_API_KEY environment variable not set")
+        _firecrawl_client = Firecrawl(api_key=api_key)
+    return _firecrawl_client
+
+# Initialize OpenRouter API client lazily (only when needed)
+_summarizer_client = None
+
+def _get_summarizer_client():
+    """Get or create the summarizer client (lazy initialization)."""
+    global _summarizer_client
+    if _summarizer_client is None:
+        api_key = os.getenv("OPENROUTER_API_KEY")
+        if not api_key:
+            raise ValueError("OPENROUTER_API_KEY environment variable not set")
+        _summarizer_client = AsyncOpenAI(
+            api_key=api_key,
+            base_url="https://openrouter.ai/api/v1"
+        )
+    return _summarizer_client
 
 # Configuration for LLM processing
 DEFAULT_SUMMARIZER_MODEL = "google/gemini-3-flash-preview"
@@ -278,7 +299,7 @@ Create a markdown summary that captures all key information in a well-organized,
 
     for attempt in range(max_retries):
         try:
-            response = await summarizer_client.chat.completions.create(
+            response = await _get_summarizer_client().chat.completions.create(
                 model=model,
                 messages=[
                     {"role": "system", "content": system_prompt},
@@ -397,7 +418,7 @@ Synthesize these into ONE cohesive, comprehensive summary that:
 Create a single, unified markdown summary."""
 
     try:
-        response = await summarizer_client.chat.completions.create(
+        response = await _get_summarizer_client().chat.completions.create(
             model=model,
             messages=[
                 {"role": "system", "content": "You synthesize multiple summaries into one cohesive, comprehensive summary. Be thorough but concise."},
@@ -518,7 +539,7 @@ def web_search_tool(query: str, limit: int = 5) -> str:
         # Use Firecrawl's v2 search functionality WITHOUT scraping
         # We only want search result metadata, not scraped content
         # Docs: https://docs.firecrawl.dev/features/search
-        response = firecrawl_client.search(
+        response = _get_firecrawl_client().search(
             query=query,
             limit=limit
         )
@@ -652,7 +673,7 @@ async def web_extract_tool(
         for url in urls:
             try:
                 print(f"  📄 Scraping: {url}")
-                scrape_result = firecrawl_client.scrape(
+                scrape_result = _get_firecrawl_client().scrape(
                     url=url,
                     formats=formats
                 )
@@ -926,7 +947,7 @@ async def web_crawl_tool(
         
         # Use the crawl method which waits for completion automatically
         try:
-            crawl_result = firecrawl_client.crawl(
+            crawl_result = _get_firecrawl_client().crawl(
                 url=url,
                 **crawl_params
             )