diff --git a/run_agent.py b/run_agent.py
index e7abde80b..97fb37087 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -388,7 +388,7 @@ class AIAgent:
         
         while api_call_count < self.max_iterations:
             api_call_count += 1
-            print(f"\n🔄 Making API call #{api_call_count}...")
+            print(f"\n🔄 Making OpenAI-compatible API call #{api_call_count}...")
             
             # Log request details if verbose
             if self.verbose_logging:
@@ -397,8 +397,8 @@ class AIAgent:
             
             api_start_time = time.time()
             retry_count = 0
-            max_retries = 3
-            
+            max_retries = 6  # Increased to allow longer backoff periods
+
             while retry_count <= max_retries:
                 try:
                     # Prepare messages for API call
@@ -407,30 +407,30 @@ class AIAgent:
                     if active_system_prompt:
                         # Insert system message at the beginning
                         api_messages = [{"role": "system", "content": active_system_prompt}] + api_messages
-                    
+
                     # Make API call with tools
                     response = self.client.chat.completions.create(
                         model=self.model,
                         messages=api_messages,
                         tools=self.tools if self.tools else None,
-                        timeout=60.0  # Add explicit timeout
+                        timeout=300.0  # 5 minute timeout for long-running agent tasks
                     )
-                    
+
                     api_duration = time.time() - api_start_time
-                    print(f"⏱️  API call completed in {api_duration:.2f}s")
-                    
+                    print(f"⏱️  OpenAI-compatible API call completed in {api_duration:.2f}s")
+
                     if self.verbose_logging:
                         logging.debug(f"API Response received - Usage: {response.usage if hasattr(response, 'usage') else 'N/A'}")
-                    
+
                     break  # Success, exit retry loop
-                    
+
                 except Exception as api_error:
                     retry_count += 1
                     if retry_count > max_retries:
                         raise api_error
-                    
-                    wait_time = min(2 ** retry_count, 10)  # Exponential backoff, max 10s
-                    print(f"⚠️  API call failed (attempt {retry_count}/{max_retries}): {str(api_error)[:100]}")
+
+                    wait_time = min(2 ** retry_count, 60)  # Exponential backoff: 2s, 4s, 8s, 16s, 32s, 60s, 60s
+                    print(f"⚠️  OpenAI-compatible API call failed (attempt {retry_count}/{max_retries}): {str(api_error)[:100]}")
                     print(f"⏳ Retrying in {wait_time}s...")
                     logging.warning(f"API retry {retry_count}/{max_retries} after error: {api_error}")
                     time.sleep(wait_time)
@@ -522,11 +522,11 @@ class AIAgent:
                         "content": final_response
                     })
                     
-                    print(f"🎉 Conversation completed after {api_call_count} API call(s)")
+                    print(f"🎉 Conversation completed after {api_call_count} OpenAI-compatible API call(s)")
                     break
                 
             except Exception as e:
-                error_msg = f"Error during API call #{api_call_count}: {str(e)}"
+                error_msg = f"Error during OpenAI-compatible API call #{api_call_count}: {str(e)}"
                 print(f"❌ {error_msg}")
                 
                 if self.verbose_logging:
diff --git a/tools/mixture_of_agents_tool.py b/tools/mixture_of_agents_tool.py
index a0e8db1ee..c94d9e1de 100644
--- a/tools/mixture_of_agents_tool.py
+++ b/tools/mixture_of_agents_tool.py
@@ -161,11 +161,11 @@ def _construct_aggregator_prompt(system_prompt: str, responses: List[str]) -> st
 
 
 async def _run_reference_model_safe(
-    model: str, 
-    user_prompt: str, 
+    model: str,
+    user_prompt: str,
     temperature: float = REFERENCE_TEMPERATURE,
     max_tokens: int = 32000,
-    max_retries: int = 3
+    max_retries: int = 6
 ) -> tuple[str, str, bool]:
     """
     Run a single reference model with retry logic and graceful failure handling.
@@ -212,8 +212,8 @@ async def _run_reference_model_safe(
                 print(f"⚠️  {model} unknown error (attempt {attempt + 1}): {error_str}")
                 
             if attempt < max_retries - 1:
-                # Exponential backoff for rate limiting
-                sleep_time = 2 ** attempt
+                # Exponential backoff for rate limiting: 2s, 4s, 8s, 16s, 32s, 60s
+                sleep_time = min(2 ** (attempt + 1), 60)
                 print(f"   Retrying in {sleep_time}s...")
                 await asyncio.sleep(sleep_time)
             else:
diff --git a/tools/simple_terminal_tool.py b/tools/simple_terminal_tool.py
index 1d0b236dc..6ebfeeda7 100644
--- a/tools/simple_terminal_tool.py
+++ b/tools/simple_terminal_tool.py
@@ -34,7 +34,7 @@ SIMPLE_TERMINAL_TOOL_DESCRIPTION = """Execute commands on a secure Linux VM envi
 **Environment:**
 - Minimal Debian-based OS with internet access
 - Automatic VM lifecycle management (creates on-demand, reuses, cleans up)
-- No state persistence - each command runs independently
+- Filesystem is persisted between tool calls but environment variables, venvs, etc are reset.
 
 **Command Execution:**
 - Simple commands: Just provide the 'command' parameter
@@ -43,13 +43,18 @@ SIMPLE_TERMINAL_TOOL_DESCRIPTION = """Execute commands on a secure Linux VM envi
 
 **Examples:**
 - Run command: `{"command": "ls -la"}`
-- Background task: `{"command": "python server.py", "background": True}`
+- Background task: `{"command": "source path/to/my/venv/bin/activate && python server.py", "background": True}`
 - With timeout: `{"command": "long_task.sh", "timeout": 300}`
 
 **Best Practices:**
 - Run servers/long processes in background
 - Monitor disk usage for large tasks
-- Install tools as needed with apt-get"""
+- Install whatever tools you need with sudo apt-get
+- Do not be afraid to run pip with --break-system-packages
+
+**Things to avoid**
+- Do NOT use interactive tools such as tmux, vim, nano, python repl - you will get stuck. Even git sometimes becomes interactive if the output is large. If you're not sure pipe to cat.
+"""
 
 # Global state for VM lifecycle management
 _active_instances: Dict[str, Any] = {}
@@ -89,7 +94,12 @@ def _cleanup_inactive_vms(vm_lifetime_seconds: int = 300):
                     del _last_activity[task_id]
 
             except Exception as e:
-                print(f"[VM Cleanup] Error cleaning up VM for task {task_id}: {e}")
+                # 404 errors are benign - VM already cleaned up by TTL
+                error_str = str(e)
+                if "404" in error_str or "InstanceNotFoundError" in error_str or "not found" in error_str.lower():
+                    print(f"[VM Cleanup] VM for task {task_id} already cleaned up (likely TTL expiration)")
+                else:
+                    print(f"[VM Cleanup] Error cleaning up VM for task {task_id}: {e}")
 
 
 def _cleanup_thread_worker():
@@ -150,7 +160,12 @@ def cleanup_vm(task_id: str):
                 del _last_activity[task_id]
 
         except Exception as e:
-            print(f"[VM Cleanup] Error manually cleaning up VM for task {task_id}: {e}")
+            # 404 errors are benign - VM already cleaned up by TTL
+            error_str = str(e)
+            if "404" in error_str or "InstanceNotFoundError" in error_str or "not found" in error_str.lower():
+                print(f"[VM Cleanup] VM for task {task_id} already cleaned up (likely TTL expiration)")
+            else:
+                print(f"[VM Cleanup] Error manually cleaning up VM for task {task_id}: {e}")
 
 
 atexit.register(_stop_cleanup_thread)
diff --git a/tools/web_tools.py b/tools/web_tools.py
index e3a65f71e..3f7df9f43 100644
--- a/tools/web_tools.py
+++ b/tools/web_tools.py
@@ -184,10 +184,10 @@ Your goal is to preserve ALL important information while reducing length. Never
 Create a markdown summary that captures all key information in a well-organized, scannable format. Include important quotes and code snippets in their original formatting. Focus on actionable information, specific details, and unique insights."""
 
         # Call the LLM asynchronously with retry logic for flaky API
-        max_retries = 3
+        max_retries = 6
         retry_delay = 2  # Start with 2 seconds
         last_error = None
-        
+
         for attempt in range(max_retries):
             try:
                 response = await nous_client.chat.completions.create(
@@ -206,7 +206,7 @@ Create a markdown summary that captures all key information in a well-organized,
                     print(f"⚠️  LLM API call failed (attempt {attempt + 1}/{max_retries}): {str(api_error)[:100]}")
                     print(f"   Retrying in {retry_delay}s...")
                     await asyncio.sleep(retry_delay)
-                    retry_delay *= 2  # Exponential backoff: 2s, 4s, 8s
+                    retry_delay = min(retry_delay * 2, 60)  # Exponential backoff: 2s, 4s, 8s, 16s, 32s, 60s
                 else:
                     # All retries exhausted
                     raise last_error