rate limits

2025-11-17 18:35:36 -05:00
parent f813959750
commit 1614c15bb1
4 changed files with 43 additions and 28 deletions
--- a/run_agent.py
+++ b/run_agent.py
@@ -388,7 +388,7 @@ class AIAgent:
        
        while api_call_count < self.max_iterations:
            api_call_count += 1
-            print(f"\n🔄 Making API call #{api_call_count}...")
+            print(f"\n🔄 Making OpenAI-compatible API call #{api_call_count}...")
            
            # Log request details if verbose
            if self.verbose_logging:
@@ -397,8 +397,8 @@ class AIAgent:
            
            api_start_time = time.time()
            retry_count = 0
-            max_retries = 3
-            
+            max_retries = 6  # Increased to allow longer backoff periods
+
            while retry_count <= max_retries:
                try:
                    # Prepare messages for API call
@@ -407,30 +407,30 @@ class AIAgent:
                    if active_system_prompt:
                        # Insert system message at the beginning
                        api_messages = [{"role": "system", "content": active_system_prompt}] + api_messages
-                    
+
                    # Make API call with tools
                    response = self.client.chat.completions.create(
                        model=self.model,
                        messages=api_messages,
                        tools=self.tools if self.tools else None,
-                        timeout=60.0  # Add explicit timeout
+                        timeout=300.0  # 5 minute timeout for long-running agent tasks
                    )
-                    
+
                    api_duration = time.time() - api_start_time
-                    print(f"⏱️  API call completed in {api_duration:.2f}s")
-                    
+                    print(f"⏱️  OpenAI-compatible API call completed in {api_duration:.2f}s")
+
                    if self.verbose_logging:
                        logging.debug(f"API Response received - Usage: {response.usage if hasattr(response, 'usage') else 'N/A'}")
-                    
+
                    break  # Success, exit retry loop
-                    
+
                except Exception as api_error:
                    retry_count += 1
                    if retry_count > max_retries:
                        raise api_error
-                    
-                    wait_time = min(2 ** retry_count, 10)  # Exponential backoff, max 10s
-                    print(f"⚠️  API call failed (attempt {retry_count}/{max_retries}): {str(api_error)[:100]}")
+
+                    wait_time = min(2 ** retry_count, 60)  # Exponential backoff: 2s, 4s, 8s, 16s, 32s, 60s, 60s
+                    print(f"⚠️  OpenAI-compatible API call failed (attempt {retry_count}/{max_retries}): {str(api_error)[:100]}")
                    print(f"⏳ Retrying in {wait_time}s...")
                    logging.warning(f"API retry {retry_count}/{max_retries} after error: {api_error}")
                    time.sleep(wait_time)
@@ -522,11 +522,11 @@ class AIAgent:
                        "content": final_response
                    })
                    
-                    print(f"🎉 Conversation completed after {api_call_count} API call(s)")
+                    print(f"🎉 Conversation completed after {api_call_count} OpenAI-compatible API call(s)")
                    break
                
            except Exception as e:
-                error_msg = f"Error during API call #{api_call_count}: {str(e)}"
+                error_msg = f"Error during OpenAI-compatible API call #{api_call_count}: {str(e)}"
                print(f"❌ {error_msg}")
                
                if self.verbose_logging:
--- a/tools/mixture_of_agents_tool.py
+++ b/tools/mixture_of_agents_tool.py
@@ -161,11 +161,11 @@ def _construct_aggregator_prompt(system_prompt: str, responses: List[str]) -> st


 async def _run_reference_model_safe(
-    model: str, 
-    user_prompt: str, 
+    model: str,
+    user_prompt: str,
    temperature: float = REFERENCE_TEMPERATURE,
    max_tokens: int = 32000,
-    max_retries: int = 3
+    max_retries: int = 6
 ) -> tuple[str, str, bool]:
    """
    Run a single reference model with retry logic and graceful failure handling.
@@ -212,8 +212,8 @@ async def _run_reference_model_safe(
                print(f"⚠️  {model} unknown error (attempt {attempt + 1}): {error_str}")
                
            if attempt < max_retries - 1:
-                # Exponential backoff for rate limiting
-                sleep_time = 2 ** attempt
+                # Exponential backoff for rate limiting: 2s, 4s, 8s, 16s, 32s, 60s
+                sleep_time = min(2 ** (attempt + 1), 60)
                print(f"   Retrying in {sleep_time}s...")
                await asyncio.sleep(sleep_time)
            else:
--- a/tools/simple_terminal_tool.py
+++ b/tools/simple_terminal_tool.py
@@ -34,7 +34,7 @@ SIMPLE_TERMINAL_TOOL_DESCRIPTION = """Execute commands on a secure Linux VM envi
 **Environment:**
 - Minimal Debian-based OS with internet access
 - Automatic VM lifecycle management (creates on-demand, reuses, cleans up)
- No state persistence - each command runs independently
+- Filesystem is persisted between tool calls but environment variables, venvs, etc are reset.

 **Command Execution:**
 - Simple commands: Just provide the 'command' parameter
@@ -43,13 +43,18 @@ SIMPLE_TERMINAL_TOOL_DESCRIPTION = """Execute commands on a secure Linux VM envi

 **Examples:**
 - Run command: `{"command": "ls -la"}`
- Background task: `{"command": "python server.py", "background": True}`
+- Background task: `{"command": "source path/to/my/venv/bin/activate && python server.py", "background": True}`
 - With timeout: `{"command": "long_task.sh", "timeout": 300}`

 **Best Practices:**
 - Run servers/long processes in background
 - Monitor disk usage for large tasks
- Install tools as needed with apt-get"""
+- Install whatever tools you need with sudo apt-get
+- Do not be afraid to run pip with --break-system-packages
+
+**Things to avoid**
+- Do NOT use interactive tools such as tmux, vim, nano, python repl - you will get stuck. Even git sometimes becomes interactive if the output is large. If you're not sure pipe to cat.
+"""

 # Global state for VM lifecycle management
 _active_instances: Dict[str, Any] = {}
@@ -89,7 +94,12 @@ def _cleanup_inactive_vms(vm_lifetime_seconds: int = 300):
                    del _last_activity[task_id]

            except Exception as e:
-                print(f"[VM Cleanup] Error cleaning up VM for task {task_id}: {e}")
+                # 404 errors are benign - VM already cleaned up by TTL
+                error_str = str(e)
+                if "404" in error_str or "InstanceNotFoundError" in error_str or "not found" in error_str.lower():
+                    print(f"[VM Cleanup] VM for task {task_id} already cleaned up (likely TTL expiration)")
+                else:
+                    print(f"[VM Cleanup] Error cleaning up VM for task {task_id}: {e}")


 def _cleanup_thread_worker():
@@ -150,7 +160,12 @@ def cleanup_vm(task_id: str):
                del _last_activity[task_id]

        except Exception as e:
-            print(f"[VM Cleanup] Error manually cleaning up VM for task {task_id}: {e}")
+            # 404 errors are benign - VM already cleaned up by TTL
+            error_str = str(e)
+            if "404" in error_str or "InstanceNotFoundError" in error_str or "not found" in error_str.lower():
+                print(f"[VM Cleanup] VM for task {task_id} already cleaned up (likely TTL expiration)")
+            else:
+                print(f"[VM Cleanup] Error manually cleaning up VM for task {task_id}: {e}")


 atexit.register(_stop_cleanup_thread)
--- a/tools/web_tools.py
+++ b/tools/web_tools.py
@@ -184,10 +184,10 @@ Your goal is to preserve ALL important information while reducing length. Never
 Create a markdown summary that captures all key information in a well-organized, scannable format. Include important quotes and code snippets in their original formatting. Focus on actionable information, specific details, and unique insights."""

        # Call the LLM asynchronously with retry logic for flaky API
-        max_retries = 3
+        max_retries = 6
        retry_delay = 2  # Start with 2 seconds
        last_error = None
-        
+
        for attempt in range(max_retries):
            try:
                response = await nous_client.chat.completions.create(
@@ -206,7 +206,7 @@ Create a markdown summary that captures all key information in a well-organized,
                    print(f"⚠️  LLM API call failed (attempt {attempt + 1}/{max_retries}): {str(api_error)[:100]}")
                    print(f"   Retrying in {retry_delay}s...")
                    await asyncio.sleep(retry_delay)
-                    retry_delay *= 2  # Exponential backoff: 2s, 4s, 8s
+                    retry_delay = min(retry_delay * 2, 60)  # Exponential backoff: 2s, 4s, 8s, 16s, 32s, 60s
                else:
                    # All retries exhausted
                    raise last_error