rate limits
This commit is contained in:
30
run_agent.py
30
run_agent.py
@@ -388,7 +388,7 @@ class AIAgent:
|
||||
|
||||
while api_call_count < self.max_iterations:
|
||||
api_call_count += 1
|
||||
print(f"\n🔄 Making API call #{api_call_count}...")
|
||||
print(f"\n🔄 Making OpenAI-compatible API call #{api_call_count}...")
|
||||
|
||||
# Log request details if verbose
|
||||
if self.verbose_logging:
|
||||
@@ -397,8 +397,8 @@ class AIAgent:
|
||||
|
||||
api_start_time = time.time()
|
||||
retry_count = 0
|
||||
max_retries = 3
|
||||
|
||||
max_retries = 6 # Increased to allow longer backoff periods
|
||||
|
||||
while retry_count <= max_retries:
|
||||
try:
|
||||
# Prepare messages for API call
|
||||
@@ -407,30 +407,30 @@ class AIAgent:
|
||||
if active_system_prompt:
|
||||
# Insert system message at the beginning
|
||||
api_messages = [{"role": "system", "content": active_system_prompt}] + api_messages
|
||||
|
||||
|
||||
# Make API call with tools
|
||||
response = self.client.chat.completions.create(
|
||||
model=self.model,
|
||||
messages=api_messages,
|
||||
tools=self.tools if self.tools else None,
|
||||
timeout=60.0 # Add explicit timeout
|
||||
timeout=300.0 # 5 minute timeout for long-running agent tasks
|
||||
)
|
||||
|
||||
|
||||
api_duration = time.time() - api_start_time
|
||||
print(f"⏱️ API call completed in {api_duration:.2f}s")
|
||||
|
||||
print(f"⏱️ OpenAI-compatible API call completed in {api_duration:.2f}s")
|
||||
|
||||
if self.verbose_logging:
|
||||
logging.debug(f"API Response received - Usage: {response.usage if hasattr(response, 'usage') else 'N/A'}")
|
||||
|
||||
|
||||
break # Success, exit retry loop
|
||||
|
||||
|
||||
except Exception as api_error:
|
||||
retry_count += 1
|
||||
if retry_count > max_retries:
|
||||
raise api_error
|
||||
|
||||
wait_time = min(2 ** retry_count, 10) # Exponential backoff, max 10s
|
||||
print(f"⚠️ API call failed (attempt {retry_count}/{max_retries}): {str(api_error)[:100]}")
|
||||
|
||||
wait_time = min(2 ** retry_count, 60) # Exponential backoff: 2s, 4s, 8s, 16s, 32s, 60s, 60s
|
||||
print(f"⚠️ OpenAI-compatible API call failed (attempt {retry_count}/{max_retries}): {str(api_error)[:100]}")
|
||||
print(f"⏳ Retrying in {wait_time}s...")
|
||||
logging.warning(f"API retry {retry_count}/{max_retries} after error: {api_error}")
|
||||
time.sleep(wait_time)
|
||||
@@ -522,11 +522,11 @@ class AIAgent:
|
||||
"content": final_response
|
||||
})
|
||||
|
||||
print(f"🎉 Conversation completed after {api_call_count} API call(s)")
|
||||
print(f"🎉 Conversation completed after {api_call_count} OpenAI-compatible API call(s)")
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Error during API call #{api_call_count}: {str(e)}"
|
||||
error_msg = f"Error during OpenAI-compatible API call #{api_call_count}: {str(e)}"
|
||||
print(f"❌ {error_msg}")
|
||||
|
||||
if self.verbose_logging:
|
||||
|
||||
@@ -161,11 +161,11 @@ def _construct_aggregator_prompt(system_prompt: str, responses: List[str]) -> st
|
||||
|
||||
|
||||
async def _run_reference_model_safe(
|
||||
model: str,
|
||||
user_prompt: str,
|
||||
model: str,
|
||||
user_prompt: str,
|
||||
temperature: float = REFERENCE_TEMPERATURE,
|
||||
max_tokens: int = 32000,
|
||||
max_retries: int = 3
|
||||
max_retries: int = 6
|
||||
) -> tuple[str, str, bool]:
|
||||
"""
|
||||
Run a single reference model with retry logic and graceful failure handling.
|
||||
@@ -212,8 +212,8 @@ async def _run_reference_model_safe(
|
||||
print(f"⚠️ {model} unknown error (attempt {attempt + 1}): {error_str}")
|
||||
|
||||
if attempt < max_retries - 1:
|
||||
# Exponential backoff for rate limiting
|
||||
sleep_time = 2 ** attempt
|
||||
# Exponential backoff for rate limiting: 2s, 4s, 8s, 16s, 32s, 60s
|
||||
sleep_time = min(2 ** (attempt + 1), 60)
|
||||
print(f" Retrying in {sleep_time}s...")
|
||||
await asyncio.sleep(sleep_time)
|
||||
else:
|
||||
|
||||
@@ -34,7 +34,7 @@ SIMPLE_TERMINAL_TOOL_DESCRIPTION = """Execute commands on a secure Linux VM envi
|
||||
**Environment:**
|
||||
- Minimal Debian-based OS with internet access
|
||||
- Automatic VM lifecycle management (creates on-demand, reuses, cleans up)
|
||||
- No state persistence - each command runs independently
|
||||
- Filesystem is persisted between tool calls but environment variables, venvs, etc are reset.
|
||||
|
||||
**Command Execution:**
|
||||
- Simple commands: Just provide the 'command' parameter
|
||||
@@ -43,13 +43,18 @@ SIMPLE_TERMINAL_TOOL_DESCRIPTION = """Execute commands on a secure Linux VM envi
|
||||
|
||||
**Examples:**
|
||||
- Run command: `{"command": "ls -la"}`
|
||||
- Background task: `{"command": "python server.py", "background": True}`
|
||||
- Background task: `{"command": "source path/to/my/venv/bin/activate && python server.py", "background": True}`
|
||||
- With timeout: `{"command": "long_task.sh", "timeout": 300}`
|
||||
|
||||
**Best Practices:**
|
||||
- Run servers/long processes in background
|
||||
- Monitor disk usage for large tasks
|
||||
- Install tools as needed with apt-get"""
|
||||
- Install whatever tools you need with sudo apt-get
|
||||
- Do not be afraid to run pip with --break-system-packages
|
||||
|
||||
**Things to avoid**
|
||||
- Do NOT use interactive tools such as tmux, vim, nano, python repl - you will get stuck. Even git sometimes becomes interactive if the output is large. If you're not sure pipe to cat.
|
||||
"""
|
||||
|
||||
# Global state for VM lifecycle management
|
||||
_active_instances: Dict[str, Any] = {}
|
||||
@@ -89,7 +94,12 @@ def _cleanup_inactive_vms(vm_lifetime_seconds: int = 300):
|
||||
del _last_activity[task_id]
|
||||
|
||||
except Exception as e:
|
||||
print(f"[VM Cleanup] Error cleaning up VM for task {task_id}: {e}")
|
||||
# 404 errors are benign - VM already cleaned up by TTL
|
||||
error_str = str(e)
|
||||
if "404" in error_str or "InstanceNotFoundError" in error_str or "not found" in error_str.lower():
|
||||
print(f"[VM Cleanup] VM for task {task_id} already cleaned up (likely TTL expiration)")
|
||||
else:
|
||||
print(f"[VM Cleanup] Error cleaning up VM for task {task_id}: {e}")
|
||||
|
||||
|
||||
def _cleanup_thread_worker():
|
||||
@@ -150,7 +160,12 @@ def cleanup_vm(task_id: str):
|
||||
del _last_activity[task_id]
|
||||
|
||||
except Exception as e:
|
||||
print(f"[VM Cleanup] Error manually cleaning up VM for task {task_id}: {e}")
|
||||
# 404 errors are benign - VM already cleaned up by TTL
|
||||
error_str = str(e)
|
||||
if "404" in error_str or "InstanceNotFoundError" in error_str or "not found" in error_str.lower():
|
||||
print(f"[VM Cleanup] VM for task {task_id} already cleaned up (likely TTL expiration)")
|
||||
else:
|
||||
print(f"[VM Cleanup] Error manually cleaning up VM for task {task_id}: {e}")
|
||||
|
||||
|
||||
atexit.register(_stop_cleanup_thread)
|
||||
|
||||
@@ -184,10 +184,10 @@ Your goal is to preserve ALL important information while reducing length. Never
|
||||
Create a markdown summary that captures all key information in a well-organized, scannable format. Include important quotes and code snippets in their original formatting. Focus on actionable information, specific details, and unique insights."""
|
||||
|
||||
# Call the LLM asynchronously with retry logic for flaky API
|
||||
max_retries = 3
|
||||
max_retries = 6
|
||||
retry_delay = 2 # Start with 2 seconds
|
||||
last_error = None
|
||||
|
||||
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
response = await nous_client.chat.completions.create(
|
||||
@@ -206,7 +206,7 @@ Create a markdown summary that captures all key information in a well-organized,
|
||||
print(f"⚠️ LLM API call failed (attempt {attempt + 1}/{max_retries}): {str(api_error)[:100]}")
|
||||
print(f" Retrying in {retry_delay}s...")
|
||||
await asyncio.sleep(retry_delay)
|
||||
retry_delay *= 2 # Exponential backoff: 2s, 4s, 8s
|
||||
retry_delay = min(retry_delay * 2, 60) # Exponential backoff: 2s, 4s, 8s, 16s, 32s, 60s
|
||||
else:
|
||||
# All retries exhausted
|
||||
raise last_error
|
||||
|
||||
Reference in New Issue
Block a user