From 5ec75e38b978e12bbabd915cf0ef716eedbcec77 Mon Sep 17 00:00:00 2001 From: teknium Date: Tue, 10 Feb 2026 22:51:18 +0000 Subject: [PATCH] Enhance tool execution and logging in HermesAgentLoop - Increased thread pool size for tool execution from 8 to 128 to improve concurrency and prevent starvation. - Added a function to resize the tool executor dynamically based on configuration. - Enhanced logging to track API call durations and tool execution times, including warnings for slow tools. - Improved overall performance monitoring by logging detailed information for each turn in the agent loop. --- environments/agent_loop.py | 61 +++++++++++++++++++++++++++++++------- 1 file changed, 50 insertions(+), 11 deletions(-) diff --git a/environments/agent_loop.py b/environments/agent_loop.py index c7b311d7a..15a8ec614 100644 --- a/environments/agent_loop.py +++ b/environments/agent_loop.py @@ -15,6 +15,7 @@ import asyncio import concurrent.futures import json import logging +import os import uuid from dataclasses import dataclass, field from typing import Any, Dict, List, Optional, Set @@ -24,7 +25,22 @@ from model_tools import handle_function_call # Thread pool for running sync tool calls that internally use asyncio.run() # (e.g., mini-swe-agent's modal/docker backends). Running them in a separate # thread gives them a clean event loop so they don't deadlock inside Atropos's loop. -_tool_executor = concurrent.futures.ThreadPoolExecutor(max_workers=8) +# Size must be large enough for concurrent eval tasks (e.g., 89 TB2 tasks all +# making tool calls). Too small = thread pool starvation, tasks queue for minutes. +# Resized at runtime by HermesAgentBaseEnv.__init__ via resize_tool_pool(). +_tool_executor = concurrent.futures.ThreadPoolExecutor(max_workers=128) + + +def resize_tool_pool(max_workers: int): + """ + Replace the global tool executor with a new one of the given size. + + Called by HermesAgentBaseEnv.__init__ based on config.tool_pool_size. + Safe to call before any tasks are submitted. + """ + global _tool_executor + _tool_executor = concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) + logger.info("Tool thread pool resized to %d workers", max_workers) logger = logging.getLogger(__name__) @@ -155,7 +171,11 @@ class HermesAgentLoop: reasoning_per_turn = [] tool_errors: List[ToolError] = [] + import time as _time + for turn in range(self.max_turns): + turn_start = _time.monotonic() + # Build the chat_completion kwargs chat_kwargs = { "messages": messages, @@ -172,10 +192,12 @@ class HermesAgentLoop: chat_kwargs["max_tokens"] = self.max_tokens # Make the API call -- standard OpenAI spec + api_start = _time.monotonic() try: response = await self.server.chat_completion(**chat_kwargs) except Exception as e: - logger.error("API call failed on turn %d: %s", turn + 1, e) + api_elapsed = _time.monotonic() - api_start + logger.error("API call failed on turn %d (%.1fs): %s", turn + 1, api_elapsed, e) return AgentResult( messages=messages, managed_state=self._get_managed_state(), @@ -185,8 +207,10 @@ class HermesAgentLoop: tool_errors=tool_errors, ) + api_elapsed = _time.monotonic() - api_start + if not response or not response.choices: - logger.warning("Empty response on turn %d", turn + 1) + logger.warning("Empty response on turn %d (api=%.1fs)", turn + 1, api_elapsed) return AgentResult( messages=messages, managed_state=self._get_managed_state(), @@ -265,14 +289,16 @@ class HermesAgentLoop: try: if tool_name == "terminal": - import os backend = os.getenv("TERMINAL_ENV", "local") cmd_preview = args.get("command", "")[:80] - print(f" 🖥️ [{backend}] $ {cmd_preview}") + logger.info( + "[%s] $ %s", self.task_id[:8], cmd_preview, + ) # Run tool calls in a thread pool so backends that use # asyncio.run() internally (modal, docker) get a clean # event loop instead of deadlocking inside Atropos's loop. + tool_submit_time = _time.monotonic() loop = asyncio.get_event_loop() tool_result = await loop.run_in_executor( _tool_executor, @@ -280,6 +306,16 @@ class HermesAgentLoop: tool_name, args, task_id=self.task_id ), ) + tool_elapsed = _time.monotonic() - tool_submit_time + + # Log slow tools and thread pool stats for debugging + pool_active = _tool_executor._work_queue.qsize() + if tool_elapsed > 30: + logger.warning( + "[%s] turn %d: %s took %.1fs (pool queue=%d)", + self.task_id[:8], turn + 1, tool_name, + tool_elapsed, pool_active, + ) except Exception as e: tool_result = json.dumps( {"error": f"Tool execution failed: {type(e).__name__}: {str(e)}"} @@ -320,10 +356,11 @@ class HermesAgentLoop: } ) - logger.debug( - "Turn %d: %d tool calls executed", - turn + 1, - len(assistant_msg.tool_calls), + turn_elapsed = _time.monotonic() - turn_start + logger.info( + "[%s] turn %d: api=%.1fs, %d tools, turn_total=%.1fs", + self.task_id[:8], turn + 1, api_elapsed, + len(assistant_msg.tool_calls), turn_elapsed, ) else: @@ -336,8 +373,10 @@ class HermesAgentLoop: msg_dict["reasoning_content"] = reasoning messages.append(msg_dict) - logger.debug( - "Turn %d: model finished naturally (no tool calls)", turn + 1 + turn_elapsed = _time.monotonic() - turn_start + logger.info( + "[%s] turn %d: api=%.1fs, no tools (finished), turn_total=%.1fs", + self.task_id[:8], turn + 1, api_elapsed, turn_elapsed, ) return AgentResult(