From 85e629e9154c06b97ff0854947f7e37da182790e Mon Sep 17 00:00:00 2001 From: teknium Date: Tue, 10 Feb 2026 23:48:49 +0000 Subject: [PATCH] Add cleanup functionality for orphaned sandboxes in TerminalBench2EvalEnv - Implemented a cleanup process to terminate any remaining sandboxes after evaluation, addressing issues with orphaned thread pool workers. - Enhanced logging to inform users about the cleanup process, ensuring better resource management and user awareness. --- .../benchmarks/terminalbench_2/terminalbench2_env.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/environments/benchmarks/terminalbench_2/terminalbench2_env.py b/environments/benchmarks/terminalbench_2/terminalbench2_env.py index ef8484f79..99ee7eb1c 100644 --- a/environments/benchmarks/terminalbench_2/terminalbench2_env.py +++ b/environments/benchmarks/terminalbench_2/terminalbench2_env.py @@ -826,6 +826,13 @@ class TerminalBench2EvalEnv(HermesAgentBaseEnv): except Exception as e: print(f"Error logging evaluation results: {e}") + # Kill all remaining sandboxes. Timed-out tasks leave orphaned thread + # pool workers still executing commands -- cleanup_all stops them. + from tools.terminal_tool import cleanup_all_environments + print("\nCleaning up all sandboxes...") + cleanup_all_environments() + print("Done.") + # ========================================================================= # Wandb logging # =========================================================================