diff --git a/run_agent.py b/run_agent.py index fa2a930b2..f138bdcc5 100644 --- a/run_agent.py +++ b/run_agent.py @@ -2839,7 +2839,10 @@ class AIAgent: spinner.start() _spinner_result = None try: - function_result = handle_function_call(function_name, function_args, effective_task_id) + function_result = handle_function_call( + function_name, function_args, effective_task_id, + enabled_tools=list(self.valid_tool_names) if self.valid_tool_names else None, + ) _spinner_result = function_result except Exception as tool_error: function_result = f"Error executing tool '{function_name}': {tool_error}" @@ -2850,7 +2853,10 @@ class AIAgent: spinner.stop(cute_msg) else: try: - function_result = handle_function_call(function_name, function_args, effective_task_id) + function_result = handle_function_call( + function_name, function_args, effective_task_id, + enabled_tools=list(self.valid_tool_names) if self.valid_tool_names else None, + ) except Exception as tool_error: function_result = f"Error executing tool '{function_name}': {tool_error}" logger.error("handle_function_call raised for %s: %s", function_name, tool_error, exc_info=True) diff --git a/tests/test_run_agent.py b/tests/test_run_agent.py index 2d420dd08..5757a7829 100644 --- a/tests/test_run_agent.py +++ b/tests/test_run_agent.py @@ -601,7 +601,10 @@ class TestExecuteToolCalls: messages = [] with patch("run_agent.handle_function_call", return_value="search result") as mock_hfc: agent._execute_tool_calls(mock_msg, messages, "task-1") - mock_hfc.assert_called_once_with("web_search", {"q": "test"}, "task-1") + # enabled_tools passes the agent's own valid_tool_names + args, kwargs = mock_hfc.call_args + assert args[:3] == ("web_search", {"q": "test"}, "task-1") + assert set(kwargs.get("enabled_tools", [])) == agent.valid_tool_names assert len(messages) == 1 assert messages[0]["role"] == "tool" assert "search result" in messages[0]["content"] @@ -627,7 +630,9 @@ class TestExecuteToolCalls: with patch("run_agent.handle_function_call", return_value="ok") as mock_hfc: agent._execute_tool_calls(mock_msg, messages, "task-1") # Invalid JSON args should fall back to empty dict - mock_hfc.assert_called_once_with("web_search", {}, "task-1") + args, kwargs = mock_hfc.call_args + assert args[:3] == ("web_search", {}, "task-1") + assert set(kwargs.get("enabled_tools", [])) == agent.valid_tool_names assert len(messages) == 1 assert messages[0]["role"] == "tool" assert messages[0]["tool_call_id"] == "c1" diff --git a/tools/code_execution_tool.py b/tools/code_execution_tool.py index 16a4416aa..5aff474a0 100644 --- a/tools/code_execution_tool.py +++ b/tools/code_execution_tool.py @@ -397,9 +397,9 @@ def execute_code( try: # Write the auto-generated hermes_tools module - tools_src = generate_hermes_tools_module( - list(sandbox_tools) if enabled_tools else list(SANDBOX_ALLOWED_TOOLS) - ) + # sandbox_tools is already the correct set (intersection with session + # tools, or SANDBOX_ALLOWED_TOOLS as fallback — see lines above). + tools_src = generate_hermes_tools_module(list(sandbox_tools)) with open(os.path.join(tmpdir, "hermes_tools.py"), "w") as f: f.write(tools_src)