diff --git a/run_agent.py b/run_agent.py
index fa2a930b2..f138bdcc5 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -2839,7 +2839,10 @@ class AIAgent:
                 spinner.start()
                 _spinner_result = None
                 try:
-                    function_result = handle_function_call(function_name, function_args, effective_task_id)
+                    function_result = handle_function_call(
+                        function_name, function_args, effective_task_id,
+                        enabled_tools=list(self.valid_tool_names) if self.valid_tool_names else None,
+                    )
                     _spinner_result = function_result
                 except Exception as tool_error:
                     function_result = f"Error executing tool '{function_name}': {tool_error}"
@@ -2850,7 +2853,10 @@ class AIAgent:
                     spinner.stop(cute_msg)
             else:
                 try:
-                    function_result = handle_function_call(function_name, function_args, effective_task_id)
+                    function_result = handle_function_call(
+                        function_name, function_args, effective_task_id,
+                        enabled_tools=list(self.valid_tool_names) if self.valid_tool_names else None,
+                    )
                 except Exception as tool_error:
                     function_result = f"Error executing tool '{function_name}': {tool_error}"
                     logger.error("handle_function_call raised for %s: %s", function_name, tool_error, exc_info=True)
diff --git a/tests/test_run_agent.py b/tests/test_run_agent.py
index 2d420dd08..5757a7829 100644
--- a/tests/test_run_agent.py
+++ b/tests/test_run_agent.py
@@ -601,7 +601,10 @@ class TestExecuteToolCalls:
         messages = []
         with patch("run_agent.handle_function_call", return_value="search result") as mock_hfc:
             agent._execute_tool_calls(mock_msg, messages, "task-1")
-            mock_hfc.assert_called_once_with("web_search", {"q": "test"}, "task-1")
+            # enabled_tools passes the agent's own valid_tool_names
+            args, kwargs = mock_hfc.call_args
+            assert args[:3] == ("web_search", {"q": "test"}, "task-1")
+            assert set(kwargs.get("enabled_tools", [])) == agent.valid_tool_names
         assert len(messages) == 1
         assert messages[0]["role"] == "tool"
         assert "search result" in messages[0]["content"]
@@ -627,7 +630,9 @@ class TestExecuteToolCalls:
         with patch("run_agent.handle_function_call", return_value="ok") as mock_hfc:
             agent._execute_tool_calls(mock_msg, messages, "task-1")
             # Invalid JSON args should fall back to empty dict
-            mock_hfc.assert_called_once_with("web_search", {}, "task-1")
+            args, kwargs = mock_hfc.call_args
+            assert args[:3] == ("web_search", {}, "task-1")
+            assert set(kwargs.get("enabled_tools", [])) == agent.valid_tool_names
         assert len(messages) == 1
         assert messages[0]["role"] == "tool"
         assert messages[0]["tool_call_id"] == "c1"
diff --git a/tools/code_execution_tool.py b/tools/code_execution_tool.py
index 16a4416aa..5aff474a0 100644
--- a/tools/code_execution_tool.py
+++ b/tools/code_execution_tool.py
@@ -397,9 +397,9 @@ def execute_code(
 
     try:
         # Write the auto-generated hermes_tools module
-        tools_src = generate_hermes_tools_module(
-            list(sandbox_tools) if enabled_tools else list(SANDBOX_ALLOWED_TOOLS)
-        )
+        # sandbox_tools is already the correct set (intersection with session
+        # tools, or SANDBOX_ALLOWED_TOOLS as fallback — see lines above).
+        tools_src = generate_hermes_tools_module(list(sandbox_tools))
         with open(os.path.join(tmpdir, "hermes_tools.py"), "w") as f:
             f.write(tools_src)