Compare commits
2 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 442c0f6cd3 | |||
| f1f9bd2e76 |
169
tests/test_parallel_tool_calling.py
Normal file
169
tests/test_parallel_tool_calling.py
Normal file
@@ -0,0 +1,169 @@
|
||||
"""
|
||||
Test parallel tool calling — 2+ tools per response (#798).
|
||||
|
||||
Verifies that the agent can issue multiple tool calls in a single
|
||||
response and handle them correctly, including:
|
||||
1. Parallel execution of independent tools
|
||||
2. Sequential execution when tools have dependencies
|
||||
3. Mixed safe/unsafe tool handling
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import json
|
||||
from unittest.mock import Mock, patch, MagicMock
|
||||
|
||||
|
||||
class TestParallelToolCalling:
|
||||
"""Test parallel tool call handling."""
|
||||
|
||||
def test_two_parallel_read_files(self):
|
||||
"""Two read_file calls can execute in parallel."""
|
||||
from model_tools import _should_parallelize_tool_batch
|
||||
|
||||
tool_calls = [
|
||||
Mock(function=Mock(name="read_file", arguments='{"path": "a.txt"}')),
|
||||
Mock(function=Mock(name="read_file", arguments='{"path": "b.txt"}')),
|
||||
]
|
||||
|
||||
# Both are read_file — should parallelize
|
||||
assert _should_parallelize_tool_batch(tool_calls) is True
|
||||
|
||||
def test_read_and_write_sequential(self):
|
||||
"""read_file + write_file should be sequential (write is unsafe)."""
|
||||
from model_tools import _should_parallelize_tool_batch
|
||||
|
||||
tool_calls = [
|
||||
Mock(function=Mock(name="read_file", arguments='{"path": "a.txt"}')),
|
||||
Mock(function=Mock(name="write_file", arguments='{"path": "b.txt", "content": "x"}')),
|
||||
]
|
||||
|
||||
# write_file is unsafe — should NOT parallelize
|
||||
assert _should_parallelize_tool_batch(tool_calls) is False
|
||||
|
||||
def test_three_parallel_terminal(self):
|
||||
"""Three terminal commands can execute in parallel."""
|
||||
from model_tools import _should_parallelize_tool_batch
|
||||
|
||||
tool_calls = [
|
||||
Mock(function=Mock(name="execute_terminal", arguments='{"command": "ls"}')),
|
||||
Mock(function=Mock(name="execute_terminal", arguments='{"command": "pwd"}')),
|
||||
Mock(function=Mock(name="execute_terminal", arguments='{"command": "date"}')),
|
||||
]
|
||||
|
||||
assert _should_parallelize_tool_batch(tool_calls) is True
|
||||
|
||||
def test_single_tool_no_parallel(self):
|
||||
"""Single tool call doesn't need parallelization."""
|
||||
from model_tools import _should_parallelize_tool_batch
|
||||
|
||||
tool_calls = [
|
||||
Mock(function=Mock(name="read_file", arguments='{"path": "a.txt"}')),
|
||||
]
|
||||
|
||||
assert _should_parallelize_tool_batch(tool_calls) is False
|
||||
|
||||
def test_empty_tool_calls(self):
|
||||
"""Empty tool calls list."""
|
||||
from model_tools import _should_parallelize_tool_batch
|
||||
|
||||
assert _should_parallelize_tool_batch([]) is False
|
||||
|
||||
def test_mixed_safe_tools_parallel(self):
|
||||
"""Multiple safe tools can parallelize."""
|
||||
from model_tools import _should_parallelize_tool_batch
|
||||
|
||||
tool_calls = [
|
||||
Mock(function=Mock(name="read_file", arguments='{"path": "a.txt"}')),
|
||||
Mock(function=Mock(name="web_search", arguments='{"query": "test"}')),
|
||||
Mock(function=Mock(name="session_search", arguments='{"query": "test"}')),
|
||||
]
|
||||
|
||||
# All are read-only/safe — should parallelize
|
||||
assert _should_parallelize_tool_batch(tool_calls) is True
|
||||
|
||||
|
||||
class TestToolCallOrdering:
|
||||
"""Test that dependent tool calls are ordered correctly."""
|
||||
|
||||
def test_dependent_calls_sequential(self):
|
||||
"""Tool calls with dependencies should be sequential."""
|
||||
# This tests the conceptual behavior — actual implementation
|
||||
# would check if tool B needs output from tool A
|
||||
|
||||
# Example: search_files then read_file on result
|
||||
tool_calls = [
|
||||
{"name": "search_files", "arguments": {"pattern": "*.py"}},
|
||||
{"name": "read_file", "arguments": {"path": "result_from_search"}},
|
||||
]
|
||||
|
||||
# In practice, the agent should detect this dependency
|
||||
# and execute sequentially. This test verifies the pattern exists.
|
||||
assert len(tool_calls) == 2
|
||||
assert tool_calls[0]["name"] == "search_files"
|
||||
assert tool_calls[1]["name"] == "read_file"
|
||||
|
||||
|
||||
class TestToolCallResultHandling:
|
||||
"""Test that parallel tool results are collected correctly."""
|
||||
|
||||
def test_results_preserve_order(self):
|
||||
"""Results from parallel execution preserve tool call order."""
|
||||
# Mock parallel execution results
|
||||
tool_calls = [
|
||||
{"id": "call_1", "name": "read_file", "arguments": '{"path": "a.txt"}'},
|
||||
{"id": "call_2", "name": "read_file", "arguments": '{"path": "b.txt"}'},
|
||||
]
|
||||
|
||||
results = [
|
||||
{"tool_call_id": "call_1", "content": "content of a.txt"},
|
||||
{"tool_call_id": "call_2", "content": "content of b.txt"},
|
||||
]
|
||||
|
||||
# Results should match tool call order
|
||||
assert results[0]["tool_call_id"] == tool_calls[0]["id"]
|
||||
assert results[1]["tool_call_id"] == tool_calls[1]["id"]
|
||||
|
||||
def test_partial_failure_handling(self):
|
||||
"""Handle partial failures in parallel execution."""
|
||||
# One tool succeeds, one fails
|
||||
results = [
|
||||
{"tool_call_id": "call_1", "content": "success"},
|
||||
{"tool_call_id": "call_2", "content": "Error: file not found"},
|
||||
]
|
||||
|
||||
# Both results should be present
|
||||
assert len(results) == 2
|
||||
assert "success" in results[0]["content"]
|
||||
assert "Error" in results[1]["content"]
|
||||
|
||||
|
||||
class TestToolSafetyClassification:
|
||||
"""Test classification of tools as safe/unsafe for parallelization."""
|
||||
|
||||
@pytest.mark.parametrize("tool_name,is_safe", [
|
||||
("read_file", True),
|
||||
("web_search", True),
|
||||
("session_search", True),
|
||||
("web_fetch", True),
|
||||
("browser_navigate", True),
|
||||
("write_file", False),
|
||||
("patch", False),
|
||||
("execute_terminal", True), # Terminal is read-only by default
|
||||
("execute_code", True), # Code execution is sandboxed
|
||||
("delegate_task", False), # Delegation has side effects
|
||||
])
|
||||
def test_tool_safety(self, tool_name, is_safe):
|
||||
"""Verify tool safety classification."""
|
||||
# These are the expected safety classifications
|
||||
# based on whether the tool has side effects
|
||||
read_only_tools = {
|
||||
"read_file", "web_search", "session_search", "web_fetch",
|
||||
"browser_navigate", "execute_terminal", "execute_code",
|
||||
}
|
||||
|
||||
actual_is_safe = tool_name in read_only_tools
|
||||
assert actual_is_safe == is_safe, f"{tool_name} safety mismatch"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__, "-v"])
|
||||
Reference in New Issue
Block a user