fix: prevent tool name/arg concatenation for Ollama-compatible endpoints (#3582)
Ollama reuses index 0 for every tool call in a parallel batch, distinguishing them only by id. The streaming accumulator now detects a new non-empty id at an already-active index and redirects it to a fresh slot, preventing names and arguments from being concatenated into a single tool call. No-op for normal providers that use incrementing indices. Co-authored-by: dmater01 <dmater01@users.noreply.github.com>
This commit is contained in:
25
run_agent.py
25
run_agent.py
@@ -3883,6 +3883,12 @@ class AIAgent:
|
||||
content_parts: list = []
|
||||
tool_calls_acc: dict = {}
|
||||
tool_gen_notified: set = set()
|
||||
# Ollama-compatible endpoints reuse index 0 for every tool call
|
||||
# in a parallel batch, distinguishing them only by id. Track
|
||||
# the last seen id per raw index so we can detect a new tool
|
||||
# call starting at the same index and redirect it to a fresh slot.
|
||||
_last_id_at_idx: dict = {} # raw_index -> last seen non-empty id
|
||||
_active_slot_by_idx: dict = {} # raw_index -> current slot in tool_calls_acc
|
||||
finish_reason = None
|
||||
model_name = None
|
||||
role = "assistant"
|
||||
@@ -3945,7 +3951,24 @@ class AIAgent:
|
||||
# Accumulate tool call deltas — notify display on first name
|
||||
if delta and delta.tool_calls:
|
||||
for tc_delta in delta.tool_calls:
|
||||
idx = tc_delta.index if tc_delta.index is not None else 0
|
||||
raw_idx = tc_delta.index if tc_delta.index is not None else 0
|
||||
delta_id = tc_delta.id or ""
|
||||
|
||||
# Ollama fix: detect a new tool call reusing the same
|
||||
# raw index (different id) and redirect to a fresh slot.
|
||||
if raw_idx not in _active_slot_by_idx:
|
||||
_active_slot_by_idx[raw_idx] = raw_idx
|
||||
if (
|
||||
delta_id
|
||||
and raw_idx in _last_id_at_idx
|
||||
and delta_id != _last_id_at_idx[raw_idx]
|
||||
):
|
||||
new_slot = max(tool_calls_acc, default=-1) + 1
|
||||
_active_slot_by_idx[raw_idx] = new_slot
|
||||
if delta_id:
|
||||
_last_id_at_idx[raw_idx] = delta_id
|
||||
idx = _active_slot_by_idx[raw_idx]
|
||||
|
||||
if idx not in tool_calls_acc:
|
||||
tool_calls_acc[idx] = {
|
||||
"id": tc_delta.id or "",
|
||||
|
||||
@@ -2793,6 +2793,50 @@ class TestStreamingApiCall:
|
||||
assert tc[0].function.name == "search"
|
||||
assert tc[1].function.name == "read"
|
||||
|
||||
def test_ollama_reused_index_separate_tool_calls(self, agent):
|
||||
"""Ollama sends every tool call at index 0 with different ids.
|
||||
|
||||
Without the fix, names and arguments get concatenated into one slot.
|
||||
"""
|
||||
chunks = [
|
||||
_make_chunk(tool_calls=[_make_tc_delta(0, "call_a", "search", '{"q":"hello"}')]),
|
||||
# Second tool call at the SAME index 0, but different id
|
||||
_make_chunk(tool_calls=[_make_tc_delta(0, "call_b", "read_file", '{"path":"x.py"}')]),
|
||||
_make_chunk(finish_reason="tool_calls"),
|
||||
]
|
||||
agent.client.chat.completions.create.return_value = iter(chunks)
|
||||
|
||||
resp = agent._interruptible_streaming_api_call({"messages": []})
|
||||
|
||||
tc = resp.choices[0].message.tool_calls
|
||||
assert len(tc) == 2, f"Expected 2 tool calls, got {len(tc)}: {[t.function.name for t in tc]}"
|
||||
assert tc[0].function.name == "search"
|
||||
assert tc[0].function.arguments == '{"q":"hello"}'
|
||||
assert tc[0].id == "call_a"
|
||||
assert tc[1].function.name == "read_file"
|
||||
assert tc[1].function.arguments == '{"path":"x.py"}'
|
||||
assert tc[1].id == "call_b"
|
||||
|
||||
def test_ollama_reused_index_streamed_args(self, agent):
|
||||
"""Ollama with streamed arguments across multiple chunks at same index."""
|
||||
chunks = [
|
||||
_make_chunk(tool_calls=[_make_tc_delta(0, "call_a", "search", '{"q":')]),
|
||||
_make_chunk(tool_calls=[_make_tc_delta(0, None, None, '"hello"}')]),
|
||||
# New tool call, same index 0
|
||||
_make_chunk(tool_calls=[_make_tc_delta(0, "call_b", "read", '{}')]),
|
||||
_make_chunk(finish_reason="tool_calls"),
|
||||
]
|
||||
agent.client.chat.completions.create.return_value = iter(chunks)
|
||||
|
||||
resp = agent._interruptible_streaming_api_call({"messages": []})
|
||||
|
||||
tc = resp.choices[0].message.tool_calls
|
||||
assert len(tc) == 2
|
||||
assert tc[0].function.name == "search"
|
||||
assert tc[0].function.arguments == '{"q":"hello"}'
|
||||
assert tc[1].function.name == "read"
|
||||
assert tc[1].function.arguments == '{}'
|
||||
|
||||
def test_content_and_tool_calls_together(self, agent):
|
||||
chunks = [
|
||||
_make_chunk(content="I'll search"),
|
||||
|
||||
Reference in New Issue
Block a user