diff --git a/run_agent.py b/run_agent.py index 1db61cf5..7cdaa327 100644 --- a/run_agent.py +++ b/run_agent.py @@ -3883,6 +3883,12 @@ class AIAgent: content_parts: list = [] tool_calls_acc: dict = {} tool_gen_notified: set = set() + # Ollama-compatible endpoints reuse index 0 for every tool call + # in a parallel batch, distinguishing them only by id. Track + # the last seen id per raw index so we can detect a new tool + # call starting at the same index and redirect it to a fresh slot. + _last_id_at_idx: dict = {} # raw_index -> last seen non-empty id + _active_slot_by_idx: dict = {} # raw_index -> current slot in tool_calls_acc finish_reason = None model_name = None role = "assistant" @@ -3945,7 +3951,24 @@ class AIAgent: # Accumulate tool call deltas — notify display on first name if delta and delta.tool_calls: for tc_delta in delta.tool_calls: - idx = tc_delta.index if tc_delta.index is not None else 0 + raw_idx = tc_delta.index if tc_delta.index is not None else 0 + delta_id = tc_delta.id or "" + + # Ollama fix: detect a new tool call reusing the same + # raw index (different id) and redirect to a fresh slot. + if raw_idx not in _active_slot_by_idx: + _active_slot_by_idx[raw_idx] = raw_idx + if ( + delta_id + and raw_idx in _last_id_at_idx + and delta_id != _last_id_at_idx[raw_idx] + ): + new_slot = max(tool_calls_acc, default=-1) + 1 + _active_slot_by_idx[raw_idx] = new_slot + if delta_id: + _last_id_at_idx[raw_idx] = delta_id + idx = _active_slot_by_idx[raw_idx] + if idx not in tool_calls_acc: tool_calls_acc[idx] = { "id": tc_delta.id or "", diff --git a/tests/test_run_agent.py b/tests/test_run_agent.py index 7ad5ee9a..cbfe14f6 100644 --- a/tests/test_run_agent.py +++ b/tests/test_run_agent.py @@ -2793,6 +2793,50 @@ class TestStreamingApiCall: assert tc[0].function.name == "search" assert tc[1].function.name == "read" + def test_ollama_reused_index_separate_tool_calls(self, agent): + """Ollama sends every tool call at index 0 with different ids. + + Without the fix, names and arguments get concatenated into one slot. + """ + chunks = [ + _make_chunk(tool_calls=[_make_tc_delta(0, "call_a", "search", '{"q":"hello"}')]), + # Second tool call at the SAME index 0, but different id + _make_chunk(tool_calls=[_make_tc_delta(0, "call_b", "read_file", '{"path":"x.py"}')]), + _make_chunk(finish_reason="tool_calls"), + ] + agent.client.chat.completions.create.return_value = iter(chunks) + + resp = agent._interruptible_streaming_api_call({"messages": []}) + + tc = resp.choices[0].message.tool_calls + assert len(tc) == 2, f"Expected 2 tool calls, got {len(tc)}: {[t.function.name for t in tc]}" + assert tc[0].function.name == "search" + assert tc[0].function.arguments == '{"q":"hello"}' + assert tc[0].id == "call_a" + assert tc[1].function.name == "read_file" + assert tc[1].function.arguments == '{"path":"x.py"}' + assert tc[1].id == "call_b" + + def test_ollama_reused_index_streamed_args(self, agent): + """Ollama with streamed arguments across multiple chunks at same index.""" + chunks = [ + _make_chunk(tool_calls=[_make_tc_delta(0, "call_a", "search", '{"q":')]), + _make_chunk(tool_calls=[_make_tc_delta(0, None, None, '"hello"}')]), + # New tool call, same index 0 + _make_chunk(tool_calls=[_make_tc_delta(0, "call_b", "read", '{}')]), + _make_chunk(finish_reason="tool_calls"), + ] + agent.client.chat.completions.create.return_value = iter(chunks) + + resp = agent._interruptible_streaming_api_call({"messages": []}) + + tc = resp.choices[0].message.tool_calls + assert len(tc) == 2 + assert tc[0].function.name == "search" + assert tc[0].function.arguments == '{"q":"hello"}' + assert tc[1].function.name == "read" + assert tc[1].function.arguments == '{}' + def test_content_and_tool_calls_together(self, agent): chunks = [ _make_chunk(content="I'll search"),