fix: prevent tool name/arg concatenation for Ollama-compatible endpoints (#3582)

Ollama reuses index 0 for every tool call in a parallel batch, distinguishing them only by id. The streaming accumulator now detects a new non-empty id at an already-active index and redirects it to a fresh slot, preventing names and arguments from being concatenated into a single tool call. No-op for normal providers that use incrementing indices. Co-authored-by: dmater01 <dmater01@users.noreply.github.com>
2026-03-28 14:08:26 -07:00
parent ba3bbf5b53
commit 924857c3e3
2 changed files with 68 additions and 1 deletions
--- a/run_agent.py
+++ b/run_agent.py
@@ -3883,6 +3883,12 @@ class AIAgent:
            content_parts: list = []
            tool_calls_acc: dict = {}
            tool_gen_notified: set = set()
+            # Ollama-compatible endpoints reuse index 0 for every tool call
+            # in a parallel batch, distinguishing them only by id.  Track
+            # the last seen id per raw index so we can detect a new tool
+            # call starting at the same index and redirect it to a fresh slot.
+            _last_id_at_idx: dict = {}      # raw_index -> last seen non-empty id
+            _active_slot_by_idx: dict = {}  # raw_index -> current slot in tool_calls_acc
            finish_reason = None
            model_name = None
            role = "assistant"
@@ -3945,7 +3951,24 @@ class AIAgent:
                # Accumulate tool call deltas — notify display on first name
                if delta and delta.tool_calls:
                    for tc_delta in delta.tool_calls:
-                        idx = tc_delta.index if tc_delta.index is not None else 0
+                        raw_idx = tc_delta.index if tc_delta.index is not None else 0
+                        delta_id = tc_delta.id or ""
+
+                        # Ollama fix: detect a new tool call reusing the same
+                        # raw index (different id) and redirect to a fresh slot.
+                        if raw_idx not in _active_slot_by_idx:
+                            _active_slot_by_idx[raw_idx] = raw_idx
+                        if (
+                            delta_id
+                            and raw_idx in _last_id_at_idx
+                            and delta_id != _last_id_at_idx[raw_idx]
+                        ):
+                            new_slot = max(tool_calls_acc, default=-1) + 1
+                            _active_slot_by_idx[raw_idx] = new_slot
+                        if delta_id:
+                            _last_id_at_idx[raw_idx] = delta_id
+                        idx = _active_slot_by_idx[raw_idx]
+
                        if idx not in tool_calls_acc:
                            tool_calls_acc[idx] = {
                                "id": tc_delta.id or "",
--- a/tests/test_run_agent.py
+++ b/tests/test_run_agent.py
@@ -2793,6 +2793,50 @@ class TestStreamingApiCall:
        assert tc[0].function.name == "search"
        assert tc[1].function.name == "read"

+    def test_ollama_reused_index_separate_tool_calls(self, agent):
+        """Ollama sends every tool call at index 0 with different ids.
+
+        Without the fix, names and arguments get concatenated into one slot.
+        """
+        chunks = [
+            _make_chunk(tool_calls=[_make_tc_delta(0, "call_a", "search", '{"q":"hello"}')]),
+            # Second tool call at the SAME index 0, but different id
+            _make_chunk(tool_calls=[_make_tc_delta(0, "call_b", "read_file", '{"path":"x.py"}')]),
+            _make_chunk(finish_reason="tool_calls"),
+        ]
+        agent.client.chat.completions.create.return_value = iter(chunks)
+
+        resp = agent._interruptible_streaming_api_call({"messages": []})
+
+        tc = resp.choices[0].message.tool_calls
+        assert len(tc) == 2, f"Expected 2 tool calls, got {len(tc)}: {[t.function.name for t in tc]}"
+        assert tc[0].function.name == "search"
+        assert tc[0].function.arguments == '{"q":"hello"}'
+        assert tc[0].id == "call_a"
+        assert tc[1].function.name == "read_file"
+        assert tc[1].function.arguments == '{"path":"x.py"}'
+        assert tc[1].id == "call_b"
+
+    def test_ollama_reused_index_streamed_args(self, agent):
+        """Ollama with streamed arguments across multiple chunks at same index."""
+        chunks = [
+            _make_chunk(tool_calls=[_make_tc_delta(0, "call_a", "search", '{"q":')]),
+            _make_chunk(tool_calls=[_make_tc_delta(0, None, None, '"hello"}')]),
+            # New tool call, same index 0
+            _make_chunk(tool_calls=[_make_tc_delta(0, "call_b", "read", '{}')]),
+            _make_chunk(finish_reason="tool_calls"),
+        ]
+        agent.client.chat.completions.create.return_value = iter(chunks)
+
+        resp = agent._interruptible_streaming_api_call({"messages": []})
+
+        tc = resp.choices[0].message.tool_calls
+        assert len(tc) == 2
+        assert tc[0].function.name == "search"
+        assert tc[0].function.arguments == '{"q":"hello"}'
+        assert tc[1].function.name == "read"
+        assert tc[1].function.arguments == '{}'
+
    def test_content_and_tool_calls_together(self, agent):
        chunks = [
            _make_chunk(content="I'll search"),