From 46db7aeffd022ff4e6bb6586a3b3780c392fcc16 Mon Sep 17 00:00:00 2001
From: 0xbyt4 <35742124+0xbyt4@users.noreply.github.com>
Date: Sat, 7 Mar 2026 01:49:12 +0300
Subject: [PATCH] fix: streaming tool call parsing, error handling, and fake HA
 state mutation

- Fix Gemini streaming tool call merge bug: multiple tool calls with same
  index but different IDs are now parsed as separate calls instead of
  concatenating names (e.g. ha_call_serviceha_call_service)
- Handle partial results in voice mode: show error and stop continuous
  mode when agent returns partial/failed results with empty response
- Fix error display during streaming TTS: error messages are shown in
  full response box even when streaming box was already opened
- Add duplicate sentence filter in TTS: skip near-duplicate sentences
  from LLM repetition
- Fix fake HA server state mutation: turn_on/turn_off/set_temperature
  correctly update entity states; temperature sensor simulates change
  when thermostat is adjusted
---
 cli.py                        |  6 ++++--
 run_agent.py                  | 16 +++++++++++++++-
 tests/fakes/fake_ha_server.py | 17 +++++++++++++++--
 tools/tts_tool.py             |  7 +++++++
 4 files changed, 41 insertions(+), 5 deletions(-)

diff --git a/cli.py b/cli.py
index 32c88ec96..dd15151ee 100755
--- a/cli.py
+++ b/cli.py
@@ -4238,8 +4238,10 @@ class HermesCLI:
             # Get the final response
             response = result.get("final_response", "") if result else ""
 
-            # Handle failed results (e.g., non-retryable errors like invalid model)
-            if result and result.get("failed") and not response:
+            # Handle failed or partial results (e.g., non-retryable errors, rate limits,
+            # truncated output, invalid tool calls). Both "failed" and "partial" with
+            # an empty final_response mean the agent couldn't produce a usable answer.
+            if result and (result.get("failed") or result.get("partial")) and not response:
                 error_detail = result.get("error", "Unknown error")
                 response = f"Error: {error_detail}"
                 # Stop continuous voice mode on persistent errors (e.g. 429 rate limit)
diff --git a/run_agent.py b/run_agent.py
index 475a797fc..152d6092e 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -2646,7 +2646,21 @@ class AIAgent:
                     # Tool call deltas
                     if delta and delta.tool_calls:
                         for tc_delta in delta.tool_calls:
-                            idx = tc_delta.index
+                            idx = tc_delta.index if tc_delta.index is not None else 0
+                            # Gemini may reuse index 0 for multiple tool calls,
+                            # sending a new id each time.  Detect this and assign
+                            # a fresh virtual index so calls don't merge.
+                            if idx in tool_calls_acc and tc_delta.id and tc_delta.id != tool_calls_acc[idx]["id"]:
+                                # Look for existing entry with this id first
+                                # (follow-up deltas for an already-created tool call)
+                                matched = False
+                                for eidx, eentry in tool_calls_acc.items():
+                                    if eentry["id"] == tc_delta.id:
+                                        idx = eidx
+                                        matched = True
+                                        break
+                                if not matched:
+                                    idx = (max(k for k in tool_calls_acc if isinstance(k, int)) + 1) if tool_calls_acc else 0
                             if idx not in tool_calls_acc:
                                 tool_calls_acc[idx] = {
                                     "id": tc_delta.id or "",
diff --git a/tests/fakes/fake_ha_server.py b/tests/fakes/fake_ha_server.py
index 1d51bf51b..b5119da36 100644
--- a/tests/fakes/fake_ha_server.py
+++ b/tests/fakes/fake_ha_server.py
@@ -275,12 +275,25 @@ class FakeHAServer:
         affected = []
         entity_id = body.get("entity_id")
         if entity_id:
-            new_state = "on" if service == "turn_on" else "off"
             for s in ENTITY_STATES:
                 if s["entity_id"] == entity_id:
+                    if service == "turn_on":
+                        s["state"] = "on"
+                    elif service == "turn_off":
+                        s["state"] = "off"
+                    elif service == "set_temperature" and "temperature" in body:
+                        s["attributes"]["temperature"] = body["temperature"]
+                        # Keep current state or set to heat if off
+                        if s["state"] == "off":
+                            s["state"] = "heat"
+                        # Simulate temperature sensor approaching the target
+                        for ts in ENTITY_STATES:
+                            if ts["entity_id"] == "sensor.temperature":
+                                ts["state"] = str(body["temperature"] - 0.5)
+                                break
                     affected.append({
                         "entity_id": entity_id,
-                        "state": new_state,
+                        "state": s["state"],
                         "attributes": s.get("attributes", {}),
                     })
                     break
diff --git a/tools/tts_tool.py b/tools/tts_tool.py
index 988fa653a..6c4e53787 100644
--- a/tools/tts_tool.py
+++ b/tools/tts_tool.py
@@ -522,6 +522,7 @@ def stream_tts_to_speaker(
         min_sentence_len = 20
         long_flush_len = 100
         queue_timeout = 0.5
+        _spoken_sentences: list[str] = []  # track spoken sentences to skip duplicates
         # Regex to strip complete <think>...</think> blocks from buffer
         _think_block_re = re.compile(r'<think[\s>].*?</think>', flags=re.DOTALL)
 
@@ -532,6 +533,12 @@ def stream_tts_to_speaker(
             cleaned = _strip_markdown_for_tts(sentence).strip()
             if not cleaned:
                 return
+            # Skip duplicate/near-duplicate sentences (LLM repetition)
+            cleaned_lower = cleaned.lower().rstrip(".!,")
+            for prev in _spoken_sentences:
+                if prev.lower().rstrip(".!,") == cleaned_lower:
+                    return
+            _spoken_sentences.append(cleaned)
             # Display raw sentence on screen before TTS processing
             if display_callback is not None:
                 display_callback(sentence)