From 46db7aeffd022ff4e6bb6586a3b3780c392fcc16 Mon Sep 17 00:00:00 2001 From: 0xbyt4 <35742124+0xbyt4@users.noreply.github.com> Date: Sat, 7 Mar 2026 01:49:12 +0300 Subject: [PATCH] fix: streaming tool call parsing, error handling, and fake HA state mutation - Fix Gemini streaming tool call merge bug: multiple tool calls with same index but different IDs are now parsed as separate calls instead of concatenating names (e.g. ha_call_serviceha_call_service) - Handle partial results in voice mode: show error and stop continuous mode when agent returns partial/failed results with empty response - Fix error display during streaming TTS: error messages are shown in full response box even when streaming box was already opened - Add duplicate sentence filter in TTS: skip near-duplicate sentences from LLM repetition - Fix fake HA server state mutation: turn_on/turn_off/set_temperature correctly update entity states; temperature sensor simulates change when thermostat is adjusted --- cli.py | 6 ++++-- run_agent.py | 16 +++++++++++++++- tests/fakes/fake_ha_server.py | 17 +++++++++++++++-- tools/tts_tool.py | 7 +++++++ 4 files changed, 41 insertions(+), 5 deletions(-) diff --git a/cli.py b/cli.py index 32c88ec96..dd15151ee 100755 --- a/cli.py +++ b/cli.py @@ -4238,8 +4238,10 @@ class HermesCLI: # Get the final response response = result.get("final_response", "") if result else "" - # Handle failed results (e.g., non-retryable errors like invalid model) - if result and result.get("failed") and not response: + # Handle failed or partial results (e.g., non-retryable errors, rate limits, + # truncated output, invalid tool calls). Both "failed" and "partial" with + # an empty final_response mean the agent couldn't produce a usable answer. + if result and (result.get("failed") or result.get("partial")) and not response: error_detail = result.get("error", "Unknown error") response = f"Error: {error_detail}" # Stop continuous voice mode on persistent errors (e.g. 429 rate limit) diff --git a/run_agent.py b/run_agent.py index 475a797fc..152d6092e 100644 --- a/run_agent.py +++ b/run_agent.py @@ -2646,7 +2646,21 @@ class AIAgent: # Tool call deltas if delta and delta.tool_calls: for tc_delta in delta.tool_calls: - idx = tc_delta.index + idx = tc_delta.index if tc_delta.index is not None else 0 + # Gemini may reuse index 0 for multiple tool calls, + # sending a new id each time. Detect this and assign + # a fresh virtual index so calls don't merge. + if idx in tool_calls_acc and tc_delta.id and tc_delta.id != tool_calls_acc[idx]["id"]: + # Look for existing entry with this id first + # (follow-up deltas for an already-created tool call) + matched = False + for eidx, eentry in tool_calls_acc.items(): + if eentry["id"] == tc_delta.id: + idx = eidx + matched = True + break + if not matched: + idx = (max(k for k in tool_calls_acc if isinstance(k, int)) + 1) if tool_calls_acc else 0 if idx not in tool_calls_acc: tool_calls_acc[idx] = { "id": tc_delta.id or "", diff --git a/tests/fakes/fake_ha_server.py b/tests/fakes/fake_ha_server.py index 1d51bf51b..b5119da36 100644 --- a/tests/fakes/fake_ha_server.py +++ b/tests/fakes/fake_ha_server.py @@ -275,12 +275,25 @@ class FakeHAServer: affected = [] entity_id = body.get("entity_id") if entity_id: - new_state = "on" if service == "turn_on" else "off" for s in ENTITY_STATES: if s["entity_id"] == entity_id: + if service == "turn_on": + s["state"] = "on" + elif service == "turn_off": + s["state"] = "off" + elif service == "set_temperature" and "temperature" in body: + s["attributes"]["temperature"] = body["temperature"] + # Keep current state or set to heat if off + if s["state"] == "off": + s["state"] = "heat" + # Simulate temperature sensor approaching the target + for ts in ENTITY_STATES: + if ts["entity_id"] == "sensor.temperature": + ts["state"] = str(body["temperature"] - 0.5) + break affected.append({ "entity_id": entity_id, - "state": new_state, + "state": s["state"], "attributes": s.get("attributes", {}), }) break diff --git a/tools/tts_tool.py b/tools/tts_tool.py index 988fa653a..6c4e53787 100644 --- a/tools/tts_tool.py +++ b/tools/tts_tool.py @@ -522,6 +522,7 @@ def stream_tts_to_speaker( min_sentence_len = 20 long_flush_len = 100 queue_timeout = 0.5 + _spoken_sentences: list[str] = [] # track spoken sentences to skip duplicates # Regex to strip complete ... blocks from buffer _think_block_re = re.compile(r'].*?', flags=re.DOTALL) @@ -532,6 +533,12 @@ def stream_tts_to_speaker( cleaned = _strip_markdown_for_tts(sentence).strip() if not cleaned: return + # Skip duplicate/near-duplicate sentences (LLM repetition) + cleaned_lower = cleaned.lower().rstrip(".!,") + for prev in _spoken_sentences: + if prev.lower().rstrip(".!,") == cleaned_lower: + return + _spoken_sentences.append(cleaned) # Display raw sentence on screen before TTS processing if display_callback is not None: display_callback(sentence)