fix(gateway): prevent TTS voice messages from accumulating across turns

Fixes #160

The issue was that MEDIA tags were being extracted from ALL messages
in the conversation history, not just messages from the current turn.
This caused TTS voice messages generated in earlier turns to be
re-attached to every subsequent reply.

The fix:
- Track history_len before calling run_conversation
- Only scan messages AFTER history_len for MEDIA tags
- Add comprehensive tests to prevent regression

This ensures each voice message is sent exactly once, when it's
generated, not on every subsequent message in the session.
This commit is contained in:
Bartok9
2026-02-28 03:38:27 -05:00
parent de5a88bd97
commit 35655298e6
2 changed files with 195 additions and 1 deletions

View File

@@ -1701,6 +1701,9 @@ class GatewayRunner:
content = f"[Delivered from {mirror_src}] {content}"
agent_history.append({"role": role, "content": content})
# Track history length to only scan NEW messages for MEDIA tags
history_len = len(agent_history)
result = agent.run_conversation(message, conversation_history=agent_history)
result_holder[0] = result
@@ -1721,10 +1724,17 @@ class GatewayRunner:
# doesn't include them. We collect unique tags from tool results and
# append any that aren't already present in the final response, so the
# adapter's extract_media() can find and deliver the files exactly once.
#
# IMPORTANT: Only scan messages from the CURRENT turn (after history_len),
# not the full history. This prevents TTS voice messages from earlier
# turns being re-attached to every subsequent reply. (Fixes #160)
if "MEDIA:" not in final_response:
media_tags = []
has_voice_directive = False
for msg in result.get("messages", []):
all_messages = result.get("messages", [])
# Only process new messages from this turn
new_messages = all_messages[history_len:] if len(all_messages) > history_len else []
for msg in new_messages:
if msg.get("role") == "tool" or msg.get("role") == "function":
content = msg.get("content", "")
if "MEDIA:" in content: