Merge PR #395: fix(gateway): use filtered history length for transcript message extraction
Authored by PercyDikec. Fixes #394. The transcript extraction used len(history) to find new messages, but history includes session_meta entries stripped before reaching the agent. This caused 1 message lost per turn from turn 2 onwards. Fix returns history_offset (filtered length) from _run_agent and uses it for the slice.
This commit is contained in:
@@ -945,9 +945,12 @@ class GatewayRunner:
|
||||
}
|
||||
)
|
||||
|
||||
# Find only the NEW messages from this turn (skip history we loaded)
|
||||
history_len = len(history)
|
||||
new_messages = agent_messages[history_len:] if len(agent_messages) > history_len else agent_messages
|
||||
# Find only the NEW messages from this turn (skip history we loaded).
|
||||
# Use the filtered history length (history_offset) that was actually
|
||||
# passed to the agent, not len(history) which includes session_meta
|
||||
# entries that were stripped before the agent saw them.
|
||||
history_len = agent_result.get("history_offset", len(history))
|
||||
new_messages = agent_messages[history_len:] if len(agent_messages) > history_len else []
|
||||
|
||||
# If no new messages found (edge case), fall back to simple user/assistant
|
||||
if not new_messages:
|
||||
@@ -2070,6 +2073,7 @@ class GatewayRunner:
|
||||
"messages": result_holder[0].get("messages", []) if result_holder[0] else [],
|
||||
"api_calls": result_holder[0].get("api_calls", 0) if result_holder[0] else 0,
|
||||
"tools": tools_holder[0] or [],
|
||||
"history_offset": len(agent_history),
|
||||
}
|
||||
|
||||
# Start progress message sender if enabled
|
||||
|
||||
267
tests/gateway/test_transcript_offset.py
Normal file
267
tests/gateway/test_transcript_offset.py
Normal file
@@ -0,0 +1,267 @@
|
||||
"""Tests for transcript history offset fix.
|
||||
|
||||
Regression tests for a bug where the gateway transcript lost 1 message
|
||||
per turn from turn 2 onwards. The raw transcript history includes
|
||||
``session_meta`` entries that are filtered out before being passed to
|
||||
the agent. The agent returns messages built from this filtered history
|
||||
plus new messages from the current turn.
|
||||
|
||||
The old code used ``len(history)`` (raw count, includes session_meta)
|
||||
to slice ``agent_messages``, which caused the slice to skip valid new
|
||||
messages. The fix adds ``history_offset`` (the filtered history length)
|
||||
to ``_run_agent``'s return dict and uses it for the slice.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers - replicate the filtering logic from _run_agent
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _filter_history(history: list) -> list:
|
||||
"""Replicate the agent_history filtering from GatewayRunner._run_agent.
|
||||
|
||||
Strips session_meta and system messages, exactly as the real code does.
|
||||
"""
|
||||
agent_history = []
|
||||
for msg in history:
|
||||
role = msg.get("role")
|
||||
if not role:
|
||||
continue
|
||||
if role in ("session_meta",):
|
||||
continue
|
||||
if role == "system":
|
||||
continue
|
||||
|
||||
has_tool_calls = "tool_calls" in msg
|
||||
has_tool_call_id = "tool_call_id" in msg
|
||||
is_tool_message = role == "tool"
|
||||
|
||||
if has_tool_calls or has_tool_call_id or is_tool_message:
|
||||
clean_msg = {k: v for k, v in msg.items() if k != "timestamp"}
|
||||
agent_history.append(clean_msg)
|
||||
else:
|
||||
content = msg.get("content")
|
||||
if content:
|
||||
agent_history.append({"role": role, "content": content})
|
||||
return agent_history
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestTranscriptHistoryOffset:
|
||||
"""Verify the transcript extraction uses the filtered history length."""
|
||||
|
||||
def test_session_meta_causes_offset_mismatch(self):
|
||||
"""Turn 2: session_meta makes len(history) > len(agent_history).
|
||||
|
||||
- history (raw): 1 session_meta + 2 conversation = 3 entries
|
||||
- agent_history (filtered): 2 entries
|
||||
- Agent returns 2 old + 2 new = 4 messages
|
||||
- OLD: agent_messages[3:] = 1 message (lost the user message)
|
||||
- FIX: agent_messages[2:] = 2 messages (correct)
|
||||
"""
|
||||
history = [
|
||||
{"role": "session_meta", "tools": [], "model": "gpt-4",
|
||||
"platform": "telegram", "timestamp": "t0"},
|
||||
{"role": "user", "content": "Hello", "timestamp": "t1"},
|
||||
{"role": "assistant", "content": "Hi there!", "timestamp": "t1"},
|
||||
]
|
||||
|
||||
agent_history = _filter_history(history)
|
||||
assert len(agent_history) == 2 # session_meta stripped
|
||||
|
||||
# Agent returns: filtered history (2) + new turn (2)
|
||||
agent_messages = [
|
||||
{"role": "user", "content": "Hello"},
|
||||
{"role": "assistant", "content": "Hi there!"},
|
||||
{"role": "user", "content": "What is Python?"},
|
||||
{"role": "assistant", "content": "A programming language."},
|
||||
]
|
||||
|
||||
# OLD behavior: len(history) = 3, skips too many
|
||||
old_offset = len(history)
|
||||
old_new = (agent_messages[old_offset:]
|
||||
if len(agent_messages) > old_offset
|
||||
else agent_messages)
|
||||
assert len(old_new) == 1 # BUG: lost the user message
|
||||
|
||||
# FIXED behavior: history_offset = 2
|
||||
history_offset = len(agent_history)
|
||||
fixed_new = (agent_messages[history_offset:]
|
||||
if len(agent_messages) > history_offset
|
||||
else [])
|
||||
assert len(fixed_new) == 2
|
||||
assert fixed_new[0]["content"] == "What is Python?"
|
||||
assert fixed_new[1]["content"] == "A programming language."
|
||||
|
||||
def test_no_session_meta_same_result(self):
|
||||
"""First turn has no session_meta, so both approaches agree."""
|
||||
history = []
|
||||
agent_history = _filter_history(history)
|
||||
assert len(agent_history) == 0
|
||||
|
||||
agent_messages = [
|
||||
{"role": "user", "content": "Hello"},
|
||||
{"role": "assistant", "content": "Hi!"},
|
||||
]
|
||||
|
||||
old_new = (agent_messages[len(history):]
|
||||
if len(agent_messages) > len(history)
|
||||
else agent_messages)
|
||||
fixed_new = (agent_messages[len(agent_history):]
|
||||
if len(agent_messages) > len(agent_history)
|
||||
else [])
|
||||
|
||||
assert old_new == fixed_new
|
||||
assert len(fixed_new) == 2
|
||||
|
||||
def test_multiple_session_meta_larger_drift(self):
|
||||
"""Two session_meta entries double the offset error.
|
||||
|
||||
This can happen when the session spans tool definition changes
|
||||
or model switches that each write a new session_meta record.
|
||||
"""
|
||||
history = [
|
||||
{"role": "session_meta", "tools": [], "timestamp": "t0"},
|
||||
{"role": "user", "content": "msg1", "timestamp": "t1"},
|
||||
{"role": "assistant", "content": "reply1", "timestamp": "t1"},
|
||||
{"role": "session_meta", "tools": ["new_tool"], "timestamp": "t2"},
|
||||
{"role": "user", "content": "msg2", "timestamp": "t3"},
|
||||
{"role": "assistant", "content": "reply2", "timestamp": "t3"},
|
||||
]
|
||||
|
||||
agent_history = _filter_history(history)
|
||||
assert len(agent_history) == 4
|
||||
assert len(history) == 6 # 2 extra session_meta entries
|
||||
|
||||
# Agent returns 4 old + 2 new = 6 total
|
||||
agent_messages = [
|
||||
{"role": "user", "content": "msg1"},
|
||||
{"role": "assistant", "content": "reply1"},
|
||||
{"role": "user", "content": "msg2"},
|
||||
{"role": "assistant", "content": "reply2"},
|
||||
{"role": "user", "content": "msg3"},
|
||||
{"role": "assistant", "content": "reply3"},
|
||||
]
|
||||
|
||||
# OLD: len(history) == len(agent_messages) == 6 -> else branch
|
||||
old_offset = len(history)
|
||||
old_new = (agent_messages[old_offset:]
|
||||
if len(agent_messages) > old_offset
|
||||
else agent_messages)
|
||||
# BUG: treats ALL messages as new (duplicates entire history)
|
||||
assert old_new == agent_messages
|
||||
|
||||
# FIXED: history_offset = 4
|
||||
fixed_new = (agent_messages[len(agent_history):]
|
||||
if len(agent_messages) > len(agent_history)
|
||||
else [])
|
||||
assert len(fixed_new) == 2
|
||||
assert fixed_new[0]["content"] == "msg3"
|
||||
assert fixed_new[1]["content"] == "reply3"
|
||||
|
||||
def test_system_messages_also_filtered(self):
|
||||
"""system messages in history are also stripped from agent_history."""
|
||||
history = [
|
||||
{"role": "session_meta", "tools": [], "timestamp": "t0"},
|
||||
{"role": "system", "content": "You are a helpful assistant."},
|
||||
{"role": "user", "content": "Hi", "timestamp": "t1"},
|
||||
{"role": "assistant", "content": "Hello!", "timestamp": "t1"},
|
||||
]
|
||||
|
||||
agent_history = _filter_history(history)
|
||||
assert len(agent_history) == 2 # only user + assistant
|
||||
|
||||
agent_messages = [
|
||||
{"role": "user", "content": "Hi"},
|
||||
{"role": "assistant", "content": "Hello!"},
|
||||
{"role": "user", "content": "New question"},
|
||||
{"role": "assistant", "content": "New answer"},
|
||||
]
|
||||
|
||||
# OLD: len(history) = 4, skips everything
|
||||
old_offset = len(history)
|
||||
old_new = (agent_messages[old_offset:]
|
||||
if len(agent_messages) > old_offset
|
||||
else agent_messages)
|
||||
assert old_new == agent_messages # BUG: all treated as new
|
||||
|
||||
# FIXED
|
||||
fixed_new = (agent_messages[len(agent_history):]
|
||||
if len(agent_messages) > len(agent_history)
|
||||
else [])
|
||||
assert len(fixed_new) == 2
|
||||
assert fixed_new[0]["content"] == "New question"
|
||||
|
||||
def test_else_branch_returns_empty_list(self):
|
||||
"""When agent has fewer messages than offset, return [] not all.
|
||||
|
||||
The old code had ``else agent_messages`` which would treat the
|
||||
entire message list as new when the agent compressed or dropped
|
||||
messages. The fix changes this to ``else []``, falling through
|
||||
to the simple user/assistant fallback path.
|
||||
"""
|
||||
history = [
|
||||
{"role": "session_meta", "tools": [], "timestamp": "t0"},
|
||||
{"role": "user", "content": "Hello", "timestamp": "t1"},
|
||||
{"role": "assistant", "content": "Hi!", "timestamp": "t1"},
|
||||
]
|
||||
|
||||
# Agent compressed and returned fewer messages than history
|
||||
agent_messages = [
|
||||
{"role": "user", "content": "Hello"},
|
||||
{"role": "assistant", "content": "Hi!"},
|
||||
]
|
||||
|
||||
history_offset = len(_filter_history(history)) # 2
|
||||
new_messages = (agent_messages[history_offset:]
|
||||
if len(agent_messages) > history_offset
|
||||
else [])
|
||||
# 2 == 2, so no new messages - falls to fallback
|
||||
assert new_messages == []
|
||||
|
||||
def test_tool_call_messages_preserved_in_filter(self):
|
||||
"""Tool call messages pass through the filter, keeping offset correct."""
|
||||
history = [
|
||||
{"role": "session_meta", "tools": [], "timestamp": "t0"},
|
||||
{"role": "user", "content": "Search for cats", "timestamp": "t1"},
|
||||
{"role": "assistant", "content": None, "timestamp": "t1",
|
||||
"tool_calls": [{"id": "tc1", "function": {"name": "web_search"}}]},
|
||||
{"role": "tool", "tool_call_id": "tc1",
|
||||
"content": "Results about cats", "timestamp": "t1"},
|
||||
{"role": "assistant", "content": "Here are results.",
|
||||
"timestamp": "t1"},
|
||||
]
|
||||
|
||||
agent_history = _filter_history(history)
|
||||
# session_meta filtered, but tool_calls/tool messages kept
|
||||
assert len(agent_history) == 4
|
||||
assert len(history) == 5 # 1 session_meta extra
|
||||
|
||||
agent_messages = [
|
||||
{"role": "user", "content": "Search for cats"},
|
||||
{"role": "assistant", "content": None,
|
||||
"tool_calls": [{"id": "tc1", "function": {"name": "web_search"}}]},
|
||||
{"role": "tool", "tool_call_id": "tc1", "content": "Results about cats"},
|
||||
{"role": "assistant", "content": "Here are results."},
|
||||
{"role": "user", "content": "Now search for dogs"},
|
||||
{"role": "assistant", "content": "Dog results here."},
|
||||
]
|
||||
|
||||
# OLD: len(history) = 5, agent_messages[5:] = 1 message (lost user msg)
|
||||
old_new = (agent_messages[len(history):]
|
||||
if len(agent_messages) > len(history)
|
||||
else agent_messages)
|
||||
assert len(old_new) == 1 # BUG
|
||||
|
||||
# FIXED
|
||||
fixed_new = (agent_messages[len(agent_history):]
|
||||
if len(agent_messages) > len(agent_history)
|
||||
else [])
|
||||
assert len(fixed_new) == 2
|
||||
assert fixed_new[0]["content"] == "Now search for dogs"
|
||||
assert fixed_new[1]["content"] == "Dog results here."
|
||||
Reference in New Issue
Block a user