Files
hermes-agent/tests/test_compression_boundary.py
Teknium d132e344d7 fix(agent): prevent silent tool result loss during context compression (#1993)
_align_boundary_backward only checked messages[idx-1] to decide if
the compress-end boundary splits a tool_call/result group. When an
assistant issues 3+ parallel tool calls, their results span multiple
consecutive messages. If the boundary fell in the middle of that group,
the parent assistant was summarized away and orphaned tool results were
silently deleted by _sanitize_tool_pairs.

Now walks backward through all consecutive tool results to find the
parent assistant, then pulls the boundary before the entire group.

6 regression tests added in tests/test_compression_boundary.py.

Co-authored-by: Guts <Gutslabs@users.noreply.github.com>
2026-03-18 15:22:51 -07:00

200 lines
9.0 KiB
Python

"""Tests for context compression boundary alignment.
Verifies that _align_boundary_backward correctly handles tool result groups
so that parallel tool calls are never split during compression.
"""
import pytest
from unittest.mock import patch, MagicMock
from agent.context_compressor import ContextCompressor
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _tc(call_id: str) -> dict:
"""Create a minimal tool_call dict."""
return {"id": call_id, "type": "function", "function": {"name": "test", "arguments": "{}"}}
def _tool_result(call_id: str, content: str = "result") -> dict:
"""Create a tool result message."""
return {"role": "tool", "tool_call_id": call_id, "content": content}
def _assistant_with_tools(*call_ids: str) -> dict:
"""Create an assistant message with tool_calls."""
return {"role": "assistant", "tool_calls": [_tc(cid) for cid in call_ids], "content": None}
def _make_compressor(**kwargs) -> ContextCompressor:
defaults = dict(
model="test-model",
threshold_percent=0.75,
protect_first_n=3,
protect_last_n=4,
quiet_mode=True,
)
defaults.update(kwargs)
with patch("agent.context_compressor.get_model_context_length", return_value=8000):
return ContextCompressor(**defaults)
# ---------------------------------------------------------------------------
# _align_boundary_backward tests
# ---------------------------------------------------------------------------
class TestAlignBoundaryBackward:
"""Test that compress-end boundary never splits a tool_call/result group."""
def test_boundary_at_clean_position(self):
"""Boundary after a user message — no adjustment needed."""
comp = _make_compressor()
messages = [
{"role": "system", "content": "sys"},
{"role": "user", "content": "hello"},
{"role": "assistant", "content": "hi"},
{"role": "user", "content": "do something"},
_assistant_with_tools("tc_1"),
_tool_result("tc_1", "done"),
{"role": "user", "content": "thanks"}, # idx=6
{"role": "assistant", "content": "np"},
]
# Boundary at 7, messages[6] = user — no adjustment
assert comp._align_boundary_backward(messages, 7) == 7
def test_boundary_after_assistant_with_tools(self):
"""Original case: boundary right after assistant with tool_calls."""
comp = _make_compressor()
messages = [
{"role": "system", "content": "sys"},
{"role": "user", "content": "hello"},
{"role": "assistant", "content": "hi"},
_assistant_with_tools("tc_1", "tc_2"), # idx=3
_tool_result("tc_1"), # idx=4
_tool_result("tc_2"), # idx=5
{"role": "user", "content": "next"},
]
# Boundary at 4, messages[3] = assistant with tool_calls → pull back to 3
assert comp._align_boundary_backward(messages, 4) == 3
def test_boundary_in_middle_of_tool_results(self):
"""THE BUG: boundary falls between tool results of the same group."""
comp = _make_compressor()
messages = [
{"role": "system", "content": "sys"},
{"role": "user", "content": "hello"},
{"role": "assistant", "content": "hi"},
{"role": "user", "content": "do 5 things"},
_assistant_with_tools("tc_A", "tc_B", "tc_C", "tc_D", "tc_E"), # idx=4
_tool_result("tc_A", "result A"), # idx=5
_tool_result("tc_B", "result B"), # idx=6
_tool_result("tc_C", "result C"), # idx=7
_tool_result("tc_D", "result D"), # idx=8
_tool_result("tc_E", "result E"), # idx=9
{"role": "user", "content": "ok"},
{"role": "assistant", "content": "done"},
]
# Boundary at 8 — in middle of tool results. messages[7] = tool result.
# Must walk back to idx=4 (the parent assistant).
assert comp._align_boundary_backward(messages, 8) == 4
def test_boundary_at_last_tool_result(self):
"""Boundary right after last tool result — messages[idx-1] is tool."""
comp = _make_compressor()
messages = [
{"role": "system", "content": "sys"},
{"role": "user", "content": "hello"},
{"role": "assistant", "content": "hi"},
_assistant_with_tools("tc_1", "tc_2", "tc_3"), # idx=3
_tool_result("tc_1"), # idx=4
_tool_result("tc_2"), # idx=5
_tool_result("tc_3"), # idx=6
{"role": "user", "content": "next"},
]
# Boundary at 7 — messages[6] is last tool result.
# Walk back: [6]=tool, [5]=tool, [4]=tool, [3]=assistant with tools → idx=3
assert comp._align_boundary_backward(messages, 7) == 3
def test_boundary_with_consecutive_tool_groups(self):
"""Two consecutive tool groups — only walk back to the nearest parent."""
comp = _make_compressor()
messages = [
{"role": "system", "content": "sys"},
{"role": "user", "content": "hello"},
_assistant_with_tools("tc_1"), # idx=2
_tool_result("tc_1"), # idx=3
{"role": "user", "content": "more"},
_assistant_with_tools("tc_2", "tc_3"), # idx=5
_tool_result("tc_2"), # idx=6
_tool_result("tc_3"), # idx=7
{"role": "user", "content": "done"},
]
# Boundary at 7 — messages[6] = tool result for tc_2 group
# Walk back: [6]=tool, [5]=assistant with tools → idx=5
assert comp._align_boundary_backward(messages, 7) == 5
# ---------------------------------------------------------------------------
# End-to-end: compression must not lose tool results
# ---------------------------------------------------------------------------
class TestCompressionToolResultPreservation:
"""Verify that compress() never silently drops tool results."""
def test_parallel_tool_results_not_lost(self):
"""The exact scenario that triggered silent data loss before the fix."""
comp = _make_compressor(protect_first_n=3, protect_last_n=4)
messages = [
{"role": "system", "content": "You are helpful."}, # 0
{"role": "user", "content": "Hello"}, # 1
{"role": "assistant", "content": "Hi there!"}, # 2 (end of head)
{"role": "user", "content": "Read 7 files for me"}, # 3
_assistant_with_tools("tc_A", "tc_B", "tc_C", "tc_D", "tc_E", "tc_F", "tc_G"), # 4
_tool_result("tc_A", "content of file A"), # 5
_tool_result("tc_B", "content of file B"), # 6
_tool_result("tc_C", "content of file C"), # 7
_tool_result("tc_D", "content of file D"), # 8
_tool_result("tc_E", "content of file E"), # 9
_tool_result("tc_F", "content of file F"), # 10
_tool_result("tc_G", "CRITICAL DATA in file G"), # 11 ← compress_end=15-4=11
{"role": "user", "content": "Now summarize them"}, # 12
{"role": "assistant", "content": "Here is the summary..."}, # 13
{"role": "user", "content": "Thanks"}, # 14
]
# 15 messages. compress_end = 15 - 4 = 11 (before fix: splits tool group)
fake_summary = "[Summary of earlier conversation]"
with patch.object(comp, "_generate_summary", return_value=fake_summary):
result = comp.compress(messages, current_tokens=7000)
# After compression, no tool results should be orphaned/lost.
# All tool results in the result must have a matching assistant tool_call.
assistant_call_ids = set()
for msg in result:
if msg.get("role") == "assistant":
for tc in msg.get("tool_calls") or []:
cid = tc.get("id", "")
if cid:
assistant_call_ids.add(cid)
tool_result_ids = set()
for msg in result:
if msg.get("role") == "tool":
cid = msg.get("tool_call_id")
if cid:
tool_result_ids.add(cid)
# Every tool result must have a parent — no orphans
orphaned = tool_result_ids - assistant_call_ids
assert not orphaned, f"Orphaned tool results found (data loss!): {orphaned}"
# Every assistant tool_call must have a real result (not a stub)
for msg in result:
if msg.get("role") == "tool":
assert msg["content"] != "[Result from earlier conversation — see context summary above]", \
f"Stub result found for {msg.get('tool_call_id')} — real result was lost"