When context compression fires during run_conversation() in the gateway, the compressed messages were silently lost on the next turn. Two bugs: 1. Agent-side: _flush_messages_to_session_db() calculated flush_from = max(len(conversation_history), _last_flushed_db_idx). After compression, _last_flushed_db_idx was correctly reset to 0, but conversation_history still had its original pre-compression length (e.g. 200). Since compressed messages are shorter (~30), messages[200:] was empty — nothing written to the new session's SQLite. Fix: Set conversation_history = None after each _compress_context() call so start_idx = 0 and all compressed messages are flushed. 2. Gateway-side: history_offset was always len(agent_history) — the original pre-compression length. After compression shortened the message list, agent_messages[200:] was empty, causing the gateway to fall back to writing only a user/assistant pair, losing the compressed summary and tail context. Fix: Detect session splits (agent.session_id != original) and set history_offset = 0 so all compressed messages are written to JSONL.
203 lines
8.8 KiB
Python
203 lines
8.8 KiB
Python
"""Tests for context compression persistence in the gateway.
|
|
|
|
Verifies that when context compression fires during run_conversation(),
|
|
the compressed messages are properly persisted to both SQLite (via the
|
|
agent) and JSONL (via the gateway).
|
|
|
|
Bug scenario (pre-fix):
|
|
1. Gateway loads 200-message history, passes to agent
|
|
2. Agent's run_conversation() compresses to ~30 messages mid-run
|
|
3. _compress_context() resets _last_flushed_db_idx = 0
|
|
4. On exit, _flush_messages_to_session_db() calculates:
|
|
flush_from = max(len(conversation_history=200), _last_flushed_db_idx=0) = 200
|
|
5. messages[200:] is empty (only ~30 messages after compression)
|
|
6. Nothing written to new session's SQLite — compressed context lost
|
|
7. Gateway's history_offset was still 200, producing empty new_messages
|
|
8. Fallback wrote only user/assistant pair — summary lost
|
|
"""
|
|
|
|
import os
|
|
import tempfile
|
|
from pathlib import Path
|
|
from unittest.mock import MagicMock, patch
|
|
|
|
import pytest
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Part 1: Agent-side — _flush_messages_to_session_db after compression
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestFlushAfterCompression:
|
|
"""Verify that compressed messages are flushed to the new session's SQLite
|
|
even when conversation_history (from the original session) is longer than
|
|
the compressed messages list."""
|
|
|
|
def _make_agent(self, session_db):
|
|
with patch.dict(os.environ, {"OPENROUTER_API_KEY": "test-key"}):
|
|
from run_agent import AIAgent
|
|
agent = AIAgent(
|
|
model="test/model",
|
|
quiet_mode=True,
|
|
session_db=session_db,
|
|
session_id="original-session",
|
|
skip_context_files=True,
|
|
skip_memory=True,
|
|
)
|
|
return agent
|
|
|
|
def test_flush_after_compression_with_long_history(self):
|
|
"""The actual bug: conversation_history longer than compressed messages.
|
|
|
|
Before the fix, flush_from = max(len(conversation_history), 0) = 200,
|
|
but messages only has ~30 entries, so messages[200:] is empty.
|
|
After the fix, conversation_history is cleared to None after compression,
|
|
so flush_from = max(0, 0) = 0, and ALL compressed messages are written.
|
|
"""
|
|
from hermes_state import SessionDB
|
|
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
db_path = Path(tmpdir) / "test.db"
|
|
db = SessionDB(db_path=db_path)
|
|
|
|
agent = self._make_agent(db)
|
|
|
|
# Simulate the original long history (200 messages)
|
|
original_history = [
|
|
{"role": "user" if i % 2 == 0 else "assistant",
|
|
"content": f"message {i}"}
|
|
for i in range(200)
|
|
]
|
|
|
|
# First, flush original messages to the original session
|
|
agent._flush_messages_to_session_db(original_history, [])
|
|
original_rows = db.get_messages("original-session")
|
|
assert len(original_rows) == 200
|
|
|
|
# Now simulate compression: new session, reset idx, shorter messages
|
|
agent.session_id = "compressed-session"
|
|
db.create_session(session_id="compressed-session", source="test")
|
|
agent._last_flushed_db_idx = 0
|
|
|
|
# The compressed messages (summary + tail + new turn)
|
|
compressed_messages = [
|
|
{"role": "user", "content": "[CONTEXT COMPACTION] Summary of work..."},
|
|
{"role": "user", "content": "What should we do next?"},
|
|
{"role": "assistant", "content": "Let me check..."},
|
|
{"role": "user", "content": "new question"},
|
|
{"role": "assistant", "content": "new answer"},
|
|
]
|
|
|
|
# THE BUG: passing the original history as conversation_history
|
|
# causes flush_from = max(200, 0) = 200, skipping everything.
|
|
# After the fix, conversation_history should be None.
|
|
agent._flush_messages_to_session_db(compressed_messages, None)
|
|
|
|
new_rows = db.get_messages("compressed-session")
|
|
assert len(new_rows) == 5, (
|
|
f"Expected 5 compressed messages in new session, got {len(new_rows)}. "
|
|
f"Compression persistence bug: messages not written to SQLite."
|
|
)
|
|
|
|
def test_flush_with_stale_history_loses_messages(self):
|
|
"""Demonstrates the bug condition: stale conversation_history causes data loss."""
|
|
from hermes_state import SessionDB
|
|
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
db_path = Path(tmpdir) / "test.db"
|
|
db = SessionDB(db_path=db_path)
|
|
|
|
agent = self._make_agent(db)
|
|
|
|
# Simulate compression reset
|
|
agent.session_id = "new-session"
|
|
db.create_session(session_id="new-session", source="test")
|
|
agent._last_flushed_db_idx = 0
|
|
|
|
compressed = [
|
|
{"role": "user", "content": "summary"},
|
|
{"role": "assistant", "content": "continuing..."},
|
|
]
|
|
|
|
# Bug: passing a conversation_history longer than compressed messages
|
|
stale_history = [{"role": "user", "content": f"msg{i}"} for i in range(100)]
|
|
agent._flush_messages_to_session_db(compressed, stale_history)
|
|
|
|
rows = db.get_messages("new-session")
|
|
# With the stale history, flush_from = max(100, 0) = 100
|
|
# But compressed only has 2 entries → messages[100:] = empty
|
|
assert len(rows) == 0, (
|
|
"Expected 0 messages with stale conversation_history "
|
|
"(this test verifies the bug condition exists)"
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Part 2: Gateway-side — history_offset after session split
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestGatewayHistoryOffsetAfterSplit:
|
|
"""Verify that when the agent creates a new session during compression,
|
|
the gateway uses history_offset=0 so all compressed messages are written
|
|
to the JSONL transcript."""
|
|
|
|
def test_history_offset_zero_on_session_split(self):
|
|
"""When agent.session_id differs from the original, history_offset must be 0."""
|
|
# This tests the logic in gateway/run.py run_sync():
|
|
# _session_was_split = agent.session_id != session_id
|
|
# _effective_history_offset = 0 if _session_was_split else len(agent_history)
|
|
|
|
original_session_id = "session-abc"
|
|
agent_session_id = "session-compressed-xyz" # Different = compression happened
|
|
agent_history_len = 200
|
|
|
|
# Simulate the gateway's offset calculation (post-fix)
|
|
_session_was_split = (agent_session_id != original_session_id)
|
|
_effective_history_offset = 0 if _session_was_split else agent_history_len
|
|
|
|
assert _session_was_split is True
|
|
assert _effective_history_offset == 0
|
|
|
|
def test_history_offset_preserved_without_split(self):
|
|
"""When no compression happened, history_offset is the original length."""
|
|
session_id = "session-abc"
|
|
agent_session_id = "session-abc" # Same = no compression
|
|
agent_history_len = 200
|
|
|
|
_session_was_split = (agent_session_id != session_id)
|
|
_effective_history_offset = 0 if _session_was_split else agent_history_len
|
|
|
|
assert _session_was_split is False
|
|
assert _effective_history_offset == 200
|
|
|
|
def test_new_messages_extraction_after_split(self):
|
|
"""After compression with offset=0, new_messages should be ALL agent messages."""
|
|
# Simulates the gateway's new_messages calculation
|
|
agent_messages = [
|
|
{"role": "user", "content": "[CONTEXT COMPACTION] Summary..."},
|
|
{"role": "user", "content": "recent question"},
|
|
{"role": "assistant", "content": "recent answer"},
|
|
{"role": "user", "content": "new question"},
|
|
{"role": "assistant", "content": "new answer"},
|
|
]
|
|
history_offset = 0 # After fix: 0 on session split
|
|
|
|
new_messages = agent_messages[history_offset:] if len(agent_messages) > history_offset else []
|
|
assert len(new_messages) == 5, (
|
|
f"Expected all 5 messages with offset=0, got {len(new_messages)}"
|
|
)
|
|
|
|
def test_new_messages_empty_with_stale_offset(self):
|
|
"""Demonstrates the bug: stale offset produces empty new_messages."""
|
|
agent_messages = [
|
|
{"role": "user", "content": "summary"},
|
|
{"role": "assistant", "content": "answer"},
|
|
]
|
|
# Bug: offset is the pre-compression history length
|
|
history_offset = 200
|
|
|
|
new_messages = agent_messages[history_offset:] if len(agent_messages) > history_offset else []
|
|
assert len(new_messages) == 0, (
|
|
"Expected 0 messages with stale offset=200 (demonstrates the bug)"
|
|
)
|