From 5c479eedf1baa8d7229c867513b2805d58e7873c Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Sat, 14 Mar 2026 02:33:31 -0700 Subject: [PATCH] feat: improve context compaction handoff summaries (#1273) Adapt PR #916 onto current main by replacing the old context summary marker with a clearer handoff wrapper, updating the summarization prompt for resume-oriented summaries, and preserving the current call_llm-based compression path. --- agent/context_compressor.py | 41 +++++++++++++++++++------- tests/agent/test_context_compressor.py | 30 ++++++++++++++----- tests/test_413_compression.py | 3 +- 3 files changed, 55 insertions(+), 19 deletions(-) diff --git a/agent/context_compressor.py b/agent/context_compressor.py index 5c0e0edf4..aa05a8daa 100644 --- a/agent/context_compressor.py +++ b/agent/context_compressor.py @@ -17,6 +17,16 @@ from agent.model_metadata import ( logger = logging.getLogger(__name__) +SUMMARY_PREFIX = ( + "[CONTEXT COMPACTION] Earlier turns in this conversation were compacted " + "to save context space. The summary below describes work that was " + "already completed, and the current session state may still reflect " + "that work (for example, files may already be changed). Use the summary " + "and the current state to continue from where things left off, and " + "avoid repeating work:" +) +LEGACY_SUMMARY_PREFIX = "[CONTEXT SUMMARY]:" + class ContextCompressor: """Compresses conversation context when approaching the model's context limit. @@ -102,22 +112,22 @@ class ContextCompressor: parts.append(f"[{role.upper()}]: {content}") content_to_summarize = "\n\n".join(parts) - prompt = f"""Summarize these conversation turns concisely. This summary will replace these turns in the conversation history. + prompt = f"""Create a concise handoff summary for a later assistant that will continue this conversation after earlier turns are compacted. -Write from a neutral perspective describing: +Describe: 1. What actions were taken (tool calls, searches, file operations) 2. Key information or results obtained -3. Important decisions or findings -4. Relevant data, file names, or outputs +3. Important decisions, constraints, or user preferences +4. Relevant data, file names, outputs, or next steps needed to continue -Keep factual and informative. Target ~{self.summary_target_tokens} tokens. +Keep it factual, concise, and focused on helping the next assistant resume without repeating work. Target ~{self.summary_target_tokens} tokens. --- TURNS TO SUMMARIZE: {content_to_summarize} --- -Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix.""" +Write only the summary body. Do not include any preamble or prefix; the system will add the handoff wrapper.""" # Use the centralized LLM router — handles provider resolution, # auth, and fallback internally. @@ -137,9 +147,7 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix.""" if not isinstance(content, str): content = str(content) if content else "" summary = content.strip() - if not summary.startswith("[CONTEXT SUMMARY]:"): - summary = "[CONTEXT SUMMARY]: " + summary - return summary + return self._with_summary_prefix(summary) except RuntimeError: logging.warning("Context compression: no provider available for " "summary. Middle turns will be dropped without summary.") @@ -148,6 +156,16 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix.""" logging.warning("Failed to generate context summary: %s", e) return None + @staticmethod + def _with_summary_prefix(summary: str) -> str: + """Normalize summary text to the current compaction handoff format.""" + text = (summary or "").strip() + for prefix in (LEGACY_SUMMARY_PREFIX, SUMMARY_PREFIX): + if text.startswith(prefix): + text = text[len(prefix):].lstrip() + break + return f"{SUMMARY_PREFIX}\n{text}" if text else SUMMARY_PREFIX + # ------------------------------------------------------------------ # Tool-call / tool-result pair integrity helpers # ------------------------------------------------------------------ @@ -287,7 +305,10 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix.""" for i in range(compress_start): msg = messages[i].copy() if i == 0 and msg.get("role") == "system" and self.compression_count == 0: - msg["content"] = (msg.get("content") or "") + "\n\n[Note: Some earlier conversation turns may be summarized to preserve context space.]" + msg["content"] = ( + (msg.get("content") or "") + + "\n\n[Note: Some earlier conversation turns have been compacted into a handoff summary to preserve context space. The current session state may still reflect earlier work, so build on that summary and state rather than re-doing work.]" + ) compressed.append(msg) if summary: diff --git a/tests/agent/test_context_compressor.py b/tests/agent/test_context_compressor.py index dac64aaf6..1f62490e3 100644 --- a/tests/agent/test_context_compressor.py +++ b/tests/agent/test_context_compressor.py @@ -3,7 +3,7 @@ import pytest from unittest.mock import patch, MagicMock -from agent.context_compressor import ContextCompressor +from agent.context_compressor import ContextCompressor, SUMMARY_PREFIX @pytest.fixture() @@ -138,7 +138,7 @@ class TestGenerateSummaryNoneContent: with patch("agent.context_compressor.call_llm", return_value=mock_response): summary = c._generate_summary(messages) assert isinstance(summary, str) - assert "CONTEXT SUMMARY" in summary + assert summary.startswith(SUMMARY_PREFIX) def test_none_content_in_system_message_compress(self): """System message with content=None should not crash during compress.""" @@ -172,7 +172,7 @@ class TestNonStringContent: with patch("agent.context_compressor.call_llm", return_value=mock_response): summary = c._generate_summary(messages) assert isinstance(summary, str) - assert "CONTEXT SUMMARY" in summary + assert summary.startswith(SUMMARY_PREFIX) def test_none_content_coerced_to_empty(self): mock_response = MagicMock() @@ -189,9 +189,19 @@ class TestNonStringContent: with patch("agent.context_compressor.call_llm", return_value=mock_response): summary = c._generate_summary(messages) - # None content → empty string → "[CONTEXT SUMMARY]: " prefix added + # None content → empty string → standardized compaction handoff prefix added assert summary is not None - assert "CONTEXT SUMMARY" in summary + assert summary == SUMMARY_PREFIX + + +class TestSummaryPrefixNormalization: + def test_legacy_prefix_is_replaced(self): + summary = ContextCompressor._with_summary_prefix("[CONTEXT SUMMARY]: did work") + assert summary == f"{SUMMARY_PREFIX}\ndid work" + + def test_existing_new_prefix_is_not_duplicated(self): + summary = ContextCompressor._with_summary_prefix(f"{SUMMARY_PREFIX}\ndid work") + assert summary == f"{SUMMARY_PREFIX}\ndid work" class TestCompressWithClient: @@ -211,7 +221,7 @@ class TestCompressWithClient: # Should have summary message in the middle contents = [m.get("content", "") for m in result] - assert any("CONTEXT SUMMARY" in c for c in contents) + assert any(c.startswith(SUMMARY_PREFIX) for c in contents) assert len(result) < len(msgs) def test_summarization_does_not_split_tool_call_pairs(self): @@ -283,7 +293,9 @@ class TestCompressWithClient: ] with patch("agent.context_compressor.call_llm", return_value=mock_response): result = c.compress(msgs) - summary_msg = [m for m in result if "CONTEXT SUMMARY" in (m.get("content") or "")] + summary_msg = [ + m for m in result if (m.get("content") or "").startswith(SUMMARY_PREFIX) + ] assert len(summary_msg) == 1 assert summary_msg[0]["role"] == "user" @@ -311,7 +323,9 @@ class TestCompressWithClient: ] with patch("agent.context_compressor.call_llm", return_value=mock_response): result = c.compress(msgs) - summary_msg = [m for m in result if "CONTEXT SUMMARY" in (m.get("content") or "")] + summary_msg = [ + m for m in result if (m.get("content") or "").startswith(SUMMARY_PREFIX) + ] assert len(summary_msg) == 1 assert summary_msg[0]["role"] == "assistant" diff --git a/tests/test_413_compression.py b/tests/test_413_compression.py index e35f67b4d..da78cd3e4 100644 --- a/tests/test_413_compression.py +++ b/tests/test_413_compression.py @@ -17,6 +17,7 @@ from unittest.mock import MagicMock, patch import pytest +from agent.context_compressor import SUMMARY_PREFIX from run_agent import AIAgent @@ -340,7 +341,7 @@ class TestPreflightCompression: # Simulate compression reducing messages mock_compress.return_value = ( [ - {"role": "user", "content": "[CONTEXT SUMMARY]: Previous conversation"}, + {"role": "user", "content": f"{SUMMARY_PREFIX}\nPrevious conversation"}, {"role": "user", "content": "hello"}, ], "new system prompt",