From 5c479eedf1baa8d7229c867513b2805d58e7873c Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sat, 14 Mar 2026 02:33:31 -0700
Subject: [PATCH] feat: improve context compaction handoff summaries (#1273)

Adapt PR #916 onto current main by replacing the old context summary marker
with a clearer handoff wrapper, updating the summarization prompt for
resume-oriented summaries, and preserving the current call_llm-based
compression path.
---
 agent/context_compressor.py            | 41 +++++++++++++++++++-------
 tests/agent/test_context_compressor.py | 30 ++++++++++++++-----
 tests/test_413_compression.py          |  3 +-
 3 files changed, 55 insertions(+), 19 deletions(-)

diff --git a/agent/context_compressor.py b/agent/context_compressor.py
index 5c0e0edf4..aa05a8daa 100644
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@@ -17,6 +17,16 @@ from agent.model_metadata import (
 
 logger = logging.getLogger(__name__)
 
+SUMMARY_PREFIX = (
+    "[CONTEXT COMPACTION] Earlier turns in this conversation were compacted "
+    "to save context space. The summary below describes work that was "
+    "already completed, and the current session state may still reflect "
+    "that work (for example, files may already be changed). Use the summary "
+    "and the current state to continue from where things left off, and "
+    "avoid repeating work:"
+)
+LEGACY_SUMMARY_PREFIX = "[CONTEXT SUMMARY]:"
+
 
 class ContextCompressor:
     """Compresses conversation context when approaching the model's context limit.
@@ -102,22 +112,22 @@ class ContextCompressor:
             parts.append(f"[{role.upper()}]: {content}")
 
         content_to_summarize = "\n\n".join(parts)
-        prompt = f"""Summarize these conversation turns concisely. This summary will replace these turns in the conversation history.
+        prompt = f"""Create a concise handoff summary for a later assistant that will continue this conversation after earlier turns are compacted.
 
-Write from a neutral perspective describing:
+Describe:
 1. What actions were taken (tool calls, searches, file operations)
 2. Key information or results obtained
-3. Important decisions or findings
-4. Relevant data, file names, or outputs
+3. Important decisions, constraints, or user preferences
+4. Relevant data, file names, outputs, or next steps needed to continue
 
-Keep factual and informative. Target ~{self.summary_target_tokens} tokens.
+Keep it factual, concise, and focused on helping the next assistant resume without repeating work. Target ~{self.summary_target_tokens} tokens.
 
 ---
 TURNS TO SUMMARIZE:
 {content_to_summarize}
 ---
 
-Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
+Write only the summary body. Do not include any preamble or prefix; the system will add the handoff wrapper."""
 
         # Use the centralized LLM router — handles provider resolution,
         # auth, and fallback internally.
@@ -137,9 +147,7 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
             if not isinstance(content, str):
                 content = str(content) if content else ""
             summary = content.strip()
-            if not summary.startswith("[CONTEXT SUMMARY]:"):
-                summary = "[CONTEXT SUMMARY]: " + summary
-            return summary
+            return self._with_summary_prefix(summary)
         except RuntimeError:
             logging.warning("Context compression: no provider available for "
                             "summary. Middle turns will be dropped without summary.")
@@ -148,6 +156,16 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
             logging.warning("Failed to generate context summary: %s", e)
             return None
 
+    @staticmethod
+    def _with_summary_prefix(summary: str) -> str:
+        """Normalize summary text to the current compaction handoff format."""
+        text = (summary or "").strip()
+        for prefix in (LEGACY_SUMMARY_PREFIX, SUMMARY_PREFIX):
+            if text.startswith(prefix):
+                text = text[len(prefix):].lstrip()
+                break
+        return f"{SUMMARY_PREFIX}\n{text}" if text else SUMMARY_PREFIX
+
     # ------------------------------------------------------------------
     # Tool-call / tool-result pair integrity helpers
     # ------------------------------------------------------------------
@@ -287,7 +305,10 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
         for i in range(compress_start):
             msg = messages[i].copy()
             if i == 0 and msg.get("role") == "system" and self.compression_count == 0:
-                msg["content"] = (msg.get("content") or "") + "\n\n[Note: Some earlier conversation turns may be summarized to preserve context space.]"
+                msg["content"] = (
+                    (msg.get("content") or "")
+                    + "\n\n[Note: Some earlier conversation turns have been compacted into a handoff summary to preserve context space. The current session state may still reflect earlier work, so build on that summary and state rather than re-doing work.]"
+                )
             compressed.append(msg)
 
         if summary:
diff --git a/tests/agent/test_context_compressor.py b/tests/agent/test_context_compressor.py
index dac64aaf6..1f62490e3 100644
--- a/tests/agent/test_context_compressor.py
+++ b/tests/agent/test_context_compressor.py
@@ -3,7 +3,7 @@
 import pytest
 from unittest.mock import patch, MagicMock
 
-from agent.context_compressor import ContextCompressor
+from agent.context_compressor import ContextCompressor, SUMMARY_PREFIX
 
 
 @pytest.fixture()
@@ -138,7 +138,7 @@ class TestGenerateSummaryNoneContent:
         with patch("agent.context_compressor.call_llm", return_value=mock_response):
             summary = c._generate_summary(messages)
         assert isinstance(summary, str)
-        assert "CONTEXT SUMMARY" in summary
+        assert summary.startswith(SUMMARY_PREFIX)
 
     def test_none_content_in_system_message_compress(self):
         """System message with content=None should not crash during compress."""
@@ -172,7 +172,7 @@ class TestNonStringContent:
         with patch("agent.context_compressor.call_llm", return_value=mock_response):
             summary = c._generate_summary(messages)
         assert isinstance(summary, str)
-        assert "CONTEXT SUMMARY" in summary
+        assert summary.startswith(SUMMARY_PREFIX)
 
     def test_none_content_coerced_to_empty(self):
         mock_response = MagicMock()
@@ -189,9 +189,19 @@ class TestNonStringContent:
 
         with patch("agent.context_compressor.call_llm", return_value=mock_response):
             summary = c._generate_summary(messages)
-        # None content → empty string → "[CONTEXT SUMMARY]: " prefix added
+        # None content → empty string → standardized compaction handoff prefix added
         assert summary is not None
-        assert "CONTEXT SUMMARY" in summary
+        assert summary == SUMMARY_PREFIX
+
+
+class TestSummaryPrefixNormalization:
+    def test_legacy_prefix_is_replaced(self):
+        summary = ContextCompressor._with_summary_prefix("[CONTEXT SUMMARY]: did work")
+        assert summary == f"{SUMMARY_PREFIX}\ndid work"
+
+    def test_existing_new_prefix_is_not_duplicated(self):
+        summary = ContextCompressor._with_summary_prefix(f"{SUMMARY_PREFIX}\ndid work")
+        assert summary == f"{SUMMARY_PREFIX}\ndid work"
 
 
 class TestCompressWithClient:
@@ -211,7 +221,7 @@ class TestCompressWithClient:
 
         # Should have summary message in the middle
         contents = [m.get("content", "") for m in result]
-        assert any("CONTEXT SUMMARY" in c for c in contents)
+        assert any(c.startswith(SUMMARY_PREFIX) for c in contents)
         assert len(result) < len(msgs)
 
     def test_summarization_does_not_split_tool_call_pairs(self):
@@ -283,7 +293,9 @@ class TestCompressWithClient:
         ]
         with patch("agent.context_compressor.call_llm", return_value=mock_response):
             result = c.compress(msgs)
-        summary_msg = [m for m in result if "CONTEXT SUMMARY" in (m.get("content") or "")]
+        summary_msg = [
+            m for m in result if (m.get("content") or "").startswith(SUMMARY_PREFIX)
+        ]
         assert len(summary_msg) == 1
         assert summary_msg[0]["role"] == "user"
 
@@ -311,7 +323,9 @@ class TestCompressWithClient:
         ]
         with patch("agent.context_compressor.call_llm", return_value=mock_response):
             result = c.compress(msgs)
-        summary_msg = [m for m in result if "CONTEXT SUMMARY" in (m.get("content") or "")]
+        summary_msg = [
+            m for m in result if (m.get("content") or "").startswith(SUMMARY_PREFIX)
+        ]
         assert len(summary_msg) == 1
         assert summary_msg[0]["role"] == "assistant"
 
diff --git a/tests/test_413_compression.py b/tests/test_413_compression.py
index e35f67b4d..da78cd3e4 100644
--- a/tests/test_413_compression.py
+++ b/tests/test_413_compression.py
@@ -17,6 +17,7 @@ from unittest.mock import MagicMock, patch
 
 import pytest
 
+from agent.context_compressor import SUMMARY_PREFIX
 from run_agent import AIAgent
 
 
@@ -340,7 +341,7 @@ class TestPreflightCompression:
             # Simulate compression reducing messages
             mock_compress.return_value = (
                 [
-                    {"role": "user", "content": "[CONTEXT SUMMARY]: Previous conversation"},
+                    {"role": "user", "content": f"{SUMMARY_PREFIX}\nPrevious conversation"},
                     {"role": "user", "content": "hello"},
                 ],
                 "new system prompt",