feat: improve context compaction handoff summaries (#1273)

Adapt PR #916 onto current main by replacing the old context summary marker
with a clearer handoff wrapper, updating the summarization prompt for
resume-oriented summaries, and preserving the current call_llm-based
compression path.
This commit is contained in:
Teknium
2026-03-14 02:33:31 -07:00
committed by GitHub
parent 728fa66ef0
commit 5c479eedf1
3 changed files with 55 additions and 19 deletions

View File

@@ -17,6 +17,16 @@ from agent.model_metadata import (
logger = logging.getLogger(__name__)
SUMMARY_PREFIX = (
"[CONTEXT COMPACTION] Earlier turns in this conversation were compacted "
"to save context space. The summary below describes work that was "
"already completed, and the current session state may still reflect "
"that work (for example, files may already be changed). Use the summary "
"and the current state to continue from where things left off, and "
"avoid repeating work:"
)
LEGACY_SUMMARY_PREFIX = "[CONTEXT SUMMARY]:"
class ContextCompressor:
"""Compresses conversation context when approaching the model's context limit.
@@ -102,22 +112,22 @@ class ContextCompressor:
parts.append(f"[{role.upper()}]: {content}")
content_to_summarize = "\n\n".join(parts)
prompt = f"""Summarize these conversation turns concisely. This summary will replace these turns in the conversation history.
prompt = f"""Create a concise handoff summary for a later assistant that will continue this conversation after earlier turns are compacted.
Write from a neutral perspective describing:
Describe:
1. What actions were taken (tool calls, searches, file operations)
2. Key information or results obtained
3. Important decisions or findings
4. Relevant data, file names, or outputs
3. Important decisions, constraints, or user preferences
4. Relevant data, file names, outputs, or next steps needed to continue
Keep factual and informative. Target ~{self.summary_target_tokens} tokens.
Keep it factual, concise, and focused on helping the next assistant resume without repeating work. Target ~{self.summary_target_tokens} tokens.
---
TURNS TO SUMMARIZE:
{content_to_summarize}
---
Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
Write only the summary body. Do not include any preamble or prefix; the system will add the handoff wrapper."""
# Use the centralized LLM router — handles provider resolution,
# auth, and fallback internally.
@@ -137,9 +147,7 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
if not isinstance(content, str):
content = str(content) if content else ""
summary = content.strip()
if not summary.startswith("[CONTEXT SUMMARY]:"):
summary = "[CONTEXT SUMMARY]: " + summary
return summary
return self._with_summary_prefix(summary)
except RuntimeError:
logging.warning("Context compression: no provider available for "
"summary. Middle turns will be dropped without summary.")
@@ -148,6 +156,16 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
logging.warning("Failed to generate context summary: %s", e)
return None
@staticmethod
def _with_summary_prefix(summary: str) -> str:
"""Normalize summary text to the current compaction handoff format."""
text = (summary or "").strip()
for prefix in (LEGACY_SUMMARY_PREFIX, SUMMARY_PREFIX):
if text.startswith(prefix):
text = text[len(prefix):].lstrip()
break
return f"{SUMMARY_PREFIX}\n{text}" if text else SUMMARY_PREFIX
# ------------------------------------------------------------------
# Tool-call / tool-result pair integrity helpers
# ------------------------------------------------------------------
@@ -287,7 +305,10 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
for i in range(compress_start):
msg = messages[i].copy()
if i == 0 and msg.get("role") == "system" and self.compression_count == 0:
msg["content"] = (msg.get("content") or "") + "\n\n[Note: Some earlier conversation turns may be summarized to preserve context space.]"
msg["content"] = (
(msg.get("content") or "")
+ "\n\n[Note: Some earlier conversation turns have been compacted into a handoff summary to preserve context space. The current session state may still reflect earlier work, so build on that summary and state rather than re-doing work.]"
)
compressed.append(msg)
if summary:

View File

@@ -3,7 +3,7 @@
import pytest
from unittest.mock import patch, MagicMock
from agent.context_compressor import ContextCompressor
from agent.context_compressor import ContextCompressor, SUMMARY_PREFIX
@pytest.fixture()
@@ -138,7 +138,7 @@ class TestGenerateSummaryNoneContent:
with patch("agent.context_compressor.call_llm", return_value=mock_response):
summary = c._generate_summary(messages)
assert isinstance(summary, str)
assert "CONTEXT SUMMARY" in summary
assert summary.startswith(SUMMARY_PREFIX)
def test_none_content_in_system_message_compress(self):
"""System message with content=None should not crash during compress."""
@@ -172,7 +172,7 @@ class TestNonStringContent:
with patch("agent.context_compressor.call_llm", return_value=mock_response):
summary = c._generate_summary(messages)
assert isinstance(summary, str)
assert "CONTEXT SUMMARY" in summary
assert summary.startswith(SUMMARY_PREFIX)
def test_none_content_coerced_to_empty(self):
mock_response = MagicMock()
@@ -189,9 +189,19 @@ class TestNonStringContent:
with patch("agent.context_compressor.call_llm", return_value=mock_response):
summary = c._generate_summary(messages)
# None content → empty string → "[CONTEXT SUMMARY]: " prefix added
# None content → empty string → standardized compaction handoff prefix added
assert summary is not None
assert "CONTEXT SUMMARY" in summary
assert summary == SUMMARY_PREFIX
class TestSummaryPrefixNormalization:
def test_legacy_prefix_is_replaced(self):
summary = ContextCompressor._with_summary_prefix("[CONTEXT SUMMARY]: did work")
assert summary == f"{SUMMARY_PREFIX}\ndid work"
def test_existing_new_prefix_is_not_duplicated(self):
summary = ContextCompressor._with_summary_prefix(f"{SUMMARY_PREFIX}\ndid work")
assert summary == f"{SUMMARY_PREFIX}\ndid work"
class TestCompressWithClient:
@@ -211,7 +221,7 @@ class TestCompressWithClient:
# Should have summary message in the middle
contents = [m.get("content", "") for m in result]
assert any("CONTEXT SUMMARY" in c for c in contents)
assert any(c.startswith(SUMMARY_PREFIX) for c in contents)
assert len(result) < len(msgs)
def test_summarization_does_not_split_tool_call_pairs(self):
@@ -283,7 +293,9 @@ class TestCompressWithClient:
]
with patch("agent.context_compressor.call_llm", return_value=mock_response):
result = c.compress(msgs)
summary_msg = [m for m in result if "CONTEXT SUMMARY" in (m.get("content") or "")]
summary_msg = [
m for m in result if (m.get("content") or "").startswith(SUMMARY_PREFIX)
]
assert len(summary_msg) == 1
assert summary_msg[0]["role"] == "user"
@@ -311,7 +323,9 @@ class TestCompressWithClient:
]
with patch("agent.context_compressor.call_llm", return_value=mock_response):
result = c.compress(msgs)
summary_msg = [m for m in result if "CONTEXT SUMMARY" in (m.get("content") or "")]
summary_msg = [
m for m in result if (m.get("content") or "").startswith(SUMMARY_PREFIX)
]
assert len(summary_msg) == 1
assert summary_msg[0]["role"] == "assistant"

View File

@@ -17,6 +17,7 @@ from unittest.mock import MagicMock, patch
import pytest
from agent.context_compressor import SUMMARY_PREFIX
from run_agent import AIAgent
@@ -340,7 +341,7 @@ class TestPreflightCompression:
# Simulate compression reducing messages
mock_compress.return_value = (
[
{"role": "user", "content": "[CONTEXT SUMMARY]: Previous conversation"},
{"role": "user", "content": f"{SUMMARY_PREFIX}\nPrevious conversation"},
{"role": "user", "content": "hello"},
],
"new system prompt",