feat: improve context compaction handoff summaries (#1273)
Adapt PR #916 onto current main by replacing the old context summary marker with a clearer handoff wrapper, updating the summarization prompt for resume-oriented summaries, and preserving the current call_llm-based compression path.
This commit is contained in:
@@ -17,6 +17,16 @@ from agent.model_metadata import (
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
SUMMARY_PREFIX = (
|
||||
"[CONTEXT COMPACTION] Earlier turns in this conversation were compacted "
|
||||
"to save context space. The summary below describes work that was "
|
||||
"already completed, and the current session state may still reflect "
|
||||
"that work (for example, files may already be changed). Use the summary "
|
||||
"and the current state to continue from where things left off, and "
|
||||
"avoid repeating work:"
|
||||
)
|
||||
LEGACY_SUMMARY_PREFIX = "[CONTEXT SUMMARY]:"
|
||||
|
||||
|
||||
class ContextCompressor:
|
||||
"""Compresses conversation context when approaching the model's context limit.
|
||||
@@ -102,22 +112,22 @@ class ContextCompressor:
|
||||
parts.append(f"[{role.upper()}]: {content}")
|
||||
|
||||
content_to_summarize = "\n\n".join(parts)
|
||||
prompt = f"""Summarize these conversation turns concisely. This summary will replace these turns in the conversation history.
|
||||
prompt = f"""Create a concise handoff summary for a later assistant that will continue this conversation after earlier turns are compacted.
|
||||
|
||||
Write from a neutral perspective describing:
|
||||
Describe:
|
||||
1. What actions were taken (tool calls, searches, file operations)
|
||||
2. Key information or results obtained
|
||||
3. Important decisions or findings
|
||||
4. Relevant data, file names, or outputs
|
||||
3. Important decisions, constraints, or user preferences
|
||||
4. Relevant data, file names, outputs, or next steps needed to continue
|
||||
|
||||
Keep factual and informative. Target ~{self.summary_target_tokens} tokens.
|
||||
Keep it factual, concise, and focused on helping the next assistant resume without repeating work. Target ~{self.summary_target_tokens} tokens.
|
||||
|
||||
---
|
||||
TURNS TO SUMMARIZE:
|
||||
{content_to_summarize}
|
||||
---
|
||||
|
||||
Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
|
||||
Write only the summary body. Do not include any preamble or prefix; the system will add the handoff wrapper."""
|
||||
|
||||
# Use the centralized LLM router — handles provider resolution,
|
||||
# auth, and fallback internally.
|
||||
@@ -137,9 +147,7 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
|
||||
if not isinstance(content, str):
|
||||
content = str(content) if content else ""
|
||||
summary = content.strip()
|
||||
if not summary.startswith("[CONTEXT SUMMARY]:"):
|
||||
summary = "[CONTEXT SUMMARY]: " + summary
|
||||
return summary
|
||||
return self._with_summary_prefix(summary)
|
||||
except RuntimeError:
|
||||
logging.warning("Context compression: no provider available for "
|
||||
"summary. Middle turns will be dropped without summary.")
|
||||
@@ -148,6 +156,16 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
|
||||
logging.warning("Failed to generate context summary: %s", e)
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _with_summary_prefix(summary: str) -> str:
|
||||
"""Normalize summary text to the current compaction handoff format."""
|
||||
text = (summary or "").strip()
|
||||
for prefix in (LEGACY_SUMMARY_PREFIX, SUMMARY_PREFIX):
|
||||
if text.startswith(prefix):
|
||||
text = text[len(prefix):].lstrip()
|
||||
break
|
||||
return f"{SUMMARY_PREFIX}\n{text}" if text else SUMMARY_PREFIX
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Tool-call / tool-result pair integrity helpers
|
||||
# ------------------------------------------------------------------
|
||||
@@ -287,7 +305,10 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
|
||||
for i in range(compress_start):
|
||||
msg = messages[i].copy()
|
||||
if i == 0 and msg.get("role") == "system" and self.compression_count == 0:
|
||||
msg["content"] = (msg.get("content") or "") + "\n\n[Note: Some earlier conversation turns may be summarized to preserve context space.]"
|
||||
msg["content"] = (
|
||||
(msg.get("content") or "")
|
||||
+ "\n\n[Note: Some earlier conversation turns have been compacted into a handoff summary to preserve context space. The current session state may still reflect earlier work, so build on that summary and state rather than re-doing work.]"
|
||||
)
|
||||
compressed.append(msg)
|
||||
|
||||
if summary:
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
import pytest
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
from agent.context_compressor import ContextCompressor
|
||||
from agent.context_compressor import ContextCompressor, SUMMARY_PREFIX
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
@@ -138,7 +138,7 @@ class TestGenerateSummaryNoneContent:
|
||||
with patch("agent.context_compressor.call_llm", return_value=mock_response):
|
||||
summary = c._generate_summary(messages)
|
||||
assert isinstance(summary, str)
|
||||
assert "CONTEXT SUMMARY" in summary
|
||||
assert summary.startswith(SUMMARY_PREFIX)
|
||||
|
||||
def test_none_content_in_system_message_compress(self):
|
||||
"""System message with content=None should not crash during compress."""
|
||||
@@ -172,7 +172,7 @@ class TestNonStringContent:
|
||||
with patch("agent.context_compressor.call_llm", return_value=mock_response):
|
||||
summary = c._generate_summary(messages)
|
||||
assert isinstance(summary, str)
|
||||
assert "CONTEXT SUMMARY" in summary
|
||||
assert summary.startswith(SUMMARY_PREFIX)
|
||||
|
||||
def test_none_content_coerced_to_empty(self):
|
||||
mock_response = MagicMock()
|
||||
@@ -189,9 +189,19 @@ class TestNonStringContent:
|
||||
|
||||
with patch("agent.context_compressor.call_llm", return_value=mock_response):
|
||||
summary = c._generate_summary(messages)
|
||||
# None content → empty string → "[CONTEXT SUMMARY]: " prefix added
|
||||
# None content → empty string → standardized compaction handoff prefix added
|
||||
assert summary is not None
|
||||
assert "CONTEXT SUMMARY" in summary
|
||||
assert summary == SUMMARY_PREFIX
|
||||
|
||||
|
||||
class TestSummaryPrefixNormalization:
|
||||
def test_legacy_prefix_is_replaced(self):
|
||||
summary = ContextCompressor._with_summary_prefix("[CONTEXT SUMMARY]: did work")
|
||||
assert summary == f"{SUMMARY_PREFIX}\ndid work"
|
||||
|
||||
def test_existing_new_prefix_is_not_duplicated(self):
|
||||
summary = ContextCompressor._with_summary_prefix(f"{SUMMARY_PREFIX}\ndid work")
|
||||
assert summary == f"{SUMMARY_PREFIX}\ndid work"
|
||||
|
||||
|
||||
class TestCompressWithClient:
|
||||
@@ -211,7 +221,7 @@ class TestCompressWithClient:
|
||||
|
||||
# Should have summary message in the middle
|
||||
contents = [m.get("content", "") for m in result]
|
||||
assert any("CONTEXT SUMMARY" in c for c in contents)
|
||||
assert any(c.startswith(SUMMARY_PREFIX) for c in contents)
|
||||
assert len(result) < len(msgs)
|
||||
|
||||
def test_summarization_does_not_split_tool_call_pairs(self):
|
||||
@@ -283,7 +293,9 @@ class TestCompressWithClient:
|
||||
]
|
||||
with patch("agent.context_compressor.call_llm", return_value=mock_response):
|
||||
result = c.compress(msgs)
|
||||
summary_msg = [m for m in result if "CONTEXT SUMMARY" in (m.get("content") or "")]
|
||||
summary_msg = [
|
||||
m for m in result if (m.get("content") or "").startswith(SUMMARY_PREFIX)
|
||||
]
|
||||
assert len(summary_msg) == 1
|
||||
assert summary_msg[0]["role"] == "user"
|
||||
|
||||
@@ -311,7 +323,9 @@ class TestCompressWithClient:
|
||||
]
|
||||
with patch("agent.context_compressor.call_llm", return_value=mock_response):
|
||||
result = c.compress(msgs)
|
||||
summary_msg = [m for m in result if "CONTEXT SUMMARY" in (m.get("content") or "")]
|
||||
summary_msg = [
|
||||
m for m in result if (m.get("content") or "").startswith(SUMMARY_PREFIX)
|
||||
]
|
||||
assert len(summary_msg) == 1
|
||||
assert summary_msg[0]["role"] == "assistant"
|
||||
|
||||
|
||||
@@ -17,6 +17,7 @@ from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from agent.context_compressor import SUMMARY_PREFIX
|
||||
from run_agent import AIAgent
|
||||
|
||||
|
||||
@@ -340,7 +341,7 @@ class TestPreflightCompression:
|
||||
# Simulate compression reducing messages
|
||||
mock_compress.return_value = (
|
||||
[
|
||||
{"role": "user", "content": "[CONTEXT SUMMARY]: Previous conversation"},
|
||||
{"role": "user", "content": f"{SUMMARY_PREFIX}\nPrevious conversation"},
|
||||
{"role": "user", "content": "hello"},
|
||||
],
|
||||
"new system prompt",
|
||||
|
||||
Reference in New Issue
Block a user