fix: keep honcho recall out of cached system prefix (#1201)

Attach later-turn Honcho recall to the current-turn user message at API
call time instead of appending it to the system prompt. This preserves the
stable system-prefix cache while keeping Honcho continuity context
available for the turn.

Also adds regression coverage for the injection helper and for continuing
sessions so Honcho recall stays out of the system prompt.
This commit is contained in:
Teknium
2026-03-13 21:07:00 -07:00
committed by GitHub
parent 07d70a0345
commit 938e887b4c
2 changed files with 87 additions and 7 deletions

View File

@@ -202,6 +202,32 @@ _NEVER_PARALLEL_TOOLS = frozenset({"clarify"})
_MAX_TOOL_WORKERS = 8
def _inject_honcho_turn_context(content, turn_context: str):
"""Append Honcho recall to the current-turn user message without mutating history.
The returned content is sent to the API for this turn only. Keeping Honcho
recall out of the system prompt preserves the stable cache prefix while
still giving the model continuity context.
"""
if not turn_context:
return content
note = (
"[System note: The following Honcho memory was retrieved from prior "
"sessions. It is continuity context for this turn only, not new user "
"input.]\n\n"
f"{turn_context}"
)
if isinstance(content, list):
return list(content) + [{"type": "text", "text": note}]
text = "" if content is None else str(content)
if not text.strip():
return note
return f"{text}\n\n{note}"
class AIAgent:
"""
AI Agent with tool calling capabilities.
@@ -3909,10 +3935,11 @@ class AIAgent:
# Honcho prefetch consumption:
# - First turn: bake into cached system prompt (stable for the session).
# - Later turns: inject as ephemeral system context for this API call only.
# - Later turns: attach recall to the current-turn user message at
# API-call time only (never persisted to history / session DB).
#
# This keeps the persisted/cached prompt stable while still allowing
# turn N to consume background prefetch results from turn N-1.
# This keeps the system-prefix cache stable while still allowing turn N
# to consume background prefetch results from turn N-1.
self._honcho_context = ""
self._honcho_turn_context = ""
_recall_mode = (self._honcho_config.recall_mode if self._honcho_config else "hybrid")
@@ -3930,6 +3957,7 @@ class AIAgent:
# Add user message
user_msg = {"role": "user", "content": user_message}
messages.append(user_msg)
current_turn_user_idx = len(messages) - 1
if not self.quiet_mode:
print(f"💬 Starting conversation: '{user_message[:60]}{'...' if len(user_message) > 60 else ''}'")
@@ -4079,9 +4107,14 @@ class AIAgent:
# However, providers like Moonshot AI require a separate 'reasoning_content' field
# on assistant messages with tool_calls. We handle both cases here.
api_messages = []
for msg in messages:
for idx, msg in enumerate(messages):
api_msg = msg.copy()
if idx == current_turn_user_idx and msg.get("role") == "user" and self._honcho_turn_context:
api_msg["content"] = _inject_honcho_turn_context(
api_msg.get("content", ""), self._honcho_turn_context
)
# For ALL assistant messages, pass reasoning back to the API
# This ensures multi-turn reasoning context is preserved
if msg.get("role") == "assistant":
@@ -4109,11 +4142,11 @@ class AIAgent:
# Build the final system message: cached prompt + ephemeral system prompt.
# Ephemeral additions are API-call-time only (not persisted to session DB).
# Honcho later-turn recall is intentionally kept OUT of the system prompt
# so the stable cache prefix remains unchanged.
effective_system = active_system_prompt or ""
if self.ephemeral_system_prompt:
effective_system = (effective_system + "\n\n" + self.ephemeral_system_prompt).strip()
if self._honcho_turn_context:
effective_system = (effective_system + "\n\n" + self._honcho_turn_context).strip()
if effective_system:
api_messages = [{"role": "system", "content": effective_system}] + api_messages

View File

@@ -14,7 +14,7 @@ from unittest.mock import MagicMock, patch
import pytest
from honcho_integration.client import HonchoClientConfig
from run_agent import AIAgent
from run_agent import AIAgent, _inject_honcho_turn_context
from agent.prompt_builder import DEFAULT_AGENT_IDENTITY
@@ -1441,6 +1441,53 @@ class TestSystemPromptStability:
should_prefetch = bool(conversation_history) and recall_mode != "tools"
assert should_prefetch is True
def test_inject_honcho_turn_context_appends_system_note(self):
content = _inject_honcho_turn_context("hello", "## Honcho Memory\nprior context")
assert "hello" in content
assert "Honcho memory was retrieved from prior sessions" in content
assert "## Honcho Memory" in content
def test_honcho_continuing_session_keeps_turn_context_out_of_system_prompt(self, agent):
captured = {}
def _fake_api_call(api_kwargs):
captured.update(api_kwargs)
return _mock_response(content="done", finish_reason="stop")
agent._honcho = object()
agent._honcho_session_key = "session-1"
agent._honcho_config = SimpleNamespace(
ai_peer="hermes",
memory_mode="hybrid",
write_frequency="async",
recall_mode="hybrid",
)
agent._use_prompt_caching = False
conversation_history = [
{"role": "user", "content": "hello"},
{"role": "assistant", "content": "hi there"},
]
with (
patch.object(agent, "_honcho_prefetch", return_value="## Honcho Memory\nprior context"),
patch.object(agent, "_queue_honcho_prefetch"),
patch.object(agent, "_persist_session"),
patch.object(agent, "_save_trajectory"),
patch.object(agent, "_cleanup_task_resources"),
patch.object(agent, "_interruptible_api_call", side_effect=_fake_api_call),
):
result = agent.run_conversation("what were we doing?", conversation_history=conversation_history)
assert result["completed"] is True
api_messages = captured["messages"]
assert api_messages[0]["role"] == "system"
assert "prior context" not in api_messages[0]["content"]
current_user = api_messages[-1]
assert current_user["role"] == "user"
assert "what were we doing?" in current_user["content"]
assert "prior context" in current_user["content"]
assert "Honcho memory was retrieved from prior sessions" in current_user["content"]
def test_honcho_prefetch_runs_on_first_turn(self):
"""Honcho prefetch should run when conversation_history is empty."""
conversation_history = []