diff --git a/run_agent.py b/run_agent.py index 5a69a3870..0d7037c42 100644 --- a/run_agent.py +++ b/run_agent.py @@ -202,6 +202,32 @@ _NEVER_PARALLEL_TOOLS = frozenset({"clarify"}) _MAX_TOOL_WORKERS = 8 +def _inject_honcho_turn_context(content, turn_context: str): + """Append Honcho recall to the current-turn user message without mutating history. + + The returned content is sent to the API for this turn only. Keeping Honcho + recall out of the system prompt preserves the stable cache prefix while + still giving the model continuity context. + """ + if not turn_context: + return content + + note = ( + "[System note: The following Honcho memory was retrieved from prior " + "sessions. It is continuity context for this turn only, not new user " + "input.]\n\n" + f"{turn_context}" + ) + + if isinstance(content, list): + return list(content) + [{"type": "text", "text": note}] + + text = "" if content is None else str(content) + if not text.strip(): + return note + return f"{text}\n\n{note}" + + class AIAgent: """ AI Agent with tool calling capabilities. @@ -3909,10 +3935,11 @@ class AIAgent: # Honcho prefetch consumption: # - First turn: bake into cached system prompt (stable for the session). - # - Later turns: inject as ephemeral system context for this API call only. + # - Later turns: attach recall to the current-turn user message at + # API-call time only (never persisted to history / session DB). # - # This keeps the persisted/cached prompt stable while still allowing - # turn N to consume background prefetch results from turn N-1. + # This keeps the system-prefix cache stable while still allowing turn N + # to consume background prefetch results from turn N-1. self._honcho_context = "" self._honcho_turn_context = "" _recall_mode = (self._honcho_config.recall_mode if self._honcho_config else "hybrid") @@ -3930,6 +3957,7 @@ class AIAgent: # Add user message user_msg = {"role": "user", "content": user_message} messages.append(user_msg) + current_turn_user_idx = len(messages) - 1 if not self.quiet_mode: print(f"💬 Starting conversation: '{user_message[:60]}{'...' if len(user_message) > 60 else ''}'") @@ -4079,9 +4107,14 @@ class AIAgent: # However, providers like Moonshot AI require a separate 'reasoning_content' field # on assistant messages with tool_calls. We handle both cases here. api_messages = [] - for msg in messages: + for idx, msg in enumerate(messages): api_msg = msg.copy() + if idx == current_turn_user_idx and msg.get("role") == "user" and self._honcho_turn_context: + api_msg["content"] = _inject_honcho_turn_context( + api_msg.get("content", ""), self._honcho_turn_context + ) + # For ALL assistant messages, pass reasoning back to the API # This ensures multi-turn reasoning context is preserved if msg.get("role") == "assistant": @@ -4109,11 +4142,11 @@ class AIAgent: # Build the final system message: cached prompt + ephemeral system prompt. # Ephemeral additions are API-call-time only (not persisted to session DB). + # Honcho later-turn recall is intentionally kept OUT of the system prompt + # so the stable cache prefix remains unchanged. effective_system = active_system_prompt or "" if self.ephemeral_system_prompt: effective_system = (effective_system + "\n\n" + self.ephemeral_system_prompt).strip() - if self._honcho_turn_context: - effective_system = (effective_system + "\n\n" + self._honcho_turn_context).strip() if effective_system: api_messages = [{"role": "system", "content": effective_system}] + api_messages diff --git a/tests/test_run_agent.py b/tests/test_run_agent.py index 3d72d3284..b20625450 100644 --- a/tests/test_run_agent.py +++ b/tests/test_run_agent.py @@ -14,7 +14,7 @@ from unittest.mock import MagicMock, patch import pytest from honcho_integration.client import HonchoClientConfig -from run_agent import AIAgent +from run_agent import AIAgent, _inject_honcho_turn_context from agent.prompt_builder import DEFAULT_AGENT_IDENTITY @@ -1441,6 +1441,53 @@ class TestSystemPromptStability: should_prefetch = bool(conversation_history) and recall_mode != "tools" assert should_prefetch is True + def test_inject_honcho_turn_context_appends_system_note(self): + content = _inject_honcho_turn_context("hello", "## Honcho Memory\nprior context") + assert "hello" in content + assert "Honcho memory was retrieved from prior sessions" in content + assert "## Honcho Memory" in content + + def test_honcho_continuing_session_keeps_turn_context_out_of_system_prompt(self, agent): + captured = {} + + def _fake_api_call(api_kwargs): + captured.update(api_kwargs) + return _mock_response(content="done", finish_reason="stop") + + agent._honcho = object() + agent._honcho_session_key = "session-1" + agent._honcho_config = SimpleNamespace( + ai_peer="hermes", + memory_mode="hybrid", + write_frequency="async", + recall_mode="hybrid", + ) + agent._use_prompt_caching = False + conversation_history = [ + {"role": "user", "content": "hello"}, + {"role": "assistant", "content": "hi there"}, + ] + + with ( + patch.object(agent, "_honcho_prefetch", return_value="## Honcho Memory\nprior context"), + patch.object(agent, "_queue_honcho_prefetch"), + patch.object(agent, "_persist_session"), + patch.object(agent, "_save_trajectory"), + patch.object(agent, "_cleanup_task_resources"), + patch.object(agent, "_interruptible_api_call", side_effect=_fake_api_call), + ): + result = agent.run_conversation("what were we doing?", conversation_history=conversation_history) + + assert result["completed"] is True + api_messages = captured["messages"] + assert api_messages[0]["role"] == "system" + assert "prior context" not in api_messages[0]["content"] + current_user = api_messages[-1] + assert current_user["role"] == "user" + assert "what were we doing?" in current_user["content"] + assert "prior context" in current_user["content"] + assert "Honcho memory was retrieved from prior sessions" in current_user["content"] + def test_honcho_prefetch_runs_on_first_turn(self): """Honcho prefetch should run when conversation_history is empty.""" conversation_history = []