diff --git a/run_agent.py b/run_agent.py index f6b3730ec..04522912b 100644 --- a/run_agent.py +++ b/run_agent.py @@ -3092,9 +3092,14 @@ class AIAgent: ) self._iters_since_skill = 0 - # Honcho prefetch: retrieve user context for system prompt injection + # Honcho prefetch: retrieve user context for system prompt injection. + # Only on the FIRST turn of a session (empty history). On subsequent + # turns the model already has all prior context in its conversation + # history, and the Honcho context is baked into the stored system + # prompt — re-fetching it would change the system message and break + # Anthropic prompt caching. self._honcho_context = "" - if self._honcho and self._honcho_session_key: + if self._honcho and self._honcho_session_key and not conversation_history: try: self._honcho_context = self._honcho_prefetch(user_message) except Exception as e: @@ -3112,14 +3117,42 @@ class AIAgent: # Built once on first call, reused for all subsequent calls. # Only rebuilt after context compression events (which invalidate # the cache and reload memory from disk). + # + # For continuing sessions (gateway creates a fresh AIAgent per + # message), we load the stored system prompt from the session DB + # instead of rebuilding. Rebuilding would pick up memory changes + # from disk that the model already knows about (it wrote them!), + # producing a different system prompt and breaking the Anthropic + # prefix cache. if self._cached_system_prompt is None: - self._cached_system_prompt = self._build_system_prompt(system_message) - # Store the system prompt snapshot in SQLite - if self._session_db: + stored_prompt = None + if conversation_history and self._session_db: try: - self._session_db.update_system_prompt(self.session_id, self._cached_system_prompt) - except Exception as e: - logger.debug("Session DB update_system_prompt failed: %s", e) + session_row = self._session_db.get_session(self.session_id) + if session_row: + stored_prompt = session_row.get("system_prompt") or None + except Exception: + pass # Fall through to build fresh + + if stored_prompt: + # Continuing session — reuse the exact system prompt from + # the previous turn so the Anthropic cache prefix matches. + self._cached_system_prompt = stored_prompt + else: + # First turn of a new session — build from scratch. + self._cached_system_prompt = self._build_system_prompt(system_message) + # Bake Honcho context into the prompt so it's stable for + # the entire session (not re-fetched per turn). + if self._honcho_context: + self._cached_system_prompt = ( + self._cached_system_prompt + "\n\n" + self._honcho_context + ).strip() + # Store the system prompt snapshot in SQLite + if self._session_db: + try: + self._session_db.update_system_prompt(self.session_id, self._cached_system_prompt) + except Exception as e: + logger.debug("Session DB update_system_prompt failed: %s", e) active_system_prompt = self._cached_system_prompt @@ -3244,11 +3277,13 @@ class AIAgent: # Build the final system message: cached prompt + ephemeral system prompt. # The ephemeral part is appended here (not baked into the cached prompt) # so it stays out of the session DB and logs. + # Note: Honcho context is baked into _cached_system_prompt on the first + # turn and stored in the session DB, so it does NOT need to be injected + # here. This keeps the system message identical across all turns in a + # session, maximizing Anthropic prompt cache hits. effective_system = active_system_prompt or "" if self.ephemeral_system_prompt: effective_system = (effective_system + "\n\n" + self.ephemeral_system_prompt).strip() - if self._honcho_context: - effective_system = (effective_system + "\n\n" + self._honcho_context).strip() if effective_system: api_messages = [{"role": "system", "content": effective_system}] + api_messages diff --git a/tests/test_run_agent.py b/tests/test_run_agent.py index 55f96f942..64de980d5 100644 --- a/tests/test_run_agent.py +++ b/tests/test_run_agent.py @@ -1040,3 +1040,136 @@ class TestMaxTokensParam: agent.base_url = "https://openrouter.ai/api/v1/api.openai.com" result = agent._max_tokens_param(4096) assert result == {"max_tokens": 4096} + + +# --------------------------------------------------------------------------- +# System prompt stability for prompt caching +# --------------------------------------------------------------------------- + +class TestSystemPromptStability: + """Verify that the system prompt stays stable across turns for cache hits.""" + + def test_stored_prompt_reused_for_continuing_session(self, agent): + """When conversation_history is non-empty and session DB has a stored + prompt, it should be reused instead of rebuilding from disk.""" + stored = "You are helpful. [stored from turn 1]" + mock_db = MagicMock() + mock_db.get_session.return_value = {"system_prompt": stored} + agent._session_db = mock_db + + # Simulate a continuing session with history + history = [ + {"role": "user", "content": "hello"}, + {"role": "assistant", "content": "hi"}, + ] + + # First call — _cached_system_prompt is None, history is non-empty + agent._cached_system_prompt = None + + # Patch run_conversation internals to just test the system prompt logic. + # We'll call the prompt caching block directly by simulating what + # run_conversation does. + conversation_history = history + + # The block under test (from run_conversation): + if agent._cached_system_prompt is None: + stored_prompt = None + if conversation_history and agent._session_db: + try: + session_row = agent._session_db.get_session(agent.session_id) + if session_row: + stored_prompt = session_row.get("system_prompt") or None + except Exception: + pass + + if stored_prompt: + agent._cached_system_prompt = stored_prompt + + assert agent._cached_system_prompt == stored + mock_db.get_session.assert_called_once_with(agent.session_id) + + def test_fresh_build_when_no_history(self, agent): + """On the first turn (no history), system prompt should be built fresh.""" + mock_db = MagicMock() + agent._session_db = mock_db + + agent._cached_system_prompt = None + conversation_history = [] + + # The block under test: + if agent._cached_system_prompt is None: + stored_prompt = None + if conversation_history and agent._session_db: + session_row = agent._session_db.get_session(agent.session_id) + if session_row: + stored_prompt = session_row.get("system_prompt") or None + + if stored_prompt: + agent._cached_system_prompt = stored_prompt + else: + agent._cached_system_prompt = agent._build_system_prompt() + + # Should have built fresh, not queried the DB + mock_db.get_session.assert_not_called() + assert agent._cached_system_prompt is not None + assert "Hermes Agent" in agent._cached_system_prompt + + def test_fresh_build_when_db_has_no_prompt(self, agent): + """If the session DB has no stored prompt, build fresh even with history.""" + mock_db = MagicMock() + mock_db.get_session.return_value = {"system_prompt": ""} + agent._session_db = mock_db + + agent._cached_system_prompt = None + conversation_history = [{"role": "user", "content": "hi"}] + + if agent._cached_system_prompt is None: + stored_prompt = None + if conversation_history and agent._session_db: + try: + session_row = agent._session_db.get_session(agent.session_id) + if session_row: + stored_prompt = session_row.get("system_prompt") or None + except Exception: + pass + + if stored_prompt: + agent._cached_system_prompt = stored_prompt + else: + agent._cached_system_prompt = agent._build_system_prompt() + + # Empty string is falsy, so should fall through to fresh build + assert "Hermes Agent" in agent._cached_system_prompt + + def test_honcho_context_baked_into_prompt_on_first_turn(self, agent): + """Honcho context should be baked into _cached_system_prompt on + the first turn, not injected separately per API call.""" + agent._honcho_context = "User prefers Python over JavaScript." + agent._cached_system_prompt = None + + # Simulate first turn: build fresh and bake in Honcho + agent._cached_system_prompt = agent._build_system_prompt() + if agent._honcho_context: + agent._cached_system_prompt = ( + agent._cached_system_prompt + "\n\n" + agent._honcho_context + ).strip() + + assert "User prefers Python over JavaScript" in agent._cached_system_prompt + + def test_honcho_prefetch_skipped_on_continuing_session(self): + """Honcho prefetch should not be called when conversation_history + is non-empty (continuing session).""" + conversation_history = [ + {"role": "user", "content": "hello"}, + {"role": "assistant", "content": "hi there"}, + ] + + # The guard: `not conversation_history` is False when history exists + should_prefetch = not conversation_history + assert should_prefetch is False + + def test_honcho_prefetch_runs_on_first_turn(self): + """Honcho prefetch should run when conversation_history is empty.""" + conversation_history = [] + should_prefetch = not conversation_history + assert should_prefetch is True