diff --git a/agent/prompt_builder.py b/agent/prompt_builder.py index b8371f79e..f1dbcf758 100644 --- a/agent/prompt_builder.py +++ b/agent/prompt_builder.py @@ -71,15 +71,17 @@ DEFAULT_AGENT_IDENTITY = ( ) MEMORY_GUIDANCE = ( - "You have persistent memory across sessions. Proactively save important things " - "you learn (user preferences, environment details, useful approaches) and do " - "(like a diary!) using the memory tool -- don't wait to be asked." + "You have persistent memory across sessions. Save durable facts using the memory " + "tool: user preferences, environment details, tool quirks, and stable conventions. " + "Memory is injected into every turn, so keep it compact. Do NOT save task progress, " + "session outcomes, or completed-work logs to memory; use session_search to recall " + "those from past transcripts." ) SESSION_SEARCH_GUIDANCE = ( "When the user references something from a past conversation or you suspect " - "relevant prior context exists, use session_search to recall it before asking " - "them to repeat themselves." + "relevant cross-session context exists, use session_search to recall it before " + "asking them to repeat themselves." ) SKILLS_GUIDANCE = ( diff --git a/tests/agent/test_prompt_builder.py b/tests/agent/test_prompt_builder.py index 4a09b4a9f..b5c10bee6 100644 --- a/tests/agent/test_prompt_builder.py +++ b/tests/agent/test_prompt_builder.py @@ -15,10 +15,30 @@ from agent.prompt_builder import ( build_context_files_prompt, CONTEXT_FILE_MAX_CHARS, DEFAULT_AGENT_IDENTITY, + MEMORY_GUIDANCE, + SESSION_SEARCH_GUIDANCE, PLATFORM_HINTS, ) +# ========================================================================= +# Guidance constants +# ========================================================================= + + +class TestGuidanceConstants: + def test_memory_guidance_discourages_task_logs(self): + assert "durable facts" in MEMORY_GUIDANCE + assert "Do NOT save task progress" in MEMORY_GUIDANCE + assert "session_search" in MEMORY_GUIDANCE + assert "like a diary" not in MEMORY_GUIDANCE + assert ">80%" not in MEMORY_GUIDANCE + + def test_session_search_guidance_is_simple_cross_session_recall(self): + assert "relevant cross-session context exists" in SESSION_SEARCH_GUIDANCE + assert "recent turns of the current session" not in SESSION_SEARCH_GUIDANCE + + # ========================================================================= # Context injection scanning # ========================================================================= diff --git a/tests/tools/test_memory_tool.py b/tests/tools/test_memory_tool.py index 0ed3b12e6..48cb6a83c 100644 --- a/tests/tools/test_memory_tool.py +++ b/tests/tools/test_memory_tool.py @@ -9,9 +9,24 @@ from tools.memory_tool import ( memory_tool, _scan_memory_content, ENTRY_DELIMITER, + MEMORY_SCHEMA, ) +# ========================================================================= +# Tool schema guidance +# ========================================================================= + +class TestMemorySchema: + def test_discourages_diary_style_task_logs(self): + description = MEMORY_SCHEMA["description"] + assert "Do NOT save task progress" in description + assert "session_search" in description + assert "like a diary" not in description + assert "temporary task state" in description + assert ">80%" not in description + + # ========================================================================= # Security scanning # ========================================================================= diff --git a/tests/tools/test_session_search.py b/tests/tools/test_session_search.py index c36247148..0d7414764 100644 --- a/tests/tools/test_session_search.py +++ b/tests/tools/test_session_search.py @@ -9,9 +9,21 @@ from tools.session_search_tool import ( _format_conversation, _truncate_around_matches, MAX_SESSION_CHARS, + SESSION_SEARCH_SCHEMA, ) +# ========================================================================= +# Tool schema guidance +# ========================================================================= + +class TestSessionSearchSchema: + def test_keeps_cross_session_recall_guidance_without_current_session_nudge(self): + description = SESSION_SEARCH_SCHEMA["description"] + assert "past conversations" in description + assert "recent turns of the current session" not in description + + # ========================================================================= # _format_timestamp # ========================================================================= diff --git a/tools/memory_tool.py b/tools/memory_tool.py index 2ce763124..b921a84f7 100644 --- a/tools/memory_tool.py +++ b/tools/memory_tool.py @@ -435,24 +435,25 @@ def check_memory_requirements() -> bool: MEMORY_SCHEMA = { "name": "memory", "description": ( - "Save important information to persistent memory that survives across sessions. " - "Your memory appears in your system prompt at session start -- it's how you " - "remember things about the user and your environment between conversations.\n\n" + "Save durable information to persistent memory that survives across sessions. " + "Memory is injected into future turns, so keep it compact and focused on facts " + "that will still matter later.\n\n" "WHEN TO SAVE (do this proactively, don't wait to be asked):\n" "- User shares a preference, habit, or personal detail (name, role, timezone, coding style)\n" "- You discover something about the environment (OS, installed tools, project structure)\n" "- User corrects you or says 'remember this' / 'don't do that again'\n" "- You learn a convention, API quirk, or workflow specific to this user's setup\n" - "- You completed something - log it like a diary entry\n" - "- After completing a complex task, save a brief note about what was done\n\n" - "- If you've discovered a new way to do something, solved a problem that could be necessary later, save it as a skill with the skill tool\n\n" + "- You identify a stable fact that will be useful again in future sessions\n\n" + "Do NOT save task progress, session outcomes, completed-work logs, or temporary TODO " + "state to memory; use session_search to recall those from past transcripts.\n" + "If you've discovered a new way to do something, solved a problem that could be " + "necessary later, save it as a skill with the skill tool.\n\n" "TWO TARGETS:\n" "- 'user': who the user is -- name, role, preferences, communication style, pet peeves\n" "- 'memory': your notes -- environment facts, project conventions, tool quirks, lessons learned\n\n" "ACTIONS: add (new entry), replace (update existing -- old_text identifies it), " - "remove (delete -- old_text identifies it).\n" - "Capacity shown in system prompt. When >80%, consolidate entries before adding new ones.\n\n" - "SKIP: trivial/obvious info, things easily re-discovered, raw data dumps." + "remove (delete -- old_text identifies it).\n\n" + "SKIP: trivial/obvious info, things easily re-discovered, raw data dumps, and temporary task state." ), "parameters": { "type": "object", diff --git a/tools/session_search_tool.py b/tools/session_search_tool.py index cd1b98fd5..f4143fa12 100644 --- a/tools/session_search_tool.py +++ b/tools/session_search_tool.py @@ -341,8 +341,8 @@ SESSION_SEARCH_SCHEMA = { "- The user references a project, person, or concept that seems familiar but isn't in memory\n" "- You want to check if you've solved a similar problem before\n" "- The user asks 'what did we do about X?' or 'how did we fix Y?'\n\n" - "Don't hesitate to search -- it's fast and cheap. Better to search and confirm " - "than to guess or ask the user to repeat themselves.\n\n" + "Don't hesitate to search when it is actually cross-session -- it's fast and cheap. " + "Better to search and confirm than to guess or ask the user to repeat themselves.\n\n" "Search syntax: keywords joined with OR for broad recall (elevenlabs OR baseten OR funding), " "phrases for exact match (\"docker networking\"), boolean (python NOT java), prefix (deploy*). " "IMPORTANT: Use OR between keywords for best results — FTS5 defaults to AND which misses "