fix: tighten memory and session recall guidance

Remove diary-style memory framing from the system prompt and memory tool
schema, explicitly steer task/session logs to session_search, and clarify
that session_search is for cross-session recall after checking the current
conversation first. Add regression tests for the updated guidance text.
This commit is contained in:
teknium1
2026-03-14 11:26:18 -07:00
parent 6d8286f396
commit 5319bb6ac4
6 changed files with 66 additions and 16 deletions

View File

@@ -71,15 +71,17 @@ DEFAULT_AGENT_IDENTITY = (
)
MEMORY_GUIDANCE = (
"You have persistent memory across sessions. Proactively save important things "
"you learn (user preferences, environment details, useful approaches) and do "
"(like a diary!) using the memory tool -- don't wait to be asked."
"You have persistent memory across sessions. Save durable facts using the memory "
"tool: user preferences, environment details, tool quirks, and stable conventions. "
"Memory is injected into every turn, so keep it compact. Do NOT save task progress, "
"session outcomes, or completed-work logs to memory; use session_search to recall "
"those from past transcripts."
)
SESSION_SEARCH_GUIDANCE = (
"When the user references something from a past conversation or you suspect "
"relevant prior context exists, use session_search to recall it before asking "
"them to repeat themselves."
"relevant cross-session context exists, use session_search to recall it before "
"asking them to repeat themselves."
)
SKILLS_GUIDANCE = (

View File

@@ -15,10 +15,30 @@ from agent.prompt_builder import (
build_context_files_prompt,
CONTEXT_FILE_MAX_CHARS,
DEFAULT_AGENT_IDENTITY,
MEMORY_GUIDANCE,
SESSION_SEARCH_GUIDANCE,
PLATFORM_HINTS,
)
# =========================================================================
# Guidance constants
# =========================================================================
class TestGuidanceConstants:
def test_memory_guidance_discourages_task_logs(self):
assert "durable facts" in MEMORY_GUIDANCE
assert "Do NOT save task progress" in MEMORY_GUIDANCE
assert "session_search" in MEMORY_GUIDANCE
assert "like a diary" not in MEMORY_GUIDANCE
assert ">80%" not in MEMORY_GUIDANCE
def test_session_search_guidance_is_simple_cross_session_recall(self):
assert "relevant cross-session context exists" in SESSION_SEARCH_GUIDANCE
assert "recent turns of the current session" not in SESSION_SEARCH_GUIDANCE
# =========================================================================
# Context injection scanning
# =========================================================================

View File

@@ -9,9 +9,24 @@ from tools.memory_tool import (
memory_tool,
_scan_memory_content,
ENTRY_DELIMITER,
MEMORY_SCHEMA,
)
# =========================================================================
# Tool schema guidance
# =========================================================================
class TestMemorySchema:
def test_discourages_diary_style_task_logs(self):
description = MEMORY_SCHEMA["description"]
assert "Do NOT save task progress" in description
assert "session_search" in description
assert "like a diary" not in description
assert "temporary task state" in description
assert ">80%" not in description
# =========================================================================
# Security scanning
# =========================================================================

View File

@@ -9,9 +9,21 @@ from tools.session_search_tool import (
_format_conversation,
_truncate_around_matches,
MAX_SESSION_CHARS,
SESSION_SEARCH_SCHEMA,
)
# =========================================================================
# Tool schema guidance
# =========================================================================
class TestSessionSearchSchema:
def test_keeps_cross_session_recall_guidance_without_current_session_nudge(self):
description = SESSION_SEARCH_SCHEMA["description"]
assert "past conversations" in description
assert "recent turns of the current session" not in description
# =========================================================================
# _format_timestamp
# =========================================================================

View File

@@ -435,24 +435,25 @@ def check_memory_requirements() -> bool:
MEMORY_SCHEMA = {
"name": "memory",
"description": (
"Save important information to persistent memory that survives across sessions. "
"Your memory appears in your system prompt at session start -- it's how you "
"remember things about the user and your environment between conversations.\n\n"
"Save durable information to persistent memory that survives across sessions. "
"Memory is injected into future turns, so keep it compact and focused on facts "
"that will still matter later.\n\n"
"WHEN TO SAVE (do this proactively, don't wait to be asked):\n"
"- User shares a preference, habit, or personal detail (name, role, timezone, coding style)\n"
"- You discover something about the environment (OS, installed tools, project structure)\n"
"- User corrects you or says 'remember this' / 'don't do that again'\n"
"- You learn a convention, API quirk, or workflow specific to this user's setup\n"
"- You completed something - log it like a diary entry\n"
"- After completing a complex task, save a brief note about what was done\n\n"
"- If you've discovered a new way to do something, solved a problem that could be necessary later, save it as a skill with the skill tool\n\n"
"- You identify a stable fact that will be useful again in future sessions\n\n"
"Do NOT save task progress, session outcomes, completed-work logs, or temporary TODO "
"state to memory; use session_search to recall those from past transcripts.\n"
"If you've discovered a new way to do something, solved a problem that could be "
"necessary later, save it as a skill with the skill tool.\n\n"
"TWO TARGETS:\n"
"- 'user': who the user is -- name, role, preferences, communication style, pet peeves\n"
"- 'memory': your notes -- environment facts, project conventions, tool quirks, lessons learned\n\n"
"ACTIONS: add (new entry), replace (update existing -- old_text identifies it), "
"remove (delete -- old_text identifies it).\n"
"Capacity shown in system prompt. When >80%, consolidate entries before adding new ones.\n\n"
"SKIP: trivial/obvious info, things easily re-discovered, raw data dumps."
"remove (delete -- old_text identifies it).\n\n"
"SKIP: trivial/obvious info, things easily re-discovered, raw data dumps, and temporary task state."
),
"parameters": {
"type": "object",

View File

@@ -341,8 +341,8 @@ SESSION_SEARCH_SCHEMA = {
"- The user references a project, person, or concept that seems familiar but isn't in memory\n"
"- You want to check if you've solved a similar problem before\n"
"- The user asks 'what did we do about X?' or 'how did we fix Y?'\n\n"
"Don't hesitate to search -- it's fast and cheap. Better to search and confirm "
"than to guess or ask the user to repeat themselves.\n\n"
"Don't hesitate to search when it is actually cross-session -- it's fast and cheap. "
"Better to search and confirm than to guess or ask the user to repeat themselves.\n\n"
"Search syntax: keywords joined with OR for broad recall (elevenlabs OR baseten OR funding), "
"phrases for exact match (\"docker networking\"), boolean (python NOT java), prefix (deploy*). "
"IMPORTANT: Use OR between keywords for best results — FTS5 defaults to AND which misses "