Merge pull request #1329 from NousResearch/hermes/hermes-2f2b4807

fix: tighten memory and session recall guidance
2026-03-14 11:38:54 -07:00
parent 80a243efe6 5319bb6ac4
commit 1114841a2c
6 changed files with 66 additions and 16 deletions
--- a/agent/prompt_builder.py
+++ b/agent/prompt_builder.py
@@ -71,15 +71,17 @@ DEFAULT_AGENT_IDENTITY = (
 )

 MEMORY_GUIDANCE = (
-    "You have persistent memory across sessions. Proactively save important things "
-    "you learn (user preferences, environment details, useful approaches) and do "
-    "(like a diary!) using the memory tool -- don't wait to be asked."
+    "You have persistent memory across sessions. Save durable facts using the memory "
+    "tool: user preferences, environment details, tool quirks, and stable conventions. "
+    "Memory is injected into every turn, so keep it compact. Do NOT save task progress, "
+    "session outcomes, or completed-work logs to memory; use session_search to recall "
+    "those from past transcripts."
 )

 SESSION_SEARCH_GUIDANCE = (
    "When the user references something from a past conversation or you suspect "
-    "relevant prior context exists, use session_search to recall it before asking "
-    "them to repeat themselves."
+    "relevant cross-session context exists, use session_search to recall it before "
+    "asking them to repeat themselves."
 )

 SKILLS_GUIDANCE = (
--- a/tests/agent/test_prompt_builder.py
+++ b/tests/agent/test_prompt_builder.py
@@ -15,10 +15,30 @@ from agent.prompt_builder import (
    build_context_files_prompt,
    CONTEXT_FILE_MAX_CHARS,
    DEFAULT_AGENT_IDENTITY,
+    MEMORY_GUIDANCE,
+    SESSION_SEARCH_GUIDANCE,
    PLATFORM_HINTS,
 )


+# =========================================================================
+# Guidance constants
+# =========================================================================
+
+
+class TestGuidanceConstants:
+    def test_memory_guidance_discourages_task_logs(self):
+        assert "durable facts" in MEMORY_GUIDANCE
+        assert "Do NOT save task progress" in MEMORY_GUIDANCE
+        assert "session_search" in MEMORY_GUIDANCE
+        assert "like a diary" not in MEMORY_GUIDANCE
+        assert ">80%" not in MEMORY_GUIDANCE
+
+    def test_session_search_guidance_is_simple_cross_session_recall(self):
+        assert "relevant cross-session context exists" in SESSION_SEARCH_GUIDANCE
+        assert "recent turns of the current session" not in SESSION_SEARCH_GUIDANCE
+
+
 # =========================================================================
 # Context injection scanning
 # =========================================================================
--- a/tests/tools/test_memory_tool.py
+++ b/tests/tools/test_memory_tool.py
@@ -9,9 +9,24 @@ from tools.memory_tool import (
    memory_tool,
    _scan_memory_content,
    ENTRY_DELIMITER,
+    MEMORY_SCHEMA,
 )


+# =========================================================================
+# Tool schema guidance
+# =========================================================================
+
+class TestMemorySchema:
+    def test_discourages_diary_style_task_logs(self):
+        description = MEMORY_SCHEMA["description"]
+        assert "Do NOT save task progress" in description
+        assert "session_search" in description
+        assert "like a diary" not in description
+        assert "temporary task state" in description
+        assert ">80%" not in description
+
+
 # =========================================================================
 # Security scanning
 # =========================================================================
--- a/tests/tools/test_session_search.py
+++ b/tests/tools/test_session_search.py
@@ -9,9 +9,21 @@ from tools.session_search_tool import (
    _format_conversation,
    _truncate_around_matches,
    MAX_SESSION_CHARS,
+    SESSION_SEARCH_SCHEMA,
 )


+# =========================================================================
+# Tool schema guidance
+# =========================================================================
+
+class TestSessionSearchSchema:
+    def test_keeps_cross_session_recall_guidance_without_current_session_nudge(self):
+        description = SESSION_SEARCH_SCHEMA["description"]
+        assert "past conversations" in description
+        assert "recent turns of the current session" not in description
+
+
 # =========================================================================
 # _format_timestamp
 # =========================================================================
--- a/tools/memory_tool.py
+++ b/tools/memory_tool.py
@@ -435,24 +435,25 @@ def check_memory_requirements() -> bool:
 MEMORY_SCHEMA = {
    "name": "memory",
    "description": (
-        "Save important information to persistent memory that survives across sessions. "
-        "Your memory appears in your system prompt at session start -- it's how you "
-        "remember things about the user and your environment between conversations.\n\n"
+        "Save durable information to persistent memory that survives across sessions. "
+        "Memory is injected into future turns, so keep it compact and focused on facts "
+        "that will still matter later.\n\n"
        "WHEN TO SAVE (do this proactively, don't wait to be asked):\n"
        "- User shares a preference, habit, or personal detail (name, role, timezone, coding style)\n"
        "- You discover something about the environment (OS, installed tools, project structure)\n"
        "- User corrects you or says 'remember this' / 'don't do that again'\n"
        "- You learn a convention, API quirk, or workflow specific to this user's setup\n"
-        "- You completed something - log it like a diary entry\n"
-        "- After completing a complex task, save a brief note about what was done\n\n"
-        "- If you've discovered a new way to do something, solved a problem that could be necessary later, save it as a skill with the skill tool\n\n"
+        "- You identify a stable fact that will be useful again in future sessions\n\n"
+        "Do NOT save task progress, session outcomes, completed-work logs, or temporary TODO "
+        "state to memory; use session_search to recall those from past transcripts.\n"
+        "If you've discovered a new way to do something, solved a problem that could be "
+        "necessary later, save it as a skill with the skill tool.\n\n"
        "TWO TARGETS:\n"
        "- 'user': who the user is -- name, role, preferences, communication style, pet peeves\n"
        "- 'memory': your notes -- environment facts, project conventions, tool quirks, lessons learned\n\n"
        "ACTIONS: add (new entry), replace (update existing -- old_text identifies it), "
-        "remove (delete -- old_text identifies it).\n"
-        "Capacity shown in system prompt. When >80%, consolidate entries before adding new ones.\n\n"
-        "SKIP: trivial/obvious info, things easily re-discovered, raw data dumps."
+        "remove (delete -- old_text identifies it).\n\n"
+        "SKIP: trivial/obvious info, things easily re-discovered, raw data dumps, and temporary task state."
    ),
    "parameters": {
        "type": "object",
--- a/tools/session_search_tool.py
+++ b/tools/session_search_tool.py
@@ -341,8 +341,8 @@ SESSION_SEARCH_SCHEMA = {
        "- The user references a project, person, or concept that seems familiar but isn't in memory\n"
        "- You want to check if you've solved a similar problem before\n"
        "- The user asks 'what did we do about X?' or 'how did we fix Y?'\n\n"
-        "Don't hesitate to search -- it's fast and cheap. Better to search and confirm "
-        "than to guess or ask the user to repeat themselves.\n\n"
+        "Don't hesitate to search when it is actually cross-session -- it's fast and cheap. "
+        "Better to search and confirm than to guess or ask the user to repeat themselves.\n\n"
        "Search syntax: keywords joined with OR for broad recall (elevenlabs OR baseten OR funding), "
        "phrases for exact match (\"docker networking\"), boolean (python NOT java), prefix (deploy*). "
        "IMPORTANT: Use OR between keywords for best results — FTS5 defaults to AND which misses "