From 0aed9bfde1d9d0204f54c6a6defc842ff6e43385 Mon Sep 17 00:00:00 2001
From: Erosika <eri@plasticlabs.ai>
Date: Thu, 12 Mar 2026 16:26:10 -0400
Subject: [PATCH] refactor(honcho): rename memory tools to Honcho tools,
 clarify recall mode language

Replace "memory tools" with "Honcho tools" and "pre-warmed/prefetch"
with "auto-injected context" in all user-facing strings and docs.
---
 honcho_integration/cli.py                  | 12 ++++++------
 honcho_integration/client.py               |  6 +++---
 run_agent.py                               | 14 +++++++-------
 website/docs/user-guide/features/honcho.md | 20 ++++++++++----------
 4 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/honcho_integration/cli.py b/honcho_integration/cli.py
index 0af9923f0..9526b1a14 100644
--- a/honcho_integration/cli.py
+++ b/honcho_integration/cli.py
@@ -153,9 +153,9 @@ def cmd_setup(args) -> None:
     # Recall mode
     current_recall = hermes_host.get("recallMode") or cfg.get("recallMode", "hybrid")
     print(f"\n  Recall mode options:")
-    print("    hybrid  — pre-warmed context + memory tools available (default)")
-    print("    context — pre-warmed context only, memory tools suppressed")
-    print("    tools   — no pre-loaded context, rely on tool calls only")
+    print("    hybrid  — auto-injected context + Honcho tools available (default)")
+    print("    context — auto-injected context only, Honcho tools hidden")
+    print("    tools   — Honcho tools only, no auto-injected context")
     new_recall = _prompt("Recall mode", default=current_recall)
     if new_recall in ("hybrid", "context", "tools"):
         hermes_host["recallMode"] = new_recall
@@ -199,7 +199,7 @@ def cmd_setup(args) -> None:
         _mode_str = f"{hcfg.memory_mode}  (peers: {overrides})"
     print(f"  Mode:      {_mode_str}")
     print(f"  Frequency: {hcfg.write_frequency}")
-    print(f"\n  Tools available in chat:")
+    print(f"\n  Honcho tools available in chat:")
     print(f"    honcho_context  — ask Honcho a question about you (LLM-synthesized)")
     print(f"    honcho_search       — semantic search over your history (no LLM)")
     print(f"    honcho_profile      — your peer card, key facts (no LLM)")
@@ -702,7 +702,7 @@ def cmd_migrate(args) -> None:
     print()
     print("  Context injection")
     print("    OpenClaw: file excerpts injected synchronously before each LLM call.")
-    print("    Hermes:   Honcho context prefetched async at turn end, injected next turn.")
+    print("    Hermes:   Honcho context fetched async at turn end, injected next turn.")
     print("              First turn has no Honcho context; subsequent turns are loaded.")
     print()
     print("  Memory growth")
@@ -710,7 +710,7 @@ def cmd_migrate(args) -> None:
     print("    Hermes:   Honcho observes every message and updates representations")
     print("              automatically. Files become the seed, not the live store.")
     print()
-    print("  Tool surface (available to the agent during conversation)")
+    print("  Honcho tools (available to the agent during conversation)")
     print("    honcho_context   — ask Honcho a question, get a synthesized answer (LLM)")
     print("    honcho_search        — semantic search over stored context (no LLM)")
     print("    honcho_profile       — fast peer card snapshot (no LLM)")
diff --git a/honcho_integration/client.py b/honcho_integration/client.py
index 04ee946e8..446176bce 100644
--- a/honcho_integration/client.py
+++ b/honcho_integration/client.py
@@ -90,9 +90,9 @@ class HonchoClientConfig:
     # Max chars of dialectic result to inject into Hermes system prompt
     dialectic_max_chars: int = 600
     # Recall mode: how memory retrieval works when Honcho is active.
-    # "hybrid"  — pre-warmed context + memory tools available (model decides)
-    # "context" — pre-warmed context only, honcho memory tools removed
-    # "tools"   — no pre-loaded context, rely on tool calls only
+    # "hybrid"  — auto-injected context + Honcho tools available (model decides)
+    # "context" — auto-injected context only, Honcho tools removed
+    # "tools"   — Honcho tools only, no auto-injected context
     recall_mode: str = "hybrid"
     # Session resolution
     session_strategy: str = "per-session"
diff --git a/run_agent.py b/run_agent.py
index 78f68c9dd..61d123201 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -1423,7 +1423,7 @@ class AIAgent:
         if hcfg.recall_mode == "context":
             self._strip_honcho_tools_from_surface()
             if not self.quiet_mode:
-                print("  Honcho active — recall_mode: context (tools suppressed)")
+                print("  Honcho active — recall_mode: context (Honcho tools hidden)")
         else:
             if not self.quiet_mode:
                 print(f"  Honcho active — recall_mode: {hcfg.recall_mode}")
@@ -1617,14 +1617,14 @@ class AIAgent:
             )
             if recall_mode == "context":
                 honcho_block += (
-                    "Honcho context is pre-loaded into this system prompt below. "
-                    "All memory retrieval comes from this context — no memory tools "
+                    "Honcho context is injected into this system prompt below. "
+                    "All memory retrieval comes from this context — no Honcho tools "
                     "are available. Answer questions about the user, prior sessions, "
                     "and recent work directly from the Honcho Memory section.\n"
                 )
             elif recall_mode == "tools":
                 honcho_block += (
-                    "Memory tools:\n"
+                    "Honcho tools:\n"
                     "  honcho_context <question>           — ask Honcho a question, LLM-synthesized answer\n"
                     "  honcho_search <query>                   — semantic search, raw excerpts, no LLM\n"
                     "  honcho_profile                          — user's peer card, key facts, no LLM\n"
@@ -1633,11 +1633,11 @@ class AIAgent:
             else:  # hybrid
                 honcho_block += (
                     "Honcho context (user representation, peer card, and recent session summary) "
-                    "is pre-loaded into this system prompt below. Use it to answer continuity "
+                    "is injected into this system prompt below. Use it to answer continuity "
                     "questions ('where were we?', 'what were we working on?') WITHOUT calling "
-                    "any tools. Only call memory tools when you need information beyond what is "
+                    "any tools. Only call Honcho tools when you need information beyond what is "
                     "already present in the Honcho Memory section.\n"
-                    "Memory tools:\n"
+                    "Honcho tools:\n"
                     "  honcho_context <question>           — ask Honcho a question, LLM-synthesized answer\n"
                     "  honcho_search <query>                   — semantic search, raw excerpts, no LLM\n"
                     "  honcho_profile                          — user's peer card, key facts, no LLM\n"
diff --git a/website/docs/user-guide/features/honcho.md b/website/docs/user-guide/features/honcho.md
index 578ea4706..da4dd1535 100644
--- a/website/docs/user-guide/features/honcho.md
+++ b/website/docs/user-guide/features/honcho.md
@@ -109,7 +109,7 @@ Settings are scoped to `hosts.hermes` and fall back to root-level globals when t
 | `recallMode` | `"hybrid"` | Retrieval strategy: `hybrid`, `context`, or `tools` |
 | `sessionStrategy` | `"per-session"` | How sessions are scoped |
 | `sessionPeerPrefix` | `false` | Prefix session names with peer name |
-| `contextTokens` | *(Honcho default)* | Max tokens for context prefetch |
+| `contextTokens` | *(Honcho default)* | Max tokens for auto-injected context |
 | `dialecticReasoningLevel` | `"low"` | Floor for dialectic reasoning: `minimal` / `low` / `medium` / `high` / `max` |
 | `dialecticMaxChars` | `600` | Char cap on dialectic results injected into system prompt |
 | `linkedHosts` | `[]` | Other host keys whose workspaces to cross-reference |
@@ -142,9 +142,9 @@ Controls how Honcho context reaches the agent:
 
 | Mode | Behavior |
 |------|----------|
-| `hybrid` | Prefetch context into system prompt + expose tools (default) |
-| `context` | Context injection only — no Honcho tools available |
-| `tools` | Tools only — no prefetch into system prompt |
+| `hybrid` | Auto-injected context + Honcho tools available (default) |
+| `context` | Auto-injected context only — Honcho tools hidden |
+| `tools` | Honcho tools only — no auto-injected context |
 
 ### Write Frequency
 
@@ -203,23 +203,23 @@ honcho: {}
 
 ## How It Works
 
-### Async Prefetch Pipeline
+### Async Context Pipeline
 
 Honcho context is fetched asynchronously to avoid blocking the response path:
 
 ```
 Turn N:
   user message
-    → pop prefetch result from cache (from previous turn)
+    → consume cached context (from previous turn's background fetch)
     → inject into system prompt (user representation, AI representation, dialectic)
     → LLM call
     → response
-    → fire prefetch in background threads
-         → prefetch_context()   ─┐
-         → prefetch_dialectic() ─┴→ cache for Turn N+1
+    → fire background fetch for next turn
+         → fetch context    ─┐
+         → fetch dialectic  ─┴→ cache for Turn N+1
 ```
 
-Turn 1 is a cold start (no cache). All subsequent turns consume pre-warmed results with zero HTTP latency on the response path. The system prompt on turn 1 uses only static context to preserve prefix cache hits at the LLM provider.
+Turn 1 is a cold start (no cache). All subsequent turns consume cached results with zero HTTP latency on the response path. The system prompt on turn 1 uses only static context to preserve prefix cache hits at the LLM provider.
 
 ### Dual-Peer Architecture