refactor(honcho): rename memory tools to Honcho tools, clarify recall mode language

Replace "memory tools" with "Honcho tools" and "pre-warmed/prefetch" with "auto-injected context" in all user-facing strings and docs.
2026-03-12 16:26:10 -04:00
parent ae2a5e5743
commit 0aed9bfde1
4 changed files with 26 additions and 26 deletions
--- a/honcho_integration/cli.py
+++ b/honcho_integration/cli.py
@@ -153,9 +153,9 @@ def cmd_setup(args) -> None:
    # Recall mode
    current_recall = hermes_host.get("recallMode") or cfg.get("recallMode", "hybrid")
    print(f"\n  Recall mode options:")
-    print("    hybrid  — pre-warmed context + memory tools available (default)")
-    print("    context — pre-warmed context only, memory tools suppressed")
-    print("    tools   — no pre-loaded context, rely on tool calls only")
+    print("    hybrid  — auto-injected context + Honcho tools available (default)")
+    print("    context — auto-injected context only, Honcho tools hidden")
+    print("    tools   — Honcho tools only, no auto-injected context")
    new_recall = _prompt("Recall mode", default=current_recall)
    if new_recall in ("hybrid", "context", "tools"):
        hermes_host["recallMode"] = new_recall
@@ -199,7 +199,7 @@ def cmd_setup(args) -> None:
        _mode_str = f"{hcfg.memory_mode}  (peers: {overrides})"
    print(f"  Mode:      {_mode_str}")
    print(f"  Frequency: {hcfg.write_frequency}")
-    print(f"\n  Tools available in chat:")
+    print(f"\n  Honcho tools available in chat:")
    print(f"    honcho_context  — ask Honcho a question about you (LLM-synthesized)")
    print(f"    honcho_search       — semantic search over your history (no LLM)")
    print(f"    honcho_profile      — your peer card, key facts (no LLM)")
@@ -702,7 +702,7 @@ def cmd_migrate(args) -> None:
    print()
    print("  Context injection")
    print("    OpenClaw: file excerpts injected synchronously before each LLM call.")
-    print("    Hermes:   Honcho context prefetched async at turn end, injected next turn.")
+    print("    Hermes:   Honcho context fetched async at turn end, injected next turn.")
    print("              First turn has no Honcho context; subsequent turns are loaded.")
    print()
    print("  Memory growth")
@@ -710,7 +710,7 @@ def cmd_migrate(args) -> None:
    print("    Hermes:   Honcho observes every message and updates representations")
    print("              automatically. Files become the seed, not the live store.")
    print()
-    print("  Tool surface (available to the agent during conversation)")
+    print("  Honcho tools (available to the agent during conversation)")
    print("    honcho_context   — ask Honcho a question, get a synthesized answer (LLM)")
    print("    honcho_search        — semantic search over stored context (no LLM)")
    print("    honcho_profile       — fast peer card snapshot (no LLM)")
--- a/honcho_integration/client.py
+++ b/honcho_integration/client.py
@@ -90,9 +90,9 @@ class HonchoClientConfig:
    # Max chars of dialectic result to inject into Hermes system prompt
    dialectic_max_chars: int = 600
    # Recall mode: how memory retrieval works when Honcho is active.
-    # "hybrid"  — pre-warmed context + memory tools available (model decides)
-    # "context" — pre-warmed context only, honcho memory tools removed
-    # "tools"   — no pre-loaded context, rely on tool calls only
+    # "hybrid"  — auto-injected context + Honcho tools available (model decides)
+    # "context" — auto-injected context only, Honcho tools removed
+    # "tools"   — Honcho tools only, no auto-injected context
    recall_mode: str = "hybrid"
    # Session resolution
    session_strategy: str = "per-session"
--- a/run_agent.py
+++ b/run_agent.py
@@ -1423,7 +1423,7 @@ class AIAgent:
        if hcfg.recall_mode == "context":
            self._strip_honcho_tools_from_surface()
            if not self.quiet_mode:
-                print("  Honcho active — recall_mode: context (tools suppressed)")
+                print("  Honcho active — recall_mode: context (Honcho tools hidden)")
        else:
            if not self.quiet_mode:
                print(f"  Honcho active — recall_mode: {hcfg.recall_mode}")
@@ -1617,14 +1617,14 @@ class AIAgent:
            )
            if recall_mode == "context":
                honcho_block += (
-                    "Honcho context is pre-loaded into this system prompt below. "
-                    "All memory retrieval comes from this context — no memory tools "
+                    "Honcho context is injected into this system prompt below. "
+                    "All memory retrieval comes from this context — no Honcho tools "
                    "are available. Answer questions about the user, prior sessions, "
                    "and recent work directly from the Honcho Memory section.\n"
                )
            elif recall_mode == "tools":
                honcho_block += (
-                    "Memory tools:\n"
+                    "Honcho tools:\n"
                    "  honcho_context <question>           — ask Honcho a question, LLM-synthesized answer\n"
                    "  honcho_search <query>                   — semantic search, raw excerpts, no LLM\n"
                    "  honcho_profile                          — user's peer card, key facts, no LLM\n"
@@ -1633,11 +1633,11 @@ class AIAgent:
            else:  # hybrid
                honcho_block += (
                    "Honcho context (user representation, peer card, and recent session summary) "
-                    "is pre-loaded into this system prompt below. Use it to answer continuity "
+                    "is injected into this system prompt below. Use it to answer continuity "
                    "questions ('where were we?', 'what were we working on?') WITHOUT calling "
-                    "any tools. Only call memory tools when you need information beyond what is "
+                    "any tools. Only call Honcho tools when you need information beyond what is "
                    "already present in the Honcho Memory section.\n"
-                    "Memory tools:\n"
+                    "Honcho tools:\n"
                    "  honcho_context <question>           — ask Honcho a question, LLM-synthesized answer\n"
                    "  honcho_search <query>                   — semantic search, raw excerpts, no LLM\n"
                    "  honcho_profile                          — user's peer card, key facts, no LLM\n"
--- a/website/docs/user-guide/features/honcho.md
+++ b/website/docs/user-guide/features/honcho.md
@@ -109,7 +109,7 @@ Settings are scoped to `hosts.hermes` and fall back to root-level globals when t
 | `recallMode` | `"hybrid"` | Retrieval strategy: `hybrid`, `context`, or `tools` |
 | `sessionStrategy` | `"per-session"` | How sessions are scoped |
 | `sessionPeerPrefix` | `false` | Prefix session names with peer name |
-| `contextTokens` | *(Honcho default)* | Max tokens for context prefetch |
+| `contextTokens` | *(Honcho default)* | Max tokens for auto-injected context |
 | `dialecticReasoningLevel` | `"low"` | Floor for dialectic reasoning: `minimal` / `low` / `medium` / `high` / `max` |
 | `dialecticMaxChars` | `600` | Char cap on dialectic results injected into system prompt |
 | `linkedHosts` | `[]` | Other host keys whose workspaces to cross-reference |
@@ -142,9 +142,9 @@ Controls how Honcho context reaches the agent:

 | Mode | Behavior |
 |------|----------|
-| `hybrid` | Prefetch context into system prompt + expose tools (default) |
-| `context` | Context injection only — no Honcho tools available |
-| `tools` | Tools only — no prefetch into system prompt |
+| `hybrid` | Auto-injected context + Honcho tools available (default) |
+| `context` | Auto-injected context only — Honcho tools hidden |
+| `tools` | Honcho tools only — no auto-injected context |

 ### Write Frequency

@@ -203,23 +203,23 @@ honcho: {}

 ## How It Works

-### Async Prefetch Pipeline
+### Async Context Pipeline

 Honcho context is fetched asynchronously to avoid blocking the response path:

 ```
 Turn N:
  user message
-    → pop prefetch result from cache (from previous turn)
+    → consume cached context (from previous turn's background fetch)
    → inject into system prompt (user representation, AI representation, dialectic)
    → LLM call
    → response
-    → fire prefetch in background threads
-         → prefetch_context()   ─┐
-         → prefetch_dialectic() ─┴→ cache for Turn N+1
+    → fire background fetch for next turn
+         → fetch context    ─┐
+         → fetch dialectic  ─┴→ cache for Turn N+1
 ```

-Turn 1 is a cold start (no cache). All subsequent turns consume pre-warmed results with zero HTTP latency on the response path. The system prompt on turn 1 uses only static context to preserve prefix cache hits at the LLM provider.
+Turn 1 is a cold start (no cache). All subsequent turns consume cached results with zero HTTP latency on the response path. The system prompt on turn 1 uses only static context to preserve prefix cache hits at the LLM provider.

 ### Dual-Peer Architecture