From 0aed9bfde1d9d0204f54c6a6defc842ff6e43385 Mon Sep 17 00:00:00 2001 From: Erosika Date: Thu, 12 Mar 2026 16:26:10 -0400 Subject: [PATCH] refactor(honcho): rename memory tools to Honcho tools, clarify recall mode language Replace "memory tools" with "Honcho tools" and "pre-warmed/prefetch" with "auto-injected context" in all user-facing strings and docs. --- honcho_integration/cli.py | 12 ++++++------ honcho_integration/client.py | 6 +++--- run_agent.py | 14 +++++++------- website/docs/user-guide/features/honcho.md | 20 ++++++++++---------- 4 files changed, 26 insertions(+), 26 deletions(-) diff --git a/honcho_integration/cli.py b/honcho_integration/cli.py index 0af9923f0..9526b1a14 100644 --- a/honcho_integration/cli.py +++ b/honcho_integration/cli.py @@ -153,9 +153,9 @@ def cmd_setup(args) -> None: # Recall mode current_recall = hermes_host.get("recallMode") or cfg.get("recallMode", "hybrid") print(f"\n Recall mode options:") - print(" hybrid — pre-warmed context + memory tools available (default)") - print(" context — pre-warmed context only, memory tools suppressed") - print(" tools — no pre-loaded context, rely on tool calls only") + print(" hybrid — auto-injected context + Honcho tools available (default)") + print(" context — auto-injected context only, Honcho tools hidden") + print(" tools — Honcho tools only, no auto-injected context") new_recall = _prompt("Recall mode", default=current_recall) if new_recall in ("hybrid", "context", "tools"): hermes_host["recallMode"] = new_recall @@ -199,7 +199,7 @@ def cmd_setup(args) -> None: _mode_str = f"{hcfg.memory_mode} (peers: {overrides})" print(f" Mode: {_mode_str}") print(f" Frequency: {hcfg.write_frequency}") - print(f"\n Tools available in chat:") + print(f"\n Honcho tools available in chat:") print(f" honcho_context — ask Honcho a question about you (LLM-synthesized)") print(f" honcho_search — semantic search over your history (no LLM)") print(f" honcho_profile — your peer card, key facts (no LLM)") @@ -702,7 +702,7 @@ def cmd_migrate(args) -> None: print() print(" Context injection") print(" OpenClaw: file excerpts injected synchronously before each LLM call.") - print(" Hermes: Honcho context prefetched async at turn end, injected next turn.") + print(" Hermes: Honcho context fetched async at turn end, injected next turn.") print(" First turn has no Honcho context; subsequent turns are loaded.") print() print(" Memory growth") @@ -710,7 +710,7 @@ def cmd_migrate(args) -> None: print(" Hermes: Honcho observes every message and updates representations") print(" automatically. Files become the seed, not the live store.") print() - print(" Tool surface (available to the agent during conversation)") + print(" Honcho tools (available to the agent during conversation)") print(" honcho_context — ask Honcho a question, get a synthesized answer (LLM)") print(" honcho_search — semantic search over stored context (no LLM)") print(" honcho_profile — fast peer card snapshot (no LLM)") diff --git a/honcho_integration/client.py b/honcho_integration/client.py index 04ee946e8..446176bce 100644 --- a/honcho_integration/client.py +++ b/honcho_integration/client.py @@ -90,9 +90,9 @@ class HonchoClientConfig: # Max chars of dialectic result to inject into Hermes system prompt dialectic_max_chars: int = 600 # Recall mode: how memory retrieval works when Honcho is active. - # "hybrid" — pre-warmed context + memory tools available (model decides) - # "context" — pre-warmed context only, honcho memory tools removed - # "tools" — no pre-loaded context, rely on tool calls only + # "hybrid" — auto-injected context + Honcho tools available (model decides) + # "context" — auto-injected context only, Honcho tools removed + # "tools" — Honcho tools only, no auto-injected context recall_mode: str = "hybrid" # Session resolution session_strategy: str = "per-session" diff --git a/run_agent.py b/run_agent.py index 78f68c9dd..61d123201 100644 --- a/run_agent.py +++ b/run_agent.py @@ -1423,7 +1423,7 @@ class AIAgent: if hcfg.recall_mode == "context": self._strip_honcho_tools_from_surface() if not self.quiet_mode: - print(" Honcho active — recall_mode: context (tools suppressed)") + print(" Honcho active — recall_mode: context (Honcho tools hidden)") else: if not self.quiet_mode: print(f" Honcho active — recall_mode: {hcfg.recall_mode}") @@ -1617,14 +1617,14 @@ class AIAgent: ) if recall_mode == "context": honcho_block += ( - "Honcho context is pre-loaded into this system prompt below. " - "All memory retrieval comes from this context — no memory tools " + "Honcho context is injected into this system prompt below. " + "All memory retrieval comes from this context — no Honcho tools " "are available. Answer questions about the user, prior sessions, " "and recent work directly from the Honcho Memory section.\n" ) elif recall_mode == "tools": honcho_block += ( - "Memory tools:\n" + "Honcho tools:\n" " honcho_context — ask Honcho a question, LLM-synthesized answer\n" " honcho_search — semantic search, raw excerpts, no LLM\n" " honcho_profile — user's peer card, key facts, no LLM\n" @@ -1633,11 +1633,11 @@ class AIAgent: else: # hybrid honcho_block += ( "Honcho context (user representation, peer card, and recent session summary) " - "is pre-loaded into this system prompt below. Use it to answer continuity " + "is injected into this system prompt below. Use it to answer continuity " "questions ('where were we?', 'what were we working on?') WITHOUT calling " - "any tools. Only call memory tools when you need information beyond what is " + "any tools. Only call Honcho tools when you need information beyond what is " "already present in the Honcho Memory section.\n" - "Memory tools:\n" + "Honcho tools:\n" " honcho_context — ask Honcho a question, LLM-synthesized answer\n" " honcho_search — semantic search, raw excerpts, no LLM\n" " honcho_profile — user's peer card, key facts, no LLM\n" diff --git a/website/docs/user-guide/features/honcho.md b/website/docs/user-guide/features/honcho.md index 578ea4706..da4dd1535 100644 --- a/website/docs/user-guide/features/honcho.md +++ b/website/docs/user-guide/features/honcho.md @@ -109,7 +109,7 @@ Settings are scoped to `hosts.hermes` and fall back to root-level globals when t | `recallMode` | `"hybrid"` | Retrieval strategy: `hybrid`, `context`, or `tools` | | `sessionStrategy` | `"per-session"` | How sessions are scoped | | `sessionPeerPrefix` | `false` | Prefix session names with peer name | -| `contextTokens` | *(Honcho default)* | Max tokens for context prefetch | +| `contextTokens` | *(Honcho default)* | Max tokens for auto-injected context | | `dialecticReasoningLevel` | `"low"` | Floor for dialectic reasoning: `minimal` / `low` / `medium` / `high` / `max` | | `dialecticMaxChars` | `600` | Char cap on dialectic results injected into system prompt | | `linkedHosts` | `[]` | Other host keys whose workspaces to cross-reference | @@ -142,9 +142,9 @@ Controls how Honcho context reaches the agent: | Mode | Behavior | |------|----------| -| `hybrid` | Prefetch context into system prompt + expose tools (default) | -| `context` | Context injection only — no Honcho tools available | -| `tools` | Tools only — no prefetch into system prompt | +| `hybrid` | Auto-injected context + Honcho tools available (default) | +| `context` | Auto-injected context only — Honcho tools hidden | +| `tools` | Honcho tools only — no auto-injected context | ### Write Frequency @@ -203,23 +203,23 @@ honcho: {} ## How It Works -### Async Prefetch Pipeline +### Async Context Pipeline Honcho context is fetched asynchronously to avoid blocking the response path: ``` Turn N: user message - → pop prefetch result from cache (from previous turn) + → consume cached context (from previous turn's background fetch) → inject into system prompt (user representation, AI representation, dialectic) → LLM call → response - → fire prefetch in background threads - → prefetch_context() ─┐ - → prefetch_dialectic() ─┴→ cache for Turn N+1 + → fire background fetch for next turn + → fetch context ─┐ + → fetch dialectic ─┴→ cache for Turn N+1 ``` -Turn 1 is a cold start (no cache). All subsequent turns consume pre-warmed results with zero HTTP latency on the response path. The system prompt on turn 1 uses only static context to preserve prefix cache hits at the LLM provider. +Turn 1 is a cold start (no cache). All subsequent turns consume cached results with zero HTTP latency on the response path. The system prompt on turn 1 uses only static context to preserve prefix cache hits at the LLM provider. ### Dual-Peer Architecture