refactor(honcho): rename memory tools to Honcho tools, clarify recall mode language

Replace "memory tools" with "Honcho tools" and "pre-warmed/prefetch"
with "auto-injected context" in all user-facing strings and docs.
This commit is contained in:
Erosika
2026-03-12 16:26:10 -04:00
parent ae2a5e5743
commit 0aed9bfde1
4 changed files with 26 additions and 26 deletions

View File

@@ -153,9 +153,9 @@ def cmd_setup(args) -> None:
# Recall mode
current_recall = hermes_host.get("recallMode") or cfg.get("recallMode", "hybrid")
print(f"\n Recall mode options:")
print(" hybrid — pre-warmed context + memory tools available (default)")
print(" context — pre-warmed context only, memory tools suppressed")
print(" tools — no pre-loaded context, rely on tool calls only")
print(" hybrid — auto-injected context + Honcho tools available (default)")
print(" context — auto-injected context only, Honcho tools hidden")
print(" tools — Honcho tools only, no auto-injected context")
new_recall = _prompt("Recall mode", default=current_recall)
if new_recall in ("hybrid", "context", "tools"):
hermes_host["recallMode"] = new_recall
@@ -199,7 +199,7 @@ def cmd_setup(args) -> None:
_mode_str = f"{hcfg.memory_mode} (peers: {overrides})"
print(f" Mode: {_mode_str}")
print(f" Frequency: {hcfg.write_frequency}")
print(f"\n Tools available in chat:")
print(f"\n Honcho tools available in chat:")
print(f" honcho_context — ask Honcho a question about you (LLM-synthesized)")
print(f" honcho_search — semantic search over your history (no LLM)")
print(f" honcho_profile — your peer card, key facts (no LLM)")
@@ -702,7 +702,7 @@ def cmd_migrate(args) -> None:
print()
print(" Context injection")
print(" OpenClaw: file excerpts injected synchronously before each LLM call.")
print(" Hermes: Honcho context prefetched async at turn end, injected next turn.")
print(" Hermes: Honcho context fetched async at turn end, injected next turn.")
print(" First turn has no Honcho context; subsequent turns are loaded.")
print()
print(" Memory growth")
@@ -710,7 +710,7 @@ def cmd_migrate(args) -> None:
print(" Hermes: Honcho observes every message and updates representations")
print(" automatically. Files become the seed, not the live store.")
print()
print(" Tool surface (available to the agent during conversation)")
print(" Honcho tools (available to the agent during conversation)")
print(" honcho_context — ask Honcho a question, get a synthesized answer (LLM)")
print(" honcho_search — semantic search over stored context (no LLM)")
print(" honcho_profile — fast peer card snapshot (no LLM)")

View File

@@ -90,9 +90,9 @@ class HonchoClientConfig:
# Max chars of dialectic result to inject into Hermes system prompt
dialectic_max_chars: int = 600
# Recall mode: how memory retrieval works when Honcho is active.
# "hybrid" — pre-warmed context + memory tools available (model decides)
# "context" — pre-warmed context only, honcho memory tools removed
# "tools" — no pre-loaded context, rely on tool calls only
# "hybrid" — auto-injected context + Honcho tools available (model decides)
# "context" — auto-injected context only, Honcho tools removed
# "tools" — Honcho tools only, no auto-injected context
recall_mode: str = "hybrid"
# Session resolution
session_strategy: str = "per-session"

View File

@@ -1423,7 +1423,7 @@ class AIAgent:
if hcfg.recall_mode == "context":
self._strip_honcho_tools_from_surface()
if not self.quiet_mode:
print(" Honcho active — recall_mode: context (tools suppressed)")
print(" Honcho active — recall_mode: context (Honcho tools hidden)")
else:
if not self.quiet_mode:
print(f" Honcho active — recall_mode: {hcfg.recall_mode}")
@@ -1617,14 +1617,14 @@ class AIAgent:
)
if recall_mode == "context":
honcho_block += (
"Honcho context is pre-loaded into this system prompt below. "
"All memory retrieval comes from this context — no memory tools "
"Honcho context is injected into this system prompt below. "
"All memory retrieval comes from this context — no Honcho tools "
"are available. Answer questions about the user, prior sessions, "
"and recent work directly from the Honcho Memory section.\n"
)
elif recall_mode == "tools":
honcho_block += (
"Memory tools:\n"
"Honcho tools:\n"
" honcho_context <question> — ask Honcho a question, LLM-synthesized answer\n"
" honcho_search <query> — semantic search, raw excerpts, no LLM\n"
" honcho_profile — user's peer card, key facts, no LLM\n"
@@ -1633,11 +1633,11 @@ class AIAgent:
else: # hybrid
honcho_block += (
"Honcho context (user representation, peer card, and recent session summary) "
"is pre-loaded into this system prompt below. Use it to answer continuity "
"is injected into this system prompt below. Use it to answer continuity "
"questions ('where were we?', 'what were we working on?') WITHOUT calling "
"any tools. Only call memory tools when you need information beyond what is "
"any tools. Only call Honcho tools when you need information beyond what is "
"already present in the Honcho Memory section.\n"
"Memory tools:\n"
"Honcho tools:\n"
" honcho_context <question> — ask Honcho a question, LLM-synthesized answer\n"
" honcho_search <query> — semantic search, raw excerpts, no LLM\n"
" honcho_profile — user's peer card, key facts, no LLM\n"

View File

@@ -109,7 +109,7 @@ Settings are scoped to `hosts.hermes` and fall back to root-level globals when t
| `recallMode` | `"hybrid"` | Retrieval strategy: `hybrid`, `context`, or `tools` |
| `sessionStrategy` | `"per-session"` | How sessions are scoped |
| `sessionPeerPrefix` | `false` | Prefix session names with peer name |
| `contextTokens` | *(Honcho default)* | Max tokens for context prefetch |
| `contextTokens` | *(Honcho default)* | Max tokens for auto-injected context |
| `dialecticReasoningLevel` | `"low"` | Floor for dialectic reasoning: `minimal` / `low` / `medium` / `high` / `max` |
| `dialecticMaxChars` | `600` | Char cap on dialectic results injected into system prompt |
| `linkedHosts` | `[]` | Other host keys whose workspaces to cross-reference |
@@ -142,9 +142,9 @@ Controls how Honcho context reaches the agent:
| Mode | Behavior |
|------|----------|
| `hybrid` | Prefetch context into system prompt + expose tools (default) |
| `context` | Context injection only — no Honcho tools available |
| `tools` | Tools only — no prefetch into system prompt |
| `hybrid` | Auto-injected context + Honcho tools available (default) |
| `context` | Auto-injected context only — Honcho tools hidden |
| `tools` | Honcho tools only — no auto-injected context |
### Write Frequency
@@ -203,23 +203,23 @@ honcho: {}
## How It Works
### Async Prefetch Pipeline
### Async Context Pipeline
Honcho context is fetched asynchronously to avoid blocking the response path:
```
Turn N:
user message
pop prefetch result from cache (from previous turn)
consume cached context (from previous turn's background fetch)
→ inject into system prompt (user representation, AI representation, dialectic)
→ LLM call
→ response
→ fire prefetch in background threads
prefetch_context() ─┐
prefetch_dialectic() ─┴→ cache for Turn N+1
→ fire background fetch for next turn
→ fetch context ─┐
→ fetch dialectic ─┴→ cache for Turn N+1
```
Turn 1 is a cold start (no cache). All subsequent turns consume pre-warmed results with zero HTTP latency on the response path. The system prompt on turn 1 uses only static context to preserve prefix cache hits at the LLM provider.
Turn 1 is a cold start (no cache). All subsequent turns consume cached results with zero HTTP latency on the response path. The system prompt on turn 1 uses only static context to preserve prefix cache hits at the LLM provider.
### Dual-Peer Architecture