refactor(honcho): rename memory tools to Honcho tools, clarify recall mode language
Replace "memory tools" with "Honcho tools" and "pre-warmed/prefetch" with "auto-injected context" in all user-facing strings and docs.
This commit is contained in:
@@ -153,9 +153,9 @@ def cmd_setup(args) -> None:
|
||||
# Recall mode
|
||||
current_recall = hermes_host.get("recallMode") or cfg.get("recallMode", "hybrid")
|
||||
print(f"\n Recall mode options:")
|
||||
print(" hybrid — pre-warmed context + memory tools available (default)")
|
||||
print(" context — pre-warmed context only, memory tools suppressed")
|
||||
print(" tools — no pre-loaded context, rely on tool calls only")
|
||||
print(" hybrid — auto-injected context + Honcho tools available (default)")
|
||||
print(" context — auto-injected context only, Honcho tools hidden")
|
||||
print(" tools — Honcho tools only, no auto-injected context")
|
||||
new_recall = _prompt("Recall mode", default=current_recall)
|
||||
if new_recall in ("hybrid", "context", "tools"):
|
||||
hermes_host["recallMode"] = new_recall
|
||||
@@ -199,7 +199,7 @@ def cmd_setup(args) -> None:
|
||||
_mode_str = f"{hcfg.memory_mode} (peers: {overrides})"
|
||||
print(f" Mode: {_mode_str}")
|
||||
print(f" Frequency: {hcfg.write_frequency}")
|
||||
print(f"\n Tools available in chat:")
|
||||
print(f"\n Honcho tools available in chat:")
|
||||
print(f" honcho_context — ask Honcho a question about you (LLM-synthesized)")
|
||||
print(f" honcho_search — semantic search over your history (no LLM)")
|
||||
print(f" honcho_profile — your peer card, key facts (no LLM)")
|
||||
@@ -702,7 +702,7 @@ def cmd_migrate(args) -> None:
|
||||
print()
|
||||
print(" Context injection")
|
||||
print(" OpenClaw: file excerpts injected synchronously before each LLM call.")
|
||||
print(" Hermes: Honcho context prefetched async at turn end, injected next turn.")
|
||||
print(" Hermes: Honcho context fetched async at turn end, injected next turn.")
|
||||
print(" First turn has no Honcho context; subsequent turns are loaded.")
|
||||
print()
|
||||
print(" Memory growth")
|
||||
@@ -710,7 +710,7 @@ def cmd_migrate(args) -> None:
|
||||
print(" Hermes: Honcho observes every message and updates representations")
|
||||
print(" automatically. Files become the seed, not the live store.")
|
||||
print()
|
||||
print(" Tool surface (available to the agent during conversation)")
|
||||
print(" Honcho tools (available to the agent during conversation)")
|
||||
print(" honcho_context — ask Honcho a question, get a synthesized answer (LLM)")
|
||||
print(" honcho_search — semantic search over stored context (no LLM)")
|
||||
print(" honcho_profile — fast peer card snapshot (no LLM)")
|
||||
|
||||
@@ -90,9 +90,9 @@ class HonchoClientConfig:
|
||||
# Max chars of dialectic result to inject into Hermes system prompt
|
||||
dialectic_max_chars: int = 600
|
||||
# Recall mode: how memory retrieval works when Honcho is active.
|
||||
# "hybrid" — pre-warmed context + memory tools available (model decides)
|
||||
# "context" — pre-warmed context only, honcho memory tools removed
|
||||
# "tools" — no pre-loaded context, rely on tool calls only
|
||||
# "hybrid" — auto-injected context + Honcho tools available (model decides)
|
||||
# "context" — auto-injected context only, Honcho tools removed
|
||||
# "tools" — Honcho tools only, no auto-injected context
|
||||
recall_mode: str = "hybrid"
|
||||
# Session resolution
|
||||
session_strategy: str = "per-session"
|
||||
|
||||
14
run_agent.py
14
run_agent.py
@@ -1423,7 +1423,7 @@ class AIAgent:
|
||||
if hcfg.recall_mode == "context":
|
||||
self._strip_honcho_tools_from_surface()
|
||||
if not self.quiet_mode:
|
||||
print(" Honcho active — recall_mode: context (tools suppressed)")
|
||||
print(" Honcho active — recall_mode: context (Honcho tools hidden)")
|
||||
else:
|
||||
if not self.quiet_mode:
|
||||
print(f" Honcho active — recall_mode: {hcfg.recall_mode}")
|
||||
@@ -1617,14 +1617,14 @@ class AIAgent:
|
||||
)
|
||||
if recall_mode == "context":
|
||||
honcho_block += (
|
||||
"Honcho context is pre-loaded into this system prompt below. "
|
||||
"All memory retrieval comes from this context — no memory tools "
|
||||
"Honcho context is injected into this system prompt below. "
|
||||
"All memory retrieval comes from this context — no Honcho tools "
|
||||
"are available. Answer questions about the user, prior sessions, "
|
||||
"and recent work directly from the Honcho Memory section.\n"
|
||||
)
|
||||
elif recall_mode == "tools":
|
||||
honcho_block += (
|
||||
"Memory tools:\n"
|
||||
"Honcho tools:\n"
|
||||
" honcho_context <question> — ask Honcho a question, LLM-synthesized answer\n"
|
||||
" honcho_search <query> — semantic search, raw excerpts, no LLM\n"
|
||||
" honcho_profile — user's peer card, key facts, no LLM\n"
|
||||
@@ -1633,11 +1633,11 @@ class AIAgent:
|
||||
else: # hybrid
|
||||
honcho_block += (
|
||||
"Honcho context (user representation, peer card, and recent session summary) "
|
||||
"is pre-loaded into this system prompt below. Use it to answer continuity "
|
||||
"is injected into this system prompt below. Use it to answer continuity "
|
||||
"questions ('where were we?', 'what were we working on?') WITHOUT calling "
|
||||
"any tools. Only call memory tools when you need information beyond what is "
|
||||
"any tools. Only call Honcho tools when you need information beyond what is "
|
||||
"already present in the Honcho Memory section.\n"
|
||||
"Memory tools:\n"
|
||||
"Honcho tools:\n"
|
||||
" honcho_context <question> — ask Honcho a question, LLM-synthesized answer\n"
|
||||
" honcho_search <query> — semantic search, raw excerpts, no LLM\n"
|
||||
" honcho_profile — user's peer card, key facts, no LLM\n"
|
||||
|
||||
@@ -109,7 +109,7 @@ Settings are scoped to `hosts.hermes` and fall back to root-level globals when t
|
||||
| `recallMode` | `"hybrid"` | Retrieval strategy: `hybrid`, `context`, or `tools` |
|
||||
| `sessionStrategy` | `"per-session"` | How sessions are scoped |
|
||||
| `sessionPeerPrefix` | `false` | Prefix session names with peer name |
|
||||
| `contextTokens` | *(Honcho default)* | Max tokens for context prefetch |
|
||||
| `contextTokens` | *(Honcho default)* | Max tokens for auto-injected context |
|
||||
| `dialecticReasoningLevel` | `"low"` | Floor for dialectic reasoning: `minimal` / `low` / `medium` / `high` / `max` |
|
||||
| `dialecticMaxChars` | `600` | Char cap on dialectic results injected into system prompt |
|
||||
| `linkedHosts` | `[]` | Other host keys whose workspaces to cross-reference |
|
||||
@@ -142,9 +142,9 @@ Controls how Honcho context reaches the agent:
|
||||
|
||||
| Mode | Behavior |
|
||||
|------|----------|
|
||||
| `hybrid` | Prefetch context into system prompt + expose tools (default) |
|
||||
| `context` | Context injection only — no Honcho tools available |
|
||||
| `tools` | Tools only — no prefetch into system prompt |
|
||||
| `hybrid` | Auto-injected context + Honcho tools available (default) |
|
||||
| `context` | Auto-injected context only — Honcho tools hidden |
|
||||
| `tools` | Honcho tools only — no auto-injected context |
|
||||
|
||||
### Write Frequency
|
||||
|
||||
@@ -203,23 +203,23 @@ honcho: {}
|
||||
|
||||
## How It Works
|
||||
|
||||
### Async Prefetch Pipeline
|
||||
### Async Context Pipeline
|
||||
|
||||
Honcho context is fetched asynchronously to avoid blocking the response path:
|
||||
|
||||
```
|
||||
Turn N:
|
||||
user message
|
||||
→ pop prefetch result from cache (from previous turn)
|
||||
→ consume cached context (from previous turn's background fetch)
|
||||
→ inject into system prompt (user representation, AI representation, dialectic)
|
||||
→ LLM call
|
||||
→ response
|
||||
→ fire prefetch in background threads
|
||||
→ prefetch_context() ─┐
|
||||
→ prefetch_dialectic() ─┴→ cache for Turn N+1
|
||||
→ fire background fetch for next turn
|
||||
→ fetch context ─┐
|
||||
→ fetch dialectic ─┴→ cache for Turn N+1
|
||||
```
|
||||
|
||||
Turn 1 is a cold start (no cache). All subsequent turns consume pre-warmed results with zero HTTP latency on the response path. The system prompt on turn 1 uses only static context to preserve prefix cache hits at the LLM provider.
|
||||
Turn 1 is a cold start (no cache). All subsequent turns consume cached results with zero HTTP latency on the response path. The system prompt on turn 1 uses only static context to preserve prefix cache hits at the LLM provider.
|
||||
|
||||
### Dual-Peer Architecture
|
||||
|
||||
|
||||
Reference in New Issue
Block a user