feat: replace inline nudges with background memory/skill review (#2235)

Remove the memory and skill nudges that were appended directly to user
messages, causing backward-looking system instructions to compete with
forward-looking user tasks. Found in 43% of user messages across 15
sessions, with confirmed cases of the agent spending tool calls on
nudge responses before starting the user's actual request.

Replace with a background review agent that runs AFTER the main agent
finishes responding:
- Spawns a background thread with a snapshot of the conversation
- Uses the main model (not auxiliary) for high-precision memory/skill work
- Only has memory + skill_manage tools (5 iteration budget)
- Shares the memory store for direct writes
- Never modifies the main conversation history
- Never competes with the user's task for model attention
- Zero latency impact (runs after response is delivered)
- Same token cost (processes the same context, just on a separate track)

The trigger conditions are unchanged (every 10 user turns for memory,
after 10+ tool iterations for skills). Only the execution path changes:
from inline injection to background fork.

Closes #2227.

Co-authored-by: Test <test@test.com>
This commit is contained in:
Teknium
2026-03-20 18:51:31 -07:00
committed by GitHub
parent 2416b2b7af
commit 45058b4105

View File

@@ -974,7 +974,7 @@ class AIAgent:
self._skill_nudge_interval = 10
try:
skills_config = _agent_cfg.get("skills", {})
self._skill_nudge_interval = int(skills_config.get("creation_nudge_interval", 15))
self._skill_nudge_interval = int(skills_config.get("creation_nudge_interval", 10))
except Exception:
pass
@@ -1303,6 +1303,98 @@ class AIAgent:
if self.verbose_logging:
logging.warning(f"Failed to cleanup browser for task {task_id}: {e}")
# ------------------------------------------------------------------
# Background memory/skill review
# ------------------------------------------------------------------
_MEMORY_REVIEW_PROMPT = (
"Review the conversation above and consider saving to memory if appropriate.\n\n"
"Focus on:\n"
"1. Has the user revealed things about themselves — their persona, desires, "
"preferences, or personal details worth remembering?\n"
"2. Has the user expressed expectations about how you should behave, their work "
"style, or ways they want you to operate?\n\n"
"If something stands out, save it using the memory tool. "
"If nothing is worth saving, just say 'Nothing to save.' and stop."
)
_SKILL_REVIEW_PROMPT = (
"Review the conversation above and consider saving or updating a skill if appropriate.\n\n"
"Focus on: was a non-trivial approach used to complete a task that required trial "
"and error, or changing course due to experiential findings along the way, or did "
"the user expect or desire a different method or outcome?\n\n"
"If a relevant skill already exists, update it with what you learned. "
"Otherwise, create a new skill if the approach is reusable.\n"
"If nothing is worth saving, just say 'Nothing to save.' and stop."
)
_COMBINED_REVIEW_PROMPT = (
"Review the conversation above and consider two things:\n\n"
"**Memory**: Has the user revealed things about themselves — their persona, "
"desires, preferences, or personal details? Has the user expressed expectations "
"about how you should behave, their work style, or ways they want you to operate? "
"If so, save using the memory tool.\n\n"
"**Skills**: Was a non-trivial approach used to complete a task that required trial "
"and error, or changing course due to experiential findings along the way, or did "
"the user expect or desire a different method or outcome? If a relevant skill "
"already exists, update it. Otherwise, create a new one if the approach is reusable.\n\n"
"Only act if there's something genuinely worth saving. "
"If nothing stands out, just say 'Nothing to save.' and stop."
)
def _spawn_background_review(
self,
messages_snapshot: List[Dict],
review_memory: bool = False,
review_skills: bool = False,
) -> None:
"""Spawn a background thread to review the conversation for memory/skill saves.
Creates a full AIAgent fork with the same model, tools, and context as the
main session. The review prompt is appended as the next user turn in the
forked conversation. Writes directly to the shared memory/skill stores.
Never modifies the main conversation history or produces user-visible output.
"""
import threading
# Pick the right prompt based on which triggers fired
if review_memory and review_skills:
prompt = self._COMBINED_REVIEW_PROMPT
elif review_memory:
prompt = self._MEMORY_REVIEW_PROMPT
else:
prompt = self._SKILL_REVIEW_PROMPT
def _run_review():
try:
# Full agent fork — same model, tools, context. Just a tighter
# iteration budget and quiet mode. No toolset filtering so we
# don't break prompt caching on the tool schema.
review_agent = AIAgent(
model=self.model,
max_iterations=8,
quiet_mode=True,
platform=self.platform,
provider=self.provider,
)
# Share the memory store so writes persist to disk
review_agent._memory_store = self._memory_store
review_agent._memory_enabled = self._memory_enabled
review_agent._user_profile_enabled = self._user_profile_enabled
# Disable nudges in the review agent to prevent recursion
review_agent._memory_nudge_interval = 0
review_agent._skill_nudge_interval = 0
review_agent.run_conversation(
user_message=prompt,
conversation_history=messages_snapshot,
)
except Exception as e:
logger.debug("Background memory/skill review failed: %s", e)
t = threading.Thread(target=_run_review, daemon=True, name="bg-review")
t.start()
def _apply_persist_user_message_override(self, messages: List[Dict]) -> None:
"""Rewrite the current-turn user message before persistence/return.
@@ -5218,36 +5310,22 @@ class AIAgent:
# Track user turns for memory flush and periodic nudge logic
self._user_turn_count += 1
# Preserve the original user message before nudge injection.
# Preserve the original user message (no nudge injection).
# Honcho should receive the actual user input, not system nudges.
original_user_message = persist_user_message if persist_user_message is not None else user_message
# Periodic memory nudge: remind the model to consider saving memories.
# Counter resets whenever the memory tool is actually used.
# Track memory nudge trigger (turn-based, checked here).
# Skill trigger is checked AFTER the agent loop completes, based on
# how many tool iterations THIS turn used.
_should_review_memory = False
if (self._memory_nudge_interval > 0
and "memory" in self.valid_tool_names
and self._memory_store):
self._turns_since_memory += 1
if self._turns_since_memory >= self._memory_nudge_interval:
user_message += (
"\n\n[System: You've had several exchanges. Consider: "
"has the user shared preferences, corrected you, or revealed "
"something about their workflow worth remembering for future sessions?]"
)
_should_review_memory = True
self._turns_since_memory = 0
# Skill creation nudge: fires on the first user message after a long tool loop.
# The counter increments per API iteration in the tool loop and is checked here.
if (self._skill_nudge_interval > 0
and self._iters_since_skill >= self._skill_nudge_interval
and "skill_manage" in self.valid_tool_names):
user_message += (
"\n\n[System: The previous task involved many tool calls. "
"Save the approach as a skill if it's reusable, or update "
"any existing skill you used if it was wrong or incomplete.]"
)
self._iters_since_skill = 0
# Honcho prefetch consumption:
# - First turn: bake into cached system prompt (stable for the session).
# - Later turns: attach recall to the current-turn user message at
@@ -6893,6 +6971,26 @@ class AIAgent:
# Clear stream callback so it doesn't leak into future calls
self._stream_callback = None
# Check skill trigger NOW — based on how many tool iterations THIS turn used.
_should_review_skills = False
if (self._skill_nudge_interval > 0
and self._iters_since_skill >= self._skill_nudge_interval
and "skill_manage" in self.valid_tool_names):
_should_review_skills = True
self._iters_since_skill = 0
# Background memory/skill review — runs AFTER the response is delivered
# so it never competes with the user's task for model attention.
if final_response and not interrupted and (_should_review_memory or _should_review_skills):
try:
self._spawn_background_review(
messages_snapshot=list(messages),
review_memory=_should_review_memory,
review_skills=_should_review_skills,
)
except Exception:
pass # Background review is best-effort
return result
def chat(self, message: str, stream_callback: Optional[callable] = None) -> str: