feat: replace inline nudges with background memory/skill review (#2235)
Remove the memory and skill nudges that were appended directly to user messages, causing backward-looking system instructions to compete with forward-looking user tasks. Found in 43% of user messages across 15 sessions, with confirmed cases of the agent spending tool calls on nudge responses before starting the user's actual request. Replace with a background review agent that runs AFTER the main agent finishes responding: - Spawns a background thread with a snapshot of the conversation - Uses the main model (not auxiliary) for high-precision memory/skill work - Only has memory + skill_manage tools (5 iteration budget) - Shares the memory store for direct writes - Never modifies the main conversation history - Never competes with the user's task for model attention - Zero latency impact (runs after response is delivered) - Same token cost (processes the same context, just on a separate track) The trigger conditions are unchanged (every 10 user turns for memory, after 10+ tool iterations for skills). Only the execution path changes: from inline injection to background fork. Closes #2227. Co-authored-by: Test <test@test.com>
This commit is contained in:
140
run_agent.py
140
run_agent.py
@@ -974,7 +974,7 @@ class AIAgent:
|
||||
self._skill_nudge_interval = 10
|
||||
try:
|
||||
skills_config = _agent_cfg.get("skills", {})
|
||||
self._skill_nudge_interval = int(skills_config.get("creation_nudge_interval", 15))
|
||||
self._skill_nudge_interval = int(skills_config.get("creation_nudge_interval", 10))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@@ -1303,6 +1303,98 @@ class AIAgent:
|
||||
if self.verbose_logging:
|
||||
logging.warning(f"Failed to cleanup browser for task {task_id}: {e}")
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Background memory/skill review
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
_MEMORY_REVIEW_PROMPT = (
|
||||
"Review the conversation above and consider saving to memory if appropriate.\n\n"
|
||||
"Focus on:\n"
|
||||
"1. Has the user revealed things about themselves — their persona, desires, "
|
||||
"preferences, or personal details worth remembering?\n"
|
||||
"2. Has the user expressed expectations about how you should behave, their work "
|
||||
"style, or ways they want you to operate?\n\n"
|
||||
"If something stands out, save it using the memory tool. "
|
||||
"If nothing is worth saving, just say 'Nothing to save.' and stop."
|
||||
)
|
||||
|
||||
_SKILL_REVIEW_PROMPT = (
|
||||
"Review the conversation above and consider saving or updating a skill if appropriate.\n\n"
|
||||
"Focus on: was a non-trivial approach used to complete a task that required trial "
|
||||
"and error, or changing course due to experiential findings along the way, or did "
|
||||
"the user expect or desire a different method or outcome?\n\n"
|
||||
"If a relevant skill already exists, update it with what you learned. "
|
||||
"Otherwise, create a new skill if the approach is reusable.\n"
|
||||
"If nothing is worth saving, just say 'Nothing to save.' and stop."
|
||||
)
|
||||
|
||||
_COMBINED_REVIEW_PROMPT = (
|
||||
"Review the conversation above and consider two things:\n\n"
|
||||
"**Memory**: Has the user revealed things about themselves — their persona, "
|
||||
"desires, preferences, or personal details? Has the user expressed expectations "
|
||||
"about how you should behave, their work style, or ways they want you to operate? "
|
||||
"If so, save using the memory tool.\n\n"
|
||||
"**Skills**: Was a non-trivial approach used to complete a task that required trial "
|
||||
"and error, or changing course due to experiential findings along the way, or did "
|
||||
"the user expect or desire a different method or outcome? If a relevant skill "
|
||||
"already exists, update it. Otherwise, create a new one if the approach is reusable.\n\n"
|
||||
"Only act if there's something genuinely worth saving. "
|
||||
"If nothing stands out, just say 'Nothing to save.' and stop."
|
||||
)
|
||||
|
||||
def _spawn_background_review(
|
||||
self,
|
||||
messages_snapshot: List[Dict],
|
||||
review_memory: bool = False,
|
||||
review_skills: bool = False,
|
||||
) -> None:
|
||||
"""Spawn a background thread to review the conversation for memory/skill saves.
|
||||
|
||||
Creates a full AIAgent fork with the same model, tools, and context as the
|
||||
main session. The review prompt is appended as the next user turn in the
|
||||
forked conversation. Writes directly to the shared memory/skill stores.
|
||||
Never modifies the main conversation history or produces user-visible output.
|
||||
"""
|
||||
import threading
|
||||
|
||||
# Pick the right prompt based on which triggers fired
|
||||
if review_memory and review_skills:
|
||||
prompt = self._COMBINED_REVIEW_PROMPT
|
||||
elif review_memory:
|
||||
prompt = self._MEMORY_REVIEW_PROMPT
|
||||
else:
|
||||
prompt = self._SKILL_REVIEW_PROMPT
|
||||
|
||||
def _run_review():
|
||||
try:
|
||||
# Full agent fork — same model, tools, context. Just a tighter
|
||||
# iteration budget and quiet mode. No toolset filtering so we
|
||||
# don't break prompt caching on the tool schema.
|
||||
review_agent = AIAgent(
|
||||
model=self.model,
|
||||
max_iterations=8,
|
||||
quiet_mode=True,
|
||||
platform=self.platform,
|
||||
provider=self.provider,
|
||||
)
|
||||
# Share the memory store so writes persist to disk
|
||||
review_agent._memory_store = self._memory_store
|
||||
review_agent._memory_enabled = self._memory_enabled
|
||||
review_agent._user_profile_enabled = self._user_profile_enabled
|
||||
# Disable nudges in the review agent to prevent recursion
|
||||
review_agent._memory_nudge_interval = 0
|
||||
review_agent._skill_nudge_interval = 0
|
||||
|
||||
review_agent.run_conversation(
|
||||
user_message=prompt,
|
||||
conversation_history=messages_snapshot,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug("Background memory/skill review failed: %s", e)
|
||||
|
||||
t = threading.Thread(target=_run_review, daemon=True, name="bg-review")
|
||||
t.start()
|
||||
|
||||
def _apply_persist_user_message_override(self, messages: List[Dict]) -> None:
|
||||
"""Rewrite the current-turn user message before persistence/return.
|
||||
|
||||
@@ -5218,36 +5310,22 @@ class AIAgent:
|
||||
# Track user turns for memory flush and periodic nudge logic
|
||||
self._user_turn_count += 1
|
||||
|
||||
# Preserve the original user message before nudge injection.
|
||||
# Preserve the original user message (no nudge injection).
|
||||
# Honcho should receive the actual user input, not system nudges.
|
||||
original_user_message = persist_user_message if persist_user_message is not None else user_message
|
||||
|
||||
# Periodic memory nudge: remind the model to consider saving memories.
|
||||
# Counter resets whenever the memory tool is actually used.
|
||||
# Track memory nudge trigger (turn-based, checked here).
|
||||
# Skill trigger is checked AFTER the agent loop completes, based on
|
||||
# how many tool iterations THIS turn used.
|
||||
_should_review_memory = False
|
||||
if (self._memory_nudge_interval > 0
|
||||
and "memory" in self.valid_tool_names
|
||||
and self._memory_store):
|
||||
self._turns_since_memory += 1
|
||||
if self._turns_since_memory >= self._memory_nudge_interval:
|
||||
user_message += (
|
||||
"\n\n[System: You've had several exchanges. Consider: "
|
||||
"has the user shared preferences, corrected you, or revealed "
|
||||
"something about their workflow worth remembering for future sessions?]"
|
||||
)
|
||||
_should_review_memory = True
|
||||
self._turns_since_memory = 0
|
||||
|
||||
# Skill creation nudge: fires on the first user message after a long tool loop.
|
||||
# The counter increments per API iteration in the tool loop and is checked here.
|
||||
if (self._skill_nudge_interval > 0
|
||||
and self._iters_since_skill >= self._skill_nudge_interval
|
||||
and "skill_manage" in self.valid_tool_names):
|
||||
user_message += (
|
||||
"\n\n[System: The previous task involved many tool calls. "
|
||||
"Save the approach as a skill if it's reusable, or update "
|
||||
"any existing skill you used if it was wrong or incomplete.]"
|
||||
)
|
||||
self._iters_since_skill = 0
|
||||
|
||||
# Honcho prefetch consumption:
|
||||
# - First turn: bake into cached system prompt (stable for the session).
|
||||
# - Later turns: attach recall to the current-turn user message at
|
||||
@@ -6893,6 +6971,26 @@ class AIAgent:
|
||||
# Clear stream callback so it doesn't leak into future calls
|
||||
self._stream_callback = None
|
||||
|
||||
# Check skill trigger NOW — based on how many tool iterations THIS turn used.
|
||||
_should_review_skills = False
|
||||
if (self._skill_nudge_interval > 0
|
||||
and self._iters_since_skill >= self._skill_nudge_interval
|
||||
and "skill_manage" in self.valid_tool_names):
|
||||
_should_review_skills = True
|
||||
self._iters_since_skill = 0
|
||||
|
||||
# Background memory/skill review — runs AFTER the response is delivered
|
||||
# so it never competes with the user's task for model attention.
|
||||
if final_response and not interrupted and (_should_review_memory or _should_review_skills):
|
||||
try:
|
||||
self._spawn_background_review(
|
||||
messages_snapshot=list(messages),
|
||||
review_memory=_should_review_memory,
|
||||
review_skills=_should_review_skills,
|
||||
)
|
||||
except Exception:
|
||||
pass # Background review is best-effort
|
||||
|
||||
return result
|
||||
|
||||
def chat(self, message: str, stream_callback: Optional[callable] = None) -> str:
|
||||
|
||||
Reference in New Issue
Block a user