feat: replace inline nudges with background memory/skill review (#2235)

Remove the memory and skill nudges that were appended directly to user messages, causing backward-looking system instructions to compete with forward-looking user tasks. Found in 43% of user messages across 15 sessions, with confirmed cases of the agent spending tool calls on nudge responses before starting the user's actual request. Replace with a background review agent that runs AFTER the main agent finishes responding: - Spawns a background thread with a snapshot of the conversation - Uses the main model (not auxiliary) for high-precision memory/skill work - Only has memory + skill_manage tools (5 iteration budget) - Shares the memory store for direct writes - Never modifies the main conversation history - Never competes with the user's task for model attention - Zero latency impact (runs after response is delivered) - Same token cost (processes the same context, just on a separate track) The trigger conditions are unchanged (every 10 user turns for memory, after 10+ tool iterations for skills). Only the execution path changes: from inline injection to background fork. Closes #2227. Co-authored-by: Test <test@test.com>
2026-03-20 18:51:31 -07:00
parent 2416b2b7af
commit 45058b4105
1 changed files with 119 additions and 21 deletions
--- a/run_agent.py
+++ b/run_agent.py
@@ -974,7 +974,7 @@ class AIAgent:
        self._skill_nudge_interval = 10
        try:
            skills_config = _agent_cfg.get("skills", {})
-            self._skill_nudge_interval = int(skills_config.get("creation_nudge_interval", 15))
+            self._skill_nudge_interval = int(skills_config.get("creation_nudge_interval", 10))
        except Exception:
            pass

@@ -1303,6 +1303,98 @@ class AIAgent:
            if self.verbose_logging:
                logging.warning(f"Failed to cleanup browser for task {task_id}: {e}")

+    # ------------------------------------------------------------------
+    # Background memory/skill review
+    # ------------------------------------------------------------------
+
+    _MEMORY_REVIEW_PROMPT = (
+        "Review the conversation above and consider saving to memory if appropriate.\n\n"
+        "Focus on:\n"
+        "1. Has the user revealed things about themselves — their persona, desires, "
+        "preferences, or personal details worth remembering?\n"
+        "2. Has the user expressed expectations about how you should behave, their work "
+        "style, or ways they want you to operate?\n\n"
+        "If something stands out, save it using the memory tool. "
+        "If nothing is worth saving, just say 'Nothing to save.' and stop."
+    )
+
+    _SKILL_REVIEW_PROMPT = (
+        "Review the conversation above and consider saving or updating a skill if appropriate.\n\n"
+        "Focus on: was a non-trivial approach used to complete a task that required trial "
+        "and error, or changing course due to experiential findings along the way, or did "
+        "the user expect or desire a different method or outcome?\n\n"
+        "If a relevant skill already exists, update it with what you learned. "
+        "Otherwise, create a new skill if the approach is reusable.\n"
+        "If nothing is worth saving, just say 'Nothing to save.' and stop."
+    )
+
+    _COMBINED_REVIEW_PROMPT = (
+        "Review the conversation above and consider two things:\n\n"
+        "**Memory**: Has the user revealed things about themselves — their persona, "
+        "desires, preferences, or personal details? Has the user expressed expectations "
+        "about how you should behave, their work style, or ways they want you to operate? "
+        "If so, save using the memory tool.\n\n"
+        "**Skills**: Was a non-trivial approach used to complete a task that required trial "
+        "and error, or changing course due to experiential findings along the way, or did "
+        "the user expect or desire a different method or outcome? If a relevant skill "
+        "already exists, update it. Otherwise, create a new one if the approach is reusable.\n\n"
+        "Only act if there's something genuinely worth saving. "
+        "If nothing stands out, just say 'Nothing to save.' and stop."
+    )
+
+    def _spawn_background_review(
+        self,
+        messages_snapshot: List[Dict],
+        review_memory: bool = False,
+        review_skills: bool = False,
+    ) -> None:
+        """Spawn a background thread to review the conversation for memory/skill saves.
+
+        Creates a full AIAgent fork with the same model, tools, and context as the
+        main session. The review prompt is appended as the next user turn in the
+        forked conversation. Writes directly to the shared memory/skill stores.
+        Never modifies the main conversation history or produces user-visible output.
+        """
+        import threading
+
+        # Pick the right prompt based on which triggers fired
+        if review_memory and review_skills:
+            prompt = self._COMBINED_REVIEW_PROMPT
+        elif review_memory:
+            prompt = self._MEMORY_REVIEW_PROMPT
+        else:
+            prompt = self._SKILL_REVIEW_PROMPT
+
+        def _run_review():
+            try:
+                # Full agent fork — same model, tools, context. Just a tighter
+                # iteration budget and quiet mode. No toolset filtering so we
+                # don't break prompt caching on the tool schema.
+                review_agent = AIAgent(
+                    model=self.model,
+                    max_iterations=8,
+                    quiet_mode=True,
+                    platform=self.platform,
+                    provider=self.provider,
+                )
+                # Share the memory store so writes persist to disk
+                review_agent._memory_store = self._memory_store
+                review_agent._memory_enabled = self._memory_enabled
+                review_agent._user_profile_enabled = self._user_profile_enabled
+                # Disable nudges in the review agent to prevent recursion
+                review_agent._memory_nudge_interval = 0
+                review_agent._skill_nudge_interval = 0
+
+                review_agent.run_conversation(
+                    user_message=prompt,
+                    conversation_history=messages_snapshot,
+                )
+            except Exception as e:
+                logger.debug("Background memory/skill review failed: %s", e)
+
+        t = threading.Thread(target=_run_review, daemon=True, name="bg-review")
+        t.start()
+
    def _apply_persist_user_message_override(self, messages: List[Dict]) -> None:
        """Rewrite the current-turn user message before persistence/return.

@@ -5218,36 +5310,22 @@ class AIAgent:
        # Track user turns for memory flush and periodic nudge logic
        self._user_turn_count += 1

-        # Preserve the original user message before nudge injection.
+        # Preserve the original user message (no nudge injection).
        # Honcho should receive the actual user input, not system nudges.
        original_user_message = persist_user_message if persist_user_message is not None else user_message

-        # Periodic memory nudge: remind the model to consider saving memories.
-        # Counter resets whenever the memory tool is actually used.
+        # Track memory nudge trigger (turn-based, checked here).
+        # Skill trigger is checked AFTER the agent loop completes, based on
+        # how many tool iterations THIS turn used.
+        _should_review_memory = False
        if (self._memory_nudge_interval > 0
                and "memory" in self.valid_tool_names
                and self._memory_store):
            self._turns_since_memory += 1
            if self._turns_since_memory >= self._memory_nudge_interval:
-                user_message += (
-                    "\n\n[System: You've had several exchanges. Consider: "
-                    "has the user shared preferences, corrected you, or revealed "
-                    "something about their workflow worth remembering for future sessions?]"
-                )
+                _should_review_memory = True
                self._turns_since_memory = 0

-        # Skill creation nudge: fires on the first user message after a long tool loop.
-        # The counter increments per API iteration in the tool loop and is checked here.
-        if (self._skill_nudge_interval > 0
-                and self._iters_since_skill >= self._skill_nudge_interval
-                and "skill_manage" in self.valid_tool_names):
-            user_message += (
-                "\n\n[System: The previous task involved many tool calls. "
-                "Save the approach as a skill if it's reusable, or update "
-                "any existing skill you used if it was wrong or incomplete.]"
-            )
-            self._iters_since_skill = 0
-
        # Honcho prefetch consumption:
        # - First turn: bake into cached system prompt (stable for the session).
        # - Later turns: attach recall to the current-turn user message at
@@ -6893,6 +6971,26 @@ class AIAgent:
        # Clear stream callback so it doesn't leak into future calls
        self._stream_callback = None

+        # Check skill trigger NOW — based on how many tool iterations THIS turn used.
+        _should_review_skills = False
+        if (self._skill_nudge_interval > 0
+                and self._iters_since_skill >= self._skill_nudge_interval
+                and "skill_manage" in self.valid_tool_names):
+            _should_review_skills = True
+            self._iters_since_skill = 0
+
+        # Background memory/skill review — runs AFTER the response is delivered
+        # so it never competes with the user's task for model attention.
+        if final_response and not interrupted and (_should_review_memory or _should_review_skills):
+            try:
+                self._spawn_background_review(
+                    messages_snapshot=list(messages),
+                    review_memory=_should_review_memory,
+                    review_skills=_should_review_skills,
+                )
+            except Exception:
+                pass  # Background review is best-effort
+
        return result

    def chat(self, message: str, stream_callback: Optional[callable] = None) -> str: