feat: marathon session limits — cap, checkpoint, rotate (closes #326 )

Empirical audit finding: 170 sessions exceed 100 messages. Longest is 1,643 messages (~40 hours). Marathon sessions have 45-84% error rates due to tool fixation and error cascading. ## Implementation **Config** (config.yaml agent.marathon): - warn_at: 200 (soft cap — inject nudge, default) - cap_at: 300 (hard cap — stop conversation, default) **Conversation loop** (run_conversation): - After interrupt check, before each API call: - At hard cap: set turn_exit_reason=marathon_cap_reached, inject system message explaining the cap, break loop - At soft cap (first time): inject system nudge suggesting agent wrap up and start fresh, print warning to user - Nudge is injected once (marathon_nudge_sent flag prevents spam) **Result dict**: Added turn_exit_reason field so callers (gateway, CLI) can distinguish marathon cap from budget exhaustion.
2026-04-13 20:53:04 -04:00
1 changed files with 50 additions and 0 deletions
--- a/run_agent.py
+++ b/run_agent.py
@@ -1247,6 +1247,15 @@ class AIAgent:
            _agent_section = {}
        self._tool_use_enforcement = _agent_section.get("tool_use_enforcement", "auto")

+        # Marathon session limits — nudge at soft cap, stop at hard cap.
+        # Research shows 200+ msg sessions have 45-84% error rates.
+        _marathon_cfg = _agent_section.get("marathon", {})
+        if not isinstance(_marathon_cfg, dict):
+            _marathon_cfg = {}
+        self._marathon_warn = int(_marathon_cfg.get("warn_at", 200))
+        self._marathon_cap = int(_marathon_cfg.get("cap_at", 300))
+        self._marathon_nudge_sent = False
+
        # Initialize context compressor for automatic context management
        # Compresses conversation when approaching model's context limit
        # Configuration via config.yaml (compression section)
@@ -8033,6 +8042,46 @@ class AIAgent:
                if not self.quiet_mode:
                    self._safe_print("\n⚡ Breaking out of tool loop due to interrupt...")
                break
+
+            # Marathon session limits — warn at soft cap, stop at hard cap.
+            # Research (issue #327): sessions >200 msgs have 45-84% error rates
+            # due to tool fixation and error cascading.
+            _msg_count = len(messages)
+            if self._marathon_cap > 0 and _msg_count >= self._marathon_cap:
+                _turn_exit_reason = "marathon_cap_reached"
+                if not self.quiet_mode:
+                    self._safe_print(
+                        f"\n🛑 Session hard cap reached ({_msg_count}/{self._marathon_cap} messages). "
+                        f"Starting a fresh session is recommended to avoid error cascading."
+                    )
+                # Inject a final user-visible message suggesting fresh start
+                messages.append({
+                    "role": "system",
+                    "content": (
+                        f"[SYSTEM: This session has reached the hard message cap "
+                        f"({_msg_count} messages). Research shows sessions this long "
+                        f"suffer from tool fixation and error cascading (45-84% error rates). "
+                        f"The conversation will end here. Please start a fresh session.]"
+                    ),
+                })
+                break
+            elif self._marathon_warn > 0 and _msg_count >= self._marathon_warn and not self._marathon_nudge_sent:
+                self._marathon_nudge_sent = True
+                # Inject nudge — the agent sees this and can recommend starting fresh
+                messages.append({
+                    "role": "system",
+                    "content": (
+                        f"[SYSTEM: This session has reached {_msg_count} messages. "
+                        f"Sessions over {self._marathon_warn} messages tend to degrade "
+                        f"due to tool fixation and error cascading. Consider wrapping up "
+                        f"current work and starting a fresh session for new tasks.]"
+                    ),
+                })
+                if not self.quiet_mode:
+                    self._safe_print(
+                        f"\n⚠️  Marathon warning: {_msg_count} messages. "
+                        f"Consider starting a fresh session soon."
+                    )
            
            api_call_count += 1
            self._api_call_count = api_call_count
@@ -10512,6 +10561,7 @@ class AIAgent:
            "completed": completed,
            "partial": False,  # True only when stopped due to invalid tool calls
            "interrupted": interrupted,
+            "turn_exit_reason": _turn_exit_reason,
            "response_previewed": getattr(self, "_response_was_previewed", False),
            "model": self.model,
            "provider": self.provider,