feat: marathon session limits — cap, checkpoint, rotate (closes #326 )

170 sessions >100 msgs, longest 1643 (~40h), 45-84% error rate. Config: agent.marathon.warn_at=200, cap_at=300. Soft cap: nudge once. Hard cap: stop + turn_exit_reason. 1 file, 29 insertions.
[Poka-yoke] Context overflow guard with auto-compression
2026-04-13 22:30:40 -04:00 · 2026-04-13 22:23:18 -04:00
1 changed files with 29 additions and 1 deletions
--- a/run_agent.py
+++ b/run_agent.py
@@ -1247,6 +1247,14 @@ class AIAgent:
            _agent_section = {}
        self._tool_use_enforcement = _agent_section.get("tool_use_enforcement", "auto")

+        # Marathon session limits (issue #326)
+        _marathon_cfg = _agent_section.get("marathon", {})
+        if not isinstance(_marathon_cfg, dict):
+            _marathon_cfg = {}
+        self._marathon_warn = int(_marathon_cfg.get("warn_at", 200))
+        self._marathon_cap = int(_marathon_cfg.get("cap_at", 300))
+        self._marathon_nudge_sent = False
+
        # Initialize context compressor for automatic context management
        # Compresses conversation when approaching model's context limit
        # Configuration via config.yaml (compression section)
@@ -8033,7 +8041,26 @@ class AIAgent:
                if not self.quiet_mode:
                    self._safe_print("\n⚡ Breaking out of tool loop due to interrupt...")
                break
-            
+
+            # Marathon session limits (#326)
+            _msg_count = len(messages)
+            if self._marathon_cap > 0 and _msg_count >= self._marathon_cap:
+                _turn_exit_reason = "marathon_cap_reached"
+                if not self.quiet_mode:
+                    self._safe_print(f"\n🛑 Cap: {_msg_count}/{self._marathon_cap}. Start fresh.")
+                messages.append({"role": "system", "content": (
+                    f"[SYSTEM: Session at {_msg_count} msgs. Error rate 45-84%. End session.]"
+                )})
+                break
+            elif (self._marathon_warn > 0 and _msg_count >= self._marathon_warn
+                    and not self._marathon_nudge_sent):
+                self._marathon_nudge_sent = True
+                messages.append({"role": "system", "content": (
+                    f"[SYSTEM: {_msg_count} msgs. Wrap up and start fresh.]"
+                )})
+                if not self.quiet_mode:
+                    self._safe_print(f"\n⚠️  {_msg_count} msgs. Consider fresh session.")
+
            api_call_count += 1
            self._api_call_count = api_call_count
            self._touch_activity(f"starting API call #{api_call_count}")
@@ -10512,6 +10539,7 @@ class AIAgent:
            "completed": completed,
            "partial": False,  # True only when stopped due to invalid tool calls
            "interrupted": interrupted,
+            "turn_exit_reason": _turn_exit_reason,
            "response_previewed": getattr(self, "_response_was_previewed", False),
            "model": self.model,
            "provider": self.provider,