Compare commits

...

1 Commits

Author SHA1 Message Date
Alexander Whitestone
7b88da0bad feat: marathon session limits — cap, checkpoint, rotate (closes #326)
Some checks failed
Docker Build and Publish / build-and-push (pull_request) Has been skipped
Nix / nix (ubuntu-latest) (pull_request) Failing after 3s
Supply Chain Audit / Scan PR for supply chain risks (pull_request) Successful in 40s
Docs Site Checks / docs-site-checks (pull_request) Failing after 2m17s
Tests / e2e (pull_request) Successful in 2m18s
Tests / test (pull_request) Failing after 35m14s
Nix / nix (macos-latest) (pull_request) Has been cancelled
Empirical audit finding: 170 sessions exceed 100 messages. Longest is
1,643 messages (~40 hours). Marathon sessions have 45-84% error rates
due to tool fixation and error cascading.

## Implementation

**Config** (config.yaml agent.marathon):
- warn_at: 200 (soft cap — inject nudge, default)
- cap_at: 300 (hard cap — stop conversation, default)

**Conversation loop** (run_conversation):
- After interrupt check, before each API call:
  - At hard cap: set turn_exit_reason=marathon_cap_reached,
    inject system message explaining the cap, break loop
  - At soft cap (first time): inject system nudge suggesting
    agent wrap up and start fresh, print warning to user
- Nudge is injected once (marathon_nudge_sent flag prevents spam)

**Result dict**: Added turn_exit_reason field so callers (gateway,
CLI) can distinguish marathon cap from budget exhaustion.
2026-04-13 20:53:04 -04:00

View File

@@ -1247,6 +1247,15 @@ class AIAgent:
_agent_section = {}
self._tool_use_enforcement = _agent_section.get("tool_use_enforcement", "auto")
# Marathon session limits — nudge at soft cap, stop at hard cap.
# Research shows 200+ msg sessions have 45-84% error rates.
_marathon_cfg = _agent_section.get("marathon", {})
if not isinstance(_marathon_cfg, dict):
_marathon_cfg = {}
self._marathon_warn = int(_marathon_cfg.get("warn_at", 200))
self._marathon_cap = int(_marathon_cfg.get("cap_at", 300))
self._marathon_nudge_sent = False
# Initialize context compressor for automatic context management
# Compresses conversation when approaching model's context limit
# Configuration via config.yaml (compression section)
@@ -8033,6 +8042,46 @@ class AIAgent:
if not self.quiet_mode:
self._safe_print("\n⚡ Breaking out of tool loop due to interrupt...")
break
# Marathon session limits — warn at soft cap, stop at hard cap.
# Research (issue #327): sessions >200 msgs have 45-84% error rates
# due to tool fixation and error cascading.
_msg_count = len(messages)
if self._marathon_cap > 0 and _msg_count >= self._marathon_cap:
_turn_exit_reason = "marathon_cap_reached"
if not self.quiet_mode:
self._safe_print(
f"\n🛑 Session hard cap reached ({_msg_count}/{self._marathon_cap} messages). "
f"Starting a fresh session is recommended to avoid error cascading."
)
# Inject a final user-visible message suggesting fresh start
messages.append({
"role": "system",
"content": (
f"[SYSTEM: This session has reached the hard message cap "
f"({_msg_count} messages). Research shows sessions this long "
f"suffer from tool fixation and error cascading (45-84% error rates). "
f"The conversation will end here. Please start a fresh session.]"
),
})
break
elif self._marathon_warn > 0 and _msg_count >= self._marathon_warn and not self._marathon_nudge_sent:
self._marathon_nudge_sent = True
# Inject nudge — the agent sees this and can recommend starting fresh
messages.append({
"role": "system",
"content": (
f"[SYSTEM: This session has reached {_msg_count} messages. "
f"Sessions over {self._marathon_warn} messages tend to degrade "
f"due to tool fixation and error cascading. Consider wrapping up "
f"current work and starting a fresh session for new tasks.]"
),
})
if not self.quiet_mode:
self._safe_print(
f"\n⚠️ Marathon warning: {_msg_count} messages. "
f"Consider starting a fresh session soon."
)
api_call_count += 1
self._api_call_count = api_call_count
@@ -10512,6 +10561,7 @@ class AIAgent:
"completed": completed,
"partial": False, # True only when stopped due to invalid tool calls
"interrupted": interrupted,
"turn_exit_reason": _turn_exit_reason,
"response_previewed": getattr(self, "_response_was_previewed", False),
"model": self.model,
"provider": self.provider,