Files
hermes-agent/tests/gateway/test_marathon_session_limits.py
Timmy e334c5256c
Some checks failed
Forge CI / smoke-and-build (pull_request) Failing after 54s
feat: marathon session limits — cap, checkpoint, rotate (#326)
- Add max_messages (default 200) to SessionResetPolicy
- Track message_count in SessionEntry (persisted to sessions.json)
- Add 'message_limit' reset reason to _should_reset
- Auto-checkpoint filesystem before session rotation
- Inject near-limit warnings (85%/100%) into agent ephemeral prompt
- Auto-rotate sessions when message cap is hit
- Add get_message_limit_info() and reset_message_count() APIs
- 24 new tests covering all limit behaviors

Evidence: 170 sessions exceed 100 msgs, longest 1,643 msgs (40h).
Marathon sessions show 45-84% error rates from tool fixation.
Cap + checkpoint + restart breaks the death spiral.
2026-04-13 18:51:23 -04:00

185 lines
5.9 KiB
Python

"""Tests for marathon session limits (#326)."""
import pytest
from datetime import datetime
from pathlib import Path
from tempfile import mkdtemp
from gateway.config import GatewayConfig, Platform, SessionResetPolicy
from gateway.session import SessionEntry, SessionSource, SessionStore
def _source(platform=Platform.LOCAL, chat_id="test"):
return SessionSource(platform=platform, chat_id=chat_id, chat_type="dm", user_id="u1")
def _store(max_messages=200, mode="both"):
cfg = GatewayConfig()
cfg.default_reset_policy = SessionResetPolicy(mode=mode, max_messages=max_messages)
return SessionStore(Path(mkdtemp()), cfg)
class TestSessionResetPolicyMaxMessages:
def test_default(self):
assert SessionResetPolicy().max_messages == 200
def test_custom(self):
assert SessionResetPolicy(max_messages=500).max_messages == 500
def test_unlimited(self):
assert SessionResetPolicy(max_messages=0).max_messages == 0
def test_to_dict(self):
d = SessionResetPolicy(max_messages=300).to_dict()
assert d["max_messages"] == 300
def test_from_dict(self):
p = SessionResetPolicy.from_dict({"max_messages": 150})
assert p.max_messages == 150
def test_from_dict_default(self):
assert SessionResetPolicy.from_dict({}).max_messages == 200
class TestSessionEntryMessageCount:
def test_default(self):
e = SessionEntry(session_key="k", session_id="s", created_at=datetime.now(), updated_at=datetime.now())
assert e.message_count == 0
def test_to_dict(self):
e = SessionEntry(session_key="k", session_id="s", created_at=datetime.now(), updated_at=datetime.now(), message_count=42)
assert e.to_dict()["message_count"] == 42
def test_from_dict(self):
e = SessionEntry.from_dict({"session_key": "k", "session_id": "s", "created_at": "2026-01-01T00:00:00", "updated_at": "2026-01-01T00:00:00", "message_count": 99})
assert e.message_count == 99
class TestShouldResetMessageLimit:
def test_at_limit(self):
s = _store()
src = _source()
e = s.get_or_create_session(src)
e.message_count = 200
assert s._should_reset(e, src) == "message_limit"
def test_over_limit(self):
s = _store()
src = _source()
e = s.get_or_create_session(src)
e.message_count = 250
assert s._should_reset(e, src) == "message_limit"
def test_below_limit(self):
s = _store()
src = _source()
e = s.get_or_create_session(src)
e.message_count = 100
assert s._should_reset(e, src) is None
def test_unlimited(self):
s = _store(max_messages=0, mode="none")
src = _source()
e = s.get_or_create_session(src)
e.message_count = 9999
assert s._should_reset(e, src) is None
def test_custom_limit(self):
s = _store(max_messages=50)
src = _source()
e = s.get_or_create_session(src)
e.message_count = 50
assert s._should_reset(e, src) == "message_limit"
def test_just_under(self):
s = _store(max_messages=50)
src = _source()
e = s.get_or_create_session(src)
e.message_count = 49
assert s._should_reset(e, src) is None
class TestAppendIncrementsCount:
def test_user_message(self):
s = _store()
src = _source()
e = s.get_or_create_session(src)
s.append_to_transcript(e.session_id, {"role": "user", "content": "hi"})
e = s.get_or_create_session(src)
assert e.message_count == 1
def test_assistant_message(self):
s = _store()
src = _source()
e = s.get_or_create_session(src)
s.append_to_transcript(e.session_id, {"role": "user", "content": "hi"})
s.append_to_transcript(e.session_id, {"role": "assistant", "content": "hello"})
e = s.get_or_create_session(src)
assert e.message_count == 2
def test_meta_not_counted(self):
s = _store()
src = _source()
e = s.get_or_create_session(src)
s.append_to_transcript(e.session_id, {"role": "session_meta", "tools": []})
e = s.get_or_create_session(src)
assert e.message_count == 0
class TestGetMessageLimitInfo:
def test_at_limit(self):
s = _store()
src = _source()
e = s.get_or_create_session(src)
e.message_count = 200
info = s.get_message_limit_info(e.session_key)
assert info["at_limit"] is True
assert info["near_limit"] is True
assert info["remaining"] == 0
def test_near_limit(self):
s = _store()
src = _source()
e = s.get_or_create_session(src)
e.message_count = 180
info = s.get_message_limit_info(e.session_key)
assert info["near_limit"] is True
assert info["at_limit"] is False
assert info["remaining"] == 20
def test_well_below(self):
s = _store()
src = _source()
e = s.get_or_create_session(src)
e.message_count = 50
info = s.get_message_limit_info(e.session_key)
assert info["near_limit"] is False
assert info["at_limit"] is False
def test_unknown(self):
s = _store()
info = s.get_message_limit_info("nonexistent")
assert info["at_limit"] is False
class TestResetMessageCount:
def test_reset(self):
s = _store()
src = _source()
e = s.get_or_create_session(src)
e.message_count = 150
s.reset_message_count(e.session_key)
assert s.get_message_limit_info(e.session_key)["message_count"] == 0
class TestSessionRotation:
def test_fresh_count_after_reset(self):
s = _store()
src = _source()
e = s.get_or_create_session(src)
e.message_count = 200
new = s.reset_session(e.session_key)
assert new is not None
assert new.message_count == 0
assert new.session_id != e.session_id