Compare commits
1 Commits
burn/317-1
...
burn/326-1
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e334c5256c |
@@ -1,11 +1,10 @@
|
||||
"""Helpers for optional cheap-vs-strong and time-aware model routing."""
|
||||
"""Helpers for optional cheap-vs-strong model routing."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import re
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict, List, Optional
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from utils import is_truthy_value
|
||||
|
||||
@@ -193,105 +192,3 @@ def resolve_turn_route(user_message: str, routing_config: Optional[Dict[str, Any
|
||||
tuple(runtime.get("args") or ()),
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Time-aware cron model routing
|
||||
# =========================================================================
|
||||
#
|
||||
# Empirical finding: cron error rate peaks at 18:00 (9.4%) vs 4.0% at 09:00.
|
||||
# During high-error windows, route cron jobs to more capable models.
|
||||
#
|
||||
# Config (config.yaml):
|
||||
# cron_model_routing:
|
||||
# enabled: true
|
||||
# fallback_model: "anthropic/claude-sonnet-4"
|
||||
# fallback_provider: "openrouter"
|
||||
# windows:
|
||||
# - start_hour: 17
|
||||
# end_hour: 22
|
||||
# reason: "evening_error_peak"
|
||||
# - start_hour: 2
|
||||
# end_hour: 5
|
||||
# reason: "overnight_api_instability"
|
||||
# =========================================================================
|
||||
|
||||
def _hour_in_window(hour: int, start: int, end: int) -> bool:
|
||||
"""Check if hour falls in [start, end) window, handling midnight wrap."""
|
||||
if start <= end:
|
||||
return start <= hour < end
|
||||
else:
|
||||
# Wraps midnight: e.g., 22-06
|
||||
return hour >= start or hour < end
|
||||
|
||||
|
||||
def resolve_cron_model(
|
||||
base_model: str,
|
||||
routing_config: Optional[Dict[str, Any]],
|
||||
now: Optional[datetime] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Apply time-aware model override for cron jobs.
|
||||
|
||||
During configured high-error windows, returns a stronger model config.
|
||||
Outside windows, returns the base model unchanged.
|
||||
|
||||
Args:
|
||||
base_model: The model string already resolved (from job/config/env).
|
||||
routing_config: The cron_model_routing dict from config.yaml.
|
||||
now: Override current time (for testing). Defaults to datetime.now().
|
||||
|
||||
Returns:
|
||||
Dict with keys: model, provider, overridden, reason.
|
||||
- model: the effective model string to use
|
||||
- provider: provider override (empty string = use default)
|
||||
- overridden: True if time-based override was applied
|
||||
- reason: why override was applied (empty string if not)
|
||||
"""
|
||||
cfg = routing_config or {}
|
||||
|
||||
if not _coerce_bool(cfg.get("enabled"), False):
|
||||
return {"model": base_model, "provider": "", "overridden": False, "reason": ""}
|
||||
|
||||
windows = cfg.get("windows") or []
|
||||
if not isinstance(windows, list) or not windows:
|
||||
return {"model": base_model, "provider": "", "overridden": False, "reason": ""}
|
||||
|
||||
current = now or datetime.now()
|
||||
current_hour = current.hour
|
||||
|
||||
matched_window = None
|
||||
for window in windows:
|
||||
if not isinstance(window, dict):
|
||||
continue
|
||||
start = _coerce_int(window.get("start_hour"), -1)
|
||||
end = _coerce_int(window.get("end_hour"), -1)
|
||||
if start < 0 or end < 0:
|
||||
continue
|
||||
if _hour_in_window(current_hour, start, end):
|
||||
matched_window = window
|
||||
break
|
||||
|
||||
if not matched_window:
|
||||
return {"model": base_model, "provider": "", "overridden": False, "reason": ""}
|
||||
|
||||
# Window matched — use the override model from window or global fallback
|
||||
override_model = str(matched_window.get("model") or "").strip()
|
||||
override_provider = str(matched_window.get("provider") or "").strip()
|
||||
|
||||
if not override_model:
|
||||
override_model = str(cfg.get("fallback_model") or "").strip()
|
||||
if not override_provider:
|
||||
override_provider = str(cfg.get("fallback_provider") or "").strip()
|
||||
|
||||
if not override_model:
|
||||
# No override configured — use base model
|
||||
return {"model": base_model, "provider": "", "overridden": False, "reason": ""}
|
||||
|
||||
reason = str(matched_window.get("reason") or "time_window").strip()
|
||||
|
||||
return {
|
||||
"model": override_model,
|
||||
"provider": override_provider,
|
||||
"overridden": True,
|
||||
"reason": f"cron_routing:{reason}(hour={current_hour})",
|
||||
}
|
||||
|
||||
@@ -717,22 +717,6 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
||||
|
||||
# Reasoning config from env or config.yaml
|
||||
from hermes_constants import parse_reasoning_effort
|
||||
|
||||
# Time-aware cron model routing — override model during high-error windows
|
||||
try:
|
||||
from agent.smart_model_routing import resolve_cron_model
|
||||
_cron_routing_cfg = (_cfg.get("cron_model_routing") or {})
|
||||
_cron_route = resolve_cron_model(model, _cron_routing_cfg)
|
||||
if _cron_route["overridden"]:
|
||||
_original_model = model
|
||||
model = _cron_route["model"]
|
||||
logger.info(
|
||||
"Job '%s': cron model override %s → %s (%s)",
|
||||
job_id, _original_model, model, _cron_route["reason"],
|
||||
)
|
||||
except Exception as _e:
|
||||
logger.debug("Job '%s': cron model routing skipped: %s", job_id, _e)
|
||||
|
||||
effort = os.getenv("HERMES_REASONING_EFFORT", "")
|
||||
if not effort:
|
||||
effort = str(_cfg.get("agent", {}).get("reasoning_effort", "")).strip()
|
||||
|
||||
@@ -107,6 +107,7 @@ class SessionResetPolicy:
|
||||
mode: str = "both" # "daily", "idle", "both", or "none"
|
||||
at_hour: int = 4 # Hour for daily reset (0-23, local time)
|
||||
idle_minutes: int = 1440 # Minutes of inactivity before reset (24 hours)
|
||||
max_messages: int = 200 # Max messages per session before forced checkpoint+restart (0 = unlimited)
|
||||
notify: bool = True # Send a notification to the user when auto-reset occurs
|
||||
notify_exclude_platforms: tuple = ("api_server", "webhook") # Platforms that don't get reset notifications
|
||||
|
||||
@@ -115,6 +116,7 @@ class SessionResetPolicy:
|
||||
"mode": self.mode,
|
||||
"at_hour": self.at_hour,
|
||||
"idle_minutes": self.idle_minutes,
|
||||
"max_messages": self.max_messages,
|
||||
"notify": self.notify,
|
||||
"notify_exclude_platforms": list(self.notify_exclude_platforms),
|
||||
}
|
||||
@@ -125,12 +127,14 @@ class SessionResetPolicy:
|
||||
mode = data.get("mode")
|
||||
at_hour = data.get("at_hour")
|
||||
idle_minutes = data.get("idle_minutes")
|
||||
max_messages = data.get("max_messages")
|
||||
notify = data.get("notify")
|
||||
exclude = data.get("notify_exclude_platforms")
|
||||
return cls(
|
||||
mode=mode if mode is not None else "both",
|
||||
at_hour=at_hour if at_hour is not None else 4,
|
||||
idle_minutes=idle_minutes if idle_minutes is not None else 1440,
|
||||
max_messages=max_messages if max_messages is not None else 200,
|
||||
notify=notify if notify is not None else True,
|
||||
notify_exclude_platforms=tuple(exclude) if exclude is not None else ("api_server", "webhook"),
|
||||
)
|
||||
|
||||
@@ -2343,6 +2343,12 @@ class GatewayRunner:
|
||||
reset_reason = getattr(session_entry, 'auto_reset_reason', None) or 'idle'
|
||||
if reset_reason == "daily":
|
||||
context_note = "[System note: The user's session was automatically reset by the daily schedule. This is a fresh conversation with no prior context.]"
|
||||
elif reset_reason == "message_limit":
|
||||
context_note = (
|
||||
"[System note: The user's previous session reached the message limit "
|
||||
"and was automatically checkpointed and rotated. This is a fresh session. "
|
||||
"If the user references something from before, you can search session history.]"
|
||||
)
|
||||
else:
|
||||
context_note = "[System note: The user's previous session expired due to inactivity. This is a fresh conversation with no prior context.]"
|
||||
context_prompt = context_note + "\n\n" + context_prompt
|
||||
@@ -2368,16 +2374,18 @@ class GatewayRunner:
|
||||
if adapter:
|
||||
if reset_reason == "daily":
|
||||
reason_text = f"daily schedule at {policy.at_hour}:00"
|
||||
elif reset_reason == "message_limit":
|
||||
reason_text = f"reached {policy.max_messages} message limit"
|
||||
else:
|
||||
hours = policy.idle_minutes // 60
|
||||
mins = policy.idle_minutes % 60
|
||||
duration = f"{hours}h" if not mins else f"{hours}h {mins}m" if hours else f"{mins}m"
|
||||
reason_text = f"inactive for {duration}"
|
||||
notice = (
|
||||
f"◐ Session automatically reset ({reason_text}). "
|
||||
f"Conversation history cleared.\n"
|
||||
f"◐ Session automatically rotated ({reason_text}). "
|
||||
f"Conversation was preserved via checkpoint.\n"
|
||||
f"Use /resume to browse and restore a previous session.\n"
|
||||
f"Adjust reset timing in config.yaml under session_reset."
|
||||
f"Adjust limits in config.yaml under session_reset."
|
||||
)
|
||||
try:
|
||||
session_info = self._format_session_info()
|
||||
@@ -3073,6 +3081,39 @@ class GatewayRunner:
|
||||
last_prompt_tokens=agent_result.get("last_prompt_tokens", 0),
|
||||
)
|
||||
|
||||
# Marathon session limit (#326): check if we hit the message cap.
|
||||
# Auto-checkpoint filesystem and rotate session.
|
||||
try:
|
||||
_post_limit = self.session_store.get_message_limit_info(session_key)
|
||||
if _post_limit["at_limit"] and _post_limit["max_messages"] > 0:
|
||||
logger.info(
|
||||
"[Marathon] Session %s hit message limit (%d/%d). Rotating.",
|
||||
session_key, _post_limit["message_count"], _post_limit["max_messages"],
|
||||
)
|
||||
# Attempt filesystem checkpoint before rotation
|
||||
try:
|
||||
from tools.checkpoint_manager import CheckpointManager
|
||||
_cp_cfg_path = _hermes_home / "config.yaml"
|
||||
if _cp_cfg_path.exists():
|
||||
import yaml as _cp_yaml
|
||||
with open(_cp_cfg_path, encoding="utf-8") as _cpf:
|
||||
_cp_data = _cp_yaml.safe_load(_cpf) or {}
|
||||
_cp_settings = _cp_data.get("checkpoints", {})
|
||||
if _cp_settings.get("enabled"):
|
||||
_cwd = _cp_settings.get("working_dir") or os.getcwd()
|
||||
mgr = CheckpointManager(max_checkpoints=_cp_settings.get("max_checkpoints", 20))
|
||||
cp = mgr.create_checkpoint(str(_cwd), label=f"marathon-{session_entry.session_id[:8]}")
|
||||
if cp:
|
||||
logger.info("[Marathon] Checkpoint: %s", cp.label)
|
||||
except Exception as cp_err:
|
||||
logger.debug("[Marathon] Checkpoint failed (non-fatal): %s", cp_err)
|
||||
|
||||
new_entry = self.session_store.reset_session(session_key)
|
||||
if new_entry:
|
||||
logger.info("[Marathon] Rotated: %s -> %s", session_entry.session_id, new_entry.session_id)
|
||||
except Exception as rot_err:
|
||||
logger.debug("[Marathon] Rotation check failed: %s", rot_err)
|
||||
|
||||
# Auto voice reply: send TTS audio before the text response
|
||||
_already_sent = bool(agent_result.get("already_sent"))
|
||||
if self._should_send_voice_reply(event, response, agent_messages, already_sent=_already_sent):
|
||||
@@ -6538,6 +6579,26 @@ class GatewayRunner:
|
||||
if self._ephemeral_system_prompt:
|
||||
combined_ephemeral = (combined_ephemeral + "\n\n" + self._ephemeral_system_prompt).strip()
|
||||
|
||||
# Marathon session limit warning (#326)
|
||||
try:
|
||||
_limit_info = self.session_store.get_message_limit_info(session_key)
|
||||
if _limit_info["near_limit"] and not _limit_info["at_limit"]:
|
||||
_remaining = _limit_info["remaining"]
|
||||
_limit_warn = (
|
||||
f"[SESSION LIMIT: This session has {_limit_info['message_count']} messages. "
|
||||
f"Only {_remaining} message(s) remain before automatic session rotation at "
|
||||
f"{_limit_info['max_messages']} messages. Start wrapping up and save important state.]"
|
||||
)
|
||||
combined_ephemeral = (combined_ephemeral + "\n\n" + _limit_warn).strip()
|
||||
elif _limit_info["at_limit"]:
|
||||
_limit_warn = (
|
||||
f"[SESSION LIMIT REACHED: This session has hit the {_limit_info['max_messages']} "
|
||||
f"message limit. This is your FINAL response. Summarize accomplishments and next steps.]"
|
||||
)
|
||||
combined_ephemeral = (combined_ephemeral + "\n\n" + _limit_warn).strip()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Re-read .env and config for fresh credentials (gateway is long-lived,
|
||||
# keys may change without restart).
|
||||
try:
|
||||
|
||||
@@ -383,7 +383,11 @@ class SessionEntry:
|
||||
# survives gateway restarts (the old in-memory _pre_flushed_sessions
|
||||
# set was lost on restart, causing redundant re-flushes).
|
||||
memory_flushed: bool = False
|
||||
|
||||
|
||||
# Marathon session limit tracking (#326).
|
||||
# Counts total messages (user + assistant + tool) in this session.
|
||||
message_count: int = 0
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
result = {
|
||||
"session_key": self.session_key,
|
||||
@@ -402,6 +406,7 @@ class SessionEntry:
|
||||
"estimated_cost_usd": self.estimated_cost_usd,
|
||||
"cost_status": self.cost_status,
|
||||
"memory_flushed": self.memory_flushed,
|
||||
"message_count": self.message_count,
|
||||
}
|
||||
if self.origin:
|
||||
result["origin"] = self.origin.to_dict()
|
||||
@@ -438,6 +443,7 @@ class SessionEntry:
|
||||
estimated_cost_usd=data.get("estimated_cost_usd", 0.0),
|
||||
cost_status=data.get("cost_status", "unknown"),
|
||||
memory_flushed=data.get("memory_flushed", False),
|
||||
message_count=data.get("message_count", 0),
|
||||
)
|
||||
|
||||
|
||||
@@ -643,6 +649,9 @@ class SessionStore:
|
||||
)
|
||||
|
||||
if policy.mode == "none":
|
||||
# Even with mode=none, enforce message_limit if set
|
||||
if policy.max_messages > 0 and entry.message_count >= policy.max_messages:
|
||||
return "message_limit"
|
||||
return None
|
||||
|
||||
now = _now()
|
||||
@@ -664,7 +673,11 @@ class SessionStore:
|
||||
|
||||
if entry.updated_at < today_reset:
|
||||
return "daily"
|
||||
|
||||
|
||||
# Marathon session limit (#326): force checkpoint+restart at max_messages
|
||||
if policy.max_messages > 0 and entry.message_count >= policy.max_messages:
|
||||
return "message_limit"
|
||||
|
||||
return None
|
||||
|
||||
def has_any_sessions(self) -> bool:
|
||||
@@ -822,6 +835,43 @@ class SessionStore:
|
||||
entry.last_prompt_tokens = last_prompt_tokens
|
||||
self._save()
|
||||
|
||||
def get_message_limit_info(self, session_key: str) -> Dict[str, Any]:
|
||||
"""Get message count and limit info for a session (#326)."""
|
||||
with self._lock:
|
||||
self._ensure_loaded_locked()
|
||||
entry = self._entries.get(session_key)
|
||||
|
||||
if not entry:
|
||||
return {"message_count": 0, "max_messages": 0, "remaining": 0,
|
||||
"near_limit": False, "at_limit": False, "threshold": 0.0}
|
||||
|
||||
policy = self.config.get_reset_policy(
|
||||
platform=entry.platform,
|
||||
session_type=entry.chat_type,
|
||||
)
|
||||
max_msgs = policy.max_messages
|
||||
count = entry.message_count
|
||||
remaining = max(0, max_msgs - count) if max_msgs > 0 else float("inf")
|
||||
threshold = count / max_msgs if max_msgs > 0 else 0.0
|
||||
|
||||
return {
|
||||
"message_count": count,
|
||||
"max_messages": max_msgs,
|
||||
"remaining": remaining,
|
||||
"near_limit": max_msgs > 0 and count >= int(max_msgs * 0.85),
|
||||
"at_limit": max_msgs > 0 and count >= max_msgs,
|
||||
"threshold": threshold,
|
||||
}
|
||||
|
||||
def reset_message_count(self, session_key: str) -> None:
|
||||
"""Reset the message count to zero for a session (#326)."""
|
||||
with self._lock:
|
||||
self._ensure_loaded_locked()
|
||||
entry = self._entries.get(session_key)
|
||||
if entry:
|
||||
entry.message_count = 0
|
||||
self._save()
|
||||
|
||||
def reset_session(self, session_key: str) -> Optional[SessionEntry]:
|
||||
"""Force reset a session, creating a new session ID."""
|
||||
db_end_session_id = None
|
||||
@@ -849,6 +899,7 @@ class SessionStore:
|
||||
display_name=old_entry.display_name,
|
||||
platform=old_entry.platform,
|
||||
chat_type=old_entry.chat_type,
|
||||
message_count=0, # Fresh count after rotation (#326)
|
||||
)
|
||||
|
||||
self._entries[session_key] = new_entry
|
||||
@@ -908,6 +959,7 @@ class SessionStore:
|
||||
display_name=old_entry.display_name,
|
||||
platform=old_entry.platform,
|
||||
chat_type=old_entry.chat_type,
|
||||
message_count=0, # Fresh count after rotation (#326)
|
||||
)
|
||||
|
||||
self._entries[session_key] = new_entry
|
||||
@@ -966,6 +1018,16 @@ class SessionStore:
|
||||
transcript_path = self.get_transcript_path(session_id)
|
||||
with open(transcript_path, "a", encoding="utf-8") as f:
|
||||
f.write(json.dumps(message, ensure_ascii=False) + "\n")
|
||||
|
||||
# Increment message count for marathon session tracking (#326)
|
||||
# Skip counting session_meta entries (tool defs, metadata)
|
||||
if message.get("role") != "session_meta":
|
||||
with self._lock:
|
||||
for entry in self._entries.values():
|
||||
if entry.session_id == session_id:
|
||||
entry.message_count += 1
|
||||
self._save()
|
||||
break
|
||||
|
||||
def rewrite_transcript(self, session_id: str, messages: List[Dict[str, Any]]) -> None:
|
||||
"""Replace the entire transcript for a session with new messages.
|
||||
|
||||
184
tests/gateway/test_marathon_session_limits.py
Normal file
184
tests/gateway/test_marathon_session_limits.py
Normal file
@@ -0,0 +1,184 @@
|
||||
"""Tests for marathon session limits (#326)."""
|
||||
|
||||
import pytest
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from tempfile import mkdtemp
|
||||
|
||||
from gateway.config import GatewayConfig, Platform, SessionResetPolicy
|
||||
from gateway.session import SessionEntry, SessionSource, SessionStore
|
||||
|
||||
|
||||
def _source(platform=Platform.LOCAL, chat_id="test"):
|
||||
return SessionSource(platform=platform, chat_id=chat_id, chat_type="dm", user_id="u1")
|
||||
|
||||
|
||||
def _store(max_messages=200, mode="both"):
|
||||
cfg = GatewayConfig()
|
||||
cfg.default_reset_policy = SessionResetPolicy(mode=mode, max_messages=max_messages)
|
||||
return SessionStore(Path(mkdtemp()), cfg)
|
||||
|
||||
|
||||
class TestSessionResetPolicyMaxMessages:
|
||||
def test_default(self):
|
||||
assert SessionResetPolicy().max_messages == 200
|
||||
|
||||
def test_custom(self):
|
||||
assert SessionResetPolicy(max_messages=500).max_messages == 500
|
||||
|
||||
def test_unlimited(self):
|
||||
assert SessionResetPolicy(max_messages=0).max_messages == 0
|
||||
|
||||
def test_to_dict(self):
|
||||
d = SessionResetPolicy(max_messages=300).to_dict()
|
||||
assert d["max_messages"] == 300
|
||||
|
||||
def test_from_dict(self):
|
||||
p = SessionResetPolicy.from_dict({"max_messages": 150})
|
||||
assert p.max_messages == 150
|
||||
|
||||
def test_from_dict_default(self):
|
||||
assert SessionResetPolicy.from_dict({}).max_messages == 200
|
||||
|
||||
|
||||
class TestSessionEntryMessageCount:
|
||||
def test_default(self):
|
||||
e = SessionEntry(session_key="k", session_id="s", created_at=datetime.now(), updated_at=datetime.now())
|
||||
assert e.message_count == 0
|
||||
|
||||
def test_to_dict(self):
|
||||
e = SessionEntry(session_key="k", session_id="s", created_at=datetime.now(), updated_at=datetime.now(), message_count=42)
|
||||
assert e.to_dict()["message_count"] == 42
|
||||
|
||||
def test_from_dict(self):
|
||||
e = SessionEntry.from_dict({"session_key": "k", "session_id": "s", "created_at": "2026-01-01T00:00:00", "updated_at": "2026-01-01T00:00:00", "message_count": 99})
|
||||
assert e.message_count == 99
|
||||
|
||||
|
||||
class TestShouldResetMessageLimit:
|
||||
def test_at_limit(self):
|
||||
s = _store()
|
||||
src = _source()
|
||||
e = s.get_or_create_session(src)
|
||||
e.message_count = 200
|
||||
assert s._should_reset(e, src) == "message_limit"
|
||||
|
||||
def test_over_limit(self):
|
||||
s = _store()
|
||||
src = _source()
|
||||
e = s.get_or_create_session(src)
|
||||
e.message_count = 250
|
||||
assert s._should_reset(e, src) == "message_limit"
|
||||
|
||||
def test_below_limit(self):
|
||||
s = _store()
|
||||
src = _source()
|
||||
e = s.get_or_create_session(src)
|
||||
e.message_count = 100
|
||||
assert s._should_reset(e, src) is None
|
||||
|
||||
def test_unlimited(self):
|
||||
s = _store(max_messages=0, mode="none")
|
||||
src = _source()
|
||||
e = s.get_or_create_session(src)
|
||||
e.message_count = 9999
|
||||
assert s._should_reset(e, src) is None
|
||||
|
||||
def test_custom_limit(self):
|
||||
s = _store(max_messages=50)
|
||||
src = _source()
|
||||
e = s.get_or_create_session(src)
|
||||
e.message_count = 50
|
||||
assert s._should_reset(e, src) == "message_limit"
|
||||
|
||||
def test_just_under(self):
|
||||
s = _store(max_messages=50)
|
||||
src = _source()
|
||||
e = s.get_or_create_session(src)
|
||||
e.message_count = 49
|
||||
assert s._should_reset(e, src) is None
|
||||
|
||||
|
||||
class TestAppendIncrementsCount:
|
||||
def test_user_message(self):
|
||||
s = _store()
|
||||
src = _source()
|
||||
e = s.get_or_create_session(src)
|
||||
s.append_to_transcript(e.session_id, {"role": "user", "content": "hi"})
|
||||
e = s.get_or_create_session(src)
|
||||
assert e.message_count == 1
|
||||
|
||||
def test_assistant_message(self):
|
||||
s = _store()
|
||||
src = _source()
|
||||
e = s.get_or_create_session(src)
|
||||
s.append_to_transcript(e.session_id, {"role": "user", "content": "hi"})
|
||||
s.append_to_transcript(e.session_id, {"role": "assistant", "content": "hello"})
|
||||
e = s.get_or_create_session(src)
|
||||
assert e.message_count == 2
|
||||
|
||||
def test_meta_not_counted(self):
|
||||
s = _store()
|
||||
src = _source()
|
||||
e = s.get_or_create_session(src)
|
||||
s.append_to_transcript(e.session_id, {"role": "session_meta", "tools": []})
|
||||
e = s.get_or_create_session(src)
|
||||
assert e.message_count == 0
|
||||
|
||||
|
||||
class TestGetMessageLimitInfo:
|
||||
def test_at_limit(self):
|
||||
s = _store()
|
||||
src = _source()
|
||||
e = s.get_or_create_session(src)
|
||||
e.message_count = 200
|
||||
info = s.get_message_limit_info(e.session_key)
|
||||
assert info["at_limit"] is True
|
||||
assert info["near_limit"] is True
|
||||
assert info["remaining"] == 0
|
||||
|
||||
def test_near_limit(self):
|
||||
s = _store()
|
||||
src = _source()
|
||||
e = s.get_or_create_session(src)
|
||||
e.message_count = 180
|
||||
info = s.get_message_limit_info(e.session_key)
|
||||
assert info["near_limit"] is True
|
||||
assert info["at_limit"] is False
|
||||
assert info["remaining"] == 20
|
||||
|
||||
def test_well_below(self):
|
||||
s = _store()
|
||||
src = _source()
|
||||
e = s.get_or_create_session(src)
|
||||
e.message_count = 50
|
||||
info = s.get_message_limit_info(e.session_key)
|
||||
assert info["near_limit"] is False
|
||||
assert info["at_limit"] is False
|
||||
|
||||
def test_unknown(self):
|
||||
s = _store()
|
||||
info = s.get_message_limit_info("nonexistent")
|
||||
assert info["at_limit"] is False
|
||||
|
||||
|
||||
class TestResetMessageCount:
|
||||
def test_reset(self):
|
||||
s = _store()
|
||||
src = _source()
|
||||
e = s.get_or_create_session(src)
|
||||
e.message_count = 150
|
||||
s.reset_message_count(e.session_key)
|
||||
assert s.get_message_limit_info(e.session_key)["message_count"] == 0
|
||||
|
||||
|
||||
class TestSessionRotation:
|
||||
def test_fresh_count_after_reset(self):
|
||||
s = _store()
|
||||
src = _source()
|
||||
e = s.get_or_create_session(src)
|
||||
e.message_count = 200
|
||||
new = s.reset_session(e.session_key)
|
||||
assert new is not None
|
||||
assert new.message_count == 0
|
||||
assert new.session_id != e.session_id
|
||||
@@ -1,194 +0,0 @@
|
||||
"""Tests for time-aware cron model routing — Issue #317."""
|
||||
|
||||
import pytest
|
||||
from datetime import datetime
|
||||
|
||||
from agent.smart_model_routing import resolve_cron_model, _hour_in_window
|
||||
|
||||
|
||||
class TestHourInWindow:
|
||||
"""Hour-in-window detection including midnight wrap."""
|
||||
|
||||
def test_normal_window(self):
|
||||
assert _hour_in_window(18, 17, 22) is True
|
||||
assert _hour_in_window(16, 17, 22) is False
|
||||
assert _hour_in_window(22, 17, 22) is False # [start, end) exclusive end
|
||||
|
||||
def test_midnight_wrap(self):
|
||||
assert _hour_in_window(23, 22, 6) is True
|
||||
assert _hour_in_window(3, 22, 6) is True
|
||||
assert _hour_in_window(10, 22, 6) is False
|
||||
|
||||
def test_edge_cases(self):
|
||||
assert _hour_in_window(0, 0, 24) is True
|
||||
assert _hour_in_window(23, 0, 24) is True
|
||||
assert _hour_in_window(0, 22, 6) is True
|
||||
assert _hour_in_window(5, 22, 6) is True
|
||||
assert _hour_in_window(6, 22, 6) is False
|
||||
|
||||
|
||||
class TestResolveCronModel:
|
||||
"""Time-aware model resolution for cron jobs."""
|
||||
|
||||
def _config(self, **overrides):
|
||||
base = {
|
||||
"enabled": True,
|
||||
"fallback_model": "anthropic/claude-sonnet-4",
|
||||
"fallback_provider": "openrouter",
|
||||
"windows": [
|
||||
{"start_hour": 17, "end_hour": 22, "reason": "evening_error_peak"},
|
||||
],
|
||||
}
|
||||
base.update(overrides)
|
||||
return base
|
||||
|
||||
def test_disabled_returns_base_model(self):
|
||||
result = resolve_cron_model(
|
||||
"xiaomi/mimo-v2-pro",
|
||||
{"enabled": False},
|
||||
now=datetime(2026, 4, 12, 18, 0),
|
||||
)
|
||||
assert result["model"] == "xiaomi/mimo-v2-pro"
|
||||
assert result["overridden"] is False
|
||||
|
||||
def test_no_config_returns_base_model(self):
|
||||
result = resolve_cron_model("xiaomi/mimo-v2-pro", None)
|
||||
assert result["model"] == "xiaomi/mimo-v2-pro"
|
||||
assert result["overridden"] is False
|
||||
|
||||
def test_no_windows_returns_base_model(self):
|
||||
result = resolve_cron_model(
|
||||
"xiaomi/mimo-v2-pro",
|
||||
{"enabled": True, "windows": []},
|
||||
now=datetime(2026, 4, 12, 18, 0),
|
||||
)
|
||||
assert result["overridden"] is False
|
||||
|
||||
def test_evening_window_overrides(self):
|
||||
"""18:00 falls in [17, 22) — should override to stronger model."""
|
||||
result = resolve_cron_model(
|
||||
"xiaomi/mimo-v2-pro",
|
||||
self._config(),
|
||||
now=datetime(2026, 4, 12, 18, 0),
|
||||
)
|
||||
assert result["model"] == "anthropic/claude-sonnet-4"
|
||||
assert result["provider"] == "openrouter"
|
||||
assert result["overridden"] is True
|
||||
assert "evening_error_peak" in result["reason"]
|
||||
assert "hour=18" in result["reason"]
|
||||
|
||||
def test_outside_window_keeps_base(self):
|
||||
"""09:00 is outside [17, 22) — keep base model."""
|
||||
result = resolve_cron_model(
|
||||
"xiaomi/mimo-v2-pro",
|
||||
self._config(),
|
||||
now=datetime(2026, 4, 12, 9, 0),
|
||||
)
|
||||
assert result["model"] == "xiaomi/mimo-v2-pro"
|
||||
assert result["overridden"] is False
|
||||
|
||||
def test_window_boundary_start_inclusive(self):
|
||||
"""17:00 is start of window — should override."""
|
||||
result = resolve_cron_model(
|
||||
"xiaomi/mimo-v2-pro",
|
||||
self._config(),
|
||||
now=datetime(2026, 4, 12, 17, 0),
|
||||
)
|
||||
assert result["overridden"] is True
|
||||
|
||||
def test_window_boundary_end_exclusive(self):
|
||||
"""22:00 is end of window — should NOT override."""
|
||||
result = resolve_cron_model(
|
||||
"xiaomi/mimo-v2-pro",
|
||||
self._config(),
|
||||
now=datetime(2026, 4, 12, 22, 0),
|
||||
)
|
||||
assert result["overridden"] is False
|
||||
|
||||
def test_midnight_window(self):
|
||||
"""Overnight window [22, 6) wraps midnight."""
|
||||
config = self._config(windows=[
|
||||
{"start_hour": 22, "end_hour": 6, "reason": "overnight_instability"},
|
||||
])
|
||||
# 23:00 — in window
|
||||
result = resolve_cron_model("mimo", config, now=datetime(2026, 4, 12, 23, 0))
|
||||
assert result["overridden"] is True
|
||||
assert "overnight_instability" in result["reason"]
|
||||
|
||||
# 03:00 — in window (past midnight)
|
||||
result = resolve_cron_model("mimo", config, now=datetime(2026, 4, 13, 3, 0))
|
||||
assert result["overridden"] is True
|
||||
|
||||
# 10:00 — outside window
|
||||
result = resolve_cron_model("mimo", config, now=datetime(2026, 4, 12, 10, 0))
|
||||
assert result["overridden"] is False
|
||||
|
||||
def test_per_window_model_override(self):
|
||||
"""Window-specific model takes precedence over global fallback."""
|
||||
config = self._config(windows=[
|
||||
{
|
||||
"start_hour": 17,
|
||||
"end_hour": 22,
|
||||
"model": "anthropic/claude-opus-4-6",
|
||||
"provider": "anthropic",
|
||||
"reason": "peak_hours",
|
||||
},
|
||||
])
|
||||
result = resolve_cron_model("mimo", config, now=datetime(2026, 4, 12, 18, 0))
|
||||
assert result["model"] == "anthropic/claude-opus-4-6"
|
||||
assert result["provider"] == "anthropic"
|
||||
|
||||
def test_first_matching_window_wins(self):
|
||||
"""When windows overlap, first match wins."""
|
||||
config = self._config(windows=[
|
||||
{"start_hour": 17, "end_hour": 20, "model": "strong-1", "provider": "p1", "reason": "w1"},
|
||||
{"start_hour": 19, "end_hour": 22, "model": "strong-2", "provider": "p2", "reason": "w2"},
|
||||
])
|
||||
# 19:00 matches both — first wins
|
||||
result = resolve_cron_model("mimo", config, now=datetime(2026, 4, 12, 19, 0))
|
||||
assert result["model"] == "strong-1"
|
||||
|
||||
def test_no_fallback_model_configured(self):
|
||||
"""If no fallback_model, keeps base model even in window."""
|
||||
config = {"enabled": True, "windows": [
|
||||
{"start_hour": 17, "end_hour": 22, "reason": "test"},
|
||||
]}
|
||||
result = resolve_cron_model("mimo", config, now=datetime(2026, 4, 12, 18, 0))
|
||||
assert result["overridden"] is False
|
||||
assert result["model"] == "mimo"
|
||||
|
||||
def test_malformed_windows_skipped(self):
|
||||
"""Non-dict or missing hours in windows are skipped safely."""
|
||||
config = self._config(windows=[
|
||||
"not-a-dict",
|
||||
{"start_hour": 17}, # missing end_hour
|
||||
{"end_hour": 22}, # missing start_hour
|
||||
{"start_hour": "bad", "end_hour": "bad"},
|
||||
{"start_hour": 17, "end_hour": 22, "reason": "valid"},
|
||||
])
|
||||
result = resolve_cron_model("mimo", config, now=datetime(2026, 4, 12, 18, 0))
|
||||
assert result["overridden"] is True
|
||||
assert "valid" in result["reason"]
|
||||
|
||||
def test_empty_provider_defaults_to_empty(self):
|
||||
"""When window has no provider, falls back to global, then empty."""
|
||||
config = self._config(
|
||||
fallback_provider="",
|
||||
windows=[{"start_hour": 17, "end_hour": 22, "reason": "test"}],
|
||||
)
|
||||
result = resolve_cron_model("mimo", config, now=datetime(2026, 4, 12, 18, 0))
|
||||
assert result["provider"] == ""
|
||||
|
||||
def test_multiple_windows_coverage(self):
|
||||
"""Two non-overlapping windows cover evening and overnight."""
|
||||
config = self._config(windows=[
|
||||
{"start_hour": 17, "end_hour": 22, "reason": "evening"},
|
||||
{"start_hour": 2, "end_hour": 5, "reason": "overnight"},
|
||||
])
|
||||
# Evening
|
||||
assert resolve_cron_model("mimo", config, now=datetime(2026, 4, 12, 20, 0))["overridden"] is True
|
||||
# Overnight
|
||||
assert resolve_cron_model("mimo", config, now=datetime(2026, 4, 13, 3, 0))["overridden"] is True
|
||||
# Safe hours
|
||||
assert resolve_cron_model("mimo", config, now=datetime(2026, 4, 12, 10, 0))["overridden"] is False
|
||||
assert resolve_cron_model("mimo", config, now=datetime(2026, 4, 12, 1, 0))["overridden"] is False
|
||||
Reference in New Issue
Block a user