Some checks failed
Forge CI / smoke-and-build (pull_request) Failing after 1m1s
Empirical audit: cron error rate peaks at 18:00 (9.4%) vs 4.0% at 09:00.
During configured high-error windows, automatically route cron jobs to
more capable models when the user is not present to correct errors.
- agent/smart_model_routing.py: resolve_cron_model() + _hour_in_window()
- cron/scheduler.py: wired into run_job() after base model resolution
- tests/test_cron_model_routing.py: 16 tests
Config:
cron_model_routing:
enabled: true
fallback_model: "anthropic/claude-sonnet-4"
fallback_provider: "openrouter"
windows:
- {start_hour: 17, end_hour: 22, reason: evening_error_peak}
- {start_hour: 2, end_hour: 5, reason: overnight_api_instability}
Features: midnight-wrap, per-window overrides, first-match-wins,
graceful degradation on malformed config.
Closes #317
297 lines
9.2 KiB
Python
297 lines
9.2 KiB
Python
"""Helpers for optional cheap-vs-strong and time-aware model routing."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import os
|
|
import re
|
|
from datetime import datetime
|
|
from typing import Any, Dict, List, Optional
|
|
|
|
from utils import is_truthy_value
|
|
|
|
_COMPLEX_KEYWORDS = {
|
|
"debug",
|
|
"debugging",
|
|
"implement",
|
|
"implementation",
|
|
"refactor",
|
|
"patch",
|
|
"traceback",
|
|
"stacktrace",
|
|
"exception",
|
|
"error",
|
|
"analyze",
|
|
"analysis",
|
|
"investigate",
|
|
"architecture",
|
|
"design",
|
|
"compare",
|
|
"benchmark",
|
|
"optimize",
|
|
"optimise",
|
|
"review",
|
|
"terminal",
|
|
"shell",
|
|
"tool",
|
|
"tools",
|
|
"pytest",
|
|
"test",
|
|
"tests",
|
|
"plan",
|
|
"planning",
|
|
"delegate",
|
|
"subagent",
|
|
"cron",
|
|
"docker",
|
|
"kubernetes",
|
|
}
|
|
|
|
_URL_RE = re.compile(r"https?://|www\.", re.IGNORECASE)
|
|
|
|
|
|
def _coerce_bool(value: Any, default: bool = False) -> bool:
|
|
return is_truthy_value(value, default=default)
|
|
|
|
|
|
def _coerce_int(value: Any, default: int) -> int:
|
|
try:
|
|
return int(value)
|
|
except (TypeError, ValueError):
|
|
return default
|
|
|
|
|
|
def choose_cheap_model_route(user_message: str, routing_config: Optional[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
|
|
"""Return the configured cheap-model route when a message looks simple.
|
|
|
|
Conservative by design: if the message has signs of code/tool/debugging/
|
|
long-form work, keep the primary model.
|
|
"""
|
|
cfg = routing_config or {}
|
|
if not _coerce_bool(cfg.get("enabled"), False):
|
|
return None
|
|
|
|
cheap_model = cfg.get("cheap_model") or {}
|
|
if not isinstance(cheap_model, dict):
|
|
return None
|
|
provider = str(cheap_model.get("provider") or "").strip().lower()
|
|
model = str(cheap_model.get("model") or "").strip()
|
|
if not provider or not model:
|
|
return None
|
|
|
|
text = (user_message or "").strip()
|
|
if not text:
|
|
return None
|
|
|
|
max_chars = _coerce_int(cfg.get("max_simple_chars"), 160)
|
|
max_words = _coerce_int(cfg.get("max_simple_words"), 28)
|
|
|
|
if len(text) > max_chars:
|
|
return None
|
|
if len(text.split()) > max_words:
|
|
return None
|
|
if text.count("\n") > 1:
|
|
return None
|
|
if "```" in text or "`" in text:
|
|
return None
|
|
if _URL_RE.search(text):
|
|
return None
|
|
|
|
lowered = text.lower()
|
|
words = {token.strip(".,:;!?()[]{}\"'`") for token in lowered.split()}
|
|
if words & _COMPLEX_KEYWORDS:
|
|
return None
|
|
|
|
route = dict(cheap_model)
|
|
route["provider"] = provider
|
|
route["model"] = model
|
|
route["routing_reason"] = "simple_turn"
|
|
return route
|
|
|
|
|
|
def resolve_turn_route(user_message: str, routing_config: Optional[Dict[str, Any]], primary: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Resolve the effective model/runtime for one turn.
|
|
|
|
Returns a dict with model/runtime/signature/label fields.
|
|
"""
|
|
route = choose_cheap_model_route(user_message, routing_config)
|
|
if not route:
|
|
return {
|
|
"model": primary.get("model"),
|
|
"runtime": {
|
|
"api_key": primary.get("api_key"),
|
|
"base_url": primary.get("base_url"),
|
|
"provider": primary.get("provider"),
|
|
"api_mode": primary.get("api_mode"),
|
|
"command": primary.get("command"),
|
|
"args": list(primary.get("args") or []),
|
|
"credential_pool": primary.get("credential_pool"),
|
|
},
|
|
"label": None,
|
|
"signature": (
|
|
primary.get("model"),
|
|
primary.get("provider"),
|
|
primary.get("base_url"),
|
|
primary.get("api_mode"),
|
|
primary.get("command"),
|
|
tuple(primary.get("args") or ()),
|
|
),
|
|
}
|
|
|
|
from hermes_cli.runtime_provider import resolve_runtime_provider
|
|
|
|
explicit_api_key = None
|
|
api_key_env = str(route.get("api_key_env") or "").strip()
|
|
if api_key_env:
|
|
explicit_api_key = os.getenv(api_key_env) or None
|
|
|
|
try:
|
|
runtime = resolve_runtime_provider(
|
|
requested=route.get("provider"),
|
|
explicit_api_key=explicit_api_key,
|
|
explicit_base_url=route.get("base_url"),
|
|
)
|
|
except Exception:
|
|
return {
|
|
"model": primary.get("model"),
|
|
"runtime": {
|
|
"api_key": primary.get("api_key"),
|
|
"base_url": primary.get("base_url"),
|
|
"provider": primary.get("provider"),
|
|
"api_mode": primary.get("api_mode"),
|
|
"command": primary.get("command"),
|
|
"args": list(primary.get("args") or []),
|
|
"credential_pool": primary.get("credential_pool"),
|
|
},
|
|
"label": None,
|
|
"signature": (
|
|
primary.get("model"),
|
|
primary.get("provider"),
|
|
primary.get("base_url"),
|
|
primary.get("api_mode"),
|
|
primary.get("command"),
|
|
tuple(primary.get("args") or ()),
|
|
),
|
|
}
|
|
|
|
return {
|
|
"model": route.get("model"),
|
|
"runtime": {
|
|
"api_key": runtime.get("api_key"),
|
|
"base_url": runtime.get("base_url"),
|
|
"provider": runtime.get("provider"),
|
|
"api_mode": runtime.get("api_mode"),
|
|
"command": runtime.get("command"),
|
|
"args": list(runtime.get("args") or []),
|
|
},
|
|
"label": f"smart route → {route.get('model')} ({runtime.get('provider')})",
|
|
"signature": (
|
|
route.get("model"),
|
|
runtime.get("provider"),
|
|
runtime.get("base_url"),
|
|
runtime.get("api_mode"),
|
|
runtime.get("command"),
|
|
tuple(runtime.get("args") or ()),
|
|
),
|
|
}
|
|
|
|
|
|
# =========================================================================
|
|
# Time-aware cron model routing
|
|
# =========================================================================
|
|
#
|
|
# Empirical finding: cron error rate peaks at 18:00 (9.4%) vs 4.0% at 09:00.
|
|
# During high-error windows, route cron jobs to more capable models.
|
|
#
|
|
# Config (config.yaml):
|
|
# cron_model_routing:
|
|
# enabled: true
|
|
# fallback_model: "anthropic/claude-sonnet-4"
|
|
# fallback_provider: "openrouter"
|
|
# windows:
|
|
# - start_hour: 17
|
|
# end_hour: 22
|
|
# reason: "evening_error_peak"
|
|
# - start_hour: 2
|
|
# end_hour: 5
|
|
# reason: "overnight_api_instability"
|
|
# =========================================================================
|
|
|
|
def _hour_in_window(hour: int, start: int, end: int) -> bool:
|
|
"""Check if hour falls in [start, end) window, handling midnight wrap."""
|
|
if start <= end:
|
|
return start <= hour < end
|
|
else:
|
|
# Wraps midnight: e.g., 22-06
|
|
return hour >= start or hour < end
|
|
|
|
|
|
def resolve_cron_model(
|
|
base_model: str,
|
|
routing_config: Optional[Dict[str, Any]],
|
|
now: Optional[datetime] = None,
|
|
) -> Dict[str, Any]:
|
|
"""Apply time-aware model override for cron jobs.
|
|
|
|
During configured high-error windows, returns a stronger model config.
|
|
Outside windows, returns the base model unchanged.
|
|
|
|
Args:
|
|
base_model: The model string already resolved (from job/config/env).
|
|
routing_config: The cron_model_routing dict from config.yaml.
|
|
now: Override current time (for testing). Defaults to datetime.now().
|
|
|
|
Returns:
|
|
Dict with keys: model, provider, overridden, reason.
|
|
- model: the effective model string to use
|
|
- provider: provider override (empty string = use default)
|
|
- overridden: True if time-based override was applied
|
|
- reason: why override was applied (empty string if not)
|
|
"""
|
|
cfg = routing_config or {}
|
|
|
|
if not _coerce_bool(cfg.get("enabled"), False):
|
|
return {"model": base_model, "provider": "", "overridden": False, "reason": ""}
|
|
|
|
windows = cfg.get("windows") or []
|
|
if not isinstance(windows, list) or not windows:
|
|
return {"model": base_model, "provider": "", "overridden": False, "reason": ""}
|
|
|
|
current = now or datetime.now()
|
|
current_hour = current.hour
|
|
|
|
matched_window = None
|
|
for window in windows:
|
|
if not isinstance(window, dict):
|
|
continue
|
|
start = _coerce_int(window.get("start_hour"), -1)
|
|
end = _coerce_int(window.get("end_hour"), -1)
|
|
if start < 0 or end < 0:
|
|
continue
|
|
if _hour_in_window(current_hour, start, end):
|
|
matched_window = window
|
|
break
|
|
|
|
if not matched_window:
|
|
return {"model": base_model, "provider": "", "overridden": False, "reason": ""}
|
|
|
|
# Window matched — use the override model from window or global fallback
|
|
override_model = str(matched_window.get("model") or "").strip()
|
|
override_provider = str(matched_window.get("provider") or "").strip()
|
|
|
|
if not override_model:
|
|
override_model = str(cfg.get("fallback_model") or "").strip()
|
|
if not override_provider:
|
|
override_provider = str(cfg.get("fallback_provider") or "").strip()
|
|
|
|
if not override_model:
|
|
return {"model": base_model, "provider": "", "overridden": False, "reason": ""}
|
|
|
|
reason = str(matched_window.get("reason") or "time_window").strip()
|
|
|
|
return {
|
|
"model": override_model,
|
|
"provider": override_provider,
|
|
"overridden": True,
|
|
"reason": f"cron_routing:{reason}(hour={current_hour})",
|
|
}
|