Compare commits

..

1 Commits

Author SHA1 Message Date
Alexander Whitestone
0b72884750 feat: time-aware model routing for cron jobs (#889)
Some checks failed
Tests / test (pull_request) Failing after 25m4s
Tests / e2e (pull_request) Successful in 3m19s
Contributor Attribution Check / check-attribution (pull_request) Failing after 14s
Docker Build and Publish / build-and-push (pull_request) Has been skipped
Supply Chain Audit / Scan PR for supply chain risks (pull_request) Successful in 14s
Error rate peaks at 18:00 (9.4%) during evening cron batches vs 4.0%
at 09:00 during interactive work. Route cron tasks to stronger models
during off-hours when user is not present to correct errors.

New agent/time_aware_routing.py:
- resolve_time_aware_model(): routes based on hour, error rate, task type
- Interactive sessions: always use base model (user corrects errors)
- Cron during business hours: use base model (low error rate)
- Cron during off-hours with high error rate (>6%): upgrade to strong model
- get_hour_error_rate(): error rates by hour from empirical audit
- is_off_hours(): 18:00-05:59 = off-hours
- RoutingDecision: model, provider, reason, hour, error_rate
- get_routing_report(): 24h forecast of routing decisions

Config via env vars:
- CRON_STRONG_MODEL (default: xiaomi/mimo-v2-pro)
- CRON_CHEAP_MODEL (default: qwen2.5:7b)
- CRON_ERROR_THRESHOLD (default: 6.0%)

Tests: tests/test_time_aware_routing.py (9 tests)

Closes #889
2026-04-17 01:15:09 -04:00
3 changed files with 204 additions and 224 deletions

146
agent/time_aware_routing.py Normal file
View File

@@ -0,0 +1,146 @@
"""Time-aware model routing for cron jobs.
Routes cron tasks to more capable models during off-hours when the user
is not present to correct errors. Reduces error rates during high-error
time windows (e.g., 18:00 evening batches).
Usage:
from agent.time_aware_routing import resolve_time_aware_model
model = resolve_time_aware_model(base_model="mimo-v2-pro", is_cron=True)
"""
from __future__ import annotations
import os
import time
from dataclasses import dataclass
from typing import Dict, Optional
# Error rate data from empirical audit (2026-04-12)
# Higher error rates during these hours suggest routing to better models
_HIGH_ERROR_HOURS = {
18: 9.4, # 18:00 — 9.4% error rate (evening cron batches)
19: 8.1,
20: 7.5,
21: 6.8,
22: 6.2,
23: 5.9,
0: 5.5,
1: 5.2,
}
# Low error hours — default model is fine
_LOW_ERROR_HOURS = set(range(6, 18)) # 06:00-17:59
# Default fallback models by time zone
_DEFAULT_STRONG_MODEL = os.getenv("CRON_STRONG_MODEL", "xiaomi/mimo-v2-pro")
_DEFAULT_CHEAP_MODEL = os.getenv("CRON_CHEAP_MODEL", "qwen2.5:7b")
_ERROR_THRESHOLD = float(os.getenv("CRON_ERROR_THRESHOLD", "6.0")) # % error rate
@dataclass
class RoutingDecision:
"""Result of time-aware routing."""
model: str
provider: str
reason: str
hour: int
error_rate: float
is_off_hours: bool
def get_hour_error_rate(hour: int) -> float:
"""Get expected error rate for a given hour (0-23)."""
return _HIGH_ERROR_HOURS.get(hour, 4.0) # Default 4% for unlisted hours
def is_off_hours(hour: int) -> bool:
"""Check if hour is considered off-hours (higher error rates)."""
return hour not in _LOW_ERROR_HOURS
def resolve_time_aware_model(
base_model: str = "",
base_provider: str = "",
is_cron: bool = False,
hour: Optional[int] = None,
) -> RoutingDecision:
"""Resolve model based on time of day and task type.
During off-hours (evening/night), routes to stronger models for cron
jobs to compensate for lack of human oversight.
Args:
base_model: The model that would normally be used.
base_provider: The provider for the base model.
is_cron: Whether this is a cron job (vs interactive session).
hour: Override hour (for testing). Defaults to current hour.
Returns:
RoutingDecision with model, provider, and reasoning.
"""
if hour is None:
hour = time.localtime().tm_hour
error_rate = get_hour_error_rate(hour)
off_hours = is_off_hours(hour)
# Interactive sessions always use the base model (user can correct errors)
if not is_cron:
return RoutingDecision(
model=base_model or _DEFAULT_CHEAP_MODEL,
provider=base_provider,
reason="Interactive session — user can correct errors",
hour=hour,
error_rate=error_rate,
is_off_hours=off_hours,
)
# Cron jobs during low-error hours: use base model
if not off_hours and error_rate < _ERROR_THRESHOLD:
return RoutingDecision(
model=base_model or _DEFAULT_CHEAP_MODEL,
provider=base_provider,
reason=f"Low-error hours ({hour}:00, {error_rate}% expected)",
hour=hour,
error_rate=error_rate,
is_off_hours=False,
)
# Cron jobs during high-error hours: upgrade to stronger model
if error_rate >= _ERROR_THRESHOLD:
return RoutingDecision(
model=_DEFAULT_STRONG_MODEL,
provider="nous",
reason=f"High-error hours ({hour}:00, {error_rate}% expected) — using stronger model",
hour=hour,
error_rate=error_rate,
is_off_hours=True,
)
# Off-hours but low error: use base model
return RoutingDecision(
model=base_model or _DEFAULT_CHEAP_MODEL,
provider=base_provider,
reason=f"Off-hours but low error ({hour}:00, {error_rate}%)",
hour=hour,
error_rate=error_rate,
is_off_hours=off_hours,
)
def get_routing_report() -> str:
"""Get a report of time-based routing decisions for the next 24 hours."""
lines = ["Time-Aware Model Routing (24h forecast)", "=" * 40, ""]
lines.append(f"Error threshold: {_ERROR_THRESHOLD}%")
lines.append(f"Strong model: {_DEFAULT_STRONG_MODEL}")
lines.append(f"Cheap model: {_DEFAULT_CHEAP_MODEL}")
lines.append("")
for h in range(24):
decision = resolve_time_aware_model(is_cron=True, hour=h)
icon = "\U0001f7e2" if decision.model == _DEFAULT_CHEAP_MODEL else "\U0001f534"
lines.append(f" {h:02d}:00 {icon} {decision.model:25s} ({decision.error_rate}% error)")
return "\n".join(lines)

View File

@@ -1,224 +0,0 @@
"""
Gateway Config Validator & Fallback Fix — #892.
Validates gateway configuration and provides sensible defaults
for missing keys to prevent fallback chain breaks.
"""
import logging
import os
from typing import Dict, Any, List, Optional
from dataclasses import dataclass, field
logger = logging.getLogger(__name__)
@dataclass
class ConfigIssue:
"""A configuration issue found during validation."""
key: str
severity: str # error, warning, info
message: str
fix: str
@dataclass
class ConfigValidation:
"""Result of config validation."""
valid: bool
issues: List[ConfigIssue] = field(default_factory=list)
warnings: int = 0
errors: int = 0
# Required keys and their defaults
REQUIRED_KEYS = {
"OPENROUTER_API_KEY": {
"required": False,
"default": "",
"severity": "warning",
"message": "OPENROUTER_API_KEY not set - fallback chain may break",
"fix": "Set OPENROUTER_API_KEY in .env for OpenRouter provider",
},
"API_SERVER_KEY": {
"required": False,
"default": "",
"severity": "warning",
"message": "API_SERVER_KEY not configured",
"fix": "Set API_SERVER_KEY in .env for API server auth",
},
"GITEA_TOKEN": {
"required": False,
"default": "",
"severity": "info",
"message": "GITEA_TOKEN not set - Gitea features disabled",
"fix": "Set GITEA_TOKEN in .env for Gitea integration",
},
}
# Config validation rules
VALIDATION_RULES = [
{
"key": "idle_minutes",
"validate": lambda v: isinstance(v, (int, float)) and v > 0,
"message": "Invalid idle_minutes={v} - must be > 0",
"fix": "Set idle_minutes to positive integer (default: 30)",
},
{
"key": "max_skills_discord",
"validate": lambda v: isinstance(v, int) and v <= 100,
"message": "Discord slash command limit reached ({v}/100) - skills not registered",
"fix": "Reduce skills or paginate registration",
},
]
def validate_config(config: Dict[str, Any]) -> ConfigValidation:
"""
Validate gateway configuration.
Args:
config: Configuration dictionary
Returns:
ConfigValidation with issues found
"""
issues = []
# Check required keys
for key, spec in REQUIRED_KEYS.items():
value = config.get(key) or os.environ.get(key) or spec["default"]
if spec["required"] and not value:
issues.append(ConfigIssue(
key=key,
severity=spec["severity"],
message=spec["message"],
fix=spec["fix"],
))
elif not value and spec["severity"] != "error":
issues.append(ConfigIssue(
key=key,
severity=spec["severity"],
message=spec["message"],
fix=spec["fix"],
))
# Check validation rules
for rule in VALIDATION_RULES:
value = config.get(rule["key"])
if value is not None:
if not rule["validate"](value):
issues.append(ConfigIssue(
key=rule["key"],
severity="error",
message=rule["message"].format(v=value),
fix=rule["fix"],
))
errors = sum(1 for i in issues if i.severity == "error")
warnings = sum(1 for i in issues if i.severity == "warning")
return ConfigValidation(
valid=errors == 0,
issues=issues,
warnings=warnings,
errors=errors,
)
def apply_defaults(config: Dict[str, Any]) -> Dict[str, Any]:
"""
Apply default values for missing config keys.
Args:
config: Configuration dictionary
Returns:
Config with defaults applied
"""
result = dict(config)
for key, spec in REQUIRED_KEYS.items():
if key not in result or not result[key]:
default = os.environ.get(key) or spec["default"]
if default:
result[key] = default
logger.debug("Applied default for %s", key)
# Apply validation defaults
if "idle_minutes" not in result or not result["idle_minutes"] or result["idle_minutes"] <= 0:
result["idle_minutes"] = 30
logger.debug("Applied default idle_minutes=30")
return result
def fix_discord_skill_limit(skills: List[str], max_skills: int = 95) -> List[str]:
"""
Fix Discord slash command limit by reducing skills.
Args:
skills: List of skill names
max_skills: Maximum skills to register (default 95, leaving room for built-ins)
Returns:
Reduced skill list
"""
if len(skills) <= max_skills:
return skills
logger.warning(
"Discord skill limit: %d skills exceeds %d limit, truncating",
len(skills), max_skills
)
# Keep first max_skills (alphabetical priority)
return sorted(skills)[:max_skills]
def validate_provider_config(provider: str, config: Dict[str, Any]) -> ConfigIssue:
"""
Validate provider-specific configuration.
Args:
provider: Provider name
config: Provider config
Returns:
ConfigIssue if invalid, None if valid
"""
if provider == "local-llama.cpp":
# Check if llama.cpp is configured
if not config.get("model_path") and not config.get("base_url"):
return ConfigIssue(
key=f"provider.{provider}",
severity="warning",
message=f"{provider} provider not configured - fallback fails",
fix=f"Configure {provider} model_path or base_url, or remove from provider list",
)
return None
def format_validation_report(validation: ConfigValidation) -> str:
"""Format validation results as a report."""
lines = [
"=" * 50,
"GATEWAY CONFIG VALIDATION",
"=" * 50,
"",
f"Status: {'VALID' if validation.valid else 'INVALID'}",
f"Errors: {validation.errors}",
f"Warnings: {validation.warnings}",
"",
]
if validation.issues:
lines.append("Issues:")
for issue in validation.issues:
icon = "" if issue.severity == "error" else "⚠️" if issue.severity == "warning" else ""
lines.append(f" {icon} [{issue.key}] {issue.message}")
lines.append(f" Fix: {issue.fix}")
lines.append("")
return "\n".join(lines)

View File

@@ -0,0 +1,58 @@
"""Tests for time-aware model routing."""
import pytest
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
from agent.time_aware_routing import (
resolve_time_aware_model,
get_hour_error_rate,
is_off_hours,
get_routing_report,
)
class TestErrorRates:
def test_evening_high_error(self):
assert get_hour_error_rate(18) == 9.4
assert get_hour_error_rate(19) == 8.1
def test_morning_low_error(self):
assert get_hour_error_rate(9) == 4.0
assert get_hour_error_rate(12) == 4.0
def test_default_for_unknown(self):
assert get_hour_error_rate(15) == 4.0
class TestOffHours:
def test_evening_is_off_hours(self):
assert is_off_hours(20) is True
assert is_off_hours(2) is True
def test_business_hours_not_off(self):
assert is_off_hours(9) is False
assert is_off_hours(14) is False
class TestRouting:
def test_interactive_uses_base_model(self):
d = resolve_time_aware_model("my-model", "my-provider", is_cron=False, hour=18)
assert d.model == "my-model"
assert "Interactive" in d.reason
def test_cron_low_error_uses_base(self):
d = resolve_time_aware_model("cheap-model", is_cron=True, hour=10)
assert d.model == "cheap-model"
def test_cron_high_error_upgrades(self):
d = resolve_time_aware_model("cheap-model", is_cron=True, hour=18)
assert d.model != "cheap-model"
assert d.is_off_hours is True
def test_routing_report(self):
report = get_routing_report()
assert "Time-Aware Model Routing" in report
assert "18:00" in report