hermes-agent/agent/smart_model_routing.py

"""Helpers for optional cheap-vs-strong and time-aware model routing."""

from __future__ import annotations

import os
import re
from datetime import datetime
from typing import Any, Dict, List, Optional

from utils import is_truthy_value

_COMPLEX_KEYWORDS = {
    "debug",
    "debugging",
    "implement",
    "implementation",
    "refactor",
    "patch",
    "traceback",
    "stacktrace",
    "exception",
    "error",
    "analyze",
    "analysis",
    "investigate",
    "architecture",
    "design",
    "compare",
    "benchmark",
    "optimize",
    "optimise",
    "review",
    "terminal",
    "shell",
    "tool",
    "tools",
    "pytest",
    "test",
    "tests",
    "plan",
    "planning",
    "delegate",
    "subagent",
    "cron",
    "docker",
    "kubernetes",
}

_URL_RE = re.compile(r"https?://|www\.", re.IGNORECASE)


def _coerce_bool(value: Any, default: bool = False) -> bool:
    return is_truthy_value(value, default=default)


def _coerce_int(value: Any, default: int) -> int:
    try:
        return int(value)
    except (TypeError, ValueError):
        return default


def choose_cheap_model_route(user_message: str, routing_config: Optional[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
    """Return the configured cheap-model route when a message looks simple.

    Conservative by design: if the message has signs of code/tool/debugging/
    long-form work, keep the primary model.
    """
    cfg = routing_config or {}
    if not _coerce_bool(cfg.get("enabled"), False):
        return None

    cheap_model = cfg.get("cheap_model") or {}
    if not isinstance(cheap_model, dict):
        return None
    provider = str(cheap_model.get("provider") or "").strip().lower()
    model = str(cheap_model.get("model") or "").strip()
    if not provider or not model:
        return None

    text = (user_message or "").strip()
    if not text:
        return None

    max_chars = _coerce_int(cfg.get("max_simple_chars"), 160)
    max_words = _coerce_int(cfg.get("max_simple_words"), 28)

    if len(text) > max_chars:
        return None
    if len(text.split()) > max_words:
        return None
    if text.count("\n") > 1:
        return None
    if "```" in text or "`" in text:
        return None
    if _URL_RE.search(text):
        return None

    lowered = text.lower()
    words = {token.strip(".,:;!?()[]{}\"'`") for token in lowered.split()}
    if words & _COMPLEX_KEYWORDS:
        return None

    route = dict(cheap_model)
    route["provider"] = provider
    route["model"] = model
    route["routing_reason"] = "simple_turn"
    return route


def resolve_turn_route(user_message: str, routing_config: Optional[Dict[str, Any]], primary: Dict[str, Any]) -> Dict[str, Any]:
    """Resolve the effective model/runtime for one turn.

    Returns a dict with model/runtime/signature/label fields.
    """
    route = choose_cheap_model_route(user_message, routing_config)
    if not route:
        return {
            "model": primary.get("model"),
            "runtime": {
                "api_key": primary.get("api_key"),
                "base_url": primary.get("base_url"),
                "provider": primary.get("provider"),
                "api_mode": primary.get("api_mode"),
                "command": primary.get("command"),
                "args": list(primary.get("args") or []),
                "credential_pool": primary.get("credential_pool"),
            },
            "label": None,
            "signature": (
                primary.get("model"),
                primary.get("provider"),
                primary.get("base_url"),
                primary.get("api_mode"),
                primary.get("command"),
                tuple(primary.get("args") or ()),
            ),
        }

    from hermes_cli.runtime_provider import resolve_runtime_provider

    explicit_api_key = None
    api_key_env = str(route.get("api_key_env") or "").strip()
    if api_key_env:
        explicit_api_key = os.getenv(api_key_env) or None

    try:
        runtime = resolve_runtime_provider(
            requested=route.get("provider"),
            explicit_api_key=explicit_api_key,
            explicit_base_url=route.get("base_url"),
        )
    except Exception:
        return {
            "model": primary.get("model"),
            "runtime": {
                "api_key": primary.get("api_key"),
                "base_url": primary.get("base_url"),
                "provider": primary.get("provider"),
                "api_mode": primary.get("api_mode"),
                "command": primary.get("command"),
                "args": list(primary.get("args") or []),
                "credential_pool": primary.get("credential_pool"),
            },
            "label": None,
            "signature": (
                primary.get("model"),
                primary.get("provider"),
                primary.get("base_url"),
                primary.get("api_mode"),
                primary.get("command"),
                tuple(primary.get("args") or ()),
            ),
        }

    return {
        "model": route.get("model"),
        "runtime": {
            "api_key": runtime.get("api_key"),
            "base_url": runtime.get("base_url"),
            "provider": runtime.get("provider"),
            "api_mode": runtime.get("api_mode"),
            "command": runtime.get("command"),
            "args": list(runtime.get("args") or []),
        },
        "label": f"smart route → {route.get('model')} ({runtime.get('provider')})",
        "signature": (
            route.get("model"),
            runtime.get("provider"),
            runtime.get("base_url"),
            runtime.get("api_mode"),
            runtime.get("command"),
            tuple(runtime.get("args") or ()),
        ),
    }


# =========================================================================
# Time-aware cron model routing
# =========================================================================
#
# Empirical finding: cron error rate peaks at 18:00 (9.4%) vs 4.0% at 09:00.
# During high-error windows, route cron jobs to more capable models.
#
# Config (config.yaml):
#   cron_model_routing:
#     enabled: true
#     fallback_model: "anthropic/claude-sonnet-4"
#     fallback_provider: "openrouter"
#     windows:
#       - start_hour: 17
#         end_hour: 22
#         reason: "evening_error_peak"
#       - start_hour: 2
#         end_hour: 5
#         reason: "overnight_api_instability"
# =========================================================================

def _hour_in_window(hour: int, start: int, end: int) -> bool:
    """Check if hour falls in [start, end) window, handling midnight wrap."""
    if start <= end:
        return start <= hour < end
    else:
        # Wraps midnight: e.g., 22-06
        return hour >= start or hour < end


def resolve_cron_model(
    base_model: str,
    routing_config: Optional[Dict[str, Any]],
    now: Optional[datetime] = None,
) -> Dict[str, Any]:
    """Apply time-aware model override for cron jobs.

    During configured high-error windows, returns a stronger model config.
    Outside windows, returns the base model unchanged.

    Args:
        base_model: The model string already resolved (from job/config/env).
        routing_config: The cron_model_routing dict from config.yaml.
        now: Override current time (for testing). Defaults to datetime.now().

    Returns:
        Dict with keys: model, provider, overridden, reason.
        - model: the effective model string to use
        - provider: provider override (empty string = use default)
        - overridden: True if time-based override was applied
        - reason: why override was applied (empty string if not)
    """
    cfg = routing_config or {}

    if not _coerce_bool(cfg.get("enabled"), False):
        return {"model": base_model, "provider": "", "overridden": False, "reason": ""}

    windows = cfg.get("windows") or []
    if not isinstance(windows, list) or not windows:
        return {"model": base_model, "provider": "", "overridden": False, "reason": ""}

    current = now or datetime.now()
    current_hour = current.hour

    matched_window = None
    for window in windows:
        if not isinstance(window, dict):
            continue
        start = _coerce_int(window.get("start_hour"), -1)
        end = _coerce_int(window.get("end_hour"), -1)
        if start < 0 or end < 0:
            continue
        if _hour_in_window(current_hour, start, end):
            matched_window = window
            break

    if not matched_window:
        return {"model": base_model, "provider": "", "overridden": False, "reason": ""}

    # Window matched — use the override model from window or global fallback
    override_model = str(matched_window.get("model") or "").strip()
    override_provider = str(matched_window.get("provider") or "").strip()

    if not override_model:
        override_model = str(cfg.get("fallback_model") or "").strip()
    if not override_provider:
        override_provider = str(cfg.get("fallback_provider") or "").strip()

    if not override_model:
        return {"model": base_model, "provider": "", "overridden": False, "reason": ""}

    reason = str(matched_window.get("reason") or "time_window").strip()

    return {
        "model": override_model,
        "provider": override_provider,
        "overridden": True,
        "reason": f"cron_routing:{reason}(hour={current_hour})",
    }