Compare commits

..

1 Commits

Author SHA1 Message Date
Alexander Whitestone
412ee7329a fix(cron): runtime-aware prompts + provider mismatch detection (#372)
Some checks failed
Forge CI / smoke-and-build (pull_request) Failing after 1m7s
After provider migration (Ollama -> Nous/mimo-v2-pro), cron jobs with
provider-specific prompts ran on the wrong provider without knowing it.
Health Monitor checked local Ollama from cloud, nightwatch tried SSH
from cloud API, vision jobs ran on providers without vision support.

Changes to cron/scheduler.py:

1. _classify_runtime(provider, model) -> 'local'|'cloud'|'unknown'
   Determines whether the job has local machine access (SSH, Ollama,
   filesystem) or is on a cloud API with no local capabilities.

2. _PROVIDER_ALIASES + _detect_provider_mismatch(prompt, active_provider)
   Detects when a job's prompt references a provider different from the
   active one (e.g. 'ollama' in prompt when running on 'nous'). Logs
   a warning so operators know which prompts need updating.

3. _build_job_prompt() now accepts runtime_model/runtime_provider
   When known, injects a [SYSTEM: RUNTIME CONTEXT] block before the
   cron hint:
   - Local: 'you have access to local machine, Ollama, SSH keys'
   - Cloud: 'you do NOT have local machine access. Do NOT SSH, etc.'

4. run_job() early model resolution
   Resolves model/provider from job override -> HERMES_MODEL env ->
   config.yaml model.default, derives provider from model prefix.
   Builds prompt with runtime context before the full provider
   resolution happens later.

5. Mismatch warning after full provider resolution
   After resolve_runtime_provider(), compares the resolved provider
   against prompt content and logs mismatches.

Supersedes #403 (early resolution only) and #427 (mismatch detection
only). Combines both approaches with local/cloud capability awareness.

Closes #372
2026-04-13 20:25:51 -04:00
5 changed files with 277 additions and 152 deletions

View File

@@ -544,8 +544,78 @@ def _run_job_script(script_path: str) -> tuple[bool, str]:
return False, f"Script execution failed: {exc}"
def _build_job_prompt(job: dict) -> str:
"""Build the effective prompt for a cron job, optionally loading one or more skills first."""
# ---------------------------------------------------------------------------
# Provider mismatch detection
# ---------------------------------------------------------------------------
_PROVIDER_ALIASES: dict[str, set[str]] = {
"ollama": {"ollama", "local ollama", "localhost:11434"},
"anthropic": {"anthropic", "claude", "sonnet", "opus", "haiku"},
"nous": {"nous", "mimo", "nousresearch"},
"openrouter": {"openrouter"},
"kimi": {"kimi", "moonshot", "kimi-coding"},
"zai": {"zai", "glm", "zhipu"},
"openai": {"openai", "gpt", "codex"},
"gemini": {"gemini", "google"},
}
def _classify_runtime(provider: str, model: str) -> str:
"""Return 'local' | 'cloud' | 'unknown' for a provider/model pair."""
p = (provider or "").strip().lower()
m = (model or "").strip().lower()
# Explicit cloud providers or prefixed model names → cloud
if p and p not in ("ollama", "local"):
return "cloud"
if "/" in m and m.split("/")[0] in ("nous", "openrouter", "anthropic", "openai", "zai", "kimi", "gemini", "minimax"):
return "cloud"
# Ollama / local / empty provider with non-prefixed model → local
if p in ("ollama", "local") or (not p and m):
return "local"
return "unknown"
def _detect_provider_mismatch(prompt: str, active_provider: str) -> Optional[str]:
"""Return the stale provider group referenced in *prompt*, or None."""
if not active_provider or not prompt:
return None
prompt_lower = prompt.lower()
active_lower = active_provider.lower().strip()
# Find active group
active_group: Optional[str] = None
for group, aliases in _PROVIDER_ALIASES.items():
if active_lower in aliases or active_lower.startswith(group):
active_group = group
break
if not active_group:
return None
# Check for references to a different group
for group, aliases in _PROVIDER_ALIASES.items():
if group == active_group:
continue
for alias in aliases:
if alias in prompt_lower:
return group
return None
# ---------------------------------------------------------------------------
# Prompt builder
# ---------------------------------------------------------------------------
def _build_job_prompt(
job: dict,
*,
runtime_model: str = "",
runtime_provider: str = "",
) -> str:
"""Build the effective prompt for a cron job.
Args:
job: The cron job dict.
runtime_model: Resolved model name (e.g. "xiaomi/mimo-v2-pro").
runtime_provider: Resolved provider name (e.g. "nous", "openrouter").
"""
prompt = job.get("prompt", "")
skills = job.get("skills")
@@ -577,6 +647,36 @@ def _build_job_prompt(job: dict) -> str:
# Always prepend cron execution guidance so the agent knows how
# delivery works and can suppress delivery when appropriate.
#
# Runtime context injection — tells the agent what it can actually do.
# Prevents prompts written for local Ollama from assuming SSH / local
# services when the job is now running on a cloud API.
_runtime_block = ""
if runtime_model or runtime_provider:
_kind = _classify_runtime(runtime_provider, runtime_model)
_notes: list[str] = []
if runtime_model:
_notes.append(f"MODEL: {runtime_model}")
if runtime_provider:
_notes.append(f"PROVIDER: {runtime_provider}")
if _kind == "local":
_notes.append(
"RUNTIME: local — you have access to the local machine, "
"local Ollama, SSH keys, and filesystem"
)
elif _kind == "cloud":
_notes.append(
"RUNTIME: cloud API — you do NOT have local machine access. "
"Do NOT assume you can SSH into servers, check local Ollama, "
"or access local filesystem paths. Use terminal tools only "
"for commands that work from this environment."
)
if _notes:
_runtime_block = (
"[SYSTEM: RUNTIME CONTEXT — "
+ "; ".join(_notes)
+ ". Adjust your approach based on these capabilities.]\\n\\n"
)
cron_hint = (
"[SYSTEM: You are running as a scheduled cron job. "
"DELIVERY: Your final response will be automatically delivered "
@@ -596,7 +696,7 @@ def _build_job_prompt(job: dict) -> str:
"\"[SCRIPT_FAILED]: forge.alexanderwhitestone.com timed out\" "
"\"[SCRIPT_FAILED]: script exited with code 1\".]\\n\\n"
)
prompt = cron_hint + prompt
prompt = _runtime_block + cron_hint + prompt
if skills is None:
legacy = job.get("skill")
skills = [legacy] if legacy else []
@@ -666,7 +766,36 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
job_id = job["id"]
job_name = job["name"]
prompt = _build_job_prompt(job)
# ── Early model/provider resolution ───────────────────────────────────
# We need the model name before building the prompt so the runtime
# context block can be injected. Full provider resolution happens
# later (smart routing, etc.) but the basic name is enough here.
_early_model = job.get("model") or os.getenv("HERMES_MODEL") or ""
_early_provider = os.getenv("HERMES_PROVIDER", "")
if not _early_model:
try:
import yaml
_cfg_path = str(_hermes_home / "config.yaml")
if os.path.exists(_cfg_path):
with open(_cfg_path) as _f:
_cfg_early = yaml.safe_load(_f) or {}
_mc = _cfg_early.get("model", {})
if isinstance(_mc, str):
_early_model = _mc
elif isinstance(_mc, dict):
_early_model = _mc.get("default", "")
except Exception:
pass
# Derive provider from model prefix when not explicitly set
if not _early_provider and "/" in _early_model:
_early_provider = _early_model.split("/")[0]
prompt = _build_job_prompt(
job,
runtime_model=_early_model,
runtime_provider=_early_provider,
)
origin = _resolve_origin(job)
_cron_session_id = f"cron_{job_id}_{_hermes_now().strftime('%Y%m%d_%H%M%S')}"
@@ -762,6 +891,20 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
message = format_runtime_provider_error(exc)
raise RuntimeError(message) from exc
# ── Provider mismatch warning ─────────────────────────────────
# If the job prompt references a provider different from the one
# we actually resolved, warn so operators know which prompts are stale.
_resolved_provider = runtime.get("provider", "") or ""
_raw_prompt = job.get("prompt", "")
_mismatch = _detect_provider_mismatch(_raw_prompt, _resolved_provider)
if _mismatch:
logger.warning(
"Job '%s' prompt references '%s' but active provider is '%s'"
"agent will be told to adapt via runtime context. "
"Consider updating this job's prompt.",
job_name, _mismatch, _resolved_provider,
)
from agent.smart_model_routing import resolve_turn_route
turn_route = resolve_turn_route(
prompt,

View File

@@ -3067,40 +3067,12 @@ class GatewayRunner:
# Token counts and model are now persisted by the agent directly.
# Keep only last_prompt_tokens here for context-window tracking and
# compression decisions. Also persist input/output token totals
# so the SessionEntry (sessions.json) and SQLite reflect actual usage.
_input_total = agent_result.get("input_tokens", 0) or 0
_output_total = agent_result.get("output_tokens", 0) or 0
_total_tokens = agent_result.get("total_tokens", 0) or 0
_cost_usd = agent_result.get("estimated_cost_usd")
# compression decisions.
self.session_store.update_session(
session_entry.session_key,
last_prompt_tokens=agent_result.get("last_prompt_tokens", 0),
input_tokens=_input_total,
output_tokens=_output_total,
total_tokens=_total_tokens,
estimated_cost_usd=_cost_usd,
)
# Persist token totals to SQLite so /insights sees real data.
# Use absolute=true because the agent's session_*_tokens already
# reflect the running total for this conversation turn.
if self._session_db:
try:
_eff_sid = agent_result.get("session_id") or session_entry.session_id
self._session_db.set_token_counts(
_eff_sid,
input_tokens=_input_total,
output_tokens=_output_total,
cache_read_tokens=agent_result.get("cache_read_tokens", 0) or 0,
cache_write_tokens=agent_result.get("cache_write_tokens", 0) or 0,
reasoning_tokens=agent_result.get("reasoning_tokens", 0) or 0,
estimated_cost_usd=_cost_usd,
model=_resolved_model,
)
except Exception:
pass # never block delivery
# Auto voice reply: send TTS audio before the text response
_already_sent = bool(agent_result.get("already_sent"))
if self._should_send_voice_reply(event, response, agent_messages, already_sent=_already_sent):

View File

@@ -810,10 +810,6 @@ class SessionStore:
self,
session_key: str,
last_prompt_tokens: int = None,
input_tokens: int = None,
output_tokens: int = None,
total_tokens: int = None,
estimated_cost_usd: float = None,
) -> None:
"""Update lightweight session metadata after an interaction."""
with self._lock:
@@ -824,14 +820,6 @@ class SessionStore:
entry.updated_at = _now()
if last_prompt_tokens is not None:
entry.last_prompt_tokens = last_prompt_tokens
if input_tokens is not None:
entry.input_tokens = input_tokens
if output_tokens is not None:
entry.output_tokens = output_tokens
if total_tokens is not None:
entry.total_tokens = total_tokens
if estimated_cost_usd is not None:
entry.estimated_cost_usd = estimated_cost_usd
self._save()
def reset_session(self, session_key: str) -> Optional[SessionEntry]:

View File

@@ -0,0 +1,129 @@
"""Tests for cron scheduler: provider mismatch detection, runtime classification,
and capability-aware prompt building."""
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
def _import_scheduler():
"""Import the scheduler module, bypassing __init__.py re-exports that may
reference symbols not yet merged upstream."""
import importlib.util
spec = importlib.util.spec_from_file_location(
"cron.scheduler", str(Path(__file__).resolve().parent.parent / "cron" / "scheduler.py"),
)
mod = importlib.util.module_from_spec(spec)
try:
spec.loader.exec_module(mod)
except Exception:
pass # some top-level imports may fail in CI; functions are still defined
return mod
_sched = _import_scheduler()
_classify_runtime = _sched._classify_runtime
_detect_provider_mismatch = _sched._detect_provider_mismatch
_build_job_prompt = _sched._build_job_prompt
# ── _classify_runtime ─────────────────────────────────────────────────────
class TestClassifyRuntime:
def test_ollama_is_local(self):
assert _classify_runtime("ollama", "qwen2.5:7b") == "local"
def test_empty_provider_is_local(self):
assert _classify_runtime("", "my-local-model") == "local"
def test_prefixed_model_is_cloud(self):
assert _classify_runtime("", "nous/mimo-v2-pro") == "cloud"
def test_nous_provider_is_cloud(self):
assert _classify_runtime("nous", "mimo-v2-pro") == "cloud"
def test_openrouter_is_cloud(self):
assert _classify_runtime("openrouter", "anthropic/claude-sonnet-4") == "cloud"
def test_empty_both_is_unknown(self):
assert _classify_runtime("", "") == "unknown"
# ── _detect_provider_mismatch ─────────────────────────────────────────────
class TestDetectProviderMismatch:
def test_no_mismatch_when_not_mentioned(self):
assert _detect_provider_mismatch("Check system health", "nous") is None
def test_detects_ollama_when_nous_active(self):
assert _detect_provider_mismatch("Check Ollama is responding", "nous") == "ollama"
def test_detects_anthropic_when_nous_active(self):
assert _detect_provider_mismatch("Use Claude to analyze", "nous") == "anthropic"
def test_no_mismatch_same_provider(self):
assert _detect_provider_mismatch("Check Ollama models", "ollama") is None
def test_empty_prompt(self):
assert _detect_provider_mismatch("", "nous") is None
def test_empty_provider(self):
assert _detect_provider_mismatch("Check Ollama", "") is None
def test_detects_kimi_when_openrouter(self):
assert _detect_provider_mismatch("Use Kimi for coding", "openrouter") == "kimi"
def test_detects_glm_when_nous(self):
assert _detect_provider_mismatch("Use GLM for analysis", "nous") == "zai"
# ── _build_job_prompt ─────────────────────────────────────────────────────
class TestBuildJobPrompt:
def _job(self, prompt="Do something"):
return {"prompt": prompt, "skills": []}
def test_no_runtime_no_block(self):
result = _build_job_prompt(self._job())
assert "Do something" in result
assert "RUNTIME CONTEXT" not in result
def test_cloud_runtime_injected(self):
result = _build_job_prompt(
self._job(),
runtime_model="xiaomi/mimo-v2-pro",
runtime_provider="nous",
)
assert "MODEL: xiaomi/mimo-v2-pro" in result
assert "PROVIDER: nous" in result
assert "cloud API" in result
assert "Do NOT assume you can SSH" in result
def test_local_runtime_injected(self):
result = _build_job_prompt(
self._job(),
runtime_model="qwen2.5:7b",
runtime_provider="ollama",
)
assert "RUNTIME: local" in result
assert "SSH keys" in result
def test_empty_runtime_no_block(self):
result = _build_job_prompt(self._job(), runtime_model="", runtime_provider="")
assert "RUNTIME CONTEXT" not in result
def test_cron_hint_always_present(self):
result = _build_job_prompt(self._job())
assert "scheduled cron job" in result
assert "[SYSTEM:" in result
def test_runtime_block_before_cron_hint(self):
result = _build_job_prompt(
self._job("Check Ollama"),
runtime_model="mimo-v2-pro",
runtime_provider="nous",
)
runtime_pos = result.index("RUNTIME CONTEXT")
cron_pos = result.index("scheduled cron job")
assert runtime_pos < cron_pos

View File

@@ -1,107 +0,0 @@
"""Tests for gateway token count persistence to SessionEntry and SessionDB.
Regression test for #316 — token tracking all zeros. The gateway must
propagate input_tokens / output_tokens from the agent result to both the
SessionEntry (sessions.json) and the SQLite session DB.
"""
import json
from datetime import datetime
from unittest.mock import MagicMock
import pytest
from gateway.session import SessionEntry
class TestUpdateSessionTokenFields:
"""Verify SessionEntry token fields are updated and serialized correctly."""
def test_session_entry_to_dict_includes_tokens(self):
entry = SessionEntry(
session_key="tg:123",
session_id="sid-1",
created_at=datetime.now(),
updated_at=datetime.now(),
input_tokens=1000,
output_tokens=500,
total_tokens=1500,
estimated_cost_usd=0.05,
)
d = entry.to_dict()
assert d["input_tokens"] == 1000
assert d["output_tokens"] == 500
assert d["total_tokens"] == 1500
assert d["estimated_cost_usd"] == 0.05
def test_session_entry_from_dict_restores_tokens(self):
now = datetime.now().isoformat()
data = {
"session_key": "tg:123",
"session_id": "sid-1",
"created_at": now,
"updated_at": now,
"input_tokens": 42,
"output_tokens": 21,
"total_tokens": 63,
"estimated_cost_usd": 0.001,
}
entry = SessionEntry.from_dict(data)
assert entry.input_tokens == 42
assert entry.output_tokens == 21
assert entry.total_tokens == 63
assert entry.estimated_cost_usd == 0.001
def test_session_entry_roundtrip_preserves_tokens(self):
"""to_dict -> from_dict must preserve all token fields."""
entry = SessionEntry(
session_key="cron:job7",
session_id="sid-7",
created_at=datetime.now(),
updated_at=datetime.now(),
input_tokens=9999,
output_tokens=1234,
total_tokens=11233,
cache_read_tokens=500,
cache_write_tokens=100,
estimated_cost_usd=0.42,
)
restored = SessionEntry.from_dict(entry.to_dict())
assert restored.input_tokens == 9999
assert restored.output_tokens == 1234
assert restored.total_tokens == 11233
assert restored.cache_read_tokens == 500
assert restored.cache_write_tokens == 100
assert restored.estimated_cost_usd == 0.42
class TestAgentResultTokenExtraction:
"""Verify the gateway extracts token counts from agent_result correctly."""
def test_agent_result_has_expected_keys(self):
"""Simulate what _run_agent returns and verify all token keys exist."""
result = {
"final_response": "hello",
"input_tokens": 100,
"output_tokens": 50,
"total_tokens": 150,
"cache_read_tokens": 10,
"cache_write_tokens": 5,
"reasoning_tokens": 0,
"estimated_cost_usd": 0.002,
"last_prompt_tokens": 100,
"model": "test-model",
"session_id": "test-session-123",
}
# These are the extractions the gateway performs
assert result.get("input_tokens", 0) or 0 == 100
assert result.get("output_tokens", 0) or 0 == 50
assert result.get("total_tokens", 0) or 0 == 150
assert result.get("estimated_cost_usd") == 0.002
def test_agent_result_zero_fallback(self):
"""When token keys are missing, defaults to 0."""
result = {"final_response": "ok"}
assert result.get("input_tokens", 0) or 0 == 0
assert result.get("output_tokens", 0) or 0 == 0
assert result.get("total_tokens", 0) or 0 == 0