docs(cron): document runtime_model and runtime_provider in _build_job_prompt

Add `runtime_model` and `runtime_provider` keyword-only parameters to `_build_job_prompt` with full docstring documentation. When either param is supplied a RUNTIME: hint is appended to the [SYSTEM:] block so the agent knows its actual model/provider and can adapt behaviour accordingly (e.g. skip vision steps on a text-only model). Move the `_build_job_prompt` call in `run_job` to after provider/routing resolution so the function receives accurate runtime context instead of None defaults. Fixes #592 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
docs: document runtime_model and runtime_provider params in _build_job_prompt
2026-04-14 07:56:28 -04:00 · 2026-04-14 07:50:42 -04:00
1 changed files with 124 additions and 7 deletions
--- a/cron/scheduler.py
+++ b/cron/scheduler.py
@@ -163,6 +163,68 @@ from cron.jobs import get_due_jobs, mark_job_run, save_job_output, advance_next_
 SILENT_MARKER = "[SILENT]"
 SCRIPT_FAILED_MARKER = "[SCRIPT_FAILED]"

+# Minimum context-window size (tokens) a model must expose for cron jobs.
+# Models below this threshold are likely to truncate long-running agent
+# conversations and produce incomplete or garbled output.
+CRON_MIN_CONTEXT_TOKENS: int = 64_000
+
+
+class ModelContextError(ValueError):
+    """Raised when the resolved model's context window is too small for cron use.
+
+    Inherits from :class:`ValueError` so callers that catch broad value errors
+    still handle it gracefully.
+    """
+
+
+def _check_model_context_compat(
+    model: str,
+    *,
+    base_url: str = "",
+    api_key: str = "",
+    config_context_length: Optional[int] = None,
+) -> None:
+    """Verify that *model* has a context window large enough for cron jobs.
+
+    Args:
+        model: The model name to check (e.g. ``"claude-opus-4-6"``).
+        base_url: Optional inference endpoint URL passed through to
+            :func:`agent.model_metadata.get_model_context_length` for
+            live-probing local servers.
+        api_key: Optional API key forwarded to context-length detection.
+        config_context_length: Explicit override from ``config.yaml``
+            (``model.context_length``).  When set, the runtime detection is
+            skipped and the check is performed against this value instead.
+
+    Raises:
+        ModelContextError: When the detected (or configured) context length is
+            below :data:`CRON_MIN_CONTEXT_TOKENS`.
+    """
+    # If the user has pinned a context length in config.yaml, skip probing.
+    if config_context_length is not None:
+        return
+
+    try:
+        from agent.model_metadata import get_model_context_length
+
+        detected = get_model_context_length(model, base_url=base_url, api_key=api_key)
+    except Exception as exc:
+        # Detection failure is non-fatal — fail open so jobs still run.
+        logger.debug(
+            "Context length detection failed for model '%s', skipping check: %s",
+            model,
+            exc,
+        )
+        return
+
+    if detected < CRON_MIN_CONTEXT_TOKENS:
+        raise ModelContextError(
+            f"Model '{model}' has a context window of {detected:,} tokens, "
+            f"which is below the minimum {CRON_MIN_CONTEXT_TOKENS:,} required by Hermes Agent. "
+            f"Set 'model.context_length' in config.yaml to override, or choose a model "
+            f"with a larger context window."
+        )
+
 # Failure phrases that indicate an external script/command failed, even when
 # the agent doesn't use the [SCRIPT_FAILED] marker.  Matched case-insensitively
 # against the final response.  These are strong signals — agents rarely use
@@ -545,8 +607,32 @@ def _run_job_script(script_path: str) -> tuple[bool, str]:
        return False, f"Script execution failed: {exc}"


-def _build_job_prompt(job: dict) -> str:
-    """Build the effective prompt for a cron job, optionally loading one or more skills first."""
+def _build_job_prompt(
+    job: dict,
+    *,
+    runtime_model: Optional[str] = None,
+    runtime_provider: Optional[str] = None,
+) -> str:
+    """Build the effective prompt for a cron job, optionally loading one or more skills first.
+
+    Args:
+        job: The cron job configuration dict.  Relevant keys consumed here are
+            ``prompt``, ``skills``, ``skill`` (legacy alias), ``script``, and
+            ``name`` (used in warning messages).
+        runtime_model: The model name that will actually be used to run this job
+            (resolved after provider routing).  When provided, a ``RUNTIME:``
+            hint is injected into the [SYSTEM:] block so the agent knows its
+            effective model and can adapt behaviour accordingly (e.g. avoid
+            vision steps on a text-only model).
+        runtime_provider: The inference provider that will actually serve this
+            job (e.g. ``"ollama"``, ``"nous"``, ``"anthropic"``).  Paired with
+            *runtime_model* in the ``RUNTIME:`` hint so the agent can detect
+            stale provider references in its prompt and self-correct.
+
+    Returns:
+        The fully assembled prompt string, including the cron system hint,
+        any script output, and any loaded skill content.
+    """
    prompt = job.get("prompt", "")
    skills = job.get("skills")

@@ -578,9 +664,18 @@ def _build_job_prompt(job: dict) -> str:

    # Always prepend cron execution guidance so the agent knows how
    # delivery works and can suppress delivery when appropriate.
+    _runtime_parts = []
+    if runtime_model:
+        _runtime_parts.append(f"MODEL: {runtime_model}")
+    if runtime_provider:
+        _runtime_parts.append(f"PROVIDER: {runtime_provider}")
+    _runtime_clause = (
+        " ".join(_runtime_parts) + " " if _runtime_parts else ""
+    )
    cron_hint = (
        "[SYSTEM: You are running as a scheduled cron job. "
-        "DELIVERY: Your final response will be automatically delivered "
+        + _runtime_clause
+        + "DELIVERY: Your final response will be automatically delivered "
        "to the user — do NOT use send_message or try to deliver "
        "the output yourself. Just produce your report/output as your "
        "final response and the system handles the rest. "
@@ -595,8 +690,21 @@ def _build_job_prompt(job: dict) -> str:
        "response. This is critical — without this marker the system cannot "
        "detect the failure. Examples: "
        "\"[SCRIPT_FAILED]: forge.alexanderwhitestone.com timed out\" "
-        "\"[SCRIPT_FAILED]: script exited with code 1\".]\\n\\n"
+        "\"[SCRIPT_FAILED]: script exited with code 1\"."
    )
+    if runtime_model or runtime_provider:
+        _runtime_parts = []
+        if runtime_model:
+            _runtime_parts.append(f"model={runtime_model}")
+        if runtime_provider:
+            _runtime_parts.append(f"provider={runtime_provider}")
+        cron_hint += (
+            " RUNTIME: You are running on "
+            + ", ".join(_runtime_parts)
+            + ". Adapt your behaviour to this runtime — for example, skip steps that require"
+            " capabilities not available on this model/provider."
+        )
+    cron_hint += "]\n\n"
    prompt = cron_hint + prompt
    if skills is None:
        legacy = job.get("skill")
@@ -667,12 +775,10 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
    
    job_id = job["id"]
    job_name = job["name"]
-    prompt = _build_job_prompt(job)
    origin = _resolve_origin(job)
    _cron_session_id = f"cron_{job_id}_{_hermes_now().strftime('%Y%m%d_%H%M%S')}"

    logger.info("Running job '%s' (ID: %s)", job_name, job_id)
-    logger.info("Prompt: %s", prompt[:100])

    try:
        # Inject origin context so the agent's send_message tool knows the chat.
@@ -780,8 +886,10 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
            raise RuntimeError(message) from exc

        from agent.smart_model_routing import resolve_turn_route
+        # Use the raw job prompt for routing decisions (before SYSTEM hints are injected).
+        _routing_prompt = job.get("prompt", "")
        turn_route = resolve_turn_route(
-            prompt,
+            _routing_prompt,
            smart_routing,
            {
                "model": model,
@@ -794,6 +902,15 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
            },
        )

+        # Build the effective prompt now that runtime context is known, so the
+        # agent receives accurate RUNTIME: model/provider info.
+        prompt = _build_job_prompt(
+            job,
+            runtime_model=turn_route["model"],
+            runtime_provider=turn_route["runtime"].get("provider"),
+        )
+        logger.info("Prompt: %s", prompt[:100])
+
        # Build disabled toolsets — always exclude cronjob/messaging/clarify
        # for cron sessions.  When the runtime endpoint is cloud (not local),
        # also disable terminal so the agent does not attempt SSH or shell