feat: expose cron runtime overrides for burn-loop pinning (#799 )

Add cron CLI flags for per-job model, provider, and base URL overrides, forward them through hermes_cli.cron, and print pinned runtime overrides in create/edit/list output so Gemma burn-loop jobs are auditable.
2026-04-22 11:02:47 -04:00
4 changed files with 161 additions and 55 deletions
--- a/docs/issue-851-verification.md
+++ b/docs/issue-851-verification.md
@@ -1,55 +0,0 @@
-# Issue #851 Verification
-
-## Status: ✅ ALREADY IMPLEMENTED
-
-Issue #851 is a research/audit issue whose own conclusion is that prompt caching is already extensively implemented in hermes-agent and that the remaining work is operational, not a repo-side code change.
-
-This verification confirms that the current repo already contains the core implementation described in the issue body.
-
-## Acceptance Criteria Check
-
-1. ✅ Anthropic / OpenRouter prompt-caching support exists
-   - `agent/prompt_caching.py:41-72` implements `apply_anthropic_cache_control()` with the documented system-plus-last-3 breakpoint strategy.
-   - `run_agent.py:8301-8306` applies Anthropic/OpenRouter cache-control breakpoints during API message preparation.
-
-2. ✅ OpenAI/Codex prompt-cache key support exists
-   - `run_agent.py:6199-6213` sets `prompt_cache_key = self.session_id` on the responses path for non-GitHub responses.
-   - `run_agent.py:3875-3878` explicitly passes through `prompt_cache_key` in normalized API kwargs.
-
-3. ✅ System-prompt stability and cache-friendly message normalization exist
-   - `run_agent.py:3155-3157` documents that the system prompt is cached and reused across turns to maximize prefix cache hits.
-   - `run_agent.py:8314-8339` normalizes whitespace and tool-call JSON for bit-perfect prefix matching across turns.
-
-4. ✅ Cache hit/miss logging infrastructure exists
-   - `run_agent.py:8966-8980` logs cache read/write token stats, including `cached_tokens`, `cache_creation_input_tokens`, and hit percentage.
-
-## Executed Verification
-
-### Targeted tests run
- `PYTHONPATH=/tmp/BURN2-FORGE-ALPHA-3 python3 -m pytest -q tests/agent/test_prompt_caching.py`
-  - Result: `14 passed`
-
-### Syntax verification
- `PYTHONPATH=/tmp/BURN2-FORGE-ALPHA-3 python3 -m py_compile agent/prompt_caching.py run_agent.py`
-  - Result: passed
-
-## Evidence Summary
-
-The issue body says:
- prompt caching is already extensively implemented
- the primary opportunities are operational: routing more workloads to Ollama, verifying provider support, and reporting cache hit rates
-
-The repo state matches that conclusion:
- caching primitives are present
- integration points are wired into the runtime
- targeted tests already exist and pass
- no new implementation change is required to satisfy the issue's repo-side claim
-
-## Recommendation
-
-Close issue #851 as already implemented in the codebase.
-
-If desired, follow-on work should be opened as separate operational issues for:
- Ollama-heavy workload routing
- provider-specific cache verification
- nightly cache hit-rate reporting
--- a/hermes_cli/cron.py
+++ b/hermes_cli/cron.py
@@ -38,6 +38,18 @@ def _cron_api(**kwargs):
    return json.loads(cronjob_tool(**kwargs))


+def _print_runtime_overrides(job: dict) -> None:
+    model = job.get("model")
+    provider = job.get("provider")
+    base_url = job.get("base_url")
+    if model:
+        print(f"    Model:     {model}")
+    if provider:
+        print(f"    Provider:  {provider}")
+    if base_url:
+        print(f"    Base URL:  {base_url}")
+
+
 def cron_list(show_all: bool = False):
    """List all scheduled jobs."""
    from cron.jobs import list_jobs
@@ -93,6 +105,7 @@ def cron_list(show_all: bool = False):
        script = job.get("script")
        if script:
            print(f"    Script:    {script}")
+        _print_runtime_overrides(job)

        # Execution history
        last_status = job.get("last_status")
@@ -167,6 +180,9 @@ def cron_create(args):
        repeat=getattr(args, "repeat", None),
        skill=getattr(args, "skill", None),
        skills=_normalize_skills(getattr(args, "skill", None), getattr(args, "skills", None)),
+        model=getattr(args, "model", None),
+        provider=getattr(args, "provider", None),
+        base_url=getattr(args, "base_url", None),
        script=getattr(args, "script", None),
    )
    if not result.get("success"):
@@ -180,6 +196,8 @@ def cron_create(args):
    job_data = result.get("job", {})
    if job_data.get("script"):
        print(f"  Script: {job_data['script']}")
+    if job_data:
+        _print_runtime_overrides(job_data)
    print(f"  Next run: {result['next_run_at']}")
    return 0

@@ -217,6 +235,9 @@ def cron_edit(args):
        deliver=getattr(args, "deliver", None),
        repeat=getattr(args, "repeat", None),
        skills=final_skills,
+        model=getattr(args, "model", None),
+        provider=getattr(args, "provider", None),
+        base_url=getattr(args, "base_url", None),
        script=getattr(args, "script", None),
    )
    if not result.get("success"):
@@ -233,6 +254,7 @@ def cron_edit(args):
        print("  Skills: none")
    if updated.get("script"):
        print(f"  Script: {updated['script']}")
+    _print_runtime_overrides(updated)
    return 0


--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -4958,6 +4958,9 @@ For more help on a command:
    cron_create.add_argument("--deliver", help="Delivery target: origin, local, telegram, discord, signal, or platform:chat_id")
    cron_create.add_argument("--repeat", type=int, help="Optional repeat count")
    cron_create.add_argument("--skill", dest="skills", action="append", help="Attach a skill. Repeat to add multiple skills.")
+    cron_create.add_argument("--model", help="Pin this job to a specific model (for example: google/gemma-4-31b-it)")
+    cron_create.add_argument("--provider", help="Pin this job to a specific provider (for example: openrouter)")
+    cron_create.add_argument("--base-url", dest="base_url", help="Optional base URL override for the job's runtime provider")
    cron_create.add_argument("--script", help="Path to a Python script whose stdout is injected into the prompt each run")

    # cron edit
@@ -4972,6 +4975,9 @@ For more help on a command:
    cron_edit.add_argument("--add-skill", dest="add_skills", action="append", help="Append a skill without replacing the existing list. Repeatable.")
    cron_edit.add_argument("--remove-skill", dest="remove_skills", action="append", help="Remove a specific attached skill. Repeatable.")
    cron_edit.add_argument("--clear-skills", action="store_true", help="Remove all attached skills from the job")
+    cron_edit.add_argument("--model", help="Update the job's pinned model")
+    cron_edit.add_argument("--provider", help="Update the job's pinned provider")
+    cron_edit.add_argument("--base-url", dest="base_url", help="Update the job's pinned base URL. Pass an empty string to clear it.")
    cron_edit.add_argument("--script", help="Path to a Python script whose stdout is injected into the prompt each run. Pass empty string to clear.")

    # lifecycle actions
--- a/tests/hermes_cli/test_cron.py
+++ b/tests/hermes_cli/test_cron.py
@@ -1,6 +1,7 @@
 """Tests for hermes_cli.cron command handling."""

 from argparse import Namespace
+from unittest.mock import patch

 import pytest

@@ -105,3 +106,135 @@ class TestCronCommandLifecycle:
        assert len(jobs) == 1
        assert jobs[0]["skills"] == ["blogwatcher", "find-nearby"]
        assert jobs[0]["name"] == "Skill combo"
+
+    def test_create_can_pin_runtime_model_provider_and_base_url(self, tmp_cron_dir, capsys):
+        cron_command(
+            Namespace(
+                cron_command="create",
+                schedule="every 1h",
+                prompt="Run the burn loop",
+                name="Gemma burn",
+                deliver=None,
+                repeat=None,
+                skill=None,
+                skills=None,
+                script=None,
+                model="google/gemma-4-31b-it",
+                provider="openrouter",
+                base_url="https://openrouter.ai/api/v1",
+            )
+        )
+
+        job = list_jobs()[0]
+        assert job["model"] == "google/gemma-4-31b-it"
+        assert job["provider"] == "openrouter"
+        assert job["base_url"] == "https://openrouter.ai/api/v1"
+
+        out = capsys.readouterr().out
+        assert "Created job" in out
+        assert "Model:     google/gemma-4-31b-it" in out
+        assert "Provider:  openrouter" in out
+
+    def test_edit_can_update_runtime_model_provider_and_clear_base_url(self, tmp_cron_dir, capsys):
+        job = create_job(prompt="Check server status", schedule="every 1h")
+
+        cron_command(
+            Namespace(
+                cron_command="edit",
+                job_id=job["id"],
+                schedule=None,
+                prompt=None,
+                name=None,
+                deliver=None,
+                repeat=None,
+                skill=None,
+                skills=None,
+                add_skills=None,
+                remove_skills=None,
+                clear_skills=False,
+                script=None,
+                model="google/gemma-4-31b-it",
+                provider="openrouter",
+                base_url="",
+            )
+        )
+
+        updated = get_job(job["id"])
+        assert updated["model"] == "google/gemma-4-31b-it"
+        assert updated["provider"] == "openrouter"
+        assert updated["base_url"] is None
+
+        out = capsys.readouterr().out
+        assert "Updated job" in out
+        assert "Model:     google/gemma-4-31b-it" in out
+        assert "Provider:  openrouter" in out
+
+
+class TestCronParserRuntimeOverrideFlags:
+    def test_main_parses_create_runtime_override_flags(self, monkeypatch):
+        from hermes_cli import main as main_mod
+
+        captured = {}
+
+        def fake_cmd_cron(args):
+            captured["args"] = args
+
+        monkeypatch.setattr(main_mod, "cmd_cron", fake_cmd_cron)
+        monkeypatch.setattr(
+            "sys.argv",
+            [
+                "hermes",
+                "cron",
+                "create",
+                "every 1h",
+                "Run the burn loop",
+                "--model",
+                "google/gemma-4-31b-it",
+                "--provider",
+                "openrouter",
+                "--base-url",
+                "https://openrouter.ai/api/v1",
+            ],
+        )
+
+        main_mod.main()
+
+        args = captured["args"]
+        assert args.cron_command == "create"
+        assert args.model == "google/gemma-4-31b-it"
+        assert args.provider == "openrouter"
+        assert args.base_url == "https://openrouter.ai/api/v1"
+
+    def test_main_parses_edit_runtime_override_flags(self, monkeypatch):
+        from hermes_cli import main as main_mod
+
+        captured = {}
+
+        def fake_cmd_cron(args):
+            captured["args"] = args
+
+        monkeypatch.setattr(main_mod, "cmd_cron", fake_cmd_cron)
+        monkeypatch.setattr(
+            "sys.argv",
+            [
+                "hermes",
+                "cron",
+                "edit",
+                "job123",
+                "--model",
+                "google/gemma-4-31b-it",
+                "--provider",
+                "openrouter",
+                "--base-url",
+                "",
+            ],
+        )
+
+        main_mod.main()
+
+        args = captured["args"]
+        assert args.cron_command == "edit"
+        assert args.job_id == "job123"
+        assert args.model == "google/gemma-4-31b-it"
+        assert args.provider == "openrouter"
+        assert args.base_url == ""