Compare commits

..

1 Commits

Author SHA1 Message Date
Alexander Whitestone
77f10fa611 feat: expose cron runtime overrides for burn-loop pinning (#799)
All checks were successful
Lint / lint (pull_request) Successful in 16s
Add cron CLI flags for per-job model, provider, and base URL overrides,
forward them through hermes_cli.cron, and print pinned runtime overrides
in create/edit/list output so Gemma burn-loop jobs are auditable.
2026-04-22 11:02:47 -04:00
4 changed files with 161 additions and 55 deletions

View File

@@ -1,55 +0,0 @@
# Issue #851 Verification
## Status: ✅ ALREADY IMPLEMENTED
Issue #851 is a research/audit issue whose own conclusion is that prompt caching is already extensively implemented in hermes-agent and that the remaining work is operational, not a repo-side code change.
This verification confirms that the current repo already contains the core implementation described in the issue body.
## Acceptance Criteria Check
1. ✅ Anthropic / OpenRouter prompt-caching support exists
- `agent/prompt_caching.py:41-72` implements `apply_anthropic_cache_control()` with the documented system-plus-last-3 breakpoint strategy.
- `run_agent.py:8301-8306` applies Anthropic/OpenRouter cache-control breakpoints during API message preparation.
2. ✅ OpenAI/Codex prompt-cache key support exists
- `run_agent.py:6199-6213` sets `prompt_cache_key = self.session_id` on the responses path for non-GitHub responses.
- `run_agent.py:3875-3878` explicitly passes through `prompt_cache_key` in normalized API kwargs.
3. ✅ System-prompt stability and cache-friendly message normalization exist
- `run_agent.py:3155-3157` documents that the system prompt is cached and reused across turns to maximize prefix cache hits.
- `run_agent.py:8314-8339` normalizes whitespace and tool-call JSON for bit-perfect prefix matching across turns.
4. ✅ Cache hit/miss logging infrastructure exists
- `run_agent.py:8966-8980` logs cache read/write token stats, including `cached_tokens`, `cache_creation_input_tokens`, and hit percentage.
## Executed Verification
### Targeted tests run
- `PYTHONPATH=/tmp/BURN2-FORGE-ALPHA-3 python3 -m pytest -q tests/agent/test_prompt_caching.py`
- Result: `14 passed`
### Syntax verification
- `PYTHONPATH=/tmp/BURN2-FORGE-ALPHA-3 python3 -m py_compile agent/prompt_caching.py run_agent.py`
- Result: passed
## Evidence Summary
The issue body says:
- prompt caching is already extensively implemented
- the primary opportunities are operational: routing more workloads to Ollama, verifying provider support, and reporting cache hit rates
The repo state matches that conclusion:
- caching primitives are present
- integration points are wired into the runtime
- targeted tests already exist and pass
- no new implementation change is required to satisfy the issue's repo-side claim
## Recommendation
Close issue #851 as already implemented in the codebase.
If desired, follow-on work should be opened as separate operational issues for:
- Ollama-heavy workload routing
- provider-specific cache verification
- nightly cache hit-rate reporting

View File

@@ -38,6 +38,18 @@ def _cron_api(**kwargs):
return json.loads(cronjob_tool(**kwargs))
def _print_runtime_overrides(job: dict) -> None:
model = job.get("model")
provider = job.get("provider")
base_url = job.get("base_url")
if model:
print(f" Model: {model}")
if provider:
print(f" Provider: {provider}")
if base_url:
print(f" Base URL: {base_url}")
def cron_list(show_all: bool = False):
"""List all scheduled jobs."""
from cron.jobs import list_jobs
@@ -93,6 +105,7 @@ def cron_list(show_all: bool = False):
script = job.get("script")
if script:
print(f" Script: {script}")
_print_runtime_overrides(job)
# Execution history
last_status = job.get("last_status")
@@ -167,6 +180,9 @@ def cron_create(args):
repeat=getattr(args, "repeat", None),
skill=getattr(args, "skill", None),
skills=_normalize_skills(getattr(args, "skill", None), getattr(args, "skills", None)),
model=getattr(args, "model", None),
provider=getattr(args, "provider", None),
base_url=getattr(args, "base_url", None),
script=getattr(args, "script", None),
)
if not result.get("success"):
@@ -180,6 +196,8 @@ def cron_create(args):
job_data = result.get("job", {})
if job_data.get("script"):
print(f" Script: {job_data['script']}")
if job_data:
_print_runtime_overrides(job_data)
print(f" Next run: {result['next_run_at']}")
return 0
@@ -217,6 +235,9 @@ def cron_edit(args):
deliver=getattr(args, "deliver", None),
repeat=getattr(args, "repeat", None),
skills=final_skills,
model=getattr(args, "model", None),
provider=getattr(args, "provider", None),
base_url=getattr(args, "base_url", None),
script=getattr(args, "script", None),
)
if not result.get("success"):
@@ -233,6 +254,7 @@ def cron_edit(args):
print(" Skills: none")
if updated.get("script"):
print(f" Script: {updated['script']}")
_print_runtime_overrides(updated)
return 0

View File

@@ -4958,6 +4958,9 @@ For more help on a command:
cron_create.add_argument("--deliver", help="Delivery target: origin, local, telegram, discord, signal, or platform:chat_id")
cron_create.add_argument("--repeat", type=int, help="Optional repeat count")
cron_create.add_argument("--skill", dest="skills", action="append", help="Attach a skill. Repeat to add multiple skills.")
cron_create.add_argument("--model", help="Pin this job to a specific model (for example: google/gemma-4-31b-it)")
cron_create.add_argument("--provider", help="Pin this job to a specific provider (for example: openrouter)")
cron_create.add_argument("--base-url", dest="base_url", help="Optional base URL override for the job's runtime provider")
cron_create.add_argument("--script", help="Path to a Python script whose stdout is injected into the prompt each run")
# cron edit
@@ -4972,6 +4975,9 @@ For more help on a command:
cron_edit.add_argument("--add-skill", dest="add_skills", action="append", help="Append a skill without replacing the existing list. Repeatable.")
cron_edit.add_argument("--remove-skill", dest="remove_skills", action="append", help="Remove a specific attached skill. Repeatable.")
cron_edit.add_argument("--clear-skills", action="store_true", help="Remove all attached skills from the job")
cron_edit.add_argument("--model", help="Update the job's pinned model")
cron_edit.add_argument("--provider", help="Update the job's pinned provider")
cron_edit.add_argument("--base-url", dest="base_url", help="Update the job's pinned base URL. Pass an empty string to clear it.")
cron_edit.add_argument("--script", help="Path to a Python script whose stdout is injected into the prompt each run. Pass empty string to clear.")
# lifecycle actions

View File

@@ -1,6 +1,7 @@
"""Tests for hermes_cli.cron command handling."""
from argparse import Namespace
from unittest.mock import patch
import pytest
@@ -105,3 +106,135 @@ class TestCronCommandLifecycle:
assert len(jobs) == 1
assert jobs[0]["skills"] == ["blogwatcher", "find-nearby"]
assert jobs[0]["name"] == "Skill combo"
def test_create_can_pin_runtime_model_provider_and_base_url(self, tmp_cron_dir, capsys):
cron_command(
Namespace(
cron_command="create",
schedule="every 1h",
prompt="Run the burn loop",
name="Gemma burn",
deliver=None,
repeat=None,
skill=None,
skills=None,
script=None,
model="google/gemma-4-31b-it",
provider="openrouter",
base_url="https://openrouter.ai/api/v1",
)
)
job = list_jobs()[0]
assert job["model"] == "google/gemma-4-31b-it"
assert job["provider"] == "openrouter"
assert job["base_url"] == "https://openrouter.ai/api/v1"
out = capsys.readouterr().out
assert "Created job" in out
assert "Model: google/gemma-4-31b-it" in out
assert "Provider: openrouter" in out
def test_edit_can_update_runtime_model_provider_and_clear_base_url(self, tmp_cron_dir, capsys):
job = create_job(prompt="Check server status", schedule="every 1h")
cron_command(
Namespace(
cron_command="edit",
job_id=job["id"],
schedule=None,
prompt=None,
name=None,
deliver=None,
repeat=None,
skill=None,
skills=None,
add_skills=None,
remove_skills=None,
clear_skills=False,
script=None,
model="google/gemma-4-31b-it",
provider="openrouter",
base_url="",
)
)
updated = get_job(job["id"])
assert updated["model"] == "google/gemma-4-31b-it"
assert updated["provider"] == "openrouter"
assert updated["base_url"] is None
out = capsys.readouterr().out
assert "Updated job" in out
assert "Model: google/gemma-4-31b-it" in out
assert "Provider: openrouter" in out
class TestCronParserRuntimeOverrideFlags:
def test_main_parses_create_runtime_override_flags(self, monkeypatch):
from hermes_cli import main as main_mod
captured = {}
def fake_cmd_cron(args):
captured["args"] = args
monkeypatch.setattr(main_mod, "cmd_cron", fake_cmd_cron)
monkeypatch.setattr(
"sys.argv",
[
"hermes",
"cron",
"create",
"every 1h",
"Run the burn loop",
"--model",
"google/gemma-4-31b-it",
"--provider",
"openrouter",
"--base-url",
"https://openrouter.ai/api/v1",
],
)
main_mod.main()
args = captured["args"]
assert args.cron_command == "create"
assert args.model == "google/gemma-4-31b-it"
assert args.provider == "openrouter"
assert args.base_url == "https://openrouter.ai/api/v1"
def test_main_parses_edit_runtime_override_flags(self, monkeypatch):
from hermes_cli import main as main_mod
captured = {}
def fake_cmd_cron(args):
captured["args"] = args
monkeypatch.setattr(main_mod, "cmd_cron", fake_cmd_cron)
monkeypatch.setattr(
"sys.argv",
[
"hermes",
"cron",
"edit",
"job123",
"--model",
"google/gemma-4-31b-it",
"--provider",
"openrouter",
"--base-url",
"",
],
)
main_mod.main()
args = captured["args"]
assert args.cron_command == "edit"
assert args.job_id == "job123"
assert args.model == "google/gemma-4-31b-it"
assert args.provider == "openrouter"
assert args.base_url == ""