Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
31195b2f7f |
@@ -1,55 +0,0 @@
|
||||
# Issue #851 Verification
|
||||
|
||||
## Status: ✅ ALREADY IMPLEMENTED
|
||||
|
||||
Issue #851 is a research/audit issue whose own conclusion is that prompt caching is already extensively implemented in hermes-agent and that the remaining work is operational, not a repo-side code change.
|
||||
|
||||
This verification confirms that the current repo already contains the core implementation described in the issue body.
|
||||
|
||||
## Acceptance Criteria Check
|
||||
|
||||
1. ✅ Anthropic / OpenRouter prompt-caching support exists
|
||||
- `agent/prompt_caching.py:41-72` implements `apply_anthropic_cache_control()` with the documented system-plus-last-3 breakpoint strategy.
|
||||
- `run_agent.py:8301-8306` applies Anthropic/OpenRouter cache-control breakpoints during API message preparation.
|
||||
|
||||
2. ✅ OpenAI/Codex prompt-cache key support exists
|
||||
- `run_agent.py:6199-6213` sets `prompt_cache_key = self.session_id` on the responses path for non-GitHub responses.
|
||||
- `run_agent.py:3875-3878` explicitly passes through `prompt_cache_key` in normalized API kwargs.
|
||||
|
||||
3. ✅ System-prompt stability and cache-friendly message normalization exist
|
||||
- `run_agent.py:3155-3157` documents that the system prompt is cached and reused across turns to maximize prefix cache hits.
|
||||
- `run_agent.py:8314-8339` normalizes whitespace and tool-call JSON for bit-perfect prefix matching across turns.
|
||||
|
||||
4. ✅ Cache hit/miss logging infrastructure exists
|
||||
- `run_agent.py:8966-8980` logs cache read/write token stats, including `cached_tokens`, `cache_creation_input_tokens`, and hit percentage.
|
||||
|
||||
## Executed Verification
|
||||
|
||||
### Targeted tests run
|
||||
- `PYTHONPATH=/tmp/BURN2-FORGE-ALPHA-3 python3 -m pytest -q tests/agent/test_prompt_caching.py`
|
||||
- Result: `14 passed`
|
||||
|
||||
### Syntax verification
|
||||
- `PYTHONPATH=/tmp/BURN2-FORGE-ALPHA-3 python3 -m py_compile agent/prompt_caching.py run_agent.py`
|
||||
- Result: passed
|
||||
|
||||
## Evidence Summary
|
||||
|
||||
The issue body says:
|
||||
- prompt caching is already extensively implemented
|
||||
- the primary opportunities are operational: routing more workloads to Ollama, verifying provider support, and reporting cache hit rates
|
||||
|
||||
The repo state matches that conclusion:
|
||||
- caching primitives are present
|
||||
- integration points are wired into the runtime
|
||||
- targeted tests already exist and pass
|
||||
- no new implementation change is required to satisfy the issue's repo-side claim
|
||||
|
||||
## Recommendation
|
||||
|
||||
Close issue #851 as already implemented in the codebase.
|
||||
|
||||
If desired, follow-on work should be opened as separate operational issues for:
|
||||
- Ollama-heavy workload routing
|
||||
- provider-specific cache verification
|
||||
- nightly cache hit-rate reporting
|
||||
@@ -250,12 +250,16 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
|
||||
"big-pickle",
|
||||
],
|
||||
"opencode-go": [
|
||||
"glm-5",
|
||||
"kimi-k2.6",
|
||||
"kimi-k2.5",
|
||||
"glm-5.1",
|
||||
"glm-5",
|
||||
"mimo-v2-pro",
|
||||
"mimo-v2-omni",
|
||||
"minimax-m2.7",
|
||||
"minimax-m2.5",
|
||||
"qwen3.6-plus",
|
||||
"qwen3.5-plus",
|
||||
],
|
||||
"ai-gateway": [
|
||||
"anthropic/claude-opus-4.6",
|
||||
|
||||
@@ -105,7 +105,7 @@ _DEFAULT_PROVIDER_MODELS = {
|
||||
"ai-gateway": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5", "google/gemini-3-flash"],
|
||||
"kilocode": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5.4", "google/gemini-3-pro-preview", "google/gemini-3-flash-preview"],
|
||||
"opencode-zen": ["gpt-5.4", "gpt-5.3-codex", "claude-sonnet-4-6", "gemini-3-flash", "glm-5", "kimi-k2.5", "minimax-m2.7"],
|
||||
"opencode-go": ["glm-5", "kimi-k2.5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.5", "minimax-m2.7"],
|
||||
"opencode-go": ["kimi-k2.6", "kimi-k2.5", "glm-5.1", "glm-5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.5", "minimax-m2.7", "qwen3.6-plus", "qwen3.5-plus"],
|
||||
"huggingface": [
|
||||
"Qwen/Qwen3.5-397B-A17B", "Qwen/Qwen3-235B-A22B-Thinking-2507",
|
||||
"Qwen/Qwen3-Coder-480B-A35B-Instruct", "deepseek-ai/DeepSeek-R1-0528",
|
||||
|
||||
@@ -4,32 +4,62 @@ import os
|
||||
from unittest.mock import patch
|
||||
|
||||
from hermes_cli.model_switch import list_authenticated_providers
|
||||
from hermes_cli.models import curated_models_for_provider
|
||||
|
||||
|
||||
@patch.dict(os.environ, {"OPENCODE_GO_API_KEY": "test-key"}, clear=False)
|
||||
def test_opencode_go_appears_when_api_key_set():
|
||||
"""opencode-go should appear in list_authenticated_providers when OPENCODE_GO_API_KEY is set."""
|
||||
providers = list_authenticated_providers(current_provider="openrouter")
|
||||
|
||||
|
||||
# Find opencode-go in results
|
||||
opencode_go = next((p for p in providers if p["slug"] == "opencode-go"), None)
|
||||
|
||||
|
||||
assert opencode_go is not None, "opencode-go should appear when OPENCODE_GO_API_KEY is set"
|
||||
assert opencode_go["models"] == ["glm-5", "kimi-k2.5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.7", "minimax-m2.5"]
|
||||
assert opencode_go["models"] == [
|
||||
"kimi-k2.6",
|
||||
"kimi-k2.5",
|
||||
"glm-5.1",
|
||||
"glm-5",
|
||||
"mimo-v2-pro",
|
||||
"mimo-v2-omni",
|
||||
"minimax-m2.7",
|
||||
"minimax-m2.5",
|
||||
]
|
||||
assert opencode_go["total_models"] == 10
|
||||
# opencode-go can appear as "built-in" (from PROVIDER_TO_MODELS_DEV when
|
||||
# models.dev is reachable) or "hermes" (from HERMES_OVERLAYS fallback when
|
||||
# the API is unavailable, e.g. in CI).
|
||||
assert opencode_go["source"] in ("built-in", "hermes")
|
||||
|
||||
|
||||
@patch("hermes_cli.models.provider_model_ids", return_value=[])
|
||||
def test_opencode_go_curated_fallback_includes_new_models(_mock_provider_model_ids):
|
||||
"""Fallback catalog should include Kimi K2.6 and both Qwen Plus models."""
|
||||
model_ids = [model_id for model_id, _ in curated_models_for_provider("opencode-go")]
|
||||
|
||||
assert model_ids == [
|
||||
"kimi-k2.6",
|
||||
"kimi-k2.5",
|
||||
"glm-5.1",
|
||||
"glm-5",
|
||||
"mimo-v2-pro",
|
||||
"mimo-v2-omni",
|
||||
"minimax-m2.7",
|
||||
"minimax-m2.5",
|
||||
"qwen3.6-plus",
|
||||
"qwen3.5-plus",
|
||||
]
|
||||
|
||||
|
||||
def test_opencode_go_not_appears_when_no_creds():
|
||||
"""opencode-go should NOT appear when no credentials are set."""
|
||||
# Ensure OPENCODE_GO_API_KEY is not set
|
||||
env_without_key = {k: v for k, v in os.environ.items() if k != "OPENCODE_GO_API_KEY"}
|
||||
|
||||
|
||||
with patch.dict(os.environ, env_without_key, clear=True):
|
||||
providers = list_authenticated_providers(current_provider="openrouter")
|
||||
|
||||
|
||||
# opencode-go should not be in results
|
||||
opencode_go = next((p for p in providers if p["slug"] == "opencode-go"), None)
|
||||
assert opencode_go is None, "opencode-go should not appear without credentials"
|
||||
|
||||
Reference in New Issue
Block a user