Persist structured exhaustion metadata from provider errors, use explicit reset timestamps when available, and expose label-based credential targeting in the auth CLI. This keeps long-lived Codex cooldowns from being misreported as one-hour waits and avoids forcing operators to manage entries by list position alone. Constraint: Existing credential pool JSON needs to remain backward compatible with stored entries that only record status code and timestamp Constraint: Runtime recovery must keep the existing retry-then-rotate semantics for 429s while enriching pool state with provider metadata Rejected: Add a separate credential scheduler subsystem | too large for the Hermes pool architecture and unnecessary for this fix Rejected: Only change CLI formatting | would leave runtime rotation blind to resets_at and preserve the serial-failure behavior Confidence: high Scope-risk: moderate Reversibility: clean Directive: Preserve structured rate-limit metadata when new providers expose reset hints; do not collapse back to status-code-only exhaustion tracking Tested: Focused pytest slice for auth commands, credential pool recovery, and routing (272 passed); py_compile on changed Python files; hermes -w auth list/remove smoke test with temporary HERMES_HOME Not-tested: Full repository pytest suite, broader gateway/integration flows outside the touched auth and pool paths
351 lines
13 KiB
Python
351 lines
13 KiB
Python
"""Tests for credential pool preservation through smart routing and 429 recovery.
|
|
|
|
Covers:
|
|
1. credential_pool flows through resolve_turn_route (no-route and fallback paths)
|
|
2. CLI _resolve_turn_agent_config passes credential_pool to primary dict
|
|
3. Gateway _resolve_turn_agent_config passes credential_pool to primary dict
|
|
4. Eager fallback deferred when credential pool has credentials
|
|
5. Eager fallback fires when no credential pool exists
|
|
6. Full 429 rotation cycle: retry-same → rotate → exhaust → fallback
|
|
"""
|
|
|
|
import os
|
|
import time
|
|
from types import SimpleNamespace
|
|
from unittest.mock import MagicMock, patch, PropertyMock
|
|
|
|
import pytest
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# 1. smart_model_routing: credential_pool preserved in no-route path
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestSmartRoutingPoolPreservation:
|
|
def test_no_route_preserves_credential_pool(self):
|
|
from agent.smart_model_routing import resolve_turn_route
|
|
|
|
fake_pool = MagicMock(name="CredentialPool")
|
|
primary = {
|
|
"model": "gpt-5.4",
|
|
"api_key": "sk-test",
|
|
"base_url": None,
|
|
"provider": "openai-codex",
|
|
"api_mode": "codex_responses",
|
|
"command": None,
|
|
"args": [],
|
|
"credential_pool": fake_pool,
|
|
}
|
|
# routing disabled
|
|
result = resolve_turn_route("hello", None, primary)
|
|
assert result["runtime"]["credential_pool"] is fake_pool
|
|
|
|
def test_no_route_none_pool(self):
|
|
from agent.smart_model_routing import resolve_turn_route
|
|
|
|
primary = {
|
|
"model": "gpt-5.4",
|
|
"api_key": "sk-test",
|
|
"base_url": None,
|
|
"provider": "openai-codex",
|
|
"api_mode": "codex_responses",
|
|
"command": None,
|
|
"args": [],
|
|
}
|
|
result = resolve_turn_route("hello", None, primary)
|
|
assert result["runtime"]["credential_pool"] is None
|
|
|
|
def test_routing_disabled_preserves_pool(self):
|
|
from agent.smart_model_routing import resolve_turn_route
|
|
|
|
fake_pool = MagicMock(name="CredentialPool")
|
|
primary = {
|
|
"model": "gpt-5.4",
|
|
"api_key": "sk-test",
|
|
"base_url": None,
|
|
"provider": "openai-codex",
|
|
"api_mode": "codex_responses",
|
|
"command": None,
|
|
"args": [],
|
|
"credential_pool": fake_pool,
|
|
}
|
|
# routing explicitly disabled
|
|
result = resolve_turn_route("hello", {"enabled": False}, primary)
|
|
assert result["runtime"]["credential_pool"] is fake_pool
|
|
|
|
def test_route_fallback_on_resolve_error_preserves_pool(self, monkeypatch):
|
|
"""When smart routing picks a cheap model but resolve_runtime_provider
|
|
fails, the fallback to primary must still include credential_pool."""
|
|
from agent.smart_model_routing import resolve_turn_route
|
|
|
|
fake_pool = MagicMock(name="CredentialPool")
|
|
primary = {
|
|
"model": "gpt-5.4",
|
|
"api_key": "sk-test",
|
|
"base_url": None,
|
|
"provider": "openai-codex",
|
|
"api_mode": "codex_responses",
|
|
"command": None,
|
|
"args": [],
|
|
"credential_pool": fake_pool,
|
|
}
|
|
routing_config = {
|
|
"enabled": True,
|
|
"cheap_model": "openai/gpt-4.1-mini",
|
|
"cheap_provider": "openrouter",
|
|
"max_tokens": 200,
|
|
"patterns": ["^(hi|hello|hey)"],
|
|
}
|
|
# Force resolve_runtime_provider to fail so it falls back to primary
|
|
monkeypatch.setattr(
|
|
"hermes_cli.runtime_provider.resolve_runtime_provider",
|
|
MagicMock(side_effect=RuntimeError("no credentials")),
|
|
)
|
|
result = resolve_turn_route("hi", routing_config, primary)
|
|
assert result["runtime"]["credential_pool"] is fake_pool
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# 2 & 3. CLI and Gateway _resolve_turn_agent_config include credential_pool
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestCliTurnRoutePool:
|
|
def test_resolve_turn_includes_pool(self, monkeypatch, tmp_path):
|
|
"""CLI's _resolve_turn_agent_config must pass credential_pool to primary."""
|
|
from agent.smart_model_routing import resolve_turn_route
|
|
captured = {}
|
|
|
|
def spy_resolve(user_message, routing_config, primary):
|
|
captured["primary"] = primary
|
|
return resolve_turn_route(user_message, routing_config, primary)
|
|
|
|
monkeypatch.setattr(
|
|
"agent.smart_model_routing.resolve_turn_route", spy_resolve
|
|
)
|
|
|
|
# Build a minimal HermesCLI-like object with the method
|
|
shell = SimpleNamespace(
|
|
model="gpt-5.4",
|
|
api_key="sk-test",
|
|
base_url=None,
|
|
provider="openai-codex",
|
|
api_mode="codex_responses",
|
|
acp_command=None,
|
|
acp_args=[],
|
|
_credential_pool=MagicMock(name="FakePool"),
|
|
_smart_model_routing={"enabled": False},
|
|
)
|
|
|
|
# Import and bind the real method
|
|
from cli import HermesCLI
|
|
bound = HermesCLI._resolve_turn_agent_config.__get__(shell)
|
|
bound("test message")
|
|
|
|
assert "credential_pool" in captured["primary"]
|
|
assert captured["primary"]["credential_pool"] is shell._credential_pool
|
|
|
|
|
|
class TestGatewayTurnRoutePool:
|
|
def test_resolve_turn_includes_pool(self, monkeypatch):
|
|
"""Gateway's _resolve_turn_agent_config must pass credential_pool."""
|
|
from agent.smart_model_routing import resolve_turn_route
|
|
captured = {}
|
|
|
|
def spy_resolve(user_message, routing_config, primary):
|
|
captured["primary"] = primary
|
|
return resolve_turn_route(user_message, routing_config, primary)
|
|
|
|
monkeypatch.setattr(
|
|
"agent.smart_model_routing.resolve_turn_route", spy_resolve
|
|
)
|
|
|
|
from gateway.run import GatewayRunner
|
|
|
|
runner = SimpleNamespace(
|
|
_smart_model_routing={"enabled": False},
|
|
)
|
|
|
|
runtime_kwargs = {
|
|
"api_key": "sk-test",
|
|
"base_url": None,
|
|
"provider": "openai-codex",
|
|
"api_mode": "codex_responses",
|
|
"command": None,
|
|
"args": [],
|
|
"credential_pool": MagicMock(name="FakePool"),
|
|
}
|
|
|
|
bound = GatewayRunner._resolve_turn_agent_config.__get__(runner)
|
|
bound("test message", "gpt-5.4", runtime_kwargs)
|
|
|
|
assert "credential_pool" in captured["primary"]
|
|
assert captured["primary"]["credential_pool"] is runtime_kwargs["credential_pool"]
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# 4 & 5. Eager fallback deferred/fires based on credential pool
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestEagerFallbackWithPool:
|
|
"""Test the eager fallback guard in run_agent.py's error handling loop."""
|
|
|
|
def _make_agent(self, has_pool=True, pool_has_creds=True, has_fallback=True):
|
|
"""Create a minimal AIAgent mock with the fields needed."""
|
|
from run_agent import AIAgent
|
|
|
|
with patch.object(AIAgent, "__init__", lambda self, **kw: None):
|
|
agent = AIAgent()
|
|
|
|
agent._credential_pool = None
|
|
if has_pool:
|
|
pool = MagicMock()
|
|
pool.has_available.return_value = pool_has_creds
|
|
agent._credential_pool = pool
|
|
|
|
agent._fallback_chain = [{"model": "fallback/model"}] if has_fallback else []
|
|
agent._fallback_index = 0
|
|
agent._try_activate_fallback = MagicMock(return_value=True)
|
|
agent._emit_status = MagicMock()
|
|
|
|
return agent
|
|
|
|
def test_eager_fallback_deferred_when_pool_has_credentials(self):
|
|
"""429 with active pool should NOT trigger eager fallback."""
|
|
agent = self._make_agent(has_pool=True, pool_has_creds=True, has_fallback=True)
|
|
|
|
# Simulate the check from run_agent.py lines 7180-7191
|
|
is_rate_limited = True
|
|
if is_rate_limited and agent._fallback_index < len(agent._fallback_chain):
|
|
pool = agent._credential_pool
|
|
pool_may_recover = pool is not None and pool.has_available()
|
|
if not pool_may_recover:
|
|
agent._try_activate_fallback()
|
|
|
|
agent._try_activate_fallback.assert_not_called()
|
|
|
|
def test_eager_fallback_fires_when_no_pool(self):
|
|
"""429 without pool should trigger eager fallback."""
|
|
agent = self._make_agent(has_pool=False, has_fallback=True)
|
|
|
|
is_rate_limited = True
|
|
if is_rate_limited and agent._fallback_index < len(agent._fallback_chain):
|
|
pool = agent._credential_pool
|
|
pool_may_recover = pool is not None and pool.has_available()
|
|
if not pool_may_recover:
|
|
agent._try_activate_fallback()
|
|
|
|
agent._try_activate_fallback.assert_called_once()
|
|
|
|
def test_eager_fallback_fires_when_pool_exhausted(self):
|
|
"""429 with exhausted pool should trigger eager fallback."""
|
|
agent = self._make_agent(has_pool=True, pool_has_creds=False, has_fallback=True)
|
|
|
|
is_rate_limited = True
|
|
if is_rate_limited and agent._fallback_index < len(agent._fallback_chain):
|
|
pool = agent._credential_pool
|
|
pool_may_recover = pool is not None and pool.has_available()
|
|
if not pool_may_recover:
|
|
agent._try_activate_fallback()
|
|
|
|
agent._try_activate_fallback.assert_called_once()
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# 6. Full 429 rotation cycle via _recover_with_credential_pool
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestPoolRotationCycle:
|
|
"""Verify the retry-same → rotate → exhaust flow in _recover_with_credential_pool."""
|
|
|
|
def _make_agent_with_pool(self, pool_entries=3):
|
|
from run_agent import AIAgent
|
|
|
|
with patch.object(AIAgent, "__init__", lambda self, **kw: None):
|
|
agent = AIAgent()
|
|
|
|
entries = []
|
|
for i in range(pool_entries):
|
|
e = MagicMock(name=f"entry_{i}")
|
|
e.id = f"cred-{i}"
|
|
entries.append(e)
|
|
|
|
pool = MagicMock()
|
|
pool.has_credentials.return_value = True
|
|
|
|
# mark_exhausted_and_rotate returns next entry until exhausted
|
|
self._rotation_index = 0
|
|
|
|
def rotate(status_code=None, error_context=None):
|
|
self._rotation_index += 1
|
|
if self._rotation_index < pool_entries:
|
|
return entries[self._rotation_index]
|
|
pool.has_credentials.return_value = False
|
|
return None
|
|
|
|
pool.mark_exhausted_and_rotate = MagicMock(side_effect=rotate)
|
|
agent._credential_pool = pool
|
|
agent._swap_credential = MagicMock()
|
|
agent.log_prefix = ""
|
|
|
|
return agent, pool, entries
|
|
|
|
def test_first_429_sets_retry_flag_no_rotation(self):
|
|
"""First 429 should just set has_retried_429=True, no rotation."""
|
|
agent, pool, _ = self._make_agent_with_pool(3)
|
|
recovered, has_retried = agent._recover_with_credential_pool(
|
|
status_code=429, has_retried_429=False
|
|
)
|
|
assert recovered is False
|
|
assert has_retried is True
|
|
pool.mark_exhausted_and_rotate.assert_not_called()
|
|
|
|
def test_second_429_rotates_to_next(self):
|
|
"""Second consecutive 429 should rotate to next credential."""
|
|
agent, pool, entries = self._make_agent_with_pool(3)
|
|
recovered, has_retried = agent._recover_with_credential_pool(
|
|
status_code=429, has_retried_429=True
|
|
)
|
|
assert recovered is True
|
|
assert has_retried is False # reset after rotation
|
|
pool.mark_exhausted_and_rotate.assert_called_once_with(status_code=429, error_context=None)
|
|
agent._swap_credential.assert_called_once_with(entries[1])
|
|
|
|
def test_pool_exhaustion_returns_false(self):
|
|
"""When all credentials exhausted, recovery should return False."""
|
|
agent, pool, _ = self._make_agent_with_pool(1)
|
|
# First 429 sets flag
|
|
_, has_retried = agent._recover_with_credential_pool(
|
|
status_code=429, has_retried_429=False
|
|
)
|
|
assert has_retried is True
|
|
|
|
# Second 429 tries to rotate but pool is exhausted (only 1 entry)
|
|
recovered, _ = agent._recover_with_credential_pool(
|
|
status_code=429, has_retried_429=True
|
|
)
|
|
assert recovered is False
|
|
|
|
def test_402_immediate_rotation(self):
|
|
"""402 (billing) should immediately rotate, no retry-first."""
|
|
agent, pool, entries = self._make_agent_with_pool(3)
|
|
recovered, has_retried = agent._recover_with_credential_pool(
|
|
status_code=402, has_retried_429=False
|
|
)
|
|
assert recovered is True
|
|
assert has_retried is False
|
|
pool.mark_exhausted_and_rotate.assert_called_once_with(status_code=402, error_context=None)
|
|
|
|
def test_no_pool_returns_false(self):
|
|
"""No pool should return (False, unchanged)."""
|
|
from run_agent import AIAgent
|
|
|
|
with patch.object(AIAgent, "__init__", lambda self, **kw: None):
|
|
agent = AIAgent()
|
|
agent._credential_pool = None
|
|
|
|
recovered, has_retried = agent._recover_with_credential_pool(
|
|
status_code=429, has_retried_429=False
|
|
)
|
|
assert recovered is False
|
|
assert has_retried is False
|