Compare commits
2 Commits
q/372-1776
...
fix/empty-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
24f49ad23b | ||
|
|
c1826ece68 |
@@ -939,8 +939,7 @@ def tick(verbose: bool = True, adapters=None, loop=None) -> int:
|
||||
_job_results.append(result)
|
||||
|
||||
for job, success, output, final_response, error in _job_results:
|
||||
executed += 1
|
||||
|
||||
try:
|
||||
output_file = save_job_output(job["id"], output)
|
||||
if verbose:
|
||||
logger.info("Output saved to: %s", output_file)
|
||||
|
||||
123
run_agent.py
123
run_agent.py
@@ -590,6 +590,12 @@ class AIAgent:
|
||||
# Consumed by every LLM turn across parent + all subagents.
|
||||
self.iteration_budget = iteration_budget or IterationBudget(max_iterations)
|
||||
self.tool_delay = tool_delay
|
||||
# Error circuit breaker — tracks consecutive tool failures across turns.
|
||||
# After N consecutive errors, inject a nudge to change strategy.
|
||||
# After N+2, halt and report. Prevents 2.33x cascade amplification.
|
||||
# See: https://forge.alexanderwhitestone.com/Timmy_Foundation/hermes-agent/issues/309
|
||||
self._error_streak = 0
|
||||
self._error_streak_halted = False
|
||||
self.save_trajectories = save_trajectories
|
||||
self.verbose_logging = verbose_logging
|
||||
self.quiet_mode = quiet_mode
|
||||
@@ -5341,6 +5347,15 @@ class AIAgent:
|
||||
|
||||
def _build_api_kwargs(self, api_messages: list) -> dict:
|
||||
"""Build the keyword arguments dict for the active API mode."""
|
||||
# Preflight: model must be non-empty. An empty model string reaches
|
||||
# the provider as model="" and returns HTTP 400 after 3 retries.
|
||||
# Better to fail immediately with a clear message.
|
||||
if not self.model or not self.model.strip():
|
||||
raise ValueError(
|
||||
"No model specified. Set the model via AIAgent(model=...), "
|
||||
"HERMES_MODEL env var, or config.yaml model.default. "
|
||||
f"Current value: {self.model!r}"
|
||||
)
|
||||
if self.api_mode == "anthropic_messages":
|
||||
from agent.anthropic_adapter import build_anthropic_kwargs
|
||||
anthropic_messages = self._prepare_anthropic_messages_for_api(api_messages)
|
||||
@@ -6285,6 +6300,9 @@ class AIAgent:
|
||||
else:
|
||||
function_name, function_args, function_result, tool_duration, is_error = r
|
||||
|
||||
# Error circuit breaker: track consecutive failures
|
||||
_streak_nudge = self._update_error_streak(is_error)
|
||||
|
||||
if is_error:
|
||||
result_preview = function_result[:200] if len(function_result) > 200 else function_result
|
||||
logger.warning("Tool %s returned error (%.2fs): %s", function_name, tool_duration, result_preview)
|
||||
@@ -6339,6 +6357,25 @@ class AIAgent:
|
||||
}
|
||||
messages.append(tool_msg)
|
||||
|
||||
# ── Circuit breaker injection (concurrent path) ─────────────────
|
||||
# Inject the nudge from the error streak tracker, if any.
|
||||
if self._error_streak >= self._ERROR_STREAK_NUDGE and messages and messages[-1].get("role") == "tool":
|
||||
if self._error_streak_halted:
|
||||
_nudge_text = (
|
||||
f"[CIRCUIT BREAKER: {self._error_streak} consecutive tool errors. "
|
||||
"STOP calling tools. Provide your final response based on what you "
|
||||
"have learned. If you need to continue, use the terminal tool — it "
|
||||
"is the most reliable path. Do NOT repeat the same failed approach.]"
|
||||
)
|
||||
else:
|
||||
_nudge_text = (
|
||||
f"[WARNING: {self._error_streak} consecutive tool errors detected. "
|
||||
"Your current approach is not working. Try a different tool type, "
|
||||
"simplify your request, or use the terminal tool. "
|
||||
"Do NOT retry the exact same call.]"
|
||||
)
|
||||
self._inject_circuit_breaker(messages, _nudge_text)
|
||||
|
||||
# ── Budget pressure injection ────────────────────────────────────
|
||||
budget_warning = self._get_budget_warning(api_call_count)
|
||||
if budget_warning and messages and messages[-1].get("role") == "tool":
|
||||
@@ -6607,6 +6644,7 @@ class AIAgent:
|
||||
# Log tool errors to the persistent error log so [error] tags
|
||||
# in the UI always have a corresponding detailed entry on disk.
|
||||
_is_error_result, _ = _detect_tool_failure(function_name, function_result)
|
||||
_streak_nudge = self._update_error_streak(_is_error_result)
|
||||
if _is_error_result:
|
||||
logger.warning("Tool %s returned error (%.2fs): %s", function_name, tool_duration, result_preview)
|
||||
else:
|
||||
@@ -6673,6 +6711,24 @@ class AIAgent:
|
||||
if self.tool_delay > 0 and i < len(assistant_message.tool_calls):
|
||||
time.sleep(self.tool_delay)
|
||||
|
||||
# ── Circuit breaker injection (sequential path) ────────────────
|
||||
if self._error_streak >= self._ERROR_STREAK_NUDGE and messages and messages[-1].get("role") == "tool":
|
||||
if self._error_streak_halted:
|
||||
_nudge_text = (
|
||||
f"[CIRCUIT BREAKER: {self._error_streak} consecutive tool errors. "
|
||||
"STOP calling tools. Provide your final response based on what you "
|
||||
"have learned. If you need to continue, use the terminal tool — it "
|
||||
"is the most reliable path. Do NOT repeat the same failed approach.]"
|
||||
)
|
||||
else:
|
||||
_nudge_text = (
|
||||
f"[WARNING: {self._error_streak} consecutive tool errors detected. "
|
||||
"Your current approach is not working. Try a different tool type, "
|
||||
"simplify your request, or use the terminal tool. "
|
||||
"Do NOT retry the exact same call.]"
|
||||
)
|
||||
self._inject_circuit_breaker(messages, _nudge_text)
|
||||
|
||||
# ── Budget pressure injection ─────────────────────────────────
|
||||
# After all tool calls in this turn are processed, check if we're
|
||||
# approaching max_iterations. If so, inject a warning into the LAST
|
||||
@@ -6694,6 +6750,66 @@ class AIAgent:
|
||||
tier = "⚠️ WARNING" if remaining <= self.max_iterations * 0.1 else "💡 CAUTION"
|
||||
print(f"{self.log_prefix}{tier}: {remaining} iterations remaining")
|
||||
|
||||
# ── Error circuit breaker ──────────────────────────────────────────────
|
||||
# Tracks consecutive tool failures across API turns. After a threshold
|
||||
# of errors (default 3), injects a nudge telling the LLM to try a
|
||||
# different approach. After hard limit (default 5), halts iteration
|
||||
# and returns a failure summary instead of burning more tokens.
|
||||
#
|
||||
# Empirical basis (issue #309):
|
||||
# P(error | prev error) = 58.6% vs P(error | prev success) = 25.2%
|
||||
# Max observed streak: 31 consecutive errors.
|
||||
# Recovery path: terminal tool (2,300 recoveries).
|
||||
_ERROR_STREAK_NUDGE = 3 # Inject strategy-change nudge
|
||||
_ERROR_STREAK_HALT = 5 # Stop and report
|
||||
|
||||
def _update_error_streak(self, is_error: bool) -> Optional[str]:
|
||||
"""Update the error streak counter. Returns a nudge string or None.
|
||||
|
||||
Call once per tool result. Resets streak on success, increments on
|
||||
error. Returns a system nudge to inject into the last tool message
|
||||
when the streak crosses the nudge threshold.
|
||||
"""
|
||||
if is_error:
|
||||
self._error_streak += 1
|
||||
else:
|
||||
self._error_streak = 0
|
||||
return None
|
||||
|
||||
if self._error_streak >= self._ERROR_STREAK_HALT:
|
||||
self._error_streak_halted = True
|
||||
return (
|
||||
f"[CIRCUIT BREAKER: {self._error_streak} consecutive tool errors. "
|
||||
"STOP calling tools. Provide your final response based on what you "
|
||||
"have learned. If you need to continue, use the terminal tool — it "
|
||||
"is the most reliable path. Do NOT repeat the same failed approach.]"
|
||||
)
|
||||
|
||||
if self._error_streak >= self._ERROR_STREAK_NUDGE:
|
||||
return (
|
||||
f"[WARNING: {self._error_streak} consecutive tool errors detected. "
|
||||
"Your current approach is not working. Try a different tool type, "
|
||||
"simplify your request, or use the terminal tool. "
|
||||
"Do NOT retry the exact same call.]"
|
||||
)
|
||||
|
||||
return None
|
||||
|
||||
def _inject_circuit_breaker(self, messages: list, nudge: str) -> None:
|
||||
"""Append the circuit breaker nudge to the last tool message."""
|
||||
if not messages or messages[-1].get("role") != "tool":
|
||||
return
|
||||
last_content = messages[-1]["content"]
|
||||
try:
|
||||
parsed = json.loads(last_content)
|
||||
if isinstance(parsed, dict):
|
||||
parsed["_circuit_breaker"] = nudge
|
||||
messages[-1]["content"] = json.dumps(parsed, ensure_ascii=False)
|
||||
return
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
pass
|
||||
messages[-1]["content"] = last_content + f"\n\n{nudge}"
|
||||
|
||||
def _get_budget_warning(self, api_call_count: int) -> Optional[str]:
|
||||
"""Return a budget pressure string, or None if not yet needed.
|
||||
|
||||
@@ -7228,7 +7344,12 @@ class AIAgent:
|
||||
if not self.quiet_mode:
|
||||
self._safe_print("\n⚡ Breaking out of tool loop due to interrupt...")
|
||||
break
|
||||
|
||||
|
||||
# Error circuit breaker: halt if too many consecutive tool errors
|
||||
if self._error_streak_halted:
|
||||
logger.warning("Circuit breaker tripped: %d consecutive errors, halting.", self._error_streak)
|
||||
break
|
||||
|
||||
api_call_count += 1
|
||||
self._api_call_count = api_call_count
|
||||
self._touch_activity(f"starting API call #{api_call_count}")
|
||||
|
||||
0
tests/run_agent/__init__.py
Normal file
0
tests/run_agent/__init__.py
Normal file
148
tests/run_agent/test_circuit_breaker.py
Normal file
148
tests/run_agent/test_circuit_breaker.py
Normal file
@@ -0,0 +1,148 @@
|
||||
"""Tests for the error circuit breaker in run_agent.py (issue #309)."""
|
||||
|
||||
import json
|
||||
import pytest
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
|
||||
class TestErrorStreakTracking:
|
||||
"""Test _update_error_streak counter logic."""
|
||||
|
||||
def _make_agent(self):
|
||||
"""Create a minimal AIAgent-like object for testing streak logic."""
|
||||
from run_agent import AIAgent
|
||||
agent = AIAgent.__new__(AIAgent)
|
||||
agent._error_streak = 0
|
||||
agent._error_streak_halted = False
|
||||
return agent
|
||||
|
||||
def test_success_resets_streak(self):
|
||||
agent = self._make_agent()
|
||||
agent._error_streak = 5
|
||||
result = agent._update_error_streak(is_error=False)
|
||||
assert agent._error_streak == 0
|
||||
assert result is None
|
||||
|
||||
def test_error_increments_streak(self):
|
||||
agent = self._make_agent()
|
||||
agent._update_error_streak(is_error=True)
|
||||
assert agent._error_streak == 1
|
||||
agent._update_error_streak(is_error=True)
|
||||
assert agent._error_streak == 2
|
||||
|
||||
def test_nudge_at_threshold(self):
|
||||
agent = self._make_agent()
|
||||
# Errors 1 and 2: no nudge
|
||||
assert agent._update_error_streak(is_error=True) is None # streak=1
|
||||
assert agent._update_error_streak(is_error=True) is None # streak=2
|
||||
# Error 3: nudge
|
||||
nudge = agent._update_error_streak(is_error=True) # streak=3
|
||||
assert nudge is not None
|
||||
assert "WARNING" in nudge
|
||||
assert "3 consecutive" in nudge
|
||||
assert not agent._error_streak_halted
|
||||
|
||||
def test_halt_at_threshold(self):
|
||||
agent = self._make_agent()
|
||||
agent._error_streak = 4
|
||||
nudge = agent._update_error_streak(is_error=True) # streak=5
|
||||
assert nudge is not None
|
||||
assert "CIRCUIT BREAKER" in nudge
|
||||
assert agent._error_streak_halted
|
||||
|
||||
def test_success_between_errors_resets(self):
|
||||
agent = self._make_agent()
|
||||
agent._update_error_streak(is_error=True) # 1
|
||||
agent._update_error_streak(is_error=True) # 2
|
||||
agent._update_error_streak(is_error=False) # reset
|
||||
assert agent._error_streak == 0
|
||||
# Two more errors shouldn't nudge yet
|
||||
assert agent._update_error_streak(is_error=True) is None # 1
|
||||
assert agent._update_error_streak(is_error=True) is None # 2
|
||||
|
||||
def test_streak_above_halt_keeps_halting(self):
|
||||
agent = self._make_agent()
|
||||
agent._error_streak = 6
|
||||
agent._error_streak_halted = True
|
||||
nudge = agent._update_error_streak(is_error=True)
|
||||
assert "CIRCUIT BREAKER" in nudge
|
||||
assert agent._error_streak == 7
|
||||
|
||||
|
||||
class TestInjectCircuitBreaker:
|
||||
"""Test _inject_circuit_breaker message injection."""
|
||||
|
||||
def _make_agent(self):
|
||||
from run_agent import AIAgent
|
||||
agent = AIAgent.__new__(AIAgent)
|
||||
return agent
|
||||
|
||||
def test_injects_into_json_message(self):
|
||||
agent = self._make_agent()
|
||||
messages = [{"role": "tool", "content": json.dumps({"output": "hello"}), "tool_call_id": "t1"}]
|
||||
agent._inject_circuit_breaker(messages, "[WARNING: test nudge]")
|
||||
parsed = json.loads(messages[-1]["content"])
|
||||
assert parsed["_circuit_breaker"] == "[WARNING: test nudge]"
|
||||
assert parsed["output"] == "hello"
|
||||
|
||||
def test_injects_into_plain_text_message(self):
|
||||
agent = self._make_agent()
|
||||
messages = [{"role": "tool", "content": "some error string", "tool_call_id": "t1"}]
|
||||
agent._inject_circuit_breaker(messages, "[WARNING: test nudge]")
|
||||
assert "[WARNING: test nudge]" in messages[-1]["content"]
|
||||
assert "some error string" in messages[-1]["content"]
|
||||
|
||||
def test_noop_on_non_tool_message(self):
|
||||
agent = self._make_agent()
|
||||
messages = [{"role": "user", "content": "hello"}]
|
||||
agent._inject_circuit_breaker(messages, "[WARNING: test nudge]")
|
||||
assert messages[-1]["content"] == "hello"
|
||||
|
||||
def test_noop_on_empty_messages(self):
|
||||
agent = self._make_agent()
|
||||
agent._inject_circuit_breaker([], "[WARNING: test nudge]")
|
||||
|
||||
def test_noop_on_missing_role(self):
|
||||
agent = self._make_agent()
|
||||
messages = [{"content": "no role key"}]
|
||||
agent._inject_circuit_breaker(messages, "[WARNING: test nudge]")
|
||||
assert messages[-1]["content"] == "no role key"
|
||||
|
||||
|
||||
class TestCircuitBreakerConstants:
|
||||
"""Test that the class-level thresholds are set correctly."""
|
||||
|
||||
def test_nudge_threshold(self):
|
||||
from run_agent import AIAgent
|
||||
assert AIAgent._ERROR_STREAK_NUDGE == 3
|
||||
|
||||
def test_halt_threshold(self):
|
||||
from run_agent import AIAgent
|
||||
assert AIAgent._ERROR_STREAK_HALT == 5
|
||||
|
||||
def test_halt_greater_than_nudge(self):
|
||||
from run_agent import AIAgent
|
||||
assert AIAgent._ERROR_STREAK_HALT > AIAgent._ERROR_STREAK_NUDGE
|
||||
|
||||
|
||||
class TestCircuitBreakerIntegration:
|
||||
"""Test that the circuit breaker is initialized in AIAgent.__init__."""
|
||||
|
||||
def test_agent_has_streak_attributes(self):
|
||||
from run_agent import AIAgent
|
||||
agent = AIAgent.__new__(AIAgent)
|
||||
agent._error_streak = 0
|
||||
agent._error_streak_halted = False
|
||||
# Verify the attributes exist and have correct defaults
|
||||
assert hasattr(agent, "_error_streak")
|
||||
assert hasattr(agent, "_error_streak_halted")
|
||||
assert agent._error_streak == 0
|
||||
assert agent._error_streak_halted is False
|
||||
|
||||
def test_agent_init_sets_streak(self):
|
||||
"""Verify __init__ actually initializes the streak attributes."""
|
||||
import inspect
|
||||
from run_agent import AIAgent
|
||||
src = inspect.getsource(AIAgent.__init__)
|
||||
assert "_error_streak" in src
|
||||
assert "_error_streak_halted" in src
|
||||
61
tests/run_agent/test_empty_model_preflight.py
Normal file
61
tests/run_agent/test_empty_model_preflight.py
Normal file
@@ -0,0 +1,61 @@
|
||||
"""Tests for empty model preflight check in _build_api_kwargs (related to #328)."""
|
||||
|
||||
import pytest
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
|
||||
class TestEmptyModelPreflight:
|
||||
"""Verify that _build_api_kwargs rejects empty model strings."""
|
||||
|
||||
def _make_agent(self, model=""):
|
||||
"""Create a minimal AIAgent with _build_api_kwargs callable."""
|
||||
from run_agent import AIAgent
|
||||
agent = AIAgent.__new__(AIAgent)
|
||||
agent.model = model
|
||||
agent.api_mode = "openai"
|
||||
agent.tools = None
|
||||
agent.tool_choice = None
|
||||
agent.max_tokens = None
|
||||
agent.reasoning_config = None
|
||||
agent._is_openrouter_url = lambda: False
|
||||
agent._use_prompt_caching = False
|
||||
agent._client_kwargs = {}
|
||||
agent.provider_data_collection = None
|
||||
agent.providers_allowed = None
|
||||
agent.providers_ignored = None
|
||||
agent.providers_order = None
|
||||
agent.provider_sort = None
|
||||
agent.provider_require_parameters = False
|
||||
return agent
|
||||
|
||||
def test_empty_string_raises(self):
|
||||
agent = self._make_agent(model="")
|
||||
with pytest.raises(ValueError, match="No model specified"):
|
||||
agent._build_api_kwargs([{"role": "user", "content": "hi"}])
|
||||
|
||||
def test_whitespace_only_raises(self):
|
||||
agent = self._make_agent(model=" ")
|
||||
with pytest.raises(ValueError, match="No model specified"):
|
||||
agent._build_api_kwargs([{"role": "user", "content": "hi"}])
|
||||
|
||||
def test_none_raises(self):
|
||||
agent = self._make_agent(model=None)
|
||||
with pytest.raises(ValueError, match="No model specified"):
|
||||
agent._build_api_kwargs([{"role": "user", "content": "hi"}])
|
||||
|
||||
def test_valid_model_passes_preflight(self):
|
||||
"""The preflight check itself should not raise for a valid model.
|
||||
We test only the preflight, not the full _build_api_kwargs, because
|
||||
the full method needs a fully initialized agent."""
|
||||
agent = self._make_agent(model="gpt-4o")
|
||||
# The preflight is the first thing _build_api_kwargs does.
|
||||
# Verify it doesn't raise by checking model directly.
|
||||
assert agent.model and agent.model.strip()
|
||||
# If model is non-empty, the ValueError should NOT be raised.
|
||||
# We can't easily test the full method without mocking more internals,
|
||||
# so we test the inverse: empty model DOES raise (tested above).
|
||||
|
||||
def test_error_message_includes_current_value(self):
|
||||
agent = self._make_agent(model="")
|
||||
with pytest.raises(ValueError, match="Current value: ''"):
|
||||
agent._build_api_kwargs([{"role": "user", "content": "hi"}])
|
||||
Reference in New Issue
Block a user