diff --git a/tests/test_run_agent.py b/tests/test_run_agent.py
new file mode 100644
index 000000000..a07c52f84
--- /dev/null
+++ b/tests/test_run_agent.py
@@ -0,0 +1,743 @@
+"""Unit tests for run_agent.py (AIAgent).
+
+Tests cover pure functions, state/structure methods, and conversation loop
+pieces. The OpenAI client and tool loading are mocked so no network calls
+are made.
+"""
+
+import json
+import re
+import uuid
+from types import SimpleNamespace
+from unittest.mock import MagicMock, patch, PropertyMock
+
+import pytest
+
+from run_agent import AIAgent
+from agent.prompt_builder import DEFAULT_AGENT_IDENTITY, PLATFORM_HINTS
+
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+def _make_tool_defs(*names: str) -> list:
+ """Build minimal tool definition list accepted by AIAgent.__init__."""
+ return [
+ {
+ "type": "function",
+ "function": {
+ "name": n,
+ "description": f"{n} tool",
+ "parameters": {"type": "object", "properties": {}},
+ },
+ }
+ for n in names
+ ]
+
+
+@pytest.fixture()
+def agent():
+ """Minimal AIAgent with mocked OpenAI client and tool loading."""
+ with (
+ patch("run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search")),
+ patch("run_agent.check_toolset_requirements", return_value={}),
+ patch("run_agent.OpenAI"),
+ ):
+ a = AIAgent(
+ api_key="test-key-1234567890",
+ quiet_mode=True,
+ skip_context_files=True,
+ skip_memory=True,
+ )
+ a.client = MagicMock()
+ return a
+
+
+@pytest.fixture()
+def agent_with_memory_tool():
+ """Agent whose valid_tool_names includes 'memory'."""
+ with (
+ patch("run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search", "memory")),
+ patch("run_agent.check_toolset_requirements", return_value={}),
+ patch("run_agent.OpenAI"),
+ ):
+ a = AIAgent(
+ api_key="test-key-1234567890",
+ quiet_mode=True,
+ skip_context_files=True,
+ skip_memory=True,
+ )
+ a.client = MagicMock()
+ return a
+
+
+# ---------------------------------------------------------------------------
+# Helper to build mock assistant messages (API response objects)
+# ---------------------------------------------------------------------------
+
+def _mock_assistant_msg(
+ content="Hello",
+ tool_calls=None,
+ reasoning=None,
+ reasoning_content=None,
+ reasoning_details=None,
+):
+ """Return a SimpleNamespace mimicking an OpenAI ChatCompletionMessage."""
+ msg = SimpleNamespace(content=content, tool_calls=tool_calls)
+ if reasoning is not None:
+ msg.reasoning = reasoning
+ if reasoning_content is not None:
+ msg.reasoning_content = reasoning_content
+ if reasoning_details is not None:
+ msg.reasoning_details = reasoning_details
+ return msg
+
+
+def _mock_tool_call(name="web_search", arguments='{}', call_id=None):
+ """Return a SimpleNamespace mimicking a tool call object."""
+ return SimpleNamespace(
+ id=call_id or f"call_{uuid.uuid4().hex[:8]}",
+ type="function",
+ function=SimpleNamespace(name=name, arguments=arguments),
+ )
+
+
+def _mock_response(content="Hello", finish_reason="stop", tool_calls=None,
+ reasoning=None, usage=None):
+ """Return a SimpleNamespace mimicking an OpenAI ChatCompletion response."""
+ msg = _mock_assistant_msg(
+ content=content,
+ tool_calls=tool_calls,
+ reasoning=reasoning,
+ )
+ choice = SimpleNamespace(message=msg, finish_reason=finish_reason)
+ resp = SimpleNamespace(choices=[choice], model="test/model")
+ if usage:
+ resp.usage = SimpleNamespace(**usage)
+ else:
+ resp.usage = None
+ return resp
+
+
+# ===================================================================
+# Grup 1: Pure Functions
+# ===================================================================
+
+
+class TestHasContentAfterThinkBlock:
+ def test_none_returns_false(self, agent):
+ assert agent._has_content_after_think_block(None) is False
+
+ def test_empty_returns_false(self, agent):
+ assert agent._has_content_after_think_block("") is False
+
+ def test_only_think_block_returns_false(self, agent):
+ assert agent._has_content_after_think_block("reasoning") is False
+
+ def test_content_after_think_returns_true(self, agent):
+ assert agent._has_content_after_think_block("r actual answer") is True
+
+ def test_no_think_block_returns_true(self, agent):
+ assert agent._has_content_after_think_block("just normal content") is True
+
+
+class TestStripThinkBlocks:
+ def test_none_returns_empty(self, agent):
+ assert agent._strip_think_blocks(None) == ""
+
+ def test_no_blocks_unchanged(self, agent):
+ assert agent._strip_think_blocks("hello world") == "hello world"
+
+ def test_single_block_removed(self, agent):
+ result = agent._strip_think_blocks("reasoning answer")
+ assert "reasoning" not in result
+ assert "answer" in result
+
+ def test_multiline_block_removed(self, agent):
+ text = "\nline1\nline2\n\nvisible"
+ result = agent._strip_think_blocks(text)
+ assert "line1" not in result
+ assert "visible" in result
+
+
+class TestExtractReasoning:
+ def test_reasoning_field(self, agent):
+ msg = _mock_assistant_msg(reasoning="thinking hard")
+ assert agent._extract_reasoning(msg) == "thinking hard"
+
+ def test_reasoning_content_field(self, agent):
+ msg = _mock_assistant_msg(reasoning_content="deep thought")
+ assert agent._extract_reasoning(msg) == "deep thought"
+
+ def test_reasoning_details_array(self, agent):
+ msg = _mock_assistant_msg(
+ reasoning_details=[{"summary": "step-by-step analysis"}],
+ )
+ assert "step-by-step analysis" in agent._extract_reasoning(msg)
+
+ def test_no_reasoning_returns_none(self, agent):
+ msg = _mock_assistant_msg()
+ assert agent._extract_reasoning(msg) is None
+
+ def test_combined_reasoning(self, agent):
+ msg = _mock_assistant_msg(
+ reasoning="part1",
+ reasoning_content="part2",
+ )
+ result = agent._extract_reasoning(msg)
+ assert "part1" in result
+ assert "part2" in result
+
+ def test_deduplication(self, agent):
+ msg = _mock_assistant_msg(
+ reasoning="same text",
+ reasoning_content="same text",
+ )
+ result = agent._extract_reasoning(msg)
+ assert result == "same text"
+
+
+class TestCleanSessionContent:
+ def test_none_passthrough(self):
+ assert AIAgent._clean_session_content(None) is None
+
+ def test_scratchpad_converted(self):
+ text = "think answer"
+ result = AIAgent._clean_session_content(text)
+ assert "" not in result
+ assert "" in result
+
+ def test_extra_newlines_cleaned(self):
+ text = "\n\n\nx\n\n\nafter"
+ result = AIAgent._clean_session_content(text)
+ # Should not have excessive newlines around think block
+ assert "\n\n\n" not in result
+
+
+class TestGetMessagesUpToLastAssistant:
+ def test_empty_list(self, agent):
+ assert agent._get_messages_up_to_last_assistant([]) == []
+
+ def test_no_assistant_returns_copy(self, agent):
+ msgs = [{"role": "user", "content": "hi"}]
+ result = agent._get_messages_up_to_last_assistant(msgs)
+ assert result == msgs
+ assert result is not msgs # should be a copy
+
+ def test_single_assistant(self, agent):
+ msgs = [
+ {"role": "user", "content": "hi"},
+ {"role": "assistant", "content": "hello"},
+ ]
+ result = agent._get_messages_up_to_last_assistant(msgs)
+ assert len(result) == 1
+ assert result[0]["role"] == "user"
+
+ def test_multiple_assistants_returns_up_to_last(self, agent):
+ msgs = [
+ {"role": "user", "content": "q1"},
+ {"role": "assistant", "content": "a1"},
+ {"role": "user", "content": "q2"},
+ {"role": "assistant", "content": "a2"},
+ ]
+ result = agent._get_messages_up_to_last_assistant(msgs)
+ assert len(result) == 3
+ assert result[-1]["content"] == "q2"
+
+ def test_assistant_then_tool_messages(self, agent):
+ msgs = [
+ {"role": "user", "content": "do something"},
+ {"role": "assistant", "content": "ok", "tool_calls": [{"id": "1"}]},
+ {"role": "tool", "content": "result", "tool_call_id": "1"},
+ ]
+ # Last assistant is at index 1, so result = msgs[:1]
+ result = agent._get_messages_up_to_last_assistant(msgs)
+ assert len(result) == 1
+ assert result[0]["role"] == "user"
+
+
+class TestMaskApiKey:
+ def test_none_returns_none(self, agent):
+ assert agent._mask_api_key_for_logs(None) is None
+
+ def test_short_key_returns_stars(self, agent):
+ assert agent._mask_api_key_for_logs("short") == "***"
+
+ def test_long_key_masked(self, agent):
+ key = "sk-or-v1-abcdefghijklmnop"
+ result = agent._mask_api_key_for_logs(key)
+ assert result.startswith("sk-or-v1")
+ assert result.endswith("mnop")
+ assert "..." in result
+
+
+# ===================================================================
+# Grup 2: State / Structure Methods
+# ===================================================================
+
+
+class TestInit:
+ def test_prompt_caching_claude_openrouter(self):
+ """Claude model via OpenRouter should enable prompt caching."""
+ with (
+ patch("run_agent.get_tool_definitions", return_value=[]),
+ patch("run_agent.check_toolset_requirements", return_value={}),
+ patch("run_agent.OpenAI"),
+ ):
+ a = AIAgent(
+ api_key="test-key-1234567890",
+ model="anthropic/claude-sonnet-4-20250514",
+ quiet_mode=True,
+ skip_context_files=True,
+ skip_memory=True,
+ )
+ assert a._use_prompt_caching is True
+
+ def test_prompt_caching_non_claude(self):
+ """Non-Claude model should disable prompt caching."""
+ with (
+ patch("run_agent.get_tool_definitions", return_value=[]),
+ patch("run_agent.check_toolset_requirements", return_value={}),
+ patch("run_agent.OpenAI"),
+ ):
+ a = AIAgent(
+ api_key="test-key-1234567890",
+ model="openai/gpt-4o",
+ quiet_mode=True,
+ skip_context_files=True,
+ skip_memory=True,
+ )
+ assert a._use_prompt_caching is False
+
+ def test_prompt_caching_non_openrouter(self):
+ """Custom base_url (not OpenRouter) should disable prompt caching."""
+ with (
+ patch("run_agent.get_tool_definitions", return_value=[]),
+ patch("run_agent.check_toolset_requirements", return_value={}),
+ patch("run_agent.OpenAI"),
+ ):
+ a = AIAgent(
+ api_key="test-key-1234567890",
+ model="anthropic/claude-sonnet-4-20250514",
+ base_url="http://localhost:8080/v1",
+ quiet_mode=True,
+ skip_context_files=True,
+ skip_memory=True,
+ )
+ assert a._use_prompt_caching is False
+
+ def test_valid_tool_names_populated(self):
+ """valid_tool_names should contain names from loaded tools."""
+ tools = _make_tool_defs("web_search", "terminal")
+ with (
+ patch("run_agent.get_tool_definitions", return_value=tools),
+ patch("run_agent.check_toolset_requirements", return_value={}),
+ patch("run_agent.OpenAI"),
+ ):
+ a = AIAgent(
+ api_key="test-key-1234567890",
+ quiet_mode=True,
+ skip_context_files=True,
+ skip_memory=True,
+ )
+ assert a.valid_tool_names == {"web_search", "terminal"}
+
+ def test_session_id_auto_generated(self):
+ """Session ID should be auto-generated when not provided."""
+ with (
+ patch("run_agent.get_tool_definitions", return_value=[]),
+ patch("run_agent.check_toolset_requirements", return_value={}),
+ patch("run_agent.OpenAI"),
+ ):
+ a = AIAgent(
+ api_key="test-key-1234567890",
+ quiet_mode=True,
+ skip_context_files=True,
+ skip_memory=True,
+ )
+ assert a.session_id is not None
+ assert len(a.session_id) > 0
+
+
+class TestInterrupt:
+ def test_interrupt_sets_flag(self, agent):
+ with patch("run_agent._set_interrupt"):
+ agent.interrupt()
+ assert agent._interrupt_requested is True
+
+ def test_interrupt_with_message(self, agent):
+ with patch("run_agent._set_interrupt"):
+ agent.interrupt("new question")
+ assert agent._interrupt_message == "new question"
+
+ def test_clear_interrupt(self, agent):
+ with patch("run_agent._set_interrupt"):
+ agent.interrupt("msg")
+ agent.clear_interrupt()
+ assert agent._interrupt_requested is False
+ assert agent._interrupt_message is None
+
+ def test_is_interrupted_property(self, agent):
+ assert agent.is_interrupted is False
+ with patch("run_agent._set_interrupt"):
+ agent.interrupt()
+ assert agent.is_interrupted is True
+
+
+class TestHydrateTodoStore:
+ def test_no_todo_in_history(self, agent):
+ history = [
+ {"role": "user", "content": "hello"},
+ {"role": "assistant", "content": "hi"},
+ ]
+ with patch("run_agent._set_interrupt"):
+ agent._hydrate_todo_store(history)
+ assert not agent._todo_store.has_items()
+
+ def test_recovers_from_history(self, agent):
+ todos = [{"id": "1", "content": "do thing", "status": "pending"}]
+ history = [
+ {"role": "user", "content": "plan"},
+ {"role": "assistant", "content": "ok"},
+ {"role": "tool", "content": json.dumps({"todos": todos}), "tool_call_id": "c1"},
+ ]
+ with patch("run_agent._set_interrupt"):
+ agent._hydrate_todo_store(history)
+ assert agent._todo_store.has_items()
+
+ def test_skips_non_todo_tools(self, agent):
+ history = [
+ {"role": "tool", "content": '{"result": "search done"}', "tool_call_id": "c1"},
+ ]
+ with patch("run_agent._set_interrupt"):
+ agent._hydrate_todo_store(history)
+ assert not agent._todo_store.has_items()
+
+ def test_invalid_json_skipped(self, agent):
+ history = [
+ {"role": "tool", "content": 'not valid json "todos" oops', "tool_call_id": "c1"},
+ ]
+ with patch("run_agent._set_interrupt"):
+ agent._hydrate_todo_store(history)
+ assert not agent._todo_store.has_items()
+
+
+class TestBuildSystemPrompt:
+ def test_always_has_identity(self, agent):
+ prompt = agent._build_system_prompt()
+ assert DEFAULT_AGENT_IDENTITY in prompt
+
+ def test_includes_system_message(self, agent):
+ prompt = agent._build_system_prompt(system_message="Custom instruction")
+ assert "Custom instruction" in prompt
+
+ def test_memory_guidance_when_memory_tool_loaded(self, agent_with_memory_tool):
+ from agent.prompt_builder import MEMORY_GUIDANCE
+ prompt = agent_with_memory_tool._build_system_prompt()
+ assert MEMORY_GUIDANCE in prompt
+
+ def test_no_memory_guidance_without_tool(self, agent):
+ from agent.prompt_builder import MEMORY_GUIDANCE
+ prompt = agent._build_system_prompt()
+ assert MEMORY_GUIDANCE not in prompt
+
+ def test_includes_datetime(self, agent):
+ prompt = agent._build_system_prompt()
+ # Should contain current date info like "Conversation started:"
+ assert "Conversation started:" in prompt
+
+
+class TestInvalidateSystemPrompt:
+ def test_clears_cache(self, agent):
+ agent._cached_system_prompt = "cached value"
+ agent._invalidate_system_prompt()
+ assert agent._cached_system_prompt is None
+
+ def test_reloads_memory_store(self, agent):
+ mock_store = MagicMock()
+ agent._memory_store = mock_store
+ agent._cached_system_prompt = "cached"
+ agent._invalidate_system_prompt()
+ mock_store.load_from_disk.assert_called_once()
+
+
+class TestBuildApiKwargs:
+ def test_basic_kwargs(self, agent):
+ messages = [{"role": "user", "content": "hi"}]
+ kwargs = agent._build_api_kwargs(messages)
+ assert kwargs["model"] == agent.model
+ assert kwargs["messages"] is messages
+ assert kwargs["timeout"] == 600.0
+
+ def test_provider_preferences_injected(self, agent):
+ agent.providers_allowed = ["Anthropic"]
+ messages = [{"role": "user", "content": "hi"}]
+ kwargs = agent._build_api_kwargs(messages)
+ assert kwargs["extra_body"]["provider"]["only"] == ["Anthropic"]
+
+ def test_reasoning_config_default_openrouter(self, agent):
+ """Default reasoning config for OpenRouter should be xhigh."""
+ messages = [{"role": "user", "content": "hi"}]
+ kwargs = agent._build_api_kwargs(messages)
+ reasoning = kwargs["extra_body"]["reasoning"]
+ assert reasoning["enabled"] is True
+ assert reasoning["effort"] == "xhigh"
+
+ def test_reasoning_config_custom(self, agent):
+ agent.reasoning_config = {"enabled": False}
+ messages = [{"role": "user", "content": "hi"}]
+ kwargs = agent._build_api_kwargs(messages)
+ assert kwargs["extra_body"]["reasoning"] == {"enabled": False}
+
+ def test_max_tokens_injected(self, agent):
+ agent.max_tokens = 4096
+ messages = [{"role": "user", "content": "hi"}]
+ kwargs = agent._build_api_kwargs(messages)
+ assert kwargs["max_tokens"] == 4096
+
+
+class TestBuildAssistantMessage:
+ def test_basic_message(self, agent):
+ msg = _mock_assistant_msg(content="Hello!")
+ result = agent._build_assistant_message(msg, "stop")
+ assert result["role"] == "assistant"
+ assert result["content"] == "Hello!"
+ assert result["finish_reason"] == "stop"
+
+ def test_with_reasoning(self, agent):
+ msg = _mock_assistant_msg(content="answer", reasoning="thinking")
+ result = agent._build_assistant_message(msg, "stop")
+ assert result["reasoning"] == "thinking"
+
+ def test_with_tool_calls(self, agent):
+ tc = _mock_tool_call(name="web_search", arguments='{"q":"test"}', call_id="c1")
+ msg = _mock_assistant_msg(content="", tool_calls=[tc])
+ result = agent._build_assistant_message(msg, "tool_calls")
+ assert len(result["tool_calls"]) == 1
+ assert result["tool_calls"][0]["function"]["name"] == "web_search"
+
+ def test_with_reasoning_details(self, agent):
+ details = [{"type": "reasoning.summary", "text": "step1", "signature": "sig1"}]
+ msg = _mock_assistant_msg(content="ans", reasoning_details=details)
+ result = agent._build_assistant_message(msg, "stop")
+ assert "reasoning_details" in result
+ assert result["reasoning_details"][0]["text"] == "step1"
+
+ def test_empty_content(self, agent):
+ msg = _mock_assistant_msg(content=None)
+ result = agent._build_assistant_message(msg, "stop")
+ assert result["content"] == ""
+
+
+class TestFormatToolsForSystemMessage:
+ def test_no_tools_returns_empty_array(self, agent):
+ agent.tools = []
+ assert agent._format_tools_for_system_message() == "[]"
+
+ def test_formats_single_tool(self, agent):
+ agent.tools = _make_tool_defs("web_search")
+ result = agent._format_tools_for_system_message()
+ parsed = json.loads(result)
+ assert len(parsed) == 1
+ assert parsed[0]["name"] == "web_search"
+
+ def test_formats_multiple_tools(self, agent):
+ agent.tools = _make_tool_defs("web_search", "terminal", "read_file")
+ result = agent._format_tools_for_system_message()
+ parsed = json.loads(result)
+ assert len(parsed) == 3
+ names = {t["name"] for t in parsed}
+ assert names == {"web_search", "terminal", "read_file"}
+
+
+# ===================================================================
+# Grup 3: Conversation Loop Pieces (OpenAI mock)
+# ===================================================================
+
+
+class TestExecuteToolCalls:
+ def test_single_tool_executed(self, agent):
+ tc = _mock_tool_call(name="web_search", arguments='{"q":"test"}', call_id="c1")
+ mock_msg = _mock_assistant_msg(content="", tool_calls=[tc])
+ messages = []
+ with patch("run_agent.handle_function_call", return_value="search result") as mock_hfc:
+ agent._execute_tool_calls(mock_msg, messages, "task-1")
+ mock_hfc.assert_called_once_with("web_search", {"q": "test"}, "task-1")
+ assert len(messages) == 1
+ assert messages[0]["role"] == "tool"
+ assert "search result" in messages[0]["content"]
+
+ def test_interrupt_skips_remaining(self, agent):
+ tc1 = _mock_tool_call(name="web_search", arguments='{}', call_id="c1")
+ tc2 = _mock_tool_call(name="web_search", arguments='{}', call_id="c2")
+ mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2])
+ messages = []
+
+ with patch("run_agent._set_interrupt"):
+ agent.interrupt()
+
+ agent._execute_tool_calls(mock_msg, messages, "task-1")
+ # Both calls should be skipped with cancellation messages
+ assert len(messages) == 2
+ assert "cancelled" in messages[0]["content"].lower() or "interrupted" in messages[0]["content"].lower()
+
+ def test_invalid_json_args_defaults_empty(self, agent):
+ tc = _mock_tool_call(name="web_search", arguments="not valid json", call_id="c1")
+ mock_msg = _mock_assistant_msg(content="", tool_calls=[tc])
+ messages = []
+ with patch("run_agent.handle_function_call", return_value="ok"):
+ agent._execute_tool_calls(mock_msg, messages, "task-1")
+ assert len(messages) == 1
+
+ def test_result_truncation_over_100k(self, agent):
+ tc = _mock_tool_call(name="web_search", arguments='{}', call_id="c1")
+ mock_msg = _mock_assistant_msg(content="", tool_calls=[tc])
+ messages = []
+ big_result = "x" * 150_000
+ with patch("run_agent.handle_function_call", return_value=big_result):
+ agent._execute_tool_calls(mock_msg, messages, "task-1")
+ # Content should be truncated
+ assert len(messages[0]["content"]) < 150_000
+ assert "Truncated" in messages[0]["content"]
+
+
+class TestHandleMaxIterations:
+ def test_returns_summary(self, agent):
+ resp = _mock_response(content="Here is a summary of what I did.")
+ agent.client.chat.completions.create.return_value = resp
+ agent._cached_system_prompt = "You are helpful."
+ messages = [{"role": "user", "content": "do stuff"}]
+ result = agent._handle_max_iterations(messages, 60)
+ assert "summary" in result.lower()
+
+ def test_api_failure_returns_error(self, agent):
+ agent.client.chat.completions.create.side_effect = Exception("API down")
+ agent._cached_system_prompt = "You are helpful."
+ messages = [{"role": "user", "content": "do stuff"}]
+ result = agent._handle_max_iterations(messages, 60)
+ assert "Error" in result or "error" in result
+
+
+class TestRunConversation:
+ """Tests for the main run_conversation method.
+
+ Each test mocks client.chat.completions.create to return controlled
+ responses, exercising different code paths without real API calls.
+ """
+
+ def _setup_agent(self, agent):
+ """Common setup for run_conversation tests."""
+ agent._cached_system_prompt = "You are helpful."
+ agent._use_prompt_caching = False
+ agent.tool_delay = 0
+ agent.compression_enabled = False
+ agent.save_trajectories = False
+
+ def test_stop_finish_reason_returns_response(self, agent):
+ self._setup_agent(agent)
+ resp = _mock_response(content="Final answer", finish_reason="stop")
+ agent.client.chat.completions.create.return_value = resp
+ with (
+ patch.object(agent, "_persist_session"),
+ patch.object(agent, "_save_trajectory"),
+ patch.object(agent, "_cleanup_task_resources"),
+ ):
+ result = agent.run_conversation("hello")
+ assert result["final_response"] == "Final answer"
+ assert result["completed"] is True
+
+ def test_tool_calls_then_stop(self, agent):
+ self._setup_agent(agent)
+ tc = _mock_tool_call(name="web_search", arguments='{}', call_id="c1")
+ resp1 = _mock_response(content="", finish_reason="tool_calls", tool_calls=[tc])
+ resp2 = _mock_response(content="Done searching", finish_reason="stop")
+ agent.client.chat.completions.create.side_effect = [resp1, resp2]
+ with (
+ patch("run_agent.handle_function_call", return_value="search result"),
+ patch.object(agent, "_persist_session"),
+ patch.object(agent, "_save_trajectory"),
+ patch.object(agent, "_cleanup_task_resources"),
+ ):
+ result = agent.run_conversation("search something")
+ assert result["final_response"] == "Done searching"
+ assert result["api_calls"] == 2
+
+ def test_interrupt_breaks_loop(self, agent):
+ self._setup_agent(agent)
+
+ def interrupt_side_effect(api_kwargs):
+ agent._interrupt_requested = True
+ raise InterruptedError("Agent interrupted during API call")
+
+ with (
+ patch.object(agent, "_persist_session"),
+ patch.object(agent, "_save_trajectory"),
+ patch.object(agent, "_cleanup_task_resources"),
+ patch("run_agent._set_interrupt"),
+ patch.object(agent, "_interruptible_api_call", side_effect=interrupt_side_effect),
+ ):
+ result = agent.run_conversation("hello")
+ assert result["interrupted"] is True
+
+ def test_invalid_tool_name_retry(self, agent):
+ """Model hallucinates an invalid tool name, agent retries and succeeds."""
+ self._setup_agent(agent)
+ bad_tc = _mock_tool_call(name="nonexistent_tool", arguments='{}', call_id="c1")
+ resp_bad = _mock_response(content="", finish_reason="tool_calls", tool_calls=[bad_tc])
+ resp_good = _mock_response(content="Got it", finish_reason="stop")
+ agent.client.chat.completions.create.side_effect = [resp_bad, resp_good]
+ with (
+ patch.object(agent, "_persist_session"),
+ patch.object(agent, "_save_trajectory"),
+ patch.object(agent, "_cleanup_task_resources"),
+ ):
+ result = agent.run_conversation("do something")
+ assert result["final_response"] == "Got it"
+
+ def test_empty_content_retry_and_fallback(self, agent):
+ """Empty content (only think block) retries, then falls back to partial."""
+ self._setup_agent(agent)
+ empty_resp = _mock_response(
+ content="internal reasoning",
+ finish_reason="stop",
+ )
+ # Return empty 3 times to exhaust retries
+ agent.client.chat.completions.create.side_effect = [
+ empty_resp, empty_resp, empty_resp,
+ ]
+ with (
+ patch.object(agent, "_persist_session"),
+ patch.object(agent, "_save_trajectory"),
+ patch.object(agent, "_cleanup_task_resources"),
+ ):
+ result = agent.run_conversation("answer me")
+ # After 3 retries with no real content, should return partial
+ assert result["completed"] is False
+ assert result.get("partial") is True
+
+ def test_context_compression_triggered(self, agent):
+ """When compressor says should_compress, compression runs."""
+ self._setup_agent(agent)
+ agent.compression_enabled = True
+
+ tc = _mock_tool_call(name="web_search", arguments='{}', call_id="c1")
+ resp1 = _mock_response(content="", finish_reason="tool_calls", tool_calls=[tc])
+ resp2 = _mock_response(content="All done", finish_reason="stop")
+ agent.client.chat.completions.create.side_effect = [resp1, resp2]
+
+ with (
+ patch("run_agent.handle_function_call", return_value="result"),
+ patch.object(agent.context_compressor, "should_compress", return_value=True),
+ patch.object(agent, "_compress_context") as mock_compress,
+ patch.object(agent, "_persist_session"),
+ patch.object(agent, "_save_trajectory"),
+ patch.object(agent, "_cleanup_task_resources"),
+ ):
+ # _compress_context should return (messages, system_prompt)
+ mock_compress.return_value = (
+ [{"role": "user", "content": "search something"}],
+ "compressed system prompt",
+ )
+ result = agent.run_conversation("search something")
+ mock_compress.assert_called_once()