diff --git a/tests/test_run_agent.py b/tests/test_run_agent.py new file mode 100644 index 000000000..a07c52f84 --- /dev/null +++ b/tests/test_run_agent.py @@ -0,0 +1,743 @@ +"""Unit tests for run_agent.py (AIAgent). + +Tests cover pure functions, state/structure methods, and conversation loop +pieces. The OpenAI client and tool loading are mocked so no network calls +are made. +""" + +import json +import re +import uuid +from types import SimpleNamespace +from unittest.mock import MagicMock, patch, PropertyMock + +import pytest + +from run_agent import AIAgent +from agent.prompt_builder import DEFAULT_AGENT_IDENTITY, PLATFORM_HINTS + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + +def _make_tool_defs(*names: str) -> list: + """Build minimal tool definition list accepted by AIAgent.__init__.""" + return [ + { + "type": "function", + "function": { + "name": n, + "description": f"{n} tool", + "parameters": {"type": "object", "properties": {}}, + }, + } + for n in names + ] + + +@pytest.fixture() +def agent(): + """Minimal AIAgent with mocked OpenAI client and tool loading.""" + with ( + patch("run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search")), + patch("run_agent.check_toolset_requirements", return_value={}), + patch("run_agent.OpenAI"), + ): + a = AIAgent( + api_key="test-key-1234567890", + quiet_mode=True, + skip_context_files=True, + skip_memory=True, + ) + a.client = MagicMock() + return a + + +@pytest.fixture() +def agent_with_memory_tool(): + """Agent whose valid_tool_names includes 'memory'.""" + with ( + patch("run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search", "memory")), + patch("run_agent.check_toolset_requirements", return_value={}), + patch("run_agent.OpenAI"), + ): + a = AIAgent( + api_key="test-key-1234567890", + quiet_mode=True, + skip_context_files=True, + skip_memory=True, + ) + a.client = MagicMock() + return a + + +# --------------------------------------------------------------------------- +# Helper to build mock assistant messages (API response objects) +# --------------------------------------------------------------------------- + +def _mock_assistant_msg( + content="Hello", + tool_calls=None, + reasoning=None, + reasoning_content=None, + reasoning_details=None, +): + """Return a SimpleNamespace mimicking an OpenAI ChatCompletionMessage.""" + msg = SimpleNamespace(content=content, tool_calls=tool_calls) + if reasoning is not None: + msg.reasoning = reasoning + if reasoning_content is not None: + msg.reasoning_content = reasoning_content + if reasoning_details is not None: + msg.reasoning_details = reasoning_details + return msg + + +def _mock_tool_call(name="web_search", arguments='{}', call_id=None): + """Return a SimpleNamespace mimicking a tool call object.""" + return SimpleNamespace( + id=call_id or f"call_{uuid.uuid4().hex[:8]}", + type="function", + function=SimpleNamespace(name=name, arguments=arguments), + ) + + +def _mock_response(content="Hello", finish_reason="stop", tool_calls=None, + reasoning=None, usage=None): + """Return a SimpleNamespace mimicking an OpenAI ChatCompletion response.""" + msg = _mock_assistant_msg( + content=content, + tool_calls=tool_calls, + reasoning=reasoning, + ) + choice = SimpleNamespace(message=msg, finish_reason=finish_reason) + resp = SimpleNamespace(choices=[choice], model="test/model") + if usage: + resp.usage = SimpleNamespace(**usage) + else: + resp.usage = None + return resp + + +# =================================================================== +# Grup 1: Pure Functions +# =================================================================== + + +class TestHasContentAfterThinkBlock: + def test_none_returns_false(self, agent): + assert agent._has_content_after_think_block(None) is False + + def test_empty_returns_false(self, agent): + assert agent._has_content_after_think_block("") is False + + def test_only_think_block_returns_false(self, agent): + assert agent._has_content_after_think_block("reasoning") is False + + def test_content_after_think_returns_true(self, agent): + assert agent._has_content_after_think_block("r actual answer") is True + + def test_no_think_block_returns_true(self, agent): + assert agent._has_content_after_think_block("just normal content") is True + + +class TestStripThinkBlocks: + def test_none_returns_empty(self, agent): + assert agent._strip_think_blocks(None) == "" + + def test_no_blocks_unchanged(self, agent): + assert agent._strip_think_blocks("hello world") == "hello world" + + def test_single_block_removed(self, agent): + result = agent._strip_think_blocks("reasoning answer") + assert "reasoning" not in result + assert "answer" in result + + def test_multiline_block_removed(self, agent): + text = "\nline1\nline2\n\nvisible" + result = agent._strip_think_blocks(text) + assert "line1" not in result + assert "visible" in result + + +class TestExtractReasoning: + def test_reasoning_field(self, agent): + msg = _mock_assistant_msg(reasoning="thinking hard") + assert agent._extract_reasoning(msg) == "thinking hard" + + def test_reasoning_content_field(self, agent): + msg = _mock_assistant_msg(reasoning_content="deep thought") + assert agent._extract_reasoning(msg) == "deep thought" + + def test_reasoning_details_array(self, agent): + msg = _mock_assistant_msg( + reasoning_details=[{"summary": "step-by-step analysis"}], + ) + assert "step-by-step analysis" in agent._extract_reasoning(msg) + + def test_no_reasoning_returns_none(self, agent): + msg = _mock_assistant_msg() + assert agent._extract_reasoning(msg) is None + + def test_combined_reasoning(self, agent): + msg = _mock_assistant_msg( + reasoning="part1", + reasoning_content="part2", + ) + result = agent._extract_reasoning(msg) + assert "part1" in result + assert "part2" in result + + def test_deduplication(self, agent): + msg = _mock_assistant_msg( + reasoning="same text", + reasoning_content="same text", + ) + result = agent._extract_reasoning(msg) + assert result == "same text" + + +class TestCleanSessionContent: + def test_none_passthrough(self): + assert AIAgent._clean_session_content(None) is None + + def test_scratchpad_converted(self): + text = "think answer" + result = AIAgent._clean_session_content(text) + assert "" not in result + assert "" in result + + def test_extra_newlines_cleaned(self): + text = "\n\n\nx\n\n\nafter" + result = AIAgent._clean_session_content(text) + # Should not have excessive newlines around think block + assert "\n\n\n" not in result + + +class TestGetMessagesUpToLastAssistant: + def test_empty_list(self, agent): + assert agent._get_messages_up_to_last_assistant([]) == [] + + def test_no_assistant_returns_copy(self, agent): + msgs = [{"role": "user", "content": "hi"}] + result = agent._get_messages_up_to_last_assistant(msgs) + assert result == msgs + assert result is not msgs # should be a copy + + def test_single_assistant(self, agent): + msgs = [ + {"role": "user", "content": "hi"}, + {"role": "assistant", "content": "hello"}, + ] + result = agent._get_messages_up_to_last_assistant(msgs) + assert len(result) == 1 + assert result[0]["role"] == "user" + + def test_multiple_assistants_returns_up_to_last(self, agent): + msgs = [ + {"role": "user", "content": "q1"}, + {"role": "assistant", "content": "a1"}, + {"role": "user", "content": "q2"}, + {"role": "assistant", "content": "a2"}, + ] + result = agent._get_messages_up_to_last_assistant(msgs) + assert len(result) == 3 + assert result[-1]["content"] == "q2" + + def test_assistant_then_tool_messages(self, agent): + msgs = [ + {"role": "user", "content": "do something"}, + {"role": "assistant", "content": "ok", "tool_calls": [{"id": "1"}]}, + {"role": "tool", "content": "result", "tool_call_id": "1"}, + ] + # Last assistant is at index 1, so result = msgs[:1] + result = agent._get_messages_up_to_last_assistant(msgs) + assert len(result) == 1 + assert result[0]["role"] == "user" + + +class TestMaskApiKey: + def test_none_returns_none(self, agent): + assert agent._mask_api_key_for_logs(None) is None + + def test_short_key_returns_stars(self, agent): + assert agent._mask_api_key_for_logs("short") == "***" + + def test_long_key_masked(self, agent): + key = "sk-or-v1-abcdefghijklmnop" + result = agent._mask_api_key_for_logs(key) + assert result.startswith("sk-or-v1") + assert result.endswith("mnop") + assert "..." in result + + +# =================================================================== +# Grup 2: State / Structure Methods +# =================================================================== + + +class TestInit: + def test_prompt_caching_claude_openrouter(self): + """Claude model via OpenRouter should enable prompt caching.""" + with ( + patch("run_agent.get_tool_definitions", return_value=[]), + patch("run_agent.check_toolset_requirements", return_value={}), + patch("run_agent.OpenAI"), + ): + a = AIAgent( + api_key="test-key-1234567890", + model="anthropic/claude-sonnet-4-20250514", + quiet_mode=True, + skip_context_files=True, + skip_memory=True, + ) + assert a._use_prompt_caching is True + + def test_prompt_caching_non_claude(self): + """Non-Claude model should disable prompt caching.""" + with ( + patch("run_agent.get_tool_definitions", return_value=[]), + patch("run_agent.check_toolset_requirements", return_value={}), + patch("run_agent.OpenAI"), + ): + a = AIAgent( + api_key="test-key-1234567890", + model="openai/gpt-4o", + quiet_mode=True, + skip_context_files=True, + skip_memory=True, + ) + assert a._use_prompt_caching is False + + def test_prompt_caching_non_openrouter(self): + """Custom base_url (not OpenRouter) should disable prompt caching.""" + with ( + patch("run_agent.get_tool_definitions", return_value=[]), + patch("run_agent.check_toolset_requirements", return_value={}), + patch("run_agent.OpenAI"), + ): + a = AIAgent( + api_key="test-key-1234567890", + model="anthropic/claude-sonnet-4-20250514", + base_url="http://localhost:8080/v1", + quiet_mode=True, + skip_context_files=True, + skip_memory=True, + ) + assert a._use_prompt_caching is False + + def test_valid_tool_names_populated(self): + """valid_tool_names should contain names from loaded tools.""" + tools = _make_tool_defs("web_search", "terminal") + with ( + patch("run_agent.get_tool_definitions", return_value=tools), + patch("run_agent.check_toolset_requirements", return_value={}), + patch("run_agent.OpenAI"), + ): + a = AIAgent( + api_key="test-key-1234567890", + quiet_mode=True, + skip_context_files=True, + skip_memory=True, + ) + assert a.valid_tool_names == {"web_search", "terminal"} + + def test_session_id_auto_generated(self): + """Session ID should be auto-generated when not provided.""" + with ( + patch("run_agent.get_tool_definitions", return_value=[]), + patch("run_agent.check_toolset_requirements", return_value={}), + patch("run_agent.OpenAI"), + ): + a = AIAgent( + api_key="test-key-1234567890", + quiet_mode=True, + skip_context_files=True, + skip_memory=True, + ) + assert a.session_id is not None + assert len(a.session_id) > 0 + + +class TestInterrupt: + def test_interrupt_sets_flag(self, agent): + with patch("run_agent._set_interrupt"): + agent.interrupt() + assert agent._interrupt_requested is True + + def test_interrupt_with_message(self, agent): + with patch("run_agent._set_interrupt"): + agent.interrupt("new question") + assert agent._interrupt_message == "new question" + + def test_clear_interrupt(self, agent): + with patch("run_agent._set_interrupt"): + agent.interrupt("msg") + agent.clear_interrupt() + assert agent._interrupt_requested is False + assert agent._interrupt_message is None + + def test_is_interrupted_property(self, agent): + assert agent.is_interrupted is False + with patch("run_agent._set_interrupt"): + agent.interrupt() + assert agent.is_interrupted is True + + +class TestHydrateTodoStore: + def test_no_todo_in_history(self, agent): + history = [ + {"role": "user", "content": "hello"}, + {"role": "assistant", "content": "hi"}, + ] + with patch("run_agent._set_interrupt"): + agent._hydrate_todo_store(history) + assert not agent._todo_store.has_items() + + def test_recovers_from_history(self, agent): + todos = [{"id": "1", "content": "do thing", "status": "pending"}] + history = [ + {"role": "user", "content": "plan"}, + {"role": "assistant", "content": "ok"}, + {"role": "tool", "content": json.dumps({"todos": todos}), "tool_call_id": "c1"}, + ] + with patch("run_agent._set_interrupt"): + agent._hydrate_todo_store(history) + assert agent._todo_store.has_items() + + def test_skips_non_todo_tools(self, agent): + history = [ + {"role": "tool", "content": '{"result": "search done"}', "tool_call_id": "c1"}, + ] + with patch("run_agent._set_interrupt"): + agent._hydrate_todo_store(history) + assert not agent._todo_store.has_items() + + def test_invalid_json_skipped(self, agent): + history = [ + {"role": "tool", "content": 'not valid json "todos" oops', "tool_call_id": "c1"}, + ] + with patch("run_agent._set_interrupt"): + agent._hydrate_todo_store(history) + assert not agent._todo_store.has_items() + + +class TestBuildSystemPrompt: + def test_always_has_identity(self, agent): + prompt = agent._build_system_prompt() + assert DEFAULT_AGENT_IDENTITY in prompt + + def test_includes_system_message(self, agent): + prompt = agent._build_system_prompt(system_message="Custom instruction") + assert "Custom instruction" in prompt + + def test_memory_guidance_when_memory_tool_loaded(self, agent_with_memory_tool): + from agent.prompt_builder import MEMORY_GUIDANCE + prompt = agent_with_memory_tool._build_system_prompt() + assert MEMORY_GUIDANCE in prompt + + def test_no_memory_guidance_without_tool(self, agent): + from agent.prompt_builder import MEMORY_GUIDANCE + prompt = agent._build_system_prompt() + assert MEMORY_GUIDANCE not in prompt + + def test_includes_datetime(self, agent): + prompt = agent._build_system_prompt() + # Should contain current date info like "Conversation started:" + assert "Conversation started:" in prompt + + +class TestInvalidateSystemPrompt: + def test_clears_cache(self, agent): + agent._cached_system_prompt = "cached value" + agent._invalidate_system_prompt() + assert agent._cached_system_prompt is None + + def test_reloads_memory_store(self, agent): + mock_store = MagicMock() + agent._memory_store = mock_store + agent._cached_system_prompt = "cached" + agent._invalidate_system_prompt() + mock_store.load_from_disk.assert_called_once() + + +class TestBuildApiKwargs: + def test_basic_kwargs(self, agent): + messages = [{"role": "user", "content": "hi"}] + kwargs = agent._build_api_kwargs(messages) + assert kwargs["model"] == agent.model + assert kwargs["messages"] is messages + assert kwargs["timeout"] == 600.0 + + def test_provider_preferences_injected(self, agent): + agent.providers_allowed = ["Anthropic"] + messages = [{"role": "user", "content": "hi"}] + kwargs = agent._build_api_kwargs(messages) + assert kwargs["extra_body"]["provider"]["only"] == ["Anthropic"] + + def test_reasoning_config_default_openrouter(self, agent): + """Default reasoning config for OpenRouter should be xhigh.""" + messages = [{"role": "user", "content": "hi"}] + kwargs = agent._build_api_kwargs(messages) + reasoning = kwargs["extra_body"]["reasoning"] + assert reasoning["enabled"] is True + assert reasoning["effort"] == "xhigh" + + def test_reasoning_config_custom(self, agent): + agent.reasoning_config = {"enabled": False} + messages = [{"role": "user", "content": "hi"}] + kwargs = agent._build_api_kwargs(messages) + assert kwargs["extra_body"]["reasoning"] == {"enabled": False} + + def test_max_tokens_injected(self, agent): + agent.max_tokens = 4096 + messages = [{"role": "user", "content": "hi"}] + kwargs = agent._build_api_kwargs(messages) + assert kwargs["max_tokens"] == 4096 + + +class TestBuildAssistantMessage: + def test_basic_message(self, agent): + msg = _mock_assistant_msg(content="Hello!") + result = agent._build_assistant_message(msg, "stop") + assert result["role"] == "assistant" + assert result["content"] == "Hello!" + assert result["finish_reason"] == "stop" + + def test_with_reasoning(self, agent): + msg = _mock_assistant_msg(content="answer", reasoning="thinking") + result = agent._build_assistant_message(msg, "stop") + assert result["reasoning"] == "thinking" + + def test_with_tool_calls(self, agent): + tc = _mock_tool_call(name="web_search", arguments='{"q":"test"}', call_id="c1") + msg = _mock_assistant_msg(content="", tool_calls=[tc]) + result = agent._build_assistant_message(msg, "tool_calls") + assert len(result["tool_calls"]) == 1 + assert result["tool_calls"][0]["function"]["name"] == "web_search" + + def test_with_reasoning_details(self, agent): + details = [{"type": "reasoning.summary", "text": "step1", "signature": "sig1"}] + msg = _mock_assistant_msg(content="ans", reasoning_details=details) + result = agent._build_assistant_message(msg, "stop") + assert "reasoning_details" in result + assert result["reasoning_details"][0]["text"] == "step1" + + def test_empty_content(self, agent): + msg = _mock_assistant_msg(content=None) + result = agent._build_assistant_message(msg, "stop") + assert result["content"] == "" + + +class TestFormatToolsForSystemMessage: + def test_no_tools_returns_empty_array(self, agent): + agent.tools = [] + assert agent._format_tools_for_system_message() == "[]" + + def test_formats_single_tool(self, agent): + agent.tools = _make_tool_defs("web_search") + result = agent._format_tools_for_system_message() + parsed = json.loads(result) + assert len(parsed) == 1 + assert parsed[0]["name"] == "web_search" + + def test_formats_multiple_tools(self, agent): + agent.tools = _make_tool_defs("web_search", "terminal", "read_file") + result = agent._format_tools_for_system_message() + parsed = json.loads(result) + assert len(parsed) == 3 + names = {t["name"] for t in parsed} + assert names == {"web_search", "terminal", "read_file"} + + +# =================================================================== +# Grup 3: Conversation Loop Pieces (OpenAI mock) +# =================================================================== + + +class TestExecuteToolCalls: + def test_single_tool_executed(self, agent): + tc = _mock_tool_call(name="web_search", arguments='{"q":"test"}', call_id="c1") + mock_msg = _mock_assistant_msg(content="", tool_calls=[tc]) + messages = [] + with patch("run_agent.handle_function_call", return_value="search result") as mock_hfc: + agent._execute_tool_calls(mock_msg, messages, "task-1") + mock_hfc.assert_called_once_with("web_search", {"q": "test"}, "task-1") + assert len(messages) == 1 + assert messages[0]["role"] == "tool" + assert "search result" in messages[0]["content"] + + def test_interrupt_skips_remaining(self, agent): + tc1 = _mock_tool_call(name="web_search", arguments='{}', call_id="c1") + tc2 = _mock_tool_call(name="web_search", arguments='{}', call_id="c2") + mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2]) + messages = [] + + with patch("run_agent._set_interrupt"): + agent.interrupt() + + agent._execute_tool_calls(mock_msg, messages, "task-1") + # Both calls should be skipped with cancellation messages + assert len(messages) == 2 + assert "cancelled" in messages[0]["content"].lower() or "interrupted" in messages[0]["content"].lower() + + def test_invalid_json_args_defaults_empty(self, agent): + tc = _mock_tool_call(name="web_search", arguments="not valid json", call_id="c1") + mock_msg = _mock_assistant_msg(content="", tool_calls=[tc]) + messages = [] + with patch("run_agent.handle_function_call", return_value="ok"): + agent._execute_tool_calls(mock_msg, messages, "task-1") + assert len(messages) == 1 + + def test_result_truncation_over_100k(self, agent): + tc = _mock_tool_call(name="web_search", arguments='{}', call_id="c1") + mock_msg = _mock_assistant_msg(content="", tool_calls=[tc]) + messages = [] + big_result = "x" * 150_000 + with patch("run_agent.handle_function_call", return_value=big_result): + agent._execute_tool_calls(mock_msg, messages, "task-1") + # Content should be truncated + assert len(messages[0]["content"]) < 150_000 + assert "Truncated" in messages[0]["content"] + + +class TestHandleMaxIterations: + def test_returns_summary(self, agent): + resp = _mock_response(content="Here is a summary of what I did.") + agent.client.chat.completions.create.return_value = resp + agent._cached_system_prompt = "You are helpful." + messages = [{"role": "user", "content": "do stuff"}] + result = agent._handle_max_iterations(messages, 60) + assert "summary" in result.lower() + + def test_api_failure_returns_error(self, agent): + agent.client.chat.completions.create.side_effect = Exception("API down") + agent._cached_system_prompt = "You are helpful." + messages = [{"role": "user", "content": "do stuff"}] + result = agent._handle_max_iterations(messages, 60) + assert "Error" in result or "error" in result + + +class TestRunConversation: + """Tests for the main run_conversation method. + + Each test mocks client.chat.completions.create to return controlled + responses, exercising different code paths without real API calls. + """ + + def _setup_agent(self, agent): + """Common setup for run_conversation tests.""" + agent._cached_system_prompt = "You are helpful." + agent._use_prompt_caching = False + agent.tool_delay = 0 + agent.compression_enabled = False + agent.save_trajectories = False + + def test_stop_finish_reason_returns_response(self, agent): + self._setup_agent(agent) + resp = _mock_response(content="Final answer", finish_reason="stop") + agent.client.chat.completions.create.return_value = resp + with ( + patch.object(agent, "_persist_session"), + patch.object(agent, "_save_trajectory"), + patch.object(agent, "_cleanup_task_resources"), + ): + result = agent.run_conversation("hello") + assert result["final_response"] == "Final answer" + assert result["completed"] is True + + def test_tool_calls_then_stop(self, agent): + self._setup_agent(agent) + tc = _mock_tool_call(name="web_search", arguments='{}', call_id="c1") + resp1 = _mock_response(content="", finish_reason="tool_calls", tool_calls=[tc]) + resp2 = _mock_response(content="Done searching", finish_reason="stop") + agent.client.chat.completions.create.side_effect = [resp1, resp2] + with ( + patch("run_agent.handle_function_call", return_value="search result"), + patch.object(agent, "_persist_session"), + patch.object(agent, "_save_trajectory"), + patch.object(agent, "_cleanup_task_resources"), + ): + result = agent.run_conversation("search something") + assert result["final_response"] == "Done searching" + assert result["api_calls"] == 2 + + def test_interrupt_breaks_loop(self, agent): + self._setup_agent(agent) + + def interrupt_side_effect(api_kwargs): + agent._interrupt_requested = True + raise InterruptedError("Agent interrupted during API call") + + with ( + patch.object(agent, "_persist_session"), + patch.object(agent, "_save_trajectory"), + patch.object(agent, "_cleanup_task_resources"), + patch("run_agent._set_interrupt"), + patch.object(agent, "_interruptible_api_call", side_effect=interrupt_side_effect), + ): + result = agent.run_conversation("hello") + assert result["interrupted"] is True + + def test_invalid_tool_name_retry(self, agent): + """Model hallucinates an invalid tool name, agent retries and succeeds.""" + self._setup_agent(agent) + bad_tc = _mock_tool_call(name="nonexistent_tool", arguments='{}', call_id="c1") + resp_bad = _mock_response(content="", finish_reason="tool_calls", tool_calls=[bad_tc]) + resp_good = _mock_response(content="Got it", finish_reason="stop") + agent.client.chat.completions.create.side_effect = [resp_bad, resp_good] + with ( + patch.object(agent, "_persist_session"), + patch.object(agent, "_save_trajectory"), + patch.object(agent, "_cleanup_task_resources"), + ): + result = agent.run_conversation("do something") + assert result["final_response"] == "Got it" + + def test_empty_content_retry_and_fallback(self, agent): + """Empty content (only think block) retries, then falls back to partial.""" + self._setup_agent(agent) + empty_resp = _mock_response( + content="internal reasoning", + finish_reason="stop", + ) + # Return empty 3 times to exhaust retries + agent.client.chat.completions.create.side_effect = [ + empty_resp, empty_resp, empty_resp, + ] + with ( + patch.object(agent, "_persist_session"), + patch.object(agent, "_save_trajectory"), + patch.object(agent, "_cleanup_task_resources"), + ): + result = agent.run_conversation("answer me") + # After 3 retries with no real content, should return partial + assert result["completed"] is False + assert result.get("partial") is True + + def test_context_compression_triggered(self, agent): + """When compressor says should_compress, compression runs.""" + self._setup_agent(agent) + agent.compression_enabled = True + + tc = _mock_tool_call(name="web_search", arguments='{}', call_id="c1") + resp1 = _mock_response(content="", finish_reason="tool_calls", tool_calls=[tc]) + resp2 = _mock_response(content="All done", finish_reason="stop") + agent.client.chat.completions.create.side_effect = [resp1, resp2] + + with ( + patch("run_agent.handle_function_call", return_value="result"), + patch.object(agent.context_compressor, "should_compress", return_value=True), + patch.object(agent, "_compress_context") as mock_compress, + patch.object(agent, "_persist_session"), + patch.object(agent, "_save_trajectory"), + patch.object(agent, "_cleanup_task_resources"), + ): + # _compress_context should return (messages, system_prompt) + mock_compress.return_value = ( + [{"role": "user", "content": "search something"}], + "compressed system prompt", + ) + result = agent.run_conversation("search something") + mock_compress.assert_called_once()