import sys import types from types import SimpleNamespace import pytest sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None)) sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object)) sys.modules.setdefault("fal_client", types.SimpleNamespace()) import run_agent def _patch_agent_bootstrap(monkeypatch): monkeypatch.setattr( run_agent, "get_tool_definitions", lambda **kwargs: [ { "type": "function", "function": { "name": "terminal", "description": "Run shell commands.", "parameters": {"type": "object", "properties": {}}, }, } ], ) monkeypatch.setattr(run_agent, "check_toolset_requirements", lambda: {}) def _build_agent(monkeypatch): _patch_agent_bootstrap(monkeypatch) agent = run_agent.AIAgent( model="gpt-5-codex", base_url="https://chatgpt.com/backend-api/codex", api_key="codex-token", quiet_mode=True, max_iterations=4, skip_context_files=True, skip_memory=True, ) agent._cleanup_task_resources = lambda task_id: None agent._persist_session = lambda messages, history=None: None agent._save_trajectory = lambda messages, user_message, completed: None agent._save_session_log = lambda messages: None return agent def _build_copilot_agent(monkeypatch, *, model="gpt-5.4"): _patch_agent_bootstrap(monkeypatch) agent = run_agent.AIAgent( model=model, provider="copilot", api_mode="codex_responses", base_url="https://api.githubcopilot.com", api_key="gh-token", quiet_mode=True, max_iterations=4, skip_context_files=True, skip_memory=True, ) agent._cleanup_task_resources = lambda task_id: None agent._persist_session = lambda messages, history=None: None agent._save_trajectory = lambda messages, user_message, completed: None agent._save_session_log = lambda messages: None return agent def _codex_message_response(text: str): return SimpleNamespace( output=[ SimpleNamespace( type="message", content=[SimpleNamespace(type="output_text", text=text)], ) ], usage=SimpleNamespace(input_tokens=5, output_tokens=3, total_tokens=8), status="completed", model="gpt-5-codex", ) def _codex_tool_call_response(): return SimpleNamespace( output=[ SimpleNamespace( type="function_call", id="fc_1", call_id="call_1", name="terminal", arguments="{}", ) ], usage=SimpleNamespace(input_tokens=12, output_tokens=4, total_tokens=16), status="completed", model="gpt-5-codex", ) def _codex_incomplete_message_response(text: str): return SimpleNamespace( output=[ SimpleNamespace( type="message", status="in_progress", content=[SimpleNamespace(type="output_text", text=text)], ) ], usage=SimpleNamespace(input_tokens=4, output_tokens=2, total_tokens=6), status="in_progress", model="gpt-5-codex", ) def _codex_commentary_message_response(text: str): return SimpleNamespace( output=[ SimpleNamespace( type="message", phase="commentary", status="completed", content=[SimpleNamespace(type="output_text", text=text)], ) ], usage=SimpleNamespace(input_tokens=4, output_tokens=2, total_tokens=6), status="completed", model="gpt-5-codex", ) def _codex_ack_message_response(text: str): return SimpleNamespace( output=[ SimpleNamespace( type="message", status="completed", content=[SimpleNamespace(type="output_text", text=text)], ) ], usage=SimpleNamespace(input_tokens=4, output_tokens=2, total_tokens=6), status="completed", model="gpt-5-codex", ) class _FakeResponsesStream: def __init__(self, *, final_response=None, final_error=None): self._final_response = final_response self._final_error = final_error def __enter__(self): return self def __exit__(self, exc_type, exc, tb): return False def __iter__(self): return iter(()) def get_final_response(self): if self._final_error is not None: raise self._final_error return self._final_response class _FakeCreateStream: def __init__(self, events): self._events = list(events) self.closed = False def __iter__(self): return iter(self._events) def close(self): self.closed = True def _codex_request_kwargs(): return { "model": "gpt-5-codex", "instructions": "You are Hermes.", "input": [{"role": "user", "content": "Ping"}], "tools": None, "store": False, } def test_api_mode_uses_explicit_provider_when_codex(monkeypatch): _patch_agent_bootstrap(monkeypatch) agent = run_agent.AIAgent( model="gpt-5-codex", base_url="https://openrouter.ai/api/v1", provider="openai-codex", api_key="codex-token", quiet_mode=True, max_iterations=1, skip_context_files=True, skip_memory=True, ) assert agent.api_mode == "codex_responses" assert agent.provider == "openai-codex" def test_api_mode_normalizes_provider_case(monkeypatch): _patch_agent_bootstrap(monkeypatch) agent = run_agent.AIAgent( model="gpt-5-codex", base_url="https://openrouter.ai/api/v1", provider="OpenAI-Codex", api_key="codex-token", quiet_mode=True, max_iterations=1, skip_context_files=True, skip_memory=True, ) assert agent.provider == "openai-codex" assert agent.api_mode == "codex_responses" def test_api_mode_respects_explicit_openrouter_provider_over_codex_url(monkeypatch): _patch_agent_bootstrap(monkeypatch) agent = run_agent.AIAgent( model="gpt-5-codex", base_url="https://chatgpt.com/backend-api/codex", provider="openrouter", api_key="test-token", quiet_mode=True, max_iterations=1, skip_context_files=True, skip_memory=True, ) assert agent.api_mode == "chat_completions" assert agent.provider == "openrouter" def test_build_api_kwargs_codex(monkeypatch): agent = _build_agent(monkeypatch) kwargs = agent._build_api_kwargs( [ {"role": "system", "content": "You are Hermes."}, {"role": "user", "content": "Ping"}, ] ) assert kwargs["model"] == "gpt-5-codex" assert kwargs["instructions"] == "You are Hermes." assert kwargs["store"] is False assert isinstance(kwargs["input"], list) assert kwargs["input"][0]["role"] == "user" assert kwargs["tools"][0]["type"] == "function" assert kwargs["tools"][0]["name"] == "terminal" assert kwargs["tools"][0]["strict"] is False assert "function" not in kwargs["tools"][0] assert kwargs["store"] is False assert kwargs["tool_choice"] == "auto" assert kwargs["parallel_tool_calls"] is True assert isinstance(kwargs["prompt_cache_key"], str) assert len(kwargs["prompt_cache_key"]) > 0 assert "timeout" not in kwargs assert "max_tokens" not in kwargs assert "extra_body" not in kwargs def test_build_api_kwargs_copilot_responses_omits_openai_only_fields(monkeypatch): agent = _build_copilot_agent(monkeypatch) kwargs = agent._build_api_kwargs([{"role": "user", "content": "hi"}]) assert kwargs["model"] == "gpt-5.4" assert kwargs["store"] is False assert kwargs["tool_choice"] == "auto" assert kwargs["parallel_tool_calls"] is True assert kwargs["reasoning"] == {"effort": "medium"} assert "prompt_cache_key" not in kwargs assert "include" not in kwargs def test_build_api_kwargs_copilot_responses_omits_reasoning_for_non_reasoning_model(monkeypatch): agent = _build_copilot_agent(monkeypatch, model="gpt-4.1") kwargs = agent._build_api_kwargs([{"role": "user", "content": "hi"}]) assert "reasoning" not in kwargs assert "include" not in kwargs assert "prompt_cache_key" not in kwargs def test_run_codex_stream_retries_when_completed_event_missing(monkeypatch): agent = _build_agent(monkeypatch) calls = {"stream": 0} def _fake_stream(**kwargs): calls["stream"] += 1 if calls["stream"] == 1: return _FakeResponsesStream( final_error=RuntimeError("Didn't receive a `response.completed` event.") ) return _FakeResponsesStream(final_response=_codex_message_response("stream ok")) agent.client = SimpleNamespace( responses=SimpleNamespace( stream=_fake_stream, create=lambda **kwargs: _codex_message_response("fallback"), ) ) response = agent._run_codex_stream(_codex_request_kwargs()) assert calls["stream"] == 2 assert response.output[0].content[0].text == "stream ok" def test_run_codex_stream_falls_back_to_create_after_stream_completion_error(monkeypatch): agent = _build_agent(monkeypatch) calls = {"stream": 0, "create": 0} def _fake_stream(**kwargs): calls["stream"] += 1 return _FakeResponsesStream( final_error=RuntimeError("Didn't receive a `response.completed` event.") ) def _fake_create(**kwargs): calls["create"] += 1 return _codex_message_response("create fallback ok") agent.client = SimpleNamespace( responses=SimpleNamespace( stream=_fake_stream, create=_fake_create, ) ) response = agent._run_codex_stream(_codex_request_kwargs()) assert calls["stream"] == 2 assert calls["create"] == 1 assert response.output[0].content[0].text == "create fallback ok" def test_run_codex_stream_fallback_parses_create_stream_events(monkeypatch): agent = _build_agent(monkeypatch) calls = {"stream": 0, "create": 0} create_stream = _FakeCreateStream( [ SimpleNamespace(type="response.created"), SimpleNamespace(type="response.in_progress"), SimpleNamespace(type="response.completed", response=_codex_message_response("streamed create ok")), ] ) def _fake_stream(**kwargs): calls["stream"] += 1 return _FakeResponsesStream( final_error=RuntimeError("Didn't receive a `response.completed` event.") ) def _fake_create(**kwargs): calls["create"] += 1 assert kwargs.get("stream") is True return create_stream agent.client = SimpleNamespace( responses=SimpleNamespace( stream=_fake_stream, create=_fake_create, ) ) response = agent._run_codex_stream(_codex_request_kwargs()) assert calls["stream"] == 2 assert calls["create"] == 1 assert create_stream.closed is True assert response.output[0].content[0].text == "streamed create ok" def test_run_conversation_codex_plain_text(monkeypatch): agent = _build_agent(monkeypatch) monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: _codex_message_response("OK")) result = agent.run_conversation("Say OK") assert result["completed"] is True assert result["final_response"] == "OK" assert result["messages"][-1]["role"] == "assistant" assert result["messages"][-1]["content"] == "OK" def test_run_conversation_codex_refreshes_after_401_and_retries(monkeypatch): agent = _build_agent(monkeypatch) calls = {"api": 0, "refresh": 0} class _UnauthorizedError(RuntimeError): def __init__(self): super().__init__("Error code: 401 - unauthorized") self.status_code = 401 def _fake_api_call(api_kwargs): calls["api"] += 1 if calls["api"] == 1: raise _UnauthorizedError() return _codex_message_response("Recovered after refresh") def _fake_refresh(*, force=True): calls["refresh"] += 1 assert force is True return True monkeypatch.setattr(agent, "_interruptible_api_call", _fake_api_call) monkeypatch.setattr(agent, "_try_refresh_codex_client_credentials", _fake_refresh) result = agent.run_conversation("Say OK") assert calls["api"] == 2 assert calls["refresh"] == 1 assert result["completed"] is True assert result["final_response"] == "Recovered after refresh" def test_try_refresh_codex_client_credentials_rebuilds_client(monkeypatch): agent = _build_agent(monkeypatch) closed = {"value": False} rebuilt = {"kwargs": None} class _ExistingClient: def close(self): closed["value"] = True class _RebuiltClient: pass def _fake_openai(**kwargs): rebuilt["kwargs"] = kwargs return _RebuiltClient() monkeypatch.setattr( "hermes_cli.auth.resolve_codex_runtime_credentials", lambda force_refresh=True: { "api_key": "new-codex-token", "base_url": "https://chatgpt.com/backend-api/codex", }, ) monkeypatch.setattr(run_agent, "OpenAI", _fake_openai) agent.client = _ExistingClient() ok = agent._try_refresh_codex_client_credentials(force=True) assert ok is True assert closed["value"] is True assert rebuilt["kwargs"]["api_key"] == "new-codex-token" assert rebuilt["kwargs"]["base_url"] == "https://chatgpt.com/backend-api/codex" assert isinstance(agent.client, _RebuiltClient) def test_run_conversation_codex_tool_round_trip(monkeypatch): agent = _build_agent(monkeypatch) responses = [_codex_tool_call_response(), _codex_message_response("done")] monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0)) def _fake_execute_tool_calls(assistant_message, messages, effective_task_id): for call in assistant_message.tool_calls: messages.append( { "role": "tool", "tool_call_id": call.id, "content": '{"ok":true}', } ) monkeypatch.setattr(agent, "_execute_tool_calls", _fake_execute_tool_calls) result = agent.run_conversation("run a command") assert result["completed"] is True assert result["final_response"] == "done" assert any(msg.get("tool_calls") for msg in result["messages"] if msg.get("role") == "assistant") assert any(msg.get("role") == "tool" and msg.get("tool_call_id") == "call_1" for msg in result["messages"]) def test_chat_messages_to_responses_input_uses_call_id_for_function_call(monkeypatch): agent = _build_agent(monkeypatch) items = agent._chat_messages_to_responses_input( [ {"role": "user", "content": "Run terminal"}, { "role": "assistant", "content": "", "tool_calls": [ { "id": "call_abc123", "type": "function", "function": {"name": "terminal", "arguments": "{}"}, } ], }, {"role": "tool", "tool_call_id": "call_abc123", "content": '{"ok":true}'}, ] ) function_call = next(item for item in items if item.get("type") == "function_call") function_output = next(item for item in items if item.get("type") == "function_call_output") assert function_call["call_id"] == "call_abc123" assert "id" not in function_call assert function_output["call_id"] == "call_abc123" def test_chat_messages_to_responses_input_accepts_call_pipe_fc_ids(monkeypatch): agent = _build_agent(monkeypatch) items = agent._chat_messages_to_responses_input( [ {"role": "user", "content": "Run terminal"}, { "role": "assistant", "content": "", "tool_calls": [ { "id": "call_pair123|fc_pair123", "type": "function", "function": {"name": "terminal", "arguments": "{}"}, } ], }, {"role": "tool", "tool_call_id": "call_pair123|fc_pair123", "content": '{"ok":true}'}, ] ) function_call = next(item for item in items if item.get("type") == "function_call") function_output = next(item for item in items if item.get("type") == "function_call_output") assert function_call["call_id"] == "call_pair123" assert "id" not in function_call assert function_output["call_id"] == "call_pair123" def test_preflight_codex_api_kwargs_strips_optional_function_call_id(monkeypatch): agent = _build_agent(monkeypatch) preflight = agent._preflight_codex_api_kwargs( { "model": "gpt-5-codex", "instructions": "You are Hermes.", "input": [ {"role": "user", "content": "hi"}, { "type": "function_call", "id": "call_bad", "call_id": "call_good", "name": "terminal", "arguments": "{}", }, ], "tools": [], "store": False, } ) fn_call = next(item for item in preflight["input"] if item.get("type") == "function_call") assert fn_call["call_id"] == "call_good" assert "id" not in fn_call def test_preflight_codex_api_kwargs_rejects_function_call_output_without_call_id(monkeypatch): agent = _build_agent(monkeypatch) with pytest.raises(ValueError, match="function_call_output is missing call_id"): agent._preflight_codex_api_kwargs( { "model": "gpt-5-codex", "instructions": "You are Hermes.", "input": [{"type": "function_call_output", "output": "{}"}], "tools": [], "store": False, } ) def test_preflight_codex_api_kwargs_rejects_unsupported_request_fields(monkeypatch): agent = _build_agent(monkeypatch) kwargs = _codex_request_kwargs() kwargs["some_unknown_field"] = "value" with pytest.raises(ValueError, match="unsupported field"): agent._preflight_codex_api_kwargs(kwargs) def test_preflight_codex_api_kwargs_allows_reasoning_and_temperature(monkeypatch): agent = _build_agent(monkeypatch) kwargs = _codex_request_kwargs() kwargs["reasoning"] = {"effort": "high", "summary": "auto"} kwargs["include"] = ["reasoning.encrypted_content"] kwargs["temperature"] = 0.7 kwargs["max_output_tokens"] = 4096 result = agent._preflight_codex_api_kwargs(kwargs) assert result["reasoning"] == {"effort": "high", "summary": "auto"} assert result["include"] == ["reasoning.encrypted_content"] assert result["temperature"] == 0.7 assert result["max_output_tokens"] == 4096 def test_run_conversation_codex_replay_payload_keeps_call_id(monkeypatch): agent = _build_agent(monkeypatch) responses = [_codex_tool_call_response(), _codex_message_response("done")] requests = [] def _fake_api_call(api_kwargs): requests.append(api_kwargs) return responses.pop(0) monkeypatch.setattr(agent, "_interruptible_api_call", _fake_api_call) def _fake_execute_tool_calls(assistant_message, messages, effective_task_id): for call in assistant_message.tool_calls: messages.append( { "role": "tool", "tool_call_id": call.id, "content": '{"ok":true}', } ) monkeypatch.setattr(agent, "_execute_tool_calls", _fake_execute_tool_calls) result = agent.run_conversation("run a command") assert result["completed"] is True assert result["final_response"] == "done" assert len(requests) >= 2 replay_input = requests[1]["input"] function_call = next(item for item in replay_input if item.get("type") == "function_call") function_output = next(item for item in replay_input if item.get("type") == "function_call_output") assert function_call["call_id"] == "call_1" assert "id" not in function_call assert function_output["call_id"] == "call_1" def test_run_conversation_codex_continues_after_incomplete_interim_message(monkeypatch): agent = _build_agent(monkeypatch) responses = [ _codex_incomplete_message_response("I'll inspect the repo structure first."), _codex_tool_call_response(), _codex_message_response("Architecture summary complete."), ] monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0)) def _fake_execute_tool_calls(assistant_message, messages, effective_task_id): for call in assistant_message.tool_calls: messages.append( { "role": "tool", "tool_call_id": call.id, "content": '{"ok":true}', } ) monkeypatch.setattr(agent, "_execute_tool_calls", _fake_execute_tool_calls) result = agent.run_conversation("analyze repo") assert result["completed"] is True assert result["final_response"] == "Architecture summary complete." assert any( msg.get("role") == "assistant" and msg.get("finish_reason") == "incomplete" and "inspect the repo structure" in (msg.get("content") or "") for msg in result["messages"] ) assert any(msg.get("role") == "tool" and msg.get("tool_call_id") == "call_1" for msg in result["messages"]) def test_normalize_codex_response_marks_commentary_only_message_as_incomplete(monkeypatch): agent = _build_agent(monkeypatch) assistant_message, finish_reason = agent._normalize_codex_response( _codex_commentary_message_response("I'll inspect the repository first.") ) assert finish_reason == "incomplete" assert "inspect the repository" in (assistant_message.content or "") def test_run_conversation_codex_continues_after_commentary_phase_message(monkeypatch): agent = _build_agent(monkeypatch) responses = [ _codex_commentary_message_response("I'll inspect the repo structure first."), _codex_tool_call_response(), _codex_message_response("Architecture summary complete."), ] monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0)) def _fake_execute_tool_calls(assistant_message, messages, effective_task_id): for call in assistant_message.tool_calls: messages.append( { "role": "tool", "tool_call_id": call.id, "content": '{"ok":true}', } ) monkeypatch.setattr(agent, "_execute_tool_calls", _fake_execute_tool_calls) result = agent.run_conversation("analyze repo") assert result["completed"] is True assert result["final_response"] == "Architecture summary complete." assert any( msg.get("role") == "assistant" and msg.get("finish_reason") == "incomplete" and "inspect the repo structure" in (msg.get("content") or "") for msg in result["messages"] ) assert any(msg.get("role") == "tool" and msg.get("tool_call_id") == "call_1" for msg in result["messages"]) def test_run_conversation_codex_continues_after_ack_stop_message(monkeypatch): agent = _build_agent(monkeypatch) responses = [ _codex_ack_message_response( "Absolutely — I can do that. I'll inspect ~/openclaw-studio and report back with a walkthrough." ), _codex_tool_call_response(), _codex_message_response("Architecture summary complete."), ] monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0)) def _fake_execute_tool_calls(assistant_message, messages, effective_task_id): for call in assistant_message.tool_calls: messages.append( { "role": "tool", "tool_call_id": call.id, "content": '{"ok":true}', } ) monkeypatch.setattr(agent, "_execute_tool_calls", _fake_execute_tool_calls) result = agent.run_conversation("look into ~/openclaw-studio and tell me how it works") assert result["completed"] is True assert result["final_response"] == "Architecture summary complete." assert any( msg.get("role") == "assistant" and msg.get("finish_reason") == "incomplete" and "inspect ~/openclaw-studio" in (msg.get("content") or "") for msg in result["messages"] ) assert any( msg.get("role") == "user" and "Continue now. Execute the required tool calls" in (msg.get("content") or "") for msg in result["messages"] ) assert any(msg.get("role") == "tool" and msg.get("tool_call_id") == "call_1" for msg in result["messages"]) def test_run_conversation_codex_continues_after_ack_for_directory_listing_prompt(monkeypatch): agent = _build_agent(monkeypatch) responses = [ _codex_ack_message_response( "I'll check what's in the current directory and call out 3 notable items." ), _codex_tool_call_response(), _codex_message_response("Directory summary complete."), ] monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0)) def _fake_execute_tool_calls(assistant_message, messages, effective_task_id): for call in assistant_message.tool_calls: messages.append( { "role": "tool", "tool_call_id": call.id, "content": '{"ok":true}', } ) monkeypatch.setattr(agent, "_execute_tool_calls", _fake_execute_tool_calls) result = agent.run_conversation("look at current directory and list 3 notable things") assert result["completed"] is True assert result["final_response"] == "Directory summary complete." assert any( msg.get("role") == "assistant" and msg.get("finish_reason") == "incomplete" and "current directory" in (msg.get("content") or "") for msg in result["messages"] ) assert any( msg.get("role") == "user" and "Continue now. Execute the required tool calls" in (msg.get("content") or "") for msg in result["messages"] ) assert any(msg.get("role") == "tool" and msg.get("tool_call_id") == "call_1" for msg in result["messages"]) def test_dump_api_request_debug_uses_responses_url(monkeypatch, tmp_path): """Debug dumps should show /responses URL when in codex_responses mode.""" import json agent = _build_agent(monkeypatch) agent.base_url = "http://127.0.0.1:9208/v1" agent.logs_dir = tmp_path dump_file = agent._dump_api_request_debug(_codex_request_kwargs(), reason="preflight") payload = json.loads(dump_file.read_text()) assert payload["request"]["url"] == "http://127.0.0.1:9208/v1/responses" def test_dump_api_request_debug_uses_chat_completions_url(monkeypatch, tmp_path): """Debug dumps should show /chat/completions URL for chat_completions mode.""" import json _patch_agent_bootstrap(monkeypatch) agent = run_agent.AIAgent( model="gpt-4o", base_url="http://127.0.0.1:9208/v1", api_key="test-key", quiet_mode=True, max_iterations=1, skip_context_files=True, skip_memory=True, ) agent.logs_dir = tmp_path dump_file = agent._dump_api_request_debug( {"model": "gpt-4o", "messages": [{"role": "user", "content": "hi"}]}, reason="preflight", ) payload = json.loads(dump_file.read_text()) assert payload["request"]["url"] == "http://127.0.0.1:9208/v1/chat/completions" # --- Reasoning-only response tests (fix for empty content retry loop) --- def _codex_reasoning_only_response(*, encrypted_content="enc_abc123", summary_text="Thinking..."): """Codex response containing only reasoning items — no message text, no tool calls.""" return SimpleNamespace( output=[ SimpleNamespace( type="reasoning", id="rs_001", encrypted_content=encrypted_content, summary=[SimpleNamespace(type="summary_text", text=summary_text)], status="completed", ) ], usage=SimpleNamespace(input_tokens=50, output_tokens=100, total_tokens=150), status="completed", model="gpt-5-codex", ) def test_normalize_codex_response_marks_reasoning_only_as_incomplete(monkeypatch): """A response with only reasoning items and no content should be 'incomplete', not 'stop'. Without this fix, reasoning-only responses get finish_reason='stop' which sends them into the empty-content retry loop (3 retries then failure). """ agent = _build_agent(monkeypatch) assistant_message, finish_reason = agent._normalize_codex_response( _codex_reasoning_only_response() ) assert finish_reason == "incomplete" assert assistant_message.content == "" assert assistant_message.codex_reasoning_items is not None assert len(assistant_message.codex_reasoning_items) == 1 assert assistant_message.codex_reasoning_items[0]["encrypted_content"] == "enc_abc123" def test_normalize_codex_response_reasoning_with_content_is_stop(monkeypatch): """If a response has both reasoning and message content, it should still be 'stop'.""" agent = _build_agent(monkeypatch) response = SimpleNamespace( output=[ SimpleNamespace( type="reasoning", id="rs_001", encrypted_content="enc_xyz", summary=[SimpleNamespace(type="summary_text", text="Thinking...")], status="completed", ), SimpleNamespace( type="message", content=[SimpleNamespace(type="output_text", text="Here is the answer.")], status="completed", ), ], usage=SimpleNamespace(input_tokens=50, output_tokens=100, total_tokens=150), status="completed", model="gpt-5-codex", ) assistant_message, finish_reason = agent._normalize_codex_response(response) assert finish_reason == "stop" assert "Here is the answer" in assistant_message.content def test_run_conversation_codex_continues_after_reasoning_only_response(monkeypatch): """End-to-end: reasoning-only → final message should succeed, not hit retry loop.""" agent = _build_agent(monkeypatch) responses = [ _codex_reasoning_only_response(), _codex_message_response("The final answer is 42."), ] monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0)) result = agent.run_conversation("what is the answer?") assert result["completed"] is True assert result["final_response"] == "The final answer is 42." # The reasoning-only turn should be in messages as an incomplete interim assert any( msg.get("role") == "assistant" and msg.get("finish_reason") == "incomplete" and msg.get("codex_reasoning_items") is not None for msg in result["messages"] ) def test_run_conversation_codex_preserves_encrypted_reasoning_in_interim(monkeypatch): """Encrypted codex_reasoning_items must be preserved in interim messages even when there is no visible reasoning text or content.""" agent = _build_agent(monkeypatch) # Response with encrypted reasoning but no human-readable summary reasoning_response = SimpleNamespace( output=[ SimpleNamespace( type="reasoning", id="rs_002", encrypted_content="enc_opaque_blob", summary=[], status="completed", ) ], usage=SimpleNamespace(input_tokens=50, output_tokens=100, total_tokens=150), status="completed", model="gpt-5-codex", ) responses = [ reasoning_response, _codex_message_response("Done thinking."), ] monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0)) result = agent.run_conversation("think hard") assert result["completed"] is True assert result["final_response"] == "Done thinking." # The interim message must have codex_reasoning_items preserved interim_msgs = [ msg for msg in result["messages"] if msg.get("role") == "assistant" and msg.get("finish_reason") == "incomplete" ] assert len(interim_msgs) >= 1 assert interim_msgs[0].get("codex_reasoning_items") is not None assert interim_msgs[0]["codex_reasoning_items"][0]["encrypted_content"] == "enc_opaque_blob" def test_chat_messages_to_responses_input_reasoning_only_has_following_item(monkeypatch): """When converting a reasoning-only interim message to Responses API input, the reasoning items must be followed by an assistant message (even if empty) to satisfy the API's 'required following item' constraint.""" agent = _build_agent(monkeypatch) messages = [ {"role": "user", "content": "think hard"}, { "role": "assistant", "content": "", "reasoning": None, "finish_reason": "incomplete", "codex_reasoning_items": [ {"type": "reasoning", "id": "rs_001", "encrypted_content": "enc_abc", "summary": []}, ], }, ] items = agent._chat_messages_to_responses_input(messages) # Find the reasoning item reasoning_indices = [i for i, it in enumerate(items) if it.get("type") == "reasoning"] assert len(reasoning_indices) == 1 ri_idx = reasoning_indices[0] # There must be a following item after the reasoning assert ri_idx < len(items) - 1, "Reasoning item must not be the last item (missing_following_item)" following = items[ri_idx + 1] assert following.get("role") == "assistant" def test_duplicate_detection_distinguishes_different_codex_reasoning(monkeypatch): """Two consecutive reasoning-only responses with different encrypted content must NOT be treated as duplicates.""" agent = _build_agent(monkeypatch) responses = [ # First reasoning-only response SimpleNamespace( output=[ SimpleNamespace( type="reasoning", id="rs_001", encrypted_content="enc_first", summary=[], status="completed", ) ], usage=SimpleNamespace(input_tokens=50, output_tokens=100, total_tokens=150), status="completed", model="gpt-5-codex", ), # Second reasoning-only response (different encrypted content) SimpleNamespace( output=[ SimpleNamespace( type="reasoning", id="rs_002", encrypted_content="enc_second", summary=[], status="completed", ) ], usage=SimpleNamespace(input_tokens=50, output_tokens=100, total_tokens=150), status="completed", model="gpt-5-codex", ), _codex_message_response("Final answer after thinking."), ] monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0)) result = agent.run_conversation("think very hard") assert result["completed"] is True assert result["final_response"] == "Final answer after thinking." # Both reasoning-only interim messages should be in history (not collapsed) interim_msgs = [ msg for msg in result["messages"] if msg.get("role") == "assistant" and msg.get("finish_reason") == "incomplete" ] assert len(interim_msgs) == 2 encrypted_contents = [ msg["codex_reasoning_items"][0]["encrypted_content"] for msg in interim_msgs ] assert "enc_first" in encrypted_contents assert "enc_second" in encrypted_contents