2026-02-25 18:20:38 -08:00
|
|
|
import sys
|
|
|
|
|
import types
|
|
|
|
|
from types import SimpleNamespace
|
|
|
|
|
|
2026-02-25 19:27:54 -08:00
|
|
|
import pytest
|
|
|
|
|
|
2026-02-25 18:20:38 -08:00
|
|
|
|
|
|
|
|
sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None))
|
|
|
|
|
sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object))
|
|
|
|
|
sys.modules.setdefault("fal_client", types.SimpleNamespace())
|
|
|
|
|
|
|
|
|
|
import run_agent
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _patch_agent_bootstrap(monkeypatch):
|
|
|
|
|
monkeypatch.setattr(
|
|
|
|
|
run_agent,
|
|
|
|
|
"get_tool_definitions",
|
|
|
|
|
lambda **kwargs: [
|
|
|
|
|
{
|
|
|
|
|
"type": "function",
|
|
|
|
|
"function": {
|
|
|
|
|
"name": "terminal",
|
|
|
|
|
"description": "Run shell commands.",
|
|
|
|
|
"parameters": {"type": "object", "properties": {}},
|
|
|
|
|
},
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
)
|
|
|
|
|
monkeypatch.setattr(run_agent, "check_toolset_requirements", lambda: {})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _build_agent(monkeypatch):
|
|
|
|
|
_patch_agent_bootstrap(monkeypatch)
|
|
|
|
|
|
|
|
|
|
agent = run_agent.AIAgent(
|
|
|
|
|
model="gpt-5-codex",
|
|
|
|
|
base_url="https://chatgpt.com/backend-api/codex",
|
|
|
|
|
api_key="codex-token",
|
|
|
|
|
quiet_mode=True,
|
|
|
|
|
max_iterations=4,
|
|
|
|
|
skip_context_files=True,
|
|
|
|
|
skip_memory=True,
|
|
|
|
|
)
|
|
|
|
|
agent._cleanup_task_resources = lambda task_id: None
|
|
|
|
|
agent._persist_session = lambda messages, history=None: None
|
|
|
|
|
agent._save_trajectory = lambda messages, user_message, completed: None
|
|
|
|
|
agent._save_session_log = lambda messages: None
|
|
|
|
|
return agent
|
|
|
|
|
|
|
|
|
|
|
2026-03-17 23:40:22 -07:00
|
|
|
def _build_copilot_agent(monkeypatch, *, model="gpt-5.4"):
|
|
|
|
|
_patch_agent_bootstrap(monkeypatch)
|
|
|
|
|
|
|
|
|
|
agent = run_agent.AIAgent(
|
|
|
|
|
model=model,
|
|
|
|
|
provider="copilot",
|
|
|
|
|
api_mode="codex_responses",
|
|
|
|
|
base_url="https://api.githubcopilot.com",
|
|
|
|
|
api_key="gh-token",
|
|
|
|
|
quiet_mode=True,
|
|
|
|
|
max_iterations=4,
|
|
|
|
|
skip_context_files=True,
|
|
|
|
|
skip_memory=True,
|
|
|
|
|
)
|
|
|
|
|
agent._cleanup_task_resources = lambda task_id: None
|
|
|
|
|
agent._persist_session = lambda messages, history=None: None
|
|
|
|
|
agent._save_trajectory = lambda messages, user_message, completed: None
|
|
|
|
|
agent._save_session_log = lambda messages: None
|
|
|
|
|
return agent
|
|
|
|
|
|
|
|
|
|
|
2026-02-25 18:20:38 -08:00
|
|
|
def _codex_message_response(text: str):
|
|
|
|
|
return SimpleNamespace(
|
|
|
|
|
output=[
|
|
|
|
|
SimpleNamespace(
|
|
|
|
|
type="message",
|
|
|
|
|
content=[SimpleNamespace(type="output_text", text=text)],
|
|
|
|
|
)
|
|
|
|
|
],
|
|
|
|
|
usage=SimpleNamespace(input_tokens=5, output_tokens=3, total_tokens=8),
|
|
|
|
|
status="completed",
|
|
|
|
|
model="gpt-5-codex",
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _codex_tool_call_response():
|
|
|
|
|
return SimpleNamespace(
|
|
|
|
|
output=[
|
|
|
|
|
SimpleNamespace(
|
|
|
|
|
type="function_call",
|
2026-02-25 18:11:26 -08:00
|
|
|
id="fc_1",
|
2026-02-25 18:20:38 -08:00
|
|
|
call_id="call_1",
|
|
|
|
|
name="terminal",
|
|
|
|
|
arguments="{}",
|
|
|
|
|
)
|
|
|
|
|
],
|
|
|
|
|
usage=SimpleNamespace(input_tokens=12, output_tokens=4, total_tokens=16),
|
|
|
|
|
status="completed",
|
|
|
|
|
model="gpt-5-codex",
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _codex_incomplete_message_response(text: str):
|
|
|
|
|
return SimpleNamespace(
|
|
|
|
|
output=[
|
|
|
|
|
SimpleNamespace(
|
|
|
|
|
type="message",
|
|
|
|
|
status="in_progress",
|
|
|
|
|
content=[SimpleNamespace(type="output_text", text=text)],
|
|
|
|
|
)
|
|
|
|
|
],
|
|
|
|
|
usage=SimpleNamespace(input_tokens=4, output_tokens=2, total_tokens=6),
|
|
|
|
|
status="in_progress",
|
|
|
|
|
model="gpt-5-codex",
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
2026-02-25 18:11:26 -08:00
|
|
|
def _codex_commentary_message_response(text: str):
|
|
|
|
|
return SimpleNamespace(
|
|
|
|
|
output=[
|
|
|
|
|
SimpleNamespace(
|
|
|
|
|
type="message",
|
|
|
|
|
phase="commentary",
|
|
|
|
|
status="completed",
|
|
|
|
|
content=[SimpleNamespace(type="output_text", text=text)],
|
|
|
|
|
)
|
|
|
|
|
],
|
|
|
|
|
usage=SimpleNamespace(input_tokens=4, output_tokens=2, total_tokens=6),
|
|
|
|
|
status="completed",
|
|
|
|
|
model="gpt-5-codex",
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _codex_ack_message_response(text: str):
|
|
|
|
|
return SimpleNamespace(
|
|
|
|
|
output=[
|
|
|
|
|
SimpleNamespace(
|
|
|
|
|
type="message",
|
|
|
|
|
status="completed",
|
|
|
|
|
content=[SimpleNamespace(type="output_text", text=text)],
|
|
|
|
|
)
|
|
|
|
|
],
|
|
|
|
|
usage=SimpleNamespace(input_tokens=4, output_tokens=2, total_tokens=6),
|
|
|
|
|
status="completed",
|
|
|
|
|
model="gpt-5-codex",
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
2026-02-25 18:56:06 -08:00
|
|
|
class _FakeResponsesStream:
|
|
|
|
|
def __init__(self, *, final_response=None, final_error=None):
|
|
|
|
|
self._final_response = final_response
|
|
|
|
|
self._final_error = final_error
|
|
|
|
|
|
|
|
|
|
def __enter__(self):
|
|
|
|
|
return self
|
|
|
|
|
|
|
|
|
|
def __exit__(self, exc_type, exc, tb):
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
def __iter__(self):
|
|
|
|
|
return iter(())
|
|
|
|
|
|
|
|
|
|
def get_final_response(self):
|
|
|
|
|
if self._final_error is not None:
|
|
|
|
|
raise self._final_error
|
|
|
|
|
return self._final_response
|
|
|
|
|
|
|
|
|
|
|
2026-02-25 19:08:11 -08:00
|
|
|
class _FakeCreateStream:
|
|
|
|
|
def __init__(self, events):
|
|
|
|
|
self._events = list(events)
|
|
|
|
|
self.closed = False
|
|
|
|
|
|
|
|
|
|
def __iter__(self):
|
|
|
|
|
return iter(self._events)
|
|
|
|
|
|
|
|
|
|
def close(self):
|
|
|
|
|
self.closed = True
|
|
|
|
|
|
|
|
|
|
|
2026-02-25 19:27:54 -08:00
|
|
|
def _codex_request_kwargs():
|
|
|
|
|
return {
|
|
|
|
|
"model": "gpt-5-codex",
|
|
|
|
|
"instructions": "You are Hermes.",
|
|
|
|
|
"input": [{"role": "user", "content": "Ping"}],
|
|
|
|
|
"tools": None,
|
|
|
|
|
"store": False,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2026-02-25 18:20:38 -08:00
|
|
|
def test_api_mode_uses_explicit_provider_when_codex(monkeypatch):
|
|
|
|
|
_patch_agent_bootstrap(monkeypatch)
|
|
|
|
|
agent = run_agent.AIAgent(
|
|
|
|
|
model="gpt-5-codex",
|
|
|
|
|
base_url="https://openrouter.ai/api/v1",
|
|
|
|
|
provider="openai-codex",
|
|
|
|
|
api_key="codex-token",
|
|
|
|
|
quiet_mode=True,
|
|
|
|
|
max_iterations=1,
|
|
|
|
|
skip_context_files=True,
|
|
|
|
|
skip_memory=True,
|
|
|
|
|
)
|
|
|
|
|
assert agent.api_mode == "codex_responses"
|
|
|
|
|
assert agent.provider == "openai-codex"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_api_mode_normalizes_provider_case(monkeypatch):
|
|
|
|
|
_patch_agent_bootstrap(monkeypatch)
|
|
|
|
|
agent = run_agent.AIAgent(
|
|
|
|
|
model="gpt-5-codex",
|
|
|
|
|
base_url="https://openrouter.ai/api/v1",
|
|
|
|
|
provider="OpenAI-Codex",
|
|
|
|
|
api_key="codex-token",
|
|
|
|
|
quiet_mode=True,
|
|
|
|
|
max_iterations=1,
|
|
|
|
|
skip_context_files=True,
|
|
|
|
|
skip_memory=True,
|
|
|
|
|
)
|
|
|
|
|
assert agent.provider == "openai-codex"
|
|
|
|
|
assert agent.api_mode == "codex_responses"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_api_mode_respects_explicit_openrouter_provider_over_codex_url(monkeypatch):
|
|
|
|
|
_patch_agent_bootstrap(monkeypatch)
|
|
|
|
|
agent = run_agent.AIAgent(
|
|
|
|
|
model="gpt-5-codex",
|
|
|
|
|
base_url="https://chatgpt.com/backend-api/codex",
|
|
|
|
|
provider="openrouter",
|
|
|
|
|
api_key="test-token",
|
|
|
|
|
quiet_mode=True,
|
|
|
|
|
max_iterations=1,
|
|
|
|
|
skip_context_files=True,
|
|
|
|
|
skip_memory=True,
|
|
|
|
|
)
|
|
|
|
|
assert agent.api_mode == "chat_completions"
|
|
|
|
|
assert agent.provider == "openrouter"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_build_api_kwargs_codex(monkeypatch):
|
|
|
|
|
agent = _build_agent(monkeypatch)
|
|
|
|
|
kwargs = agent._build_api_kwargs(
|
|
|
|
|
[
|
|
|
|
|
{"role": "system", "content": "You are Hermes."},
|
|
|
|
|
{"role": "user", "content": "Ping"},
|
|
|
|
|
]
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
assert kwargs["model"] == "gpt-5-codex"
|
|
|
|
|
assert kwargs["instructions"] == "You are Hermes."
|
|
|
|
|
assert kwargs["store"] is False
|
|
|
|
|
assert isinstance(kwargs["input"], list)
|
|
|
|
|
assert kwargs["input"][0]["role"] == "user"
|
|
|
|
|
assert kwargs["tools"][0]["type"] == "function"
|
|
|
|
|
assert kwargs["tools"][0]["name"] == "terminal"
|
2026-02-25 18:11:26 -08:00
|
|
|
assert kwargs["tools"][0]["strict"] is False
|
2026-02-25 18:20:38 -08:00
|
|
|
assert "function" not in kwargs["tools"][0]
|
2026-02-25 19:27:54 -08:00
|
|
|
assert kwargs["store"] is False
|
2026-03-11 04:28:31 -07:00
|
|
|
assert kwargs["tool_choice"] == "auto"
|
|
|
|
|
assert kwargs["parallel_tool_calls"] is True
|
|
|
|
|
assert isinstance(kwargs["prompt_cache_key"], str)
|
|
|
|
|
assert len(kwargs["prompt_cache_key"]) > 0
|
2026-02-25 19:27:54 -08:00
|
|
|
assert "timeout" not in kwargs
|
|
|
|
|
assert "max_tokens" not in kwargs
|
|
|
|
|
assert "extra_body" not in kwargs
|
2026-02-25 18:20:38 -08:00
|
|
|
|
|
|
|
|
|
2026-03-17 23:40:22 -07:00
|
|
|
def test_build_api_kwargs_copilot_responses_omits_openai_only_fields(monkeypatch):
|
|
|
|
|
agent = _build_copilot_agent(monkeypatch)
|
|
|
|
|
kwargs = agent._build_api_kwargs([{"role": "user", "content": "hi"}])
|
|
|
|
|
|
|
|
|
|
assert kwargs["model"] == "gpt-5.4"
|
|
|
|
|
assert kwargs["store"] is False
|
|
|
|
|
assert kwargs["tool_choice"] == "auto"
|
|
|
|
|
assert kwargs["parallel_tool_calls"] is True
|
|
|
|
|
assert kwargs["reasoning"] == {"effort": "medium"}
|
|
|
|
|
assert "prompt_cache_key" not in kwargs
|
|
|
|
|
assert "include" not in kwargs
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_build_api_kwargs_copilot_responses_omits_reasoning_for_non_reasoning_model(monkeypatch):
|
|
|
|
|
agent = _build_copilot_agent(monkeypatch, model="gpt-4.1")
|
|
|
|
|
kwargs = agent._build_api_kwargs([{"role": "user", "content": "hi"}])
|
|
|
|
|
|
|
|
|
|
assert "reasoning" not in kwargs
|
|
|
|
|
assert "include" not in kwargs
|
|
|
|
|
assert "prompt_cache_key" not in kwargs
|
|
|
|
|
|
|
|
|
|
|
2026-02-25 18:56:06 -08:00
|
|
|
def test_run_codex_stream_retries_when_completed_event_missing(monkeypatch):
|
|
|
|
|
agent = _build_agent(monkeypatch)
|
|
|
|
|
calls = {"stream": 0}
|
|
|
|
|
|
|
|
|
|
def _fake_stream(**kwargs):
|
|
|
|
|
calls["stream"] += 1
|
|
|
|
|
if calls["stream"] == 1:
|
|
|
|
|
return _FakeResponsesStream(
|
|
|
|
|
final_error=RuntimeError("Didn't receive a `response.completed` event.")
|
|
|
|
|
)
|
|
|
|
|
return _FakeResponsesStream(final_response=_codex_message_response("stream ok"))
|
|
|
|
|
|
|
|
|
|
agent.client = SimpleNamespace(
|
|
|
|
|
responses=SimpleNamespace(
|
|
|
|
|
stream=_fake_stream,
|
|
|
|
|
create=lambda **kwargs: _codex_message_response("fallback"),
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
|
2026-02-25 19:27:54 -08:00
|
|
|
response = agent._run_codex_stream(_codex_request_kwargs())
|
2026-02-25 18:56:06 -08:00
|
|
|
assert calls["stream"] == 2
|
|
|
|
|
assert response.output[0].content[0].text == "stream ok"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_run_codex_stream_falls_back_to_create_after_stream_completion_error(monkeypatch):
|
|
|
|
|
agent = _build_agent(monkeypatch)
|
|
|
|
|
calls = {"stream": 0, "create": 0}
|
|
|
|
|
|
|
|
|
|
def _fake_stream(**kwargs):
|
|
|
|
|
calls["stream"] += 1
|
|
|
|
|
return _FakeResponsesStream(
|
|
|
|
|
final_error=RuntimeError("Didn't receive a `response.completed` event.")
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
def _fake_create(**kwargs):
|
|
|
|
|
calls["create"] += 1
|
|
|
|
|
return _codex_message_response("create fallback ok")
|
|
|
|
|
|
|
|
|
|
agent.client = SimpleNamespace(
|
|
|
|
|
responses=SimpleNamespace(
|
|
|
|
|
stream=_fake_stream,
|
|
|
|
|
create=_fake_create,
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
|
2026-02-25 19:27:54 -08:00
|
|
|
response = agent._run_codex_stream(_codex_request_kwargs())
|
2026-02-25 18:56:06 -08:00
|
|
|
assert calls["stream"] == 2
|
|
|
|
|
assert calls["create"] == 1
|
|
|
|
|
assert response.output[0].content[0].text == "create fallback ok"
|
|
|
|
|
|
|
|
|
|
|
2026-02-25 19:08:11 -08:00
|
|
|
def test_run_codex_stream_fallback_parses_create_stream_events(monkeypatch):
|
|
|
|
|
agent = _build_agent(monkeypatch)
|
|
|
|
|
calls = {"stream": 0, "create": 0}
|
|
|
|
|
create_stream = _FakeCreateStream(
|
|
|
|
|
[
|
|
|
|
|
SimpleNamespace(type="response.created"),
|
|
|
|
|
SimpleNamespace(type="response.in_progress"),
|
|
|
|
|
SimpleNamespace(type="response.completed", response=_codex_message_response("streamed create ok")),
|
|
|
|
|
]
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
def _fake_stream(**kwargs):
|
|
|
|
|
calls["stream"] += 1
|
|
|
|
|
return _FakeResponsesStream(
|
|
|
|
|
final_error=RuntimeError("Didn't receive a `response.completed` event.")
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
def _fake_create(**kwargs):
|
|
|
|
|
calls["create"] += 1
|
|
|
|
|
assert kwargs.get("stream") is True
|
|
|
|
|
return create_stream
|
|
|
|
|
|
|
|
|
|
agent.client = SimpleNamespace(
|
|
|
|
|
responses=SimpleNamespace(
|
|
|
|
|
stream=_fake_stream,
|
|
|
|
|
create=_fake_create,
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
|
2026-02-25 19:27:54 -08:00
|
|
|
response = agent._run_codex_stream(_codex_request_kwargs())
|
2026-02-25 19:08:11 -08:00
|
|
|
assert calls["stream"] == 2
|
|
|
|
|
assert calls["create"] == 1
|
|
|
|
|
assert create_stream.closed is True
|
|
|
|
|
assert response.output[0].content[0].text == "streamed create ok"
|
|
|
|
|
|
|
|
|
|
|
2026-02-25 18:20:38 -08:00
|
|
|
def test_run_conversation_codex_plain_text(monkeypatch):
|
|
|
|
|
agent = _build_agent(monkeypatch)
|
|
|
|
|
monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: _codex_message_response("OK"))
|
|
|
|
|
|
|
|
|
|
result = agent.run_conversation("Say OK")
|
|
|
|
|
|
|
|
|
|
assert result["completed"] is True
|
|
|
|
|
assert result["final_response"] == "OK"
|
|
|
|
|
assert result["messages"][-1]["role"] == "assistant"
|
|
|
|
|
assert result["messages"][-1]["content"] == "OK"
|
|
|
|
|
|
|
|
|
|
|
2026-02-25 19:27:54 -08:00
|
|
|
def test_run_conversation_codex_refreshes_after_401_and_retries(monkeypatch):
|
|
|
|
|
agent = _build_agent(monkeypatch)
|
|
|
|
|
calls = {"api": 0, "refresh": 0}
|
|
|
|
|
|
|
|
|
|
class _UnauthorizedError(RuntimeError):
|
|
|
|
|
def __init__(self):
|
|
|
|
|
super().__init__("Error code: 401 - unauthorized")
|
|
|
|
|
self.status_code = 401
|
|
|
|
|
|
|
|
|
|
def _fake_api_call(api_kwargs):
|
|
|
|
|
calls["api"] += 1
|
|
|
|
|
if calls["api"] == 1:
|
|
|
|
|
raise _UnauthorizedError()
|
|
|
|
|
return _codex_message_response("Recovered after refresh")
|
|
|
|
|
|
|
|
|
|
def _fake_refresh(*, force=True):
|
|
|
|
|
calls["refresh"] += 1
|
|
|
|
|
assert force is True
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
monkeypatch.setattr(agent, "_interruptible_api_call", _fake_api_call)
|
|
|
|
|
monkeypatch.setattr(agent, "_try_refresh_codex_client_credentials", _fake_refresh)
|
|
|
|
|
|
|
|
|
|
result = agent.run_conversation("Say OK")
|
|
|
|
|
|
|
|
|
|
assert calls["api"] == 2
|
|
|
|
|
assert calls["refresh"] == 1
|
|
|
|
|
assert result["completed"] is True
|
|
|
|
|
assert result["final_response"] == "Recovered after refresh"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_try_refresh_codex_client_credentials_rebuilds_client(monkeypatch):
|
|
|
|
|
agent = _build_agent(monkeypatch)
|
|
|
|
|
closed = {"value": False}
|
|
|
|
|
rebuilt = {"kwargs": None}
|
|
|
|
|
|
|
|
|
|
class _ExistingClient:
|
|
|
|
|
def close(self):
|
|
|
|
|
closed["value"] = True
|
|
|
|
|
|
|
|
|
|
class _RebuiltClient:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
def _fake_openai(**kwargs):
|
|
|
|
|
rebuilt["kwargs"] = kwargs
|
|
|
|
|
return _RebuiltClient()
|
|
|
|
|
|
|
|
|
|
monkeypatch.setattr(
|
|
|
|
|
"hermes_cli.auth.resolve_codex_runtime_credentials",
|
|
|
|
|
lambda force_refresh=True: {
|
|
|
|
|
"api_key": "new-codex-token",
|
|
|
|
|
"base_url": "https://chatgpt.com/backend-api/codex",
|
|
|
|
|
},
|
|
|
|
|
)
|
|
|
|
|
monkeypatch.setattr(run_agent, "OpenAI", _fake_openai)
|
|
|
|
|
|
|
|
|
|
agent.client = _ExistingClient()
|
|
|
|
|
ok = agent._try_refresh_codex_client_credentials(force=True)
|
|
|
|
|
|
|
|
|
|
assert ok is True
|
|
|
|
|
assert closed["value"] is True
|
|
|
|
|
assert rebuilt["kwargs"]["api_key"] == "new-codex-token"
|
|
|
|
|
assert rebuilt["kwargs"]["base_url"] == "https://chatgpt.com/backend-api/codex"
|
|
|
|
|
assert isinstance(agent.client, _RebuiltClient)
|
|
|
|
|
|
|
|
|
|
|
2026-02-25 18:20:38 -08:00
|
|
|
def test_run_conversation_codex_tool_round_trip(monkeypatch):
|
|
|
|
|
agent = _build_agent(monkeypatch)
|
|
|
|
|
responses = [_codex_tool_call_response(), _codex_message_response("done")]
|
|
|
|
|
monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0))
|
|
|
|
|
|
|
|
|
|
def _fake_execute_tool_calls(assistant_message, messages, effective_task_id):
|
|
|
|
|
for call in assistant_message.tool_calls:
|
|
|
|
|
messages.append(
|
|
|
|
|
{
|
|
|
|
|
"role": "tool",
|
|
|
|
|
"tool_call_id": call.id,
|
|
|
|
|
"content": '{"ok":true}',
|
|
|
|
|
}
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
monkeypatch.setattr(agent, "_execute_tool_calls", _fake_execute_tool_calls)
|
|
|
|
|
|
|
|
|
|
result = agent.run_conversation("run a command")
|
|
|
|
|
|
|
|
|
|
assert result["completed"] is True
|
|
|
|
|
assert result["final_response"] == "done"
|
|
|
|
|
assert any(msg.get("tool_calls") for msg in result["messages"] if msg.get("role") == "assistant")
|
|
|
|
|
assert any(msg.get("role") == "tool" and msg.get("tool_call_id") == "call_1" for msg in result["messages"])
|
|
|
|
|
|
|
|
|
|
|
2026-02-25 19:00:11 -08:00
|
|
|
def test_chat_messages_to_responses_input_uses_call_id_for_function_call(monkeypatch):
|
2026-02-25 18:11:26 -08:00
|
|
|
agent = _build_agent(monkeypatch)
|
|
|
|
|
items = agent._chat_messages_to_responses_input(
|
|
|
|
|
[
|
|
|
|
|
{"role": "user", "content": "Run terminal"},
|
|
|
|
|
{
|
|
|
|
|
"role": "assistant",
|
|
|
|
|
"content": "",
|
|
|
|
|
"tool_calls": [
|
|
|
|
|
{
|
|
|
|
|
"id": "call_abc123",
|
|
|
|
|
"type": "function",
|
|
|
|
|
"function": {"name": "terminal", "arguments": "{}"},
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
},
|
|
|
|
|
{"role": "tool", "tool_call_id": "call_abc123", "content": '{"ok":true}'},
|
|
|
|
|
]
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
function_call = next(item for item in items if item.get("type") == "function_call")
|
|
|
|
|
function_output = next(item for item in items if item.get("type") == "function_call_output")
|
|
|
|
|
|
|
|
|
|
assert function_call["call_id"] == "call_abc123"
|
2026-02-25 19:00:11 -08:00
|
|
|
assert "id" not in function_call
|
2026-02-25 18:11:26 -08:00
|
|
|
assert function_output["call_id"] == "call_abc123"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_chat_messages_to_responses_input_accepts_call_pipe_fc_ids(monkeypatch):
|
|
|
|
|
agent = _build_agent(monkeypatch)
|
|
|
|
|
items = agent._chat_messages_to_responses_input(
|
|
|
|
|
[
|
|
|
|
|
{"role": "user", "content": "Run terminal"},
|
|
|
|
|
{
|
|
|
|
|
"role": "assistant",
|
|
|
|
|
"content": "",
|
|
|
|
|
"tool_calls": [
|
|
|
|
|
{
|
|
|
|
|
"id": "call_pair123|fc_pair123",
|
|
|
|
|
"type": "function",
|
|
|
|
|
"function": {"name": "terminal", "arguments": "{}"},
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
},
|
|
|
|
|
{"role": "tool", "tool_call_id": "call_pair123|fc_pair123", "content": '{"ok":true}'},
|
|
|
|
|
]
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
function_call = next(item for item in items if item.get("type") == "function_call")
|
|
|
|
|
function_output = next(item for item in items if item.get("type") == "function_call_output")
|
|
|
|
|
|
|
|
|
|
assert function_call["call_id"] == "call_pair123"
|
2026-02-25 19:00:11 -08:00
|
|
|
assert "id" not in function_call
|
2026-02-25 18:11:26 -08:00
|
|
|
assert function_output["call_id"] == "call_pair123"
|
|
|
|
|
|
|
|
|
|
|
2026-02-25 19:27:54 -08:00
|
|
|
def test_preflight_codex_api_kwargs_strips_optional_function_call_id(monkeypatch):
|
|
|
|
|
agent = _build_agent(monkeypatch)
|
|
|
|
|
preflight = agent._preflight_codex_api_kwargs(
|
|
|
|
|
{
|
|
|
|
|
"model": "gpt-5-codex",
|
|
|
|
|
"instructions": "You are Hermes.",
|
|
|
|
|
"input": [
|
|
|
|
|
{"role": "user", "content": "hi"},
|
|
|
|
|
{
|
|
|
|
|
"type": "function_call",
|
|
|
|
|
"id": "call_bad",
|
|
|
|
|
"call_id": "call_good",
|
|
|
|
|
"name": "terminal",
|
|
|
|
|
"arguments": "{}",
|
|
|
|
|
},
|
|
|
|
|
],
|
|
|
|
|
"tools": [],
|
|
|
|
|
"store": False,
|
|
|
|
|
}
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
fn_call = next(item for item in preflight["input"] if item.get("type") == "function_call")
|
|
|
|
|
assert fn_call["call_id"] == "call_good"
|
|
|
|
|
assert "id" not in fn_call
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_preflight_codex_api_kwargs_rejects_function_call_output_without_call_id(monkeypatch):
|
|
|
|
|
agent = _build_agent(monkeypatch)
|
|
|
|
|
|
|
|
|
|
with pytest.raises(ValueError, match="function_call_output is missing call_id"):
|
|
|
|
|
agent._preflight_codex_api_kwargs(
|
|
|
|
|
{
|
|
|
|
|
"model": "gpt-5-codex",
|
|
|
|
|
"instructions": "You are Hermes.",
|
|
|
|
|
"input": [{"type": "function_call_output", "output": "{}"}],
|
|
|
|
|
"tools": [],
|
|
|
|
|
"store": False,
|
|
|
|
|
}
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_preflight_codex_api_kwargs_rejects_unsupported_request_fields(monkeypatch):
|
|
|
|
|
agent = _build_agent(monkeypatch)
|
|
|
|
|
kwargs = _codex_request_kwargs()
|
2026-02-28 21:47:51 -08:00
|
|
|
kwargs["some_unknown_field"] = "value"
|
2026-02-25 19:27:54 -08:00
|
|
|
|
|
|
|
|
with pytest.raises(ValueError, match="unsupported field"):
|
|
|
|
|
agent._preflight_codex_api_kwargs(kwargs)
|
|
|
|
|
|
|
|
|
|
|
2026-02-28 21:47:51 -08:00
|
|
|
def test_preflight_codex_api_kwargs_allows_reasoning_and_temperature(monkeypatch):
|
|
|
|
|
agent = _build_agent(monkeypatch)
|
|
|
|
|
kwargs = _codex_request_kwargs()
|
|
|
|
|
kwargs["reasoning"] = {"effort": "high", "summary": "auto"}
|
|
|
|
|
kwargs["include"] = ["reasoning.encrypted_content"]
|
|
|
|
|
kwargs["temperature"] = 0.7
|
|
|
|
|
kwargs["max_output_tokens"] = 4096
|
|
|
|
|
|
|
|
|
|
result = agent._preflight_codex_api_kwargs(kwargs)
|
|
|
|
|
assert result["reasoning"] == {"effort": "high", "summary": "auto"}
|
|
|
|
|
assert result["include"] == ["reasoning.encrypted_content"]
|
|
|
|
|
assert result["temperature"] == 0.7
|
|
|
|
|
assert result["max_output_tokens"] == 4096
|
|
|
|
|
|
|
|
|
|
|
2026-02-25 19:00:11 -08:00
|
|
|
def test_run_conversation_codex_replay_payload_keeps_call_id(monkeypatch):
|
2026-02-25 18:11:26 -08:00
|
|
|
agent = _build_agent(monkeypatch)
|
|
|
|
|
responses = [_codex_tool_call_response(), _codex_message_response("done")]
|
|
|
|
|
requests = []
|
|
|
|
|
|
|
|
|
|
def _fake_api_call(api_kwargs):
|
|
|
|
|
requests.append(api_kwargs)
|
|
|
|
|
return responses.pop(0)
|
|
|
|
|
|
|
|
|
|
monkeypatch.setattr(agent, "_interruptible_api_call", _fake_api_call)
|
|
|
|
|
|
|
|
|
|
def _fake_execute_tool_calls(assistant_message, messages, effective_task_id):
|
|
|
|
|
for call in assistant_message.tool_calls:
|
|
|
|
|
messages.append(
|
|
|
|
|
{
|
|
|
|
|
"role": "tool",
|
|
|
|
|
"tool_call_id": call.id,
|
|
|
|
|
"content": '{"ok":true}',
|
|
|
|
|
}
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
monkeypatch.setattr(agent, "_execute_tool_calls", _fake_execute_tool_calls)
|
|
|
|
|
|
|
|
|
|
result = agent.run_conversation("run a command")
|
|
|
|
|
|
|
|
|
|
assert result["completed"] is True
|
|
|
|
|
assert result["final_response"] == "done"
|
|
|
|
|
assert len(requests) >= 2
|
|
|
|
|
|
|
|
|
|
replay_input = requests[1]["input"]
|
|
|
|
|
function_call = next(item for item in replay_input if item.get("type") == "function_call")
|
|
|
|
|
function_output = next(item for item in replay_input if item.get("type") == "function_call_output")
|
|
|
|
|
assert function_call["call_id"] == "call_1"
|
2026-02-25 19:00:11 -08:00
|
|
|
assert "id" not in function_call
|
2026-02-25 18:11:26 -08:00
|
|
|
assert function_output["call_id"] == "call_1"
|
|
|
|
|
|
|
|
|
|
|
2026-02-25 18:20:38 -08:00
|
|
|
def test_run_conversation_codex_continues_after_incomplete_interim_message(monkeypatch):
|
|
|
|
|
agent = _build_agent(monkeypatch)
|
|
|
|
|
responses = [
|
|
|
|
|
_codex_incomplete_message_response("I'll inspect the repo structure first."),
|
|
|
|
|
_codex_tool_call_response(),
|
|
|
|
|
_codex_message_response("Architecture summary complete."),
|
|
|
|
|
]
|
|
|
|
|
monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0))
|
|
|
|
|
|
|
|
|
|
def _fake_execute_tool_calls(assistant_message, messages, effective_task_id):
|
|
|
|
|
for call in assistant_message.tool_calls:
|
|
|
|
|
messages.append(
|
|
|
|
|
{
|
|
|
|
|
"role": "tool",
|
|
|
|
|
"tool_call_id": call.id,
|
|
|
|
|
"content": '{"ok":true}',
|
|
|
|
|
}
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
monkeypatch.setattr(agent, "_execute_tool_calls", _fake_execute_tool_calls)
|
|
|
|
|
|
|
|
|
|
result = agent.run_conversation("analyze repo")
|
|
|
|
|
|
|
|
|
|
assert result["completed"] is True
|
|
|
|
|
assert result["final_response"] == "Architecture summary complete."
|
|
|
|
|
assert any(
|
|
|
|
|
msg.get("role") == "assistant"
|
|
|
|
|
and msg.get("finish_reason") == "incomplete"
|
|
|
|
|
and "inspect the repo structure" in (msg.get("content") or "")
|
|
|
|
|
for msg in result["messages"]
|
|
|
|
|
)
|
|
|
|
|
assert any(msg.get("role") == "tool" and msg.get("tool_call_id") == "call_1" for msg in result["messages"])
|
2026-02-25 18:11:26 -08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_normalize_codex_response_marks_commentary_only_message_as_incomplete(monkeypatch):
|
|
|
|
|
agent = _build_agent(monkeypatch)
|
|
|
|
|
assistant_message, finish_reason = agent._normalize_codex_response(
|
|
|
|
|
_codex_commentary_message_response("I'll inspect the repository first.")
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
assert finish_reason == "incomplete"
|
|
|
|
|
assert "inspect the repository" in (assistant_message.content or "")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_run_conversation_codex_continues_after_commentary_phase_message(monkeypatch):
|
|
|
|
|
agent = _build_agent(monkeypatch)
|
|
|
|
|
responses = [
|
|
|
|
|
_codex_commentary_message_response("I'll inspect the repo structure first."),
|
|
|
|
|
_codex_tool_call_response(),
|
|
|
|
|
_codex_message_response("Architecture summary complete."),
|
|
|
|
|
]
|
|
|
|
|
monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0))
|
|
|
|
|
|
|
|
|
|
def _fake_execute_tool_calls(assistant_message, messages, effective_task_id):
|
|
|
|
|
for call in assistant_message.tool_calls:
|
|
|
|
|
messages.append(
|
|
|
|
|
{
|
|
|
|
|
"role": "tool",
|
|
|
|
|
"tool_call_id": call.id,
|
|
|
|
|
"content": '{"ok":true}',
|
|
|
|
|
}
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
monkeypatch.setattr(agent, "_execute_tool_calls", _fake_execute_tool_calls)
|
|
|
|
|
|
|
|
|
|
result = agent.run_conversation("analyze repo")
|
|
|
|
|
|
|
|
|
|
assert result["completed"] is True
|
|
|
|
|
assert result["final_response"] == "Architecture summary complete."
|
|
|
|
|
assert any(
|
|
|
|
|
msg.get("role") == "assistant"
|
|
|
|
|
and msg.get("finish_reason") == "incomplete"
|
|
|
|
|
and "inspect the repo structure" in (msg.get("content") or "")
|
|
|
|
|
for msg in result["messages"]
|
|
|
|
|
)
|
|
|
|
|
assert any(msg.get("role") == "tool" and msg.get("tool_call_id") == "call_1" for msg in result["messages"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_run_conversation_codex_continues_after_ack_stop_message(monkeypatch):
|
|
|
|
|
agent = _build_agent(monkeypatch)
|
|
|
|
|
responses = [
|
|
|
|
|
_codex_ack_message_response(
|
|
|
|
|
"Absolutely — I can do that. I'll inspect ~/openclaw-studio and report back with a walkthrough."
|
|
|
|
|
),
|
|
|
|
|
_codex_tool_call_response(),
|
|
|
|
|
_codex_message_response("Architecture summary complete."),
|
|
|
|
|
]
|
|
|
|
|
monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0))
|
|
|
|
|
|
|
|
|
|
def _fake_execute_tool_calls(assistant_message, messages, effective_task_id):
|
|
|
|
|
for call in assistant_message.tool_calls:
|
|
|
|
|
messages.append(
|
|
|
|
|
{
|
|
|
|
|
"role": "tool",
|
|
|
|
|
"tool_call_id": call.id,
|
|
|
|
|
"content": '{"ok":true}',
|
|
|
|
|
}
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
monkeypatch.setattr(agent, "_execute_tool_calls", _fake_execute_tool_calls)
|
|
|
|
|
|
|
|
|
|
result = agent.run_conversation("look into ~/openclaw-studio and tell me how it works")
|
|
|
|
|
|
|
|
|
|
assert result["completed"] is True
|
|
|
|
|
assert result["final_response"] == "Architecture summary complete."
|
|
|
|
|
assert any(
|
|
|
|
|
msg.get("role") == "assistant"
|
|
|
|
|
and msg.get("finish_reason") == "incomplete"
|
|
|
|
|
and "inspect ~/openclaw-studio" in (msg.get("content") or "")
|
|
|
|
|
for msg in result["messages"]
|
|
|
|
|
)
|
|
|
|
|
assert any(
|
|
|
|
|
msg.get("role") == "user"
|
|
|
|
|
and "Continue now. Execute the required tool calls" in (msg.get("content") or "")
|
|
|
|
|
for msg in result["messages"]
|
|
|
|
|
)
|
|
|
|
|
assert any(msg.get("role") == "tool" and msg.get("tool_call_id") == "call_1" for msg in result["messages"])
|
2026-02-25 18:56:06 -08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_run_conversation_codex_continues_after_ack_for_directory_listing_prompt(monkeypatch):
|
|
|
|
|
agent = _build_agent(monkeypatch)
|
|
|
|
|
responses = [
|
|
|
|
|
_codex_ack_message_response(
|
|
|
|
|
"I'll check what's in the current directory and call out 3 notable items."
|
|
|
|
|
),
|
|
|
|
|
_codex_tool_call_response(),
|
|
|
|
|
_codex_message_response("Directory summary complete."),
|
|
|
|
|
]
|
|
|
|
|
monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0))
|
|
|
|
|
|
|
|
|
|
def _fake_execute_tool_calls(assistant_message, messages, effective_task_id):
|
|
|
|
|
for call in assistant_message.tool_calls:
|
|
|
|
|
messages.append(
|
|
|
|
|
{
|
|
|
|
|
"role": "tool",
|
|
|
|
|
"tool_call_id": call.id,
|
|
|
|
|
"content": '{"ok":true}',
|
|
|
|
|
}
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
monkeypatch.setattr(agent, "_execute_tool_calls", _fake_execute_tool_calls)
|
|
|
|
|
|
|
|
|
|
result = agent.run_conversation("look at current directory and list 3 notable things")
|
|
|
|
|
|
|
|
|
|
assert result["completed"] is True
|
|
|
|
|
assert result["final_response"] == "Directory summary complete."
|
|
|
|
|
assert any(
|
|
|
|
|
msg.get("role") == "assistant"
|
|
|
|
|
and msg.get("finish_reason") == "incomplete"
|
|
|
|
|
and "current directory" in (msg.get("content") or "")
|
|
|
|
|
for msg in result["messages"]
|
|
|
|
|
)
|
|
|
|
|
assert any(
|
|
|
|
|
msg.get("role") == "user"
|
|
|
|
|
and "Continue now. Execute the required tool calls" in (msg.get("content") or "")
|
|
|
|
|
for msg in result["messages"]
|
|
|
|
|
)
|
|
|
|
|
assert any(msg.get("role") == "tool" and msg.get("tool_call_id") == "call_1" for msg in result["messages"])
|
2026-03-17 02:04:36 -07:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_dump_api_request_debug_uses_responses_url(monkeypatch, tmp_path):
|
|
|
|
|
"""Debug dumps should show /responses URL when in codex_responses mode."""
|
|
|
|
|
import json
|
|
|
|
|
agent = _build_agent(monkeypatch)
|
|
|
|
|
agent.base_url = "http://127.0.0.1:9208/v1"
|
|
|
|
|
agent.logs_dir = tmp_path
|
|
|
|
|
|
|
|
|
|
dump_file = agent._dump_api_request_debug(_codex_request_kwargs(), reason="preflight")
|
|
|
|
|
|
|
|
|
|
payload = json.loads(dump_file.read_text())
|
|
|
|
|
assert payload["request"]["url"] == "http://127.0.0.1:9208/v1/responses"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_dump_api_request_debug_uses_chat_completions_url(monkeypatch, tmp_path):
|
|
|
|
|
"""Debug dumps should show /chat/completions URL for chat_completions mode."""
|
|
|
|
|
import json
|
|
|
|
|
_patch_agent_bootstrap(monkeypatch)
|
|
|
|
|
agent = run_agent.AIAgent(
|
|
|
|
|
model="gpt-4o",
|
|
|
|
|
base_url="http://127.0.0.1:9208/v1",
|
|
|
|
|
api_key="test-key",
|
|
|
|
|
quiet_mode=True,
|
|
|
|
|
max_iterations=1,
|
|
|
|
|
skip_context_files=True,
|
|
|
|
|
skip_memory=True,
|
|
|
|
|
)
|
|
|
|
|
agent.logs_dir = tmp_path
|
|
|
|
|
|
|
|
|
|
dump_file = agent._dump_api_request_debug(
|
|
|
|
|
{"model": "gpt-4o", "messages": [{"role": "user", "content": "hi"}]},
|
|
|
|
|
reason="preflight",
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
payload = json.loads(dump_file.read_text())
|
|
|
|
|
assert payload["request"]["url"] == "http://127.0.0.1:9208/v1/chat/completions"
|
fix(codex): handle reasoning-only responses and replay path (#2070)
* fix(codex): treat reasoning-only responses as incomplete, not stop
When a Codex Responses API response contains only reasoning items
(encrypted thinking state) with no message text or tool calls, the
_normalize_codex_response method was setting finish_reason='stop'.
This sent the response into the empty-content retry loop, which
burned 3 retries and then failed — exactly the pattern Nester
reported in Discord.
Two fixes:
1. _normalize_codex_response: reasoning-only responses (reasoning_items_raw
non-empty but no final_text) now get finish_reason='incomplete', routing
them to the Codex continuation path instead of the retry loop.
2. Incomplete handling: also checks for codex_reasoning_items when deciding
whether to preserve an interim message, so encrypted reasoning state is
not silently dropped when there is no visible reasoning text.
Adds 4 regression tests covering:
- Unit: reasoning-only → incomplete, reasoning+content → stop
- E2E: reasoning-only → continuation → final answer succeeds
- E2E: encrypted reasoning items preserved in interim messages
* fix(codex): ensure reasoning items have required following item in API input
Follow-up to the reasoning-only response fix. Three additional issues
found by tracing the full replay path:
1. _chat_messages_to_responses_input: when a reasoning-only interim
message was converted to Responses API input, the reasoning items
were emitted as the last items with no following item. The Responses
API requires a following item after each reasoning item (otherwise:
'missing_following_item' error, as seen in OpenHands #11406). Now
emits an empty assistant message as the required following item when
content is empty but reasoning items were added.
2. Duplicate detection: two consecutive reasoning-only incomplete
messages with identical empty content/reasoning but different
encrypted codex_reasoning_items were incorrectly treated as
duplicates, silently dropping the second response's reasoning state.
Now includes codex_reasoning_items in the duplicate comparison.
3. Added tests for both the API input conversion path and the duplicate
detection edge case.
Research context: verified against OpenCode (uses Vercel AI SDK, no
retry loop so avoids the issue), Clawdbot (drops orphaned reasoning
blocks entirely), and OpenHands (hit the missing_following_item error).
Our approach preserves reasoning continuity while satisfying the API
constraint.
---------
Co-authored-by: Test <test@test.com>
2026-03-19 10:34:44 -07:00
|
|
|
|
|
|
|
|
|
|
|
|
|
# --- Reasoning-only response tests (fix for empty content retry loop) ---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _codex_reasoning_only_response(*, encrypted_content="enc_abc123", summary_text="Thinking..."):
|
|
|
|
|
"""Codex response containing only reasoning items — no message text, no tool calls."""
|
|
|
|
|
return SimpleNamespace(
|
|
|
|
|
output=[
|
|
|
|
|
SimpleNamespace(
|
|
|
|
|
type="reasoning",
|
|
|
|
|
id="rs_001",
|
|
|
|
|
encrypted_content=encrypted_content,
|
|
|
|
|
summary=[SimpleNamespace(type="summary_text", text=summary_text)],
|
|
|
|
|
status="completed",
|
|
|
|
|
)
|
|
|
|
|
],
|
|
|
|
|
usage=SimpleNamespace(input_tokens=50, output_tokens=100, total_tokens=150),
|
|
|
|
|
status="completed",
|
|
|
|
|
model="gpt-5-codex",
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_normalize_codex_response_marks_reasoning_only_as_incomplete(monkeypatch):
|
|
|
|
|
"""A response with only reasoning items and no content should be 'incomplete', not 'stop'.
|
|
|
|
|
|
|
|
|
|
Without this fix, reasoning-only responses get finish_reason='stop' which
|
|
|
|
|
sends them into the empty-content retry loop (3 retries then failure).
|
|
|
|
|
"""
|
|
|
|
|
agent = _build_agent(monkeypatch)
|
|
|
|
|
assistant_message, finish_reason = agent._normalize_codex_response(
|
|
|
|
|
_codex_reasoning_only_response()
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
assert finish_reason == "incomplete"
|
|
|
|
|
assert assistant_message.content == ""
|
|
|
|
|
assert assistant_message.codex_reasoning_items is not None
|
|
|
|
|
assert len(assistant_message.codex_reasoning_items) == 1
|
|
|
|
|
assert assistant_message.codex_reasoning_items[0]["encrypted_content"] == "enc_abc123"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_normalize_codex_response_reasoning_with_content_is_stop(monkeypatch):
|
|
|
|
|
"""If a response has both reasoning and message content, it should still be 'stop'."""
|
|
|
|
|
agent = _build_agent(monkeypatch)
|
|
|
|
|
response = SimpleNamespace(
|
|
|
|
|
output=[
|
|
|
|
|
SimpleNamespace(
|
|
|
|
|
type="reasoning",
|
|
|
|
|
id="rs_001",
|
|
|
|
|
encrypted_content="enc_xyz",
|
|
|
|
|
summary=[SimpleNamespace(type="summary_text", text="Thinking...")],
|
|
|
|
|
status="completed",
|
|
|
|
|
),
|
|
|
|
|
SimpleNamespace(
|
|
|
|
|
type="message",
|
|
|
|
|
content=[SimpleNamespace(type="output_text", text="Here is the answer.")],
|
|
|
|
|
status="completed",
|
|
|
|
|
),
|
|
|
|
|
],
|
|
|
|
|
usage=SimpleNamespace(input_tokens=50, output_tokens=100, total_tokens=150),
|
|
|
|
|
status="completed",
|
|
|
|
|
model="gpt-5-codex",
|
|
|
|
|
)
|
|
|
|
|
assistant_message, finish_reason = agent._normalize_codex_response(response)
|
|
|
|
|
|
|
|
|
|
assert finish_reason == "stop"
|
|
|
|
|
assert "Here is the answer" in assistant_message.content
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_run_conversation_codex_continues_after_reasoning_only_response(monkeypatch):
|
|
|
|
|
"""End-to-end: reasoning-only → final message should succeed, not hit retry loop."""
|
|
|
|
|
agent = _build_agent(monkeypatch)
|
|
|
|
|
responses = [
|
|
|
|
|
_codex_reasoning_only_response(),
|
|
|
|
|
_codex_message_response("The final answer is 42."),
|
|
|
|
|
]
|
|
|
|
|
monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0))
|
|
|
|
|
|
|
|
|
|
result = agent.run_conversation("what is the answer?")
|
|
|
|
|
|
|
|
|
|
assert result["completed"] is True
|
|
|
|
|
assert result["final_response"] == "The final answer is 42."
|
|
|
|
|
# The reasoning-only turn should be in messages as an incomplete interim
|
|
|
|
|
assert any(
|
|
|
|
|
msg.get("role") == "assistant"
|
|
|
|
|
and msg.get("finish_reason") == "incomplete"
|
|
|
|
|
and msg.get("codex_reasoning_items") is not None
|
|
|
|
|
for msg in result["messages"]
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_run_conversation_codex_preserves_encrypted_reasoning_in_interim(monkeypatch):
|
|
|
|
|
"""Encrypted codex_reasoning_items must be preserved in interim messages
|
|
|
|
|
even when there is no visible reasoning text or content."""
|
|
|
|
|
agent = _build_agent(monkeypatch)
|
|
|
|
|
# Response with encrypted reasoning but no human-readable summary
|
|
|
|
|
reasoning_response = SimpleNamespace(
|
|
|
|
|
output=[
|
|
|
|
|
SimpleNamespace(
|
|
|
|
|
type="reasoning",
|
|
|
|
|
id="rs_002",
|
|
|
|
|
encrypted_content="enc_opaque_blob",
|
|
|
|
|
summary=[],
|
|
|
|
|
status="completed",
|
|
|
|
|
)
|
|
|
|
|
],
|
|
|
|
|
usage=SimpleNamespace(input_tokens=50, output_tokens=100, total_tokens=150),
|
|
|
|
|
status="completed",
|
|
|
|
|
model="gpt-5-codex",
|
|
|
|
|
)
|
|
|
|
|
responses = [
|
|
|
|
|
reasoning_response,
|
|
|
|
|
_codex_message_response("Done thinking."),
|
|
|
|
|
]
|
|
|
|
|
monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0))
|
|
|
|
|
|
|
|
|
|
result = agent.run_conversation("think hard")
|
|
|
|
|
|
|
|
|
|
assert result["completed"] is True
|
|
|
|
|
assert result["final_response"] == "Done thinking."
|
|
|
|
|
# The interim message must have codex_reasoning_items preserved
|
|
|
|
|
interim_msgs = [
|
|
|
|
|
msg for msg in result["messages"]
|
|
|
|
|
if msg.get("role") == "assistant"
|
|
|
|
|
and msg.get("finish_reason") == "incomplete"
|
|
|
|
|
]
|
|
|
|
|
assert len(interim_msgs) >= 1
|
|
|
|
|
assert interim_msgs[0].get("codex_reasoning_items") is not None
|
|
|
|
|
assert interim_msgs[0]["codex_reasoning_items"][0]["encrypted_content"] == "enc_opaque_blob"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_chat_messages_to_responses_input_reasoning_only_has_following_item(monkeypatch):
|
|
|
|
|
"""When converting a reasoning-only interim message to Responses API input,
|
|
|
|
|
the reasoning items must be followed by an assistant message (even if empty)
|
|
|
|
|
to satisfy the API's 'required following item' constraint."""
|
|
|
|
|
agent = _build_agent(monkeypatch)
|
|
|
|
|
messages = [
|
|
|
|
|
{"role": "user", "content": "think hard"},
|
|
|
|
|
{
|
|
|
|
|
"role": "assistant",
|
|
|
|
|
"content": "",
|
|
|
|
|
"reasoning": None,
|
|
|
|
|
"finish_reason": "incomplete",
|
|
|
|
|
"codex_reasoning_items": [
|
|
|
|
|
{"type": "reasoning", "id": "rs_001", "encrypted_content": "enc_abc", "summary": []},
|
|
|
|
|
],
|
|
|
|
|
},
|
|
|
|
|
]
|
|
|
|
|
items = agent._chat_messages_to_responses_input(messages)
|
|
|
|
|
|
|
|
|
|
# Find the reasoning item
|
|
|
|
|
reasoning_indices = [i for i, it in enumerate(items) if it.get("type") == "reasoning"]
|
|
|
|
|
assert len(reasoning_indices) == 1
|
|
|
|
|
ri_idx = reasoning_indices[0]
|
|
|
|
|
|
|
|
|
|
# There must be a following item after the reasoning
|
|
|
|
|
assert ri_idx < len(items) - 1, "Reasoning item must not be the last item (missing_following_item)"
|
|
|
|
|
following = items[ri_idx + 1]
|
|
|
|
|
assert following.get("role") == "assistant"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_duplicate_detection_distinguishes_different_codex_reasoning(monkeypatch):
|
|
|
|
|
"""Two consecutive reasoning-only responses with different encrypted content
|
|
|
|
|
must NOT be treated as duplicates."""
|
|
|
|
|
agent = _build_agent(monkeypatch)
|
|
|
|
|
responses = [
|
|
|
|
|
# First reasoning-only response
|
|
|
|
|
SimpleNamespace(
|
|
|
|
|
output=[
|
|
|
|
|
SimpleNamespace(
|
|
|
|
|
type="reasoning", id="rs_001",
|
|
|
|
|
encrypted_content="enc_first", summary=[], status="completed",
|
|
|
|
|
)
|
|
|
|
|
],
|
|
|
|
|
usage=SimpleNamespace(input_tokens=50, output_tokens=100, total_tokens=150),
|
|
|
|
|
status="completed", model="gpt-5-codex",
|
|
|
|
|
),
|
|
|
|
|
# Second reasoning-only response (different encrypted content)
|
|
|
|
|
SimpleNamespace(
|
|
|
|
|
output=[
|
|
|
|
|
SimpleNamespace(
|
|
|
|
|
type="reasoning", id="rs_002",
|
|
|
|
|
encrypted_content="enc_second", summary=[], status="completed",
|
|
|
|
|
)
|
|
|
|
|
],
|
|
|
|
|
usage=SimpleNamespace(input_tokens=50, output_tokens=100, total_tokens=150),
|
|
|
|
|
status="completed", model="gpt-5-codex",
|
|
|
|
|
),
|
|
|
|
|
_codex_message_response("Final answer after thinking."),
|
|
|
|
|
]
|
|
|
|
|
monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0))
|
|
|
|
|
|
|
|
|
|
result = agent.run_conversation("think very hard")
|
|
|
|
|
|
|
|
|
|
assert result["completed"] is True
|
|
|
|
|
assert result["final_response"] == "Final answer after thinking."
|
|
|
|
|
# Both reasoning-only interim messages should be in history (not collapsed)
|
|
|
|
|
interim_msgs = [
|
|
|
|
|
msg for msg in result["messages"]
|
|
|
|
|
if msg.get("role") == "assistant"
|
|
|
|
|
and msg.get("finish_reason") == "incomplete"
|
|
|
|
|
]
|
|
|
|
|
assert len(interim_msgs) == 2
|
|
|
|
|
encrypted_contents = [
|
|
|
|
|
msg["codex_reasoning_items"][0]["encrypted_content"]
|
|
|
|
|
for msg in interim_msgs
|
|
|
|
|
]
|
|
|
|
|
assert "enc_first" in encrypted_contents
|
|
|
|
|
assert "enc_second" in encrypted_contents
|