From 3628ccc8c435e2d2cc697d9f0fafcca1f20e9db5 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Wed, 1 Apr 2026 14:49:32 -0700 Subject: [PATCH] feat: use 'developer' role for GPT-5 and Codex models (#4498) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OpenAI's newer models (GPT-5, Codex) give stronger instruction-following weight to the 'developer' role vs 'system'. Swap the role at the API boundary in _build_api_kwargs() for the chat_completions path so internal message representation stays consistent ('system' everywhere). Applies regardless of provider — OpenRouter, Nous portal, direct, etc. The codex_responses path (direct OpenAI) uses 'instructions' instead of message roles, so it's unaffected. DEVELOPER_ROLE_MODELS constant in prompt_builder.py defines the matching model name substrings: ('gpt-5', 'codex'). --- agent/prompt_builder.py | 7 ++++ run_agent.py | 15 +++++++- tests/test_provider_parity.py | 70 +++++++++++++++++++++++++++++++++++ 3 files changed, 91 insertions(+), 1 deletion(-) diff --git a/agent/prompt_builder.py b/agent/prompt_builder.py index 8bc01251b..b8a044965 100644 --- a/agent/prompt_builder.py +++ b/agent/prompt_builder.py @@ -189,6 +189,13 @@ TOOL_USE_ENFORCEMENT_GUIDANCE = ( # Add new patterns here when a model family needs explicit steering. TOOL_USE_ENFORCEMENT_MODELS = ("gpt", "codex") +# Model name substrings that should use the 'developer' role instead of +# 'system' for the system prompt. OpenAI's newer models (GPT-5, Codex) +# give stronger instruction-following weight to the 'developer' role. +# The swap happens at the API boundary in _build_api_kwargs() so internal +# message representation stays consistent ("system" everywhere). +DEVELOPER_ROLE_MODELS = ("gpt-5", "codex") + PLATFORM_HINTS = { "whatsapp": ( "You are on a text messaging communication platform, WhatsApp. " diff --git a/run_agent.py b/run_agent.py index 92ab62fde..14721b811 100644 --- a/run_agent.py +++ b/run_agent.py @@ -88,7 +88,7 @@ from agent.model_metadata import ( ) from agent.context_compressor import ContextCompressor from agent.prompt_caching import apply_anthropic_cache_control -from agent.prompt_builder import build_skills_system_prompt, build_context_files_prompt, load_soul_md, TOOL_USE_ENFORCEMENT_GUIDANCE, TOOL_USE_ENFORCEMENT_MODELS +from agent.prompt_builder import build_skills_system_prompt, build_context_files_prompt, load_soul_md, TOOL_USE_ENFORCEMENT_GUIDANCE, TOOL_USE_ENFORCEMENT_MODELS, DEVELOPER_ROLE_MODELS from agent.usage_pricing import estimate_usage_cost, normalize_usage from agent.display import ( KawaiiSpinner, build_tool_preview as _build_tool_preview, @@ -5024,6 +5024,19 @@ class AIAgent: tool_call.pop("call_id", None) tool_call.pop("response_item_id", None) + # GPT-5 and Codex models respond better to 'developer' than 'system' + # for instruction-following. Swap the role at the API boundary so + # internal message representation stays uniform ("system"). + _model_lower = (self.model or "").lower() + if ( + sanitized_messages + and sanitized_messages[0].get("role") == "system" + and any(p in _model_lower for p in DEVELOPER_ROLE_MODELS) + ): + # Shallow-copy the list + first message only — rest stays shared. + sanitized_messages = list(sanitized_messages) + sanitized_messages[0] = {**sanitized_messages[0], "role": "developer"} + provider_preferences = {} if self.providers_allowed: provider_preferences["only"] = self.providers_allowed diff --git a/tests/test_provider_parity.py b/tests/test_provider_parity.py index deb657340..3c96a164e 100644 --- a/tests/test_provider_parity.py +++ b/tests/test_provider_parity.py @@ -137,6 +137,76 @@ class TestBuildApiKwargsOpenRouter: assert "codex_reasoning_items" in messages[1] +class TestDeveloperRoleSwap: + """GPT-5 and Codex models should get 'developer' instead of 'system' role.""" + + @pytest.mark.parametrize("model", [ + "openai/gpt-5", + "openai/gpt-5-turbo", + "openai/gpt-5.4", + "gpt-5-mini", + "openai/codex-mini", + "codex-mini-latest", + "openai/codex-pro", + ]) + def test_gpt5_codex_get_developer_role(self, monkeypatch, model): + agent = _make_agent(monkeypatch, "openrouter") + agent.model = model + messages = [ + {"role": "system", "content": "You are helpful."}, + {"role": "user", "content": "hi"}, + ] + kwargs = agent._build_api_kwargs(messages) + assert kwargs["messages"][0]["role"] == "developer" + assert kwargs["messages"][0]["content"] == "You are helpful." + assert kwargs["messages"][1]["role"] == "user" + + @pytest.mark.parametrize("model", [ + "anthropic/claude-opus-4.6", + "openai/gpt-4o", + "google/gemini-2.5-pro", + "deepseek/deepseek-chat", + "openai/o3-mini", + ]) + def test_non_matching_models_keep_system_role(self, monkeypatch, model): + agent = _make_agent(monkeypatch, "openrouter") + agent.model = model + messages = [ + {"role": "system", "content": "You are helpful."}, + {"role": "user", "content": "hi"}, + ] + kwargs = agent._build_api_kwargs(messages) + assert kwargs["messages"][0]["role"] == "system" + + def test_no_system_message_no_crash(self, monkeypatch): + agent = _make_agent(monkeypatch, "openrouter") + agent.model = "openai/gpt-5" + messages = [{"role": "user", "content": "hi"}] + kwargs = agent._build_api_kwargs(messages) + assert kwargs["messages"][0]["role"] == "user" + + def test_original_messages_not_mutated(self, monkeypatch): + agent = _make_agent(monkeypatch, "openrouter") + agent.model = "openai/gpt-5" + messages = [ + {"role": "system", "content": "You are helpful."}, + {"role": "user", "content": "hi"}, + ] + agent._build_api_kwargs(messages) + # Original messages must be untouched (internal representation stays "system") + assert messages[0]["role"] == "system" + + def test_developer_role_via_nous_portal(self, monkeypatch): + agent = _make_agent(monkeypatch, "nous", base_url="https://inference-api.nousresearch.com/v1") + agent.model = "gpt-5" + messages = [ + {"role": "system", "content": "You are helpful."}, + {"role": "user", "content": "hi"}, + ] + kwargs = agent._build_api_kwargs(messages) + assert kwargs["messages"][0]["role"] == "developer" + + class TestBuildApiKwargsAIGateway: def test_uses_chat_completions_format(self, monkeypatch): agent = _make_agent(monkeypatch, "ai-gateway", base_url="https://ai-gateway.vercel.sh/v1")