feat: use 'developer' role for GPT-5 and Codex models (#4498)

OpenAI's newer models (GPT-5, Codex) give stronger instruction-following weight to the 'developer' role vs 'system'. Swap the role at the API boundary in _build_api_kwargs() for the chat_completions path so internal message representation stays consistent ('system' everywhere). Applies regardless of provider — OpenRouter, Nous portal, direct, etc. The codex_responses path (direct OpenAI) uses 'instructions' instead of message roles, so it's unaffected. DEVELOPER_ROLE_MODELS constant in prompt_builder.py defines the matching model name substrings: ('gpt-5', 'codex').
2026-04-01 14:49:32 -07:00
parent c59ab8b0da
commit 3628ccc8c4
3 changed files with 91 additions and 1 deletions
--- a/agent/prompt_builder.py
+++ b/agent/prompt_builder.py
@@ -189,6 +189,13 @@ TOOL_USE_ENFORCEMENT_GUIDANCE = (
 # Add new patterns here when a model family needs explicit steering.
 TOOL_USE_ENFORCEMENT_MODELS = ("gpt", "codex")

+# Model name substrings that should use the 'developer' role instead of
+# 'system' for the system prompt.  OpenAI's newer models (GPT-5, Codex)
+# give stronger instruction-following weight to the 'developer' role.
+# The swap happens at the API boundary in _build_api_kwargs() so internal
+# message representation stays consistent ("system" everywhere).
+DEVELOPER_ROLE_MODELS = ("gpt-5", "codex")
+
 PLATFORM_HINTS = {
    "whatsapp": (
        "You are on a text messaging communication platform, WhatsApp. "
--- a/run_agent.py
+++ b/run_agent.py
@@ -88,7 +88,7 @@ from agent.model_metadata import (
 )
 from agent.context_compressor import ContextCompressor
 from agent.prompt_caching import apply_anthropic_cache_control
-from agent.prompt_builder import build_skills_system_prompt, build_context_files_prompt, load_soul_md, TOOL_USE_ENFORCEMENT_GUIDANCE, TOOL_USE_ENFORCEMENT_MODELS
+from agent.prompt_builder import build_skills_system_prompt, build_context_files_prompt, load_soul_md, TOOL_USE_ENFORCEMENT_GUIDANCE, TOOL_USE_ENFORCEMENT_MODELS, DEVELOPER_ROLE_MODELS
 from agent.usage_pricing import estimate_usage_cost, normalize_usage
 from agent.display import (
    KawaiiSpinner, build_tool_preview as _build_tool_preview,
@@ -5024,6 +5024,19 @@ class AIAgent:
                            tool_call.pop("call_id", None)
                            tool_call.pop("response_item_id", None)

+        # GPT-5 and Codex models respond better to 'developer' than 'system'
+        # for instruction-following.  Swap the role at the API boundary so
+        # internal message representation stays uniform ("system").
+        _model_lower = (self.model or "").lower()
+        if (
+            sanitized_messages
+            and sanitized_messages[0].get("role") == "system"
+            and any(p in _model_lower for p in DEVELOPER_ROLE_MODELS)
+        ):
+            # Shallow-copy the list + first message only — rest stays shared.
+            sanitized_messages = list(sanitized_messages)
+            sanitized_messages[0] = {**sanitized_messages[0], "role": "developer"}
+
        provider_preferences = {}
        if self.providers_allowed:
            provider_preferences["only"] = self.providers_allowed
--- a/tests/test_provider_parity.py
+++ b/tests/test_provider_parity.py
@@ -137,6 +137,76 @@ class TestBuildApiKwargsOpenRouter:
        assert "codex_reasoning_items" in messages[1]


+class TestDeveloperRoleSwap:
+    """GPT-5 and Codex models should get 'developer' instead of 'system' role."""
+
+    @pytest.mark.parametrize("model", [
+        "openai/gpt-5",
+        "openai/gpt-5-turbo",
+        "openai/gpt-5.4",
+        "gpt-5-mini",
+        "openai/codex-mini",
+        "codex-mini-latest",
+        "openai/codex-pro",
+    ])
+    def test_gpt5_codex_get_developer_role(self, monkeypatch, model):
+        agent = _make_agent(monkeypatch, "openrouter")
+        agent.model = model
+        messages = [
+            {"role": "system", "content": "You are helpful."},
+            {"role": "user", "content": "hi"},
+        ]
+        kwargs = agent._build_api_kwargs(messages)
+        assert kwargs["messages"][0]["role"] == "developer"
+        assert kwargs["messages"][0]["content"] == "You are helpful."
+        assert kwargs["messages"][1]["role"] == "user"
+
+    @pytest.mark.parametrize("model", [
+        "anthropic/claude-opus-4.6",
+        "openai/gpt-4o",
+        "google/gemini-2.5-pro",
+        "deepseek/deepseek-chat",
+        "openai/o3-mini",
+    ])
+    def test_non_matching_models_keep_system_role(self, monkeypatch, model):
+        agent = _make_agent(monkeypatch, "openrouter")
+        agent.model = model
+        messages = [
+            {"role": "system", "content": "You are helpful."},
+            {"role": "user", "content": "hi"},
+        ]
+        kwargs = agent._build_api_kwargs(messages)
+        assert kwargs["messages"][0]["role"] == "system"
+
+    def test_no_system_message_no_crash(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openrouter")
+        agent.model = "openai/gpt-5"
+        messages = [{"role": "user", "content": "hi"}]
+        kwargs = agent._build_api_kwargs(messages)
+        assert kwargs["messages"][0]["role"] == "user"
+
+    def test_original_messages_not_mutated(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openrouter")
+        agent.model = "openai/gpt-5"
+        messages = [
+            {"role": "system", "content": "You are helpful."},
+            {"role": "user", "content": "hi"},
+        ]
+        agent._build_api_kwargs(messages)
+        # Original messages must be untouched (internal representation stays "system")
+        assert messages[0]["role"] == "system"
+
+    def test_developer_role_via_nous_portal(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "nous", base_url="https://inference-api.nousresearch.com/v1")
+        agent.model = "gpt-5"
+        messages = [
+            {"role": "system", "content": "You are helpful."},
+            {"role": "user", "content": "hi"},
+        ]
+        kwargs = agent._build_api_kwargs(messages)
+        assert kwargs["messages"][0]["role"] == "developer"
+
+
 class TestBuildApiKwargsAIGateway:
    def test_uses_chat_completions_format(self, monkeypatch):
        agent = _make_agent(monkeypatch, "ai-gateway", base_url="https://ai-gateway.vercel.sh/v1")