fix(openai): route api.openai.com to Responses API for GPT-5.x
Based on PR #1859 by @magi-morph (too stale to cherry-pick, reimplemented). GPT-5.x models reject tool calls + reasoning_effort on /v1/chat/completions with a 400 error directing to /v1/responses. This auto-detects api.openai.com in the base URL and switches to codex_responses mode in three places: - AIAgent.__init__: upgrades chat_completions → codex_responses - _try_activate_fallback(): same routing for fallback model - runtime_provider.py: _detect_api_mode_for_url() for both custom provider and openrouter runtime resolution paths Also extracts _is_direct_openai_url() helper to replace the inline check in _max_tokens_param().
This commit is contained in:
@@ -24,6 +24,18 @@ def _normalize_custom_provider_name(value: str) -> str:
|
||||
return value.strip().lower().replace(" ", "-")
|
||||
|
||||
|
||||
def _detect_api_mode_for_url(base_url: str) -> Optional[str]:
|
||||
"""Auto-detect api_mode from the resolved base URL.
|
||||
|
||||
Direct api.openai.com endpoints need the Responses API for GPT-5.x
|
||||
tool calls with reasoning (chat/completions returns 400).
|
||||
"""
|
||||
normalized = (base_url or "").strip().lower().rstrip("/")
|
||||
if "api.openai.com" in normalized and "openrouter" not in normalized:
|
||||
return "codex_responses"
|
||||
return None
|
||||
|
||||
|
||||
def _auto_detect_local_model(base_url: str) -> str:
|
||||
"""Query a local server for its model name when only one model is loaded."""
|
||||
if not base_url:
|
||||
@@ -185,7 +197,9 @@ def _resolve_named_custom_runtime(
|
||||
|
||||
return {
|
||||
"provider": "openrouter",
|
||||
"api_mode": custom_provider.get("api_mode", "chat_completions"),
|
||||
"api_mode": custom_provider.get("api_mode")
|
||||
or _detect_api_mode_for_url(base_url)
|
||||
or "chat_completions",
|
||||
"base_url": base_url,
|
||||
"api_key": api_key,
|
||||
"source": f"custom_provider:{custom_provider.get('name', requested_provider)}",
|
||||
@@ -263,7 +277,9 @@ def _resolve_openrouter_runtime(
|
||||
|
||||
return {
|
||||
"provider": "openrouter",
|
||||
"api_mode": _parse_api_mode(model_cfg.get("api_mode")) or "chat_completions",
|
||||
"api_mode": _parse_api_mode(model_cfg.get("api_mode"))
|
||||
or _detect_api_mode_for_url(base_url)
|
||||
or "chat_completions",
|
||||
"base_url": base_url,
|
||||
"api_key": api_key,
|
||||
"source": source,
|
||||
|
||||
21
run_agent.py
21
run_agent.py
@@ -501,6 +501,12 @@ class AIAgent:
|
||||
else:
|
||||
self.api_mode = "chat_completions"
|
||||
|
||||
# Direct OpenAI sessions use the Responses API path. GPT-5.x tool
|
||||
# calls with reasoning are rejected on /v1/chat/completions, and
|
||||
# Hermes is a tool-using client by default.
|
||||
if self.api_mode == "chat_completions" and self._is_direct_openai_url():
|
||||
self.api_mode = "codex_responses"
|
||||
|
||||
# Pre-warm OpenRouter model metadata cache in a background thread.
|
||||
# fetch_model_metadata() is cached for 1 hour; this avoids a blocking
|
||||
# HTTP request on the first API response when pricing is estimated.
|
||||
@@ -1080,6 +1086,11 @@ class AIAgent:
|
||||
return
|
||||
self._safe_print(*args, **kwargs)
|
||||
|
||||
def _is_direct_openai_url(self, base_url: str = None) -> bool:
|
||||
"""Return True when a base URL targets OpenAI's native API."""
|
||||
url = (base_url or self._base_url_lower).lower()
|
||||
return "api.openai.com" in url and "openrouter" not in url
|
||||
|
||||
def _max_tokens_param(self, value: int) -> dict:
|
||||
"""Return the correct max tokens kwarg for the current provider.
|
||||
|
||||
@@ -1087,11 +1098,7 @@ class AIAgent:
|
||||
'max_completion_tokens'. OpenRouter, local models, and older
|
||||
OpenAI models use 'max_tokens'.
|
||||
"""
|
||||
_is_direct_openai = (
|
||||
"api.openai.com" in self._base_url_lower
|
||||
and "openrouter" not in self._base_url_lower
|
||||
)
|
||||
if _is_direct_openai:
|
||||
if self._is_direct_openai_url():
|
||||
return {"max_completion_tokens": value}
|
||||
return {"max_tokens": value}
|
||||
|
||||
@@ -3553,13 +3560,15 @@ class AIAgent:
|
||||
fb_provider)
|
||||
return False
|
||||
|
||||
# Determine api_mode from provider
|
||||
# Determine api_mode from provider / base URL
|
||||
fb_api_mode = "chat_completions"
|
||||
fb_base_url = str(fb_client.base_url)
|
||||
if fb_provider == "openai-codex":
|
||||
fb_api_mode = "codex_responses"
|
||||
elif fb_provider == "anthropic" or fb_base_url.rstrip("/").lower().endswith("/anthropic"):
|
||||
fb_api_mode = "anthropic_messages"
|
||||
elif self._is_direct_openai_url(fb_base_url):
|
||||
fb_api_mode = "codex_responses"
|
||||
|
||||
old_model = self.model
|
||||
self.model = fb_model
|
||||
|
||||
Reference in New Issue
Block a user