Updated the authentication mechanism to store Codex OAuth tokens in the Hermes auth store located at ~/.hermes/auth.json instead of the previous ~/.codex/auth.json. This change includes refactoring related functions for reading and saving tokens, ensuring better management of authentication states and preventing conflicts between different applications. Adjusted tests to reflect the new storage structure and improved error handling for missing or malformed tokens.
408 lines
15 KiB
Python
408 lines
15 KiB
Python
"""Shared auxiliary OpenAI client for cheap/fast side tasks.
|
|
|
|
Provides a single resolution chain so every consumer (context compression,
|
|
session search, web extraction, vision analysis, browser vision) picks up
|
|
the best available backend without duplicating fallback logic.
|
|
|
|
Resolution order for text tasks:
|
|
1. OpenRouter (OPENROUTER_API_KEY)
|
|
2. Nous Portal (~/.hermes/auth.json active provider)
|
|
3. Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY)
|
|
4. Codex OAuth (Responses API via chatgpt.com with gpt-5.3-codex,
|
|
wrapped to look like a chat.completions client)
|
|
5. None
|
|
|
|
Resolution order for vision/multimodal tasks:
|
|
1. OpenRouter
|
|
2. Nous Portal
|
|
3. None (custom endpoints can't substitute for Gemini multimodal)
|
|
"""
|
|
|
|
import json
|
|
import logging
|
|
import os
|
|
from pathlib import Path
|
|
from types import SimpleNamespace
|
|
from typing import Any, Dict, List, Optional, Tuple
|
|
|
|
from openai import OpenAI
|
|
|
|
from hermes_constants import OPENROUTER_BASE_URL
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# OpenRouter app attribution headers
|
|
_OR_HEADERS = {
|
|
"HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
|
|
"X-OpenRouter-Title": "Hermes Agent",
|
|
"X-OpenRouter-Categories": "productivity,cli-agent",
|
|
}
|
|
|
|
# Nous Portal extra_body for product attribution.
|
|
# Callers should pass this as extra_body in chat.completions.create()
|
|
# when the auxiliary client is backed by Nous Portal.
|
|
NOUS_EXTRA_BODY = {"tags": ["product=hermes-agent"]}
|
|
|
|
# Set at resolve time — True if the auxiliary client points to Nous Portal
|
|
auxiliary_is_nous: bool = False
|
|
|
|
# Default auxiliary models per provider
|
|
_OPENROUTER_MODEL = "google/gemini-3-flash-preview"
|
|
_NOUS_MODEL = "gemini-3-flash"
|
|
_NOUS_DEFAULT_BASE_URL = "https://inference-api.nousresearch.com/v1"
|
|
_AUTH_JSON_PATH = Path.home() / ".hermes" / "auth.json"
|
|
|
|
# Codex fallback: uses the Responses API (the only endpoint the Codex
|
|
# OAuth token can access) with a fast model for auxiliary tasks.
|
|
_CODEX_AUX_MODEL = "gpt-5.3-codex"
|
|
_CODEX_AUX_BASE_URL = "https://chatgpt.com/backend-api/codex"
|
|
|
|
|
|
# ── Codex Responses → chat.completions adapter ─────────────────────────────
|
|
# All auxiliary consumers call client.chat.completions.create(**kwargs) and
|
|
# read response.choices[0].message.content. This adapter translates those
|
|
# calls to the Codex Responses API so callers don't need any changes.
|
|
|
|
class _CodexCompletionsAdapter:
|
|
"""Drop-in shim that accepts chat.completions.create() kwargs and
|
|
routes them through the Codex Responses streaming API."""
|
|
|
|
def __init__(self, real_client: OpenAI, model: str):
|
|
self._client = real_client
|
|
self._model = model
|
|
|
|
def create(self, **kwargs) -> Any:
|
|
messages = kwargs.get("messages", [])
|
|
model = kwargs.get("model", self._model)
|
|
temperature = kwargs.get("temperature")
|
|
|
|
# Separate system/instructions from conversation messages
|
|
instructions = "You are a helpful assistant."
|
|
input_msgs: List[Dict[str, Any]] = []
|
|
for msg in messages:
|
|
role = msg.get("role", "user")
|
|
content = msg.get("content", "")
|
|
if role == "system":
|
|
instructions = content
|
|
else:
|
|
input_msgs.append({"role": role, "content": content})
|
|
|
|
resp_kwargs: Dict[str, Any] = {
|
|
"model": model,
|
|
"instructions": instructions,
|
|
"input": input_msgs or [{"role": "user", "content": ""}],
|
|
"stream": True,
|
|
"store": False,
|
|
}
|
|
|
|
max_tokens = kwargs.get("max_output_tokens") or kwargs.get("max_completion_tokens") or kwargs.get("max_tokens")
|
|
if max_tokens is not None:
|
|
resp_kwargs["max_output_tokens"] = int(max_tokens)
|
|
if temperature is not None:
|
|
resp_kwargs["temperature"] = temperature
|
|
|
|
# Tools support for flush_memories and similar callers
|
|
tools = kwargs.get("tools")
|
|
if tools:
|
|
converted = []
|
|
for t in tools:
|
|
fn = t.get("function", {}) if isinstance(t, dict) else {}
|
|
name = fn.get("name")
|
|
if not name:
|
|
continue
|
|
converted.append({
|
|
"type": "function",
|
|
"name": name,
|
|
"description": fn.get("description", ""),
|
|
"parameters": fn.get("parameters", {}),
|
|
})
|
|
if converted:
|
|
resp_kwargs["tools"] = converted
|
|
|
|
# Stream and collect the response
|
|
text_parts: List[str] = []
|
|
tool_calls_raw: List[Any] = []
|
|
usage = None
|
|
|
|
try:
|
|
with self._client.responses.stream(**resp_kwargs) as stream:
|
|
for _event in stream:
|
|
pass
|
|
final = stream.get_final_response()
|
|
|
|
# Extract text and tool calls from the Responses output
|
|
for item in getattr(final, "output", []):
|
|
item_type = getattr(item, "type", None)
|
|
if item_type == "message":
|
|
for part in getattr(item, "content", []):
|
|
ptype = getattr(part, "type", None)
|
|
if ptype in ("output_text", "text"):
|
|
text_parts.append(getattr(part, "text", ""))
|
|
elif item_type == "function_call":
|
|
tool_calls_raw.append(SimpleNamespace(
|
|
id=getattr(item, "call_id", ""),
|
|
type="function",
|
|
function=SimpleNamespace(
|
|
name=getattr(item, "name", ""),
|
|
arguments=getattr(item, "arguments", "{}"),
|
|
),
|
|
))
|
|
|
|
resp_usage = getattr(final, "usage", None)
|
|
if resp_usage:
|
|
usage = SimpleNamespace(
|
|
prompt_tokens=getattr(resp_usage, "input_tokens", 0),
|
|
completion_tokens=getattr(resp_usage, "output_tokens", 0),
|
|
total_tokens=getattr(resp_usage, "total_tokens", 0),
|
|
)
|
|
except Exception as exc:
|
|
logger.debug("Codex auxiliary Responses API call failed: %s", exc)
|
|
raise
|
|
|
|
content = "".join(text_parts).strip() or None
|
|
|
|
# Build a response that looks like chat.completions
|
|
message = SimpleNamespace(
|
|
role="assistant",
|
|
content=content,
|
|
tool_calls=tool_calls_raw or None,
|
|
)
|
|
choice = SimpleNamespace(
|
|
index=0,
|
|
message=message,
|
|
finish_reason="stop" if not tool_calls_raw else "tool_calls",
|
|
)
|
|
return SimpleNamespace(
|
|
choices=[choice],
|
|
model=model,
|
|
usage=usage,
|
|
)
|
|
|
|
|
|
class _CodexChatShim:
|
|
"""Wraps the adapter to provide client.chat.completions.create()."""
|
|
|
|
def __init__(self, adapter: _CodexCompletionsAdapter):
|
|
self.completions = adapter
|
|
|
|
|
|
class CodexAuxiliaryClient:
|
|
"""OpenAI-client-compatible wrapper that routes through Codex Responses API.
|
|
|
|
Consumers can call client.chat.completions.create(**kwargs) as normal.
|
|
Also exposes .api_key and .base_url for introspection by async wrappers.
|
|
"""
|
|
|
|
def __init__(self, real_client: OpenAI, model: str):
|
|
self._real_client = real_client
|
|
adapter = _CodexCompletionsAdapter(real_client, model)
|
|
self.chat = _CodexChatShim(adapter)
|
|
self.api_key = real_client.api_key
|
|
self.base_url = real_client.base_url
|
|
|
|
def close(self):
|
|
self._real_client.close()
|
|
|
|
|
|
class _AsyncCodexCompletionsAdapter:
|
|
"""Async version of the Codex Responses adapter.
|
|
|
|
Wraps the sync adapter via asyncio.to_thread() so async consumers
|
|
(web_tools, session_search) can await it as normal.
|
|
"""
|
|
|
|
def __init__(self, sync_adapter: _CodexCompletionsAdapter):
|
|
self._sync = sync_adapter
|
|
|
|
async def create(self, **kwargs) -> Any:
|
|
import asyncio
|
|
return await asyncio.to_thread(self._sync.create, **kwargs)
|
|
|
|
|
|
class _AsyncCodexChatShim:
|
|
def __init__(self, adapter: _AsyncCodexCompletionsAdapter):
|
|
self.completions = adapter
|
|
|
|
|
|
class AsyncCodexAuxiliaryClient:
|
|
"""Async-compatible wrapper matching AsyncOpenAI.chat.completions.create()."""
|
|
|
|
def __init__(self, sync_wrapper: "CodexAuxiliaryClient"):
|
|
sync_adapter = sync_wrapper.chat.completions
|
|
async_adapter = _AsyncCodexCompletionsAdapter(sync_adapter)
|
|
self.chat = _AsyncCodexChatShim(async_adapter)
|
|
self.api_key = sync_wrapper.api_key
|
|
self.base_url = sync_wrapper.base_url
|
|
|
|
|
|
def _read_nous_auth() -> Optional[dict]:
|
|
"""Read and validate ~/.hermes/auth.json for an active Nous provider.
|
|
|
|
Returns the provider state dict if Nous is active with tokens,
|
|
otherwise None.
|
|
"""
|
|
try:
|
|
if not _AUTH_JSON_PATH.is_file():
|
|
return None
|
|
data = json.loads(_AUTH_JSON_PATH.read_text())
|
|
if data.get("active_provider") != "nous":
|
|
return None
|
|
provider = data.get("providers", {}).get("nous", {})
|
|
# Must have at least an access_token or agent_key
|
|
if not provider.get("agent_key") and not provider.get("access_token"):
|
|
return None
|
|
return provider
|
|
except Exception as exc:
|
|
logger.debug("Could not read Nous auth: %s", exc)
|
|
return None
|
|
|
|
|
|
def _nous_api_key(provider: dict) -> str:
|
|
"""Extract the best API key from a Nous provider state dict."""
|
|
return provider.get("agent_key") or provider.get("access_token", "")
|
|
|
|
|
|
def _nous_base_url() -> str:
|
|
"""Resolve the Nous inference base URL from env or default."""
|
|
return os.getenv("NOUS_INFERENCE_BASE_URL", _NOUS_DEFAULT_BASE_URL)
|
|
|
|
|
|
def _read_codex_access_token() -> Optional[str]:
|
|
"""Read a valid Codex OAuth access token from Hermes auth store (~/.hermes/auth.json)."""
|
|
try:
|
|
from hermes_cli.auth import _read_codex_tokens
|
|
data = _read_codex_tokens()
|
|
tokens = data.get("tokens", {})
|
|
access_token = tokens.get("access_token")
|
|
if isinstance(access_token, str) and access_token.strip():
|
|
return access_token.strip()
|
|
return None
|
|
except Exception as exc:
|
|
logger.debug("Could not read Codex auth for auxiliary client: %s", exc)
|
|
return None
|
|
|
|
|
|
# ── Public API ──────────────────────────────────────────────────────────────
|
|
|
|
def get_text_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
|
|
"""Return (client, model_slug) for text-only auxiliary tasks.
|
|
|
|
Falls through OpenRouter -> Nous Portal -> custom endpoint -> Codex OAuth -> (None, None).
|
|
"""
|
|
# 1. OpenRouter
|
|
or_key = os.getenv("OPENROUTER_API_KEY")
|
|
if or_key:
|
|
logger.debug("Auxiliary text client: OpenRouter")
|
|
return OpenAI(api_key=or_key, base_url=OPENROUTER_BASE_URL,
|
|
default_headers=_OR_HEADERS), _OPENROUTER_MODEL
|
|
|
|
# 2. Nous Portal
|
|
nous = _read_nous_auth()
|
|
if nous:
|
|
global auxiliary_is_nous
|
|
auxiliary_is_nous = True
|
|
logger.debug("Auxiliary text client: Nous Portal")
|
|
return (
|
|
OpenAI(api_key=_nous_api_key(nous), base_url=_nous_base_url()),
|
|
_NOUS_MODEL,
|
|
)
|
|
|
|
# 3. Custom endpoint (both base URL and key must be set)
|
|
custom_base = os.getenv("OPENAI_BASE_URL")
|
|
custom_key = os.getenv("OPENAI_API_KEY")
|
|
if custom_base and custom_key:
|
|
model = os.getenv("OPENAI_MODEL") or os.getenv("LLM_MODEL") or "gpt-4o-mini"
|
|
logger.debug("Auxiliary text client: custom endpoint (%s)", model)
|
|
return OpenAI(api_key=custom_key, base_url=custom_base), model
|
|
|
|
# 4. Codex OAuth -- uses the Responses API (only endpoint the token
|
|
# can access), wrapped to look like a chat.completions client.
|
|
codex_token = _read_codex_access_token()
|
|
if codex_token:
|
|
logger.debug("Auxiliary text client: Codex OAuth (%s via Responses API)", _CODEX_AUX_MODEL)
|
|
real_client = OpenAI(api_key=codex_token, base_url=_CODEX_AUX_BASE_URL)
|
|
return CodexAuxiliaryClient(real_client, _CODEX_AUX_MODEL), _CODEX_AUX_MODEL
|
|
|
|
# 5. Nothing available
|
|
logger.debug("Auxiliary text client: none available")
|
|
return None, None
|
|
|
|
|
|
def get_async_text_auxiliary_client():
|
|
"""Return (async_client, model_slug) for async consumers.
|
|
|
|
For standard providers returns (AsyncOpenAI, model). For Codex returns
|
|
(AsyncCodexAuxiliaryClient, model) which wraps the Responses API.
|
|
Returns (None, None) when no provider is available.
|
|
"""
|
|
from openai import AsyncOpenAI
|
|
|
|
sync_client, model = get_text_auxiliary_client()
|
|
if sync_client is None:
|
|
return None, None
|
|
|
|
if isinstance(sync_client, CodexAuxiliaryClient):
|
|
return AsyncCodexAuxiliaryClient(sync_client), model
|
|
|
|
async_kwargs = {
|
|
"api_key": sync_client.api_key,
|
|
"base_url": str(sync_client.base_url),
|
|
}
|
|
if "openrouter" in str(sync_client.base_url).lower():
|
|
async_kwargs["default_headers"] = dict(_OR_HEADERS)
|
|
return AsyncOpenAI(**async_kwargs), model
|
|
|
|
|
|
def get_vision_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
|
|
"""Return (client, model_slug) for vision/multimodal auxiliary tasks.
|
|
|
|
Only OpenRouter and Nous Portal qualify — custom endpoints cannot
|
|
substitute for Gemini multimodal.
|
|
"""
|
|
# 1. OpenRouter
|
|
or_key = os.getenv("OPENROUTER_API_KEY")
|
|
if or_key:
|
|
logger.debug("Auxiliary vision client: OpenRouter")
|
|
return OpenAI(api_key=or_key, base_url=OPENROUTER_BASE_URL,
|
|
default_headers=_OR_HEADERS), _OPENROUTER_MODEL
|
|
|
|
# 2. Nous Portal
|
|
nous = _read_nous_auth()
|
|
if nous:
|
|
logger.debug("Auxiliary vision client: Nous Portal")
|
|
return (
|
|
OpenAI(api_key=_nous_api_key(nous), base_url=_nous_base_url()),
|
|
_NOUS_MODEL,
|
|
)
|
|
|
|
# 3. Nothing suitable
|
|
logger.debug("Auxiliary vision client: none available")
|
|
return None, None
|
|
|
|
|
|
def get_auxiliary_extra_body() -> dict:
|
|
"""Return extra_body kwargs for auxiliary API calls.
|
|
|
|
Includes Nous Portal product tags when the auxiliary client is backed
|
|
by Nous Portal. Returns empty dict otherwise.
|
|
"""
|
|
return dict(NOUS_EXTRA_BODY) if auxiliary_is_nous else {}
|
|
|
|
|
|
def auxiliary_max_tokens_param(value: int) -> dict:
|
|
"""Return the correct max tokens kwarg for the auxiliary client's provider.
|
|
|
|
OpenRouter and local models use 'max_tokens'. Direct OpenAI with newer
|
|
models (gpt-4o, o-series, gpt-5+) requires 'max_completion_tokens'.
|
|
The Codex adapter translates max_tokens internally, so we use max_tokens
|
|
for it as well.
|
|
"""
|
|
custom_base = os.getenv("OPENAI_BASE_URL", "")
|
|
or_key = os.getenv("OPENROUTER_API_KEY")
|
|
# Only use max_completion_tokens for direct OpenAI custom endpoints
|
|
if (not or_key
|
|
and _read_nous_auth() is None
|
|
and "api.openai.com" in custom_base.lower()):
|
|
return {"max_completion_tokens": value}
|
|
return {"max_tokens": value}
|