refactor: route ad-hoc LLM consumers through centralized provider router

Route all remaining ad-hoc auxiliary LLM call sites through
resolve_provider_client() so auth, headers, and API format (Chat
Completions vs Responses API) are handled consistently in one place.

Files changed:

- tools/openrouter_client.py: Replace manual AsyncOpenAI construction
  with resolve_provider_client('openrouter', async_mode=True). The
  shared client module now delegates entirely to the router.

- tools/skills_guard.py: Replace inline OpenAI client construction
  (hardcoded OpenRouter base_url, manual api_key lookup, manual
  headers) with resolve_provider_client('openrouter'). Remove unused
  OPENROUTER_BASE_URL import.

- trajectory_compressor.py: Add _detect_provider() to map config
  base_url to a provider name, then route through
  resolve_provider_client. Falls back to raw construction for
  unrecognized custom endpoints.

- mini_swe_runner.py: Route default case (no explicit api_key/base_url)
  through resolve_provider_client('openrouter') with auto-detection
  fallback. Preserves direct construction when explicit creds are
  passed via CLI args.

- agent/auxiliary_client.py: Fix stale module docstring — vision auto
  mode now correctly documents that Codex and custom endpoints are
  tried (not skipped).
This commit is contained in:
teknium1
2026-03-11 20:02:36 -07:00
parent 8805e705a7
commit 07f09ecd83
5 changed files with 97 additions and 89 deletions

View File

@@ -17,7 +17,10 @@ Resolution order for text tasks (auto mode):
Resolution order for vision/multimodal tasks (auto mode):
1. OpenRouter
2. Nous Portal
3. None (steps 3-5 are skipped — they may not support multimodal)
3. Codex OAuth (gpt-5.3-codex supports vision via Responses API)
4. Custom endpoint (for local vision models: Qwen-VL, LLaVA, Pixtral, etc.)
5. None (API-key providers like z.ai/Kimi/MiniMax are skipped —
they may not support multimodal)
Per-task provider overrides (e.g. AUXILIARY_VISION_PROVIDER,
CONTEXT_COMPRESSION_PROVIDER) can force a specific provider for each task:

View File

@@ -189,29 +189,30 @@ class MiniSWERunner:
)
self.logger = logging.getLogger(__name__)
# Initialize OpenAI client - defaults to OpenRouter
from openai import OpenAI
client_kwargs = {}
# Default to OpenRouter if no base_url provided
if base_url:
client_kwargs["base_url"] = base_url
# Initialize LLM client via centralized provider router.
# If explicit api_key/base_url are provided (e.g. from CLI args),
# construct directly. Otherwise use the router for OpenRouter.
if api_key or base_url:
from openai import OpenAI
client_kwargs = {
"base_url": base_url or "https://openrouter.ai/api/v1",
"api_key": api_key or os.getenv(
"OPENROUTER_API_KEY",
os.getenv("ANTHROPIC_API_KEY",
os.getenv("OPENAI_API_KEY", ""))),
}
self.client = OpenAI(**client_kwargs)
else:
client_kwargs["base_url"] = "https://openrouter.ai/api/v1"
# Handle API key - OpenRouter is the primary provider
if api_key:
client_kwargs["api_key"] = api_key
else:
client_kwargs["api_key"] = os.getenv(
"OPENROUTER_API_KEY",
os.getenv("ANTHROPIC_API_KEY", os.getenv("OPENAI_API_KEY", ""))
)
self.client = OpenAI(**client_kwargs)
from agent.auxiliary_client import resolve_provider_client
self.client, _ = resolve_provider_client("openrouter", model=model)
if self.client is None:
# Fallback: try auto-detection
self.client, _ = resolve_provider_client("auto", model=model)
if self.client is None:
from openai import OpenAI
self.client = OpenAI(
base_url="https://openrouter.ai/api/v1",
api_key=os.getenv("OPENROUTER_API_KEY", ""))
# Environment will be created per-task
self.env = None

View File

@@ -1,39 +1,30 @@
"""Shared OpenRouter API client for Hermes tools.
Provides a single lazy-initialized AsyncOpenAI client that all tool modules
can share, eliminating the duplicated _get_openrouter_client() /
_get_summarizer_client() pattern previously copy-pasted across web_tools,
vision_tools, mixture_of_agents_tool, and session_search_tool.
can share. Routes through the centralized provider router in
agent/auxiliary_client.py so auth, headers, and API format are handled
consistently.
"""
import os
from openai import AsyncOpenAI
from hermes_constants import OPENROUTER_BASE_URL
_client: AsyncOpenAI | None = None
_client = None
def get_async_client() -> AsyncOpenAI:
"""Return a shared AsyncOpenAI client pointed at OpenRouter.
def get_async_client():
"""Return a shared async OpenAI-compatible client for OpenRouter.
The client is created lazily on first call and reused thereafter.
Uses the centralized provider router for auth and client construction.
Raises ValueError if OPENROUTER_API_KEY is not set.
"""
global _client
if _client is None:
api_key = os.getenv("OPENROUTER_API_KEY")
if not api_key:
from agent.auxiliary_client import resolve_provider_client
client, _model = resolve_provider_client("openrouter", async_mode=True)
if client is None:
raise ValueError("OPENROUTER_API_KEY environment variable not set")
_client = AsyncOpenAI(
api_key=api_key,
base_url=OPENROUTER_BASE_URL,
default_headers={
"HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
"X-OpenRouter-Title": "Hermes Agent",
"X-OpenRouter-Categories": "productivity,cli-agent",
},
)
_client = client
return _client

View File

@@ -29,7 +29,7 @@ from datetime import datetime, timezone
from pathlib import Path
from typing import List, Tuple
from hermes_constants import OPENROUTER_BASE_URL
# ---------------------------------------------------------------------------
@@ -934,24 +934,14 @@ def llm_audit_skill(skill_path: Path, static_result: ScanResult,
if not model:
return static_result
# Call the LLM via the OpenAI SDK (same pattern as run_agent.py)
# Call the LLM via the centralized provider router
try:
from openai import OpenAI
import os
from agent.auxiliary_client import resolve_provider_client
api_key = os.getenv("OPENROUTER_API_KEY", "")
if not api_key:
client, _default_model = resolve_provider_client("openrouter")
if client is None:
return static_result
client = OpenAI(
base_url=OPENROUTER_BASE_URL,
api_key=api_key,
default_headers={
"HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
"X-OpenRouter-Title": "Hermes Agent",
"X-OpenRouter-Categories": "productivity,cli-agent",
},
)
response = client.chat.completions.create(
model=model,
messages=[{

View File

@@ -344,38 +344,61 @@ class TrajectoryCompressor:
raise RuntimeError(f"Failed to load tokenizer '{self.config.tokenizer_name}': {e}")
def _init_summarizer(self):
"""Initialize OpenRouter client for summarization (sync and async)."""
api_key = os.getenv(self.config.api_key_env)
if not api_key:
raise RuntimeError(f"Missing API key. Set {self.config.api_key_env} environment variable.")
from openai import OpenAI, AsyncOpenAI
# OpenRouter app attribution headers (only for OpenRouter endpoints)
extra = {}
if "openrouter" in self.config.base_url.lower():
extra["default_headers"] = {
"HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
"X-OpenRouter-Title": "Hermes Agent",
"X-OpenRouter-Categories": "productivity,cli-agent",
}
# Sync client (for backwards compatibility)
self.client = OpenAI(
api_key=api_key,
base_url=self.config.base_url,
**extra,
)
# Async client for parallel processing
self.async_client = AsyncOpenAI(
api_key=api_key,
base_url=self.config.base_url,
**extra,
)
print(f"✅ Initialized OpenRouter client: {self.config.summarization_model}")
"""Initialize LLM client for summarization (sync and async).
Routes through the centralized provider router for known providers
(OpenRouter, Nous, Codex, etc.) so auth and headers are handled
consistently. Falls back to raw construction for custom endpoints.
"""
from agent.auxiliary_client import resolve_provider_client
provider = self._detect_provider()
if provider:
# Use centralized router — handles auth, headers, Codex adapter
self.client, _ = resolve_provider_client(
provider, model=self.config.summarization_model)
self.async_client, _ = resolve_provider_client(
provider, model=self.config.summarization_model,
async_mode=True)
if self.client is None:
raise RuntimeError(
f"Provider '{provider}' is not configured. "
f"Check your API key or run: hermes setup")
else:
# Custom endpoint — use config's raw base_url + api_key_env
api_key = os.getenv(self.config.api_key_env)
if not api_key:
raise RuntimeError(
f"Missing API key. Set {self.config.api_key_env} "
f"environment variable.")
from openai import OpenAI, AsyncOpenAI
self.client = OpenAI(
api_key=api_key, base_url=self.config.base_url)
self.async_client = AsyncOpenAI(
api_key=api_key, base_url=self.config.base_url)
print(f"✅ Initialized summarizer client: {self.config.summarization_model}")
print(f" Max concurrent requests: {self.config.max_concurrent_requests}")
def _detect_provider(self) -> str:
"""Detect the provider name from the configured base_url."""
url = self.config.base_url.lower()
if "openrouter" in url:
return "openrouter"
if "nousresearch.com" in url:
return "nous"
if "chatgpt.com/backend-api/codex" in url:
return "codex"
if "api.z.ai" in url:
return "zai"
if "moonshot.ai" in url or "api.kimi.com" in url:
return "kimi-coding"
if "minimaxi.com" in url:
return "minimax-cn"
if "minimax.io" in url:
return "minimax"
# Unknown base_url — not a known provider
return ""
def count_tokens(self, text: str) -> int:
"""Count tokens in text using the configured tokenizer."""