From d9f373654b4a9cc7ecfcb46d54a5a42b6c8baca4 Mon Sep 17 00:00:00 2001 From: teknium1 Date: Sat, 7 Mar 2026 08:52:06 -0800 Subject: [PATCH] feat: enhance auxiliary model configuration and environment variable handling - Added support for auxiliary model overrides in the configuration, allowing users to specify providers and models for vision and web extraction tasks. - Updated the CLI configuration example to include new auxiliary model settings. - Enhanced the environment variable mapping in the CLI to accommodate auxiliary model configurations. - Improved the resolution logic for auxiliary clients to support task-specific provider overrides. - Updated relevant documentation and comments for clarity on the new features and their usage. --- agent/auxiliary_client.py | 179 +++++++++++++++++---------- agent/context_compressor.py | 2 +- cli-config.yaml.example | 52 +++++++- cli.py | 24 ++++ hermes_cli/config.py | 39 ++++++ tests/agent/test_auxiliary_client.py | 6 +- tools/browser_tool.py | 38 ++++-- tools/vision_tools.py | 4 +- tools/web_tools.py | 8 +- 9 files changed, 271 insertions(+), 81 deletions(-) diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 841bb6166..f5a11e1ce 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -4,7 +4,7 @@ Provides a single resolution chain so every consumer (context compression, session search, web extraction, vision analysis, browser vision) picks up the best available backend without duplicating fallback logic. -Resolution order for text tasks: +Resolution order (same for text and vision tasks): 1. OpenRouter (OPENROUTER_API_KEY) 2. Nous Portal (~/.hermes/auth.json active provider) 3. Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY) @@ -14,10 +14,10 @@ Resolution order for text tasks: — checked via PROVIDER_REGISTRY entries with auth_type='api_key' 6. None -Resolution order for vision/multimodal tasks: - 1. OpenRouter - 2. Nous Portal - 3. None (custom endpoints can't substitute for Gemini multimodal) +Per-task provider overrides (e.g. AUXILIARY_VISION_PROVIDER, +CONTEXT_COMPRESSION_PROVIDER) can force a specific provider for each task: +"openrouter", "nous", or "main" (= steps 3-5). +Default "auto" follows the full chain above. """ import json @@ -337,59 +337,122 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]: return None, None -# ── Public API ────────────────────────────────────────────────────────────── +# ── Provider resolution helpers ───────────────────────────────────────────── -def get_text_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]: - """Return (client, model_slug) for text-only auxiliary tasks. +def _get_auxiliary_provider(task: str = "") -> str: + """Read the provider override for a specific auxiliary task. - Falls through OpenRouter -> Nous Portal -> custom endpoint -> Codex OAuth - -> direct API-key providers -> (None, None). + Checks AUXILIARY_{TASK}_PROVIDER first (e.g. AUXILIARY_VISION_PROVIDER), + then CONTEXT_{TASK}_PROVIDER (for the compression section's summary_provider), + then falls back to "auto". Returns one of: "auto", "openrouter", "nous", "main". """ - # 1. OpenRouter + if task: + for prefix in ("AUXILIARY_", "CONTEXT_"): + val = os.getenv(f"{prefix}{task.upper()}_PROVIDER", "").strip().lower() + if val and val != "auto": + return val + return "auto" + + +def _try_openrouter() -> Tuple[Optional[OpenAI], Optional[str]]: or_key = os.getenv("OPENROUTER_API_KEY") - if or_key: - logger.debug("Auxiliary text client: OpenRouter") - return OpenAI(api_key=or_key, base_url=OPENROUTER_BASE_URL, - default_headers=_OR_HEADERS), _OPENROUTER_MODEL + if not or_key: + return None, None + logger.debug("Auxiliary client: OpenRouter") + return OpenAI(api_key=or_key, base_url=OPENROUTER_BASE_URL, + default_headers=_OR_HEADERS), _OPENROUTER_MODEL - # 2. Nous Portal + +def _try_nous() -> Tuple[Optional[OpenAI], Optional[str]]: nous = _read_nous_auth() - if nous: - global auxiliary_is_nous - auxiliary_is_nous = True - logger.debug("Auxiliary text client: Nous Portal") - return ( - OpenAI(api_key=_nous_api_key(nous), base_url=_nous_base_url()), - _NOUS_MODEL, - ) + if not nous: + return None, None + global auxiliary_is_nous + auxiliary_is_nous = True + logger.debug("Auxiliary client: Nous Portal") + return ( + OpenAI(api_key=_nous_api_key(nous), base_url=_nous_base_url()), + _NOUS_MODEL, + ) - # 3. Custom endpoint (both base URL and key must be set) + +def _try_custom_endpoint() -> Tuple[Optional[OpenAI], Optional[str]]: custom_base = os.getenv("OPENAI_BASE_URL") custom_key = os.getenv("OPENAI_API_KEY") - if custom_base and custom_key: - model = os.getenv("OPENAI_MODEL") or os.getenv("LLM_MODEL") or "gpt-4o-mini" - logger.debug("Auxiliary text client: custom endpoint (%s)", model) - return OpenAI(api_key=custom_key, base_url=custom_base), model + if not custom_base or not custom_key: + return None, None + model = os.getenv("OPENAI_MODEL") or os.getenv("LLM_MODEL") or "gpt-4o-mini" + logger.debug("Auxiliary client: custom endpoint (%s)", model) + return OpenAI(api_key=custom_key, base_url=custom_base), model - # 4. Codex OAuth -- uses the Responses API (only endpoint the token - # can access), wrapped to look like a chat.completions client. + +def _try_codex() -> Tuple[Optional[Any], Optional[str]]: codex_token = _read_codex_access_token() - if codex_token: - logger.debug("Auxiliary text client: Codex OAuth (%s via Responses API)", _CODEX_AUX_MODEL) - real_client = OpenAI(api_key=codex_token, base_url=_CODEX_AUX_BASE_URL) - return CodexAuxiliaryClient(real_client, _CODEX_AUX_MODEL), _CODEX_AUX_MODEL + if not codex_token: + return None, None + logger.debug("Auxiliary client: Codex OAuth (%s via Responses API)", _CODEX_AUX_MODEL) + real_client = OpenAI(api_key=codex_token, base_url=_CODEX_AUX_BASE_URL) + return CodexAuxiliaryClient(real_client, _CODEX_AUX_MODEL), _CODEX_AUX_MODEL - # 5. Direct API-key providers (z.ai/GLM, Kimi/Moonshot, MiniMax, etc.) - api_client, api_model = _resolve_api_key_provider() - if api_client is not None: - return api_client, api_model - # 6. Nothing available - logger.debug("Auxiliary text client: none available") +def _resolve_forced_provider(forced: str) -> Tuple[Optional[OpenAI], Optional[str]]: + """Resolve a specific forced provider. Returns (None, None) if creds missing.""" + if forced == "openrouter": + client, model = _try_openrouter() + if client is None: + logger.warning("auxiliary.provider=openrouter but OPENROUTER_API_KEY not set") + return client, model + + if forced == "nous": + client, model = _try_nous() + if client is None: + logger.warning("auxiliary.provider=nous but Nous Portal not configured (run: hermes login)") + return client, model + + if forced == "main": + # "main" = skip OpenRouter/Nous, use the main chat model's credentials. + for try_fn in (_try_custom_endpoint, _try_codex, _resolve_api_key_provider): + client, model = try_fn() + if client is not None: + return client, model + logger.warning("auxiliary.provider=main but no main endpoint credentials found") + return None, None + + # Unknown provider name — fall through to auto + logger.warning("Unknown auxiliary.provider=%r, falling back to auto", forced) return None, None -def get_async_text_auxiliary_client(): +def _resolve_auto() -> Tuple[Optional[OpenAI], Optional[str]]: + """Full auto-detection chain: OpenRouter → Nous → custom → Codex → API-key → None.""" + for try_fn in (_try_openrouter, _try_nous, _try_custom_endpoint, + _try_codex, _resolve_api_key_provider): + client, model = try_fn() + if client is not None: + return client, model + logger.debug("Auxiliary client: none available") + return None, None + + +# ── Public API ────────────────────────────────────────────────────────────── + +def get_text_auxiliary_client(task: str = "") -> Tuple[Optional[OpenAI], Optional[str]]: + """Return (client, default_model_slug) for text-only auxiliary tasks. + + Args: + task: Optional task name ("compression", "web_extract") to check + for a task-specific provider override. + + Callers may override the returned model with a per-task env var + (e.g. CONTEXT_COMPRESSION_MODEL, AUXILIARY_WEB_EXTRACT_MODEL). + """ + forced = _get_auxiliary_provider(task) + if forced != "auto": + return _resolve_forced_provider(forced) + return _resolve_auto() + + +def get_async_text_auxiliary_client(task: str = ""): """Return (async_client, model_slug) for async consumers. For standard providers returns (AsyncOpenAI, model). For Codex returns @@ -398,7 +461,7 @@ def get_async_text_auxiliary_client(): """ from openai import AsyncOpenAI - sync_client, model = get_text_auxiliary_client() + sync_client, model = get_text_auxiliary_client(task) if sync_client is None: return None, None @@ -417,30 +480,16 @@ def get_async_text_auxiliary_client(): def get_vision_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]: - """Return (client, model_slug) for vision/multimodal auxiliary tasks. + """Return (client, default_model_slug) for vision/multimodal auxiliary tasks. - Only OpenRouter and Nous Portal qualify — custom endpoints cannot - substitute for Gemini multimodal. + Checks AUXILIARY_VISION_PROVIDER for a forced provider, otherwise + auto-detects. Callers may override the returned model with + AUXILIARY_VISION_MODEL. """ - # 1. OpenRouter - or_key = os.getenv("OPENROUTER_API_KEY") - if or_key: - logger.debug("Auxiliary vision client: OpenRouter") - return OpenAI(api_key=or_key, base_url=OPENROUTER_BASE_URL, - default_headers=_OR_HEADERS), _OPENROUTER_MODEL - - # 2. Nous Portal - nous = _read_nous_auth() - if nous: - logger.debug("Auxiliary vision client: Nous Portal") - return ( - OpenAI(api_key=_nous_api_key(nous), base_url=_nous_base_url()), - _NOUS_MODEL, - ) - - # 3. Nothing suitable - logger.debug("Auxiliary vision client: none available") - return None, None + forced = _get_auxiliary_provider("vision") + if forced != "auto": + return _resolve_forced_provider(forced) + return _resolve_auto() def get_auxiliary_extra_body() -> dict: diff --git a/agent/context_compressor.py b/agent/context_compressor.py index 35897cccd..7a01d796a 100644 --- a/agent/context_compressor.py +++ b/agent/context_compressor.py @@ -53,7 +53,7 @@ class ContextCompressor: self.last_completion_tokens = 0 self.last_total_tokens = 0 - self.client, default_model = get_text_auxiliary_client() + self.client, default_model = get_text_auxiliary_client("compression") self.summary_model = summary_model_override or default_model def update_from_response(self, usage: Dict[str, Any]): diff --git a/cli-config.yaml.example b/cli-config.yaml.example index dfbaeee6b..66c15af0c 100644 --- a/cli-config.yaml.example +++ b/cli-config.yaml.example @@ -209,8 +209,58 @@ compression: threshold: 0.85 # Model to use for generating summaries (fast/cheap recommended) - # This model compresses the middle turns into a concise summary + # This model compresses the middle turns into a concise summary. + # IMPORTANT: it receives the full middle section of the conversation, so it + # MUST support a context length at least as large as your main model's. summary_model: "google/gemini-3-flash-preview" + + # Provider for the summary model (default: "auto") + # Options: "auto", "openrouter", "nous", "main" + # summary_provider: "auto" + +# ============================================================================= +# Auxiliary Models (Advanced — Experimental) +# ============================================================================= +# Hermes uses lightweight "auxiliary" models for side tasks: image analysis, +# browser screenshot analysis, web page summarization, and context compression. +# +# By default these use Gemini Flash via OpenRouter or Nous Portal and are +# auto-detected from your credentials. You do NOT need to change anything +# here for normal usage. +# +# WARNING: Overriding these with providers other than OpenRouter or Nous Portal +# is EXPERIMENTAL and may not work. Not all models/providers support vision, +# produce usable summaries, or accept the same API format. Change at your own +# risk — if things break, reset to "auto" / empty values. +# +# Each task has its own provider + model pair so you can mix providers. +# For example: OpenRouter for vision (needs multimodal), but your main +# local endpoint for compression (just needs text). +# +# Provider options: +# "auto" - Best available: OpenRouter → Nous Portal → main endpoint (default) +# "openrouter" - Force OpenRouter (requires OPENROUTER_API_KEY) +# "nous" - Force Nous Portal (requires: hermes login) +# "main" - Use the same provider & credentials as your main chat model. +# Skips OpenRouter/Nous and uses your custom endpoint +# (OPENAI_BASE_URL), Codex OAuth, or API-key provider directly. +# Useful if you run a local model and want auxiliary tasks to +# use it too. +# +# Model: leave empty to use the provider's default. When empty, OpenRouter +# uses "google/gemini-3-flash-preview" and Nous uses "gemini-3-flash". +# Other providers pick a sensible default automatically. +# +# auxiliary: +# # Image analysis: vision_analyze tool + browser screenshots +# vision: +# provider: "auto" +# model: "" # e.g. "google/gemini-2.5-flash", "openai/gpt-4o" +# +# # Web page scraping / summarization + browser page text extraction +# web_extract: +# provider: "auto" +# model: "" # ============================================================================= # Persistent Memory diff --git a/cli.py b/cli.py index cd13b820c..21453620f 100755 --- a/cli.py +++ b/cli.py @@ -333,12 +333,36 @@ def load_cli_config() -> Dict[str, Any]: "enabled": "CONTEXT_COMPRESSION_ENABLED", "threshold": "CONTEXT_COMPRESSION_THRESHOLD", "summary_model": "CONTEXT_COMPRESSION_MODEL", + "summary_provider": "CONTEXT_COMPRESSION_PROVIDER", } for config_key, env_var in compression_env_mappings.items(): if config_key in compression_config: os.environ[env_var] = str(compression_config[config_key]) + # Apply auxiliary model overrides to environment variables. + # Vision and web_extract each have their own provider + model pair. + # (Compression is handled in the compression section above.) + # Only set env vars for non-empty / non-default values so auto-detection + # still works. + auxiliary_config = defaults.get("auxiliary", {}) + auxiliary_task_env = { + # config key → (provider env var, model env var) + "vision": ("AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL"), + "web_extract": ("AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL"), + } + + for task_key, (prov_env, model_env) in auxiliary_task_env.items(): + task_cfg = auxiliary_config.get(task_key, {}) + if not isinstance(task_cfg, dict): + continue + prov = str(task_cfg.get("provider", "")).strip() + model = str(task_cfg.get("model", "")).strip() + if prov and prov != "auto": + os.environ[prov_env] = prov + if model: + os.environ[model_env] = model + return defaults # Load configuration at module startup diff --git a/hermes_cli/config.py b/hermes_cli/config.py index ed782e6a9..208b95cb7 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -87,6 +87,20 @@ DEFAULT_CONFIG = { "enabled": True, "threshold": 0.85, "summary_model": "google/gemini-3-flash-preview", + "summary_provider": "auto", + }, + + # Auxiliary model overrides (advanced). By default Hermes auto-selects + # the provider and model for each side task. Set these to override. + "auxiliary": { + "vision": { + "provider": "auto", # auto | openrouter | nous | main + "model": "", # e.g. "google/gemini-2.5-flash", "gpt-4o" + }, + "web_extract": { + "provider": "auto", + "model": "", + }, }, "display": { @@ -913,6 +927,31 @@ def show_config(): if enabled: print(f" Threshold: {compression.get('threshold', 0.85) * 100:.0f}%") print(f" Model: {compression.get('summary_model', 'google/gemini-3-flash-preview')}") + comp_provider = compression.get('summary_provider', 'auto') + if comp_provider != 'auto': + print(f" Provider: {comp_provider}") + + # Auxiliary models + auxiliary = config.get('auxiliary', {}) + aux_tasks = { + "Vision": auxiliary.get('vision', {}), + "Web extract": auxiliary.get('web_extract', {}), + } + has_overrides = any( + t.get('provider', 'auto') != 'auto' or t.get('model', '') + for t in aux_tasks.values() + ) + if has_overrides: + print() + print(color("◆ Auxiliary Models (overrides)", Colors.CYAN, Colors.BOLD)) + for label, task_cfg in aux_tasks.items(): + prov = task_cfg.get('provider', 'auto') + mdl = task_cfg.get('model', '') + if prov != 'auto' or mdl: + parts = [f"provider={prov}"] + if mdl: + parts.append(f"model={mdl}") + print(f" {label:12s} {', '.join(parts)}") # Messaging print() diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py index a8f797fe2..b8f872e59 100644 --- a/tests/agent/test_auxiliary_client.py +++ b/tests/agent/test_auxiliary_client.py @@ -151,10 +151,10 @@ class TestGetTextAuxiliaryClient: assert model is None -class TestCodexNotInVisionClient: - """Codex fallback should NOT apply to vision tasks.""" +class TestVisionClientFallback: + """Vision client uses the same full fallback chain as text.""" - def test_vision_returns_none_without_openrouter_nous(self): + def test_vision_returns_none_without_any_credentials(self): with patch("agent.auxiliary_client._read_nous_auth", return_value=None): client, model = get_vision_auxiliary_client() assert client is None diff --git a/tools/browser_tool.py b/tools/browser_tool.py index e1bd32239..eea884bd7 100644 --- a/tools/browser_tool.py +++ b/tools/browser_tool.py @@ -63,7 +63,7 @@ import time import requests from typing import Dict, Any, Optional, List from pathlib import Path -from agent.auxiliary_client import get_vision_auxiliary_client +from agent.auxiliary_client import get_vision_auxiliary_client, get_text_auxiliary_client logger = logging.getLogger(__name__) @@ -80,8 +80,28 @@ DEFAULT_SESSION_TIMEOUT = 300 # Max tokens for snapshot content before summarization SNAPSHOT_SUMMARIZE_THRESHOLD = 8000 -# Resolve vision auxiliary client for extraction/vision tasks -_aux_vision_client, EXTRACTION_MODEL = get_vision_auxiliary_client() +# Vision client — for browser_vision (screenshot analysis) +_aux_vision_client, _DEFAULT_VISION_MODEL = get_vision_auxiliary_client() + +# Text client — for page snapshot summarization (same config as web_extract) +_aux_text_client, _DEFAULT_TEXT_MODEL = get_text_auxiliary_client("web_extract") + +# Module-level alias for availability checks +EXTRACTION_MODEL = _DEFAULT_TEXT_MODEL or _DEFAULT_VISION_MODEL + + +def _get_vision_model() -> str: + """Model for browser_vision (screenshot analysis — multimodal).""" + return (os.getenv("AUXILIARY_VISION_MODEL", "").strip() + or _DEFAULT_VISION_MODEL + or "google/gemini-3-flash-preview") + + +def _get_extraction_model() -> str: + """Model for page snapshot text summarization — same as web_extract.""" + return (os.getenv("AUXILIARY_WEB_EXTRACT_MODEL", "").strip() + or _DEFAULT_TEXT_MODEL + or "google/gemini-3-flash-preview") def _is_local_mode() -> bool: @@ -860,9 +880,9 @@ def _extract_relevant_content( ) -> str: """Use LLM to extract relevant content from a snapshot based on the user's task. - Falls back to simple truncation when no auxiliary vision model is configured. + Falls back to simple truncation when no auxiliary text model is configured. """ - if _aux_vision_client is None or EXTRACTION_MODEL is None: + if _aux_text_client is None: return _truncate_snapshot(snapshot_text) if user_task: @@ -890,8 +910,8 @@ def _extract_relevant_content( try: from agent.auxiliary_client import auxiliary_max_tokens_param - response = _aux_vision_client.chat.completions.create( - model=EXTRACTION_MODEL, + response = _aux_text_client.chat.completions.create( + model=_get_extraction_model(), messages=[{"role": "user", "content": extraction_prompt}], **auxiliary_max_tokens_param(4000), temperature=0.1, @@ -1316,7 +1336,7 @@ def browser_vision(question: str, task_id: Optional[str] = None) -> str: effective_task_id = task_id or "default" # Check auxiliary vision client - if _aux_vision_client is None or EXTRACTION_MODEL is None: + if _aux_vision_client is None or _DEFAULT_VISION_MODEL is None: return json.dumps({ "success": False, "error": "Browser vision unavailable: no auxiliary vision model configured. " @@ -1372,7 +1392,7 @@ def browser_vision(question: str, task_id: Optional[str] = None) -> str: # Use the sync auxiliary vision client directly from agent.auxiliary_client import auxiliary_max_tokens_param response = _aux_vision_client.chat.completions.create( - model=EXTRACTION_MODEL, + model=_get_vision_model(), messages=[ { "role": "user", diff --git a/tools/vision_tools.py b/tools/vision_tools.py index f3744e95f..718e17363 100644 --- a/tools/vision_tools.py +++ b/tools/vision_tools.py @@ -468,7 +468,9 @@ def _handle_vision_analyze(args, **kw): image_url = args.get("image_url", "") question = args.get("question", "") full_prompt = f"Fully describe and explain everything about this image, then answer the following question:\n\n{question}" - model = DEFAULT_VISION_MODEL or "google/gemini-3-flash-preview" + model = (os.getenv("AUXILIARY_VISION_MODEL", "").strip() + or DEFAULT_VISION_MODEL + or "google/gemini-3-flash-preview") return vision_analyze_tool(image_url, full_prompt, model) diff --git a/tools/web_tools.py b/tools/web_tools.py index 0fd0f4107..e99d94fb0 100644 --- a/tools/web_tools.py +++ b/tools/web_tools.py @@ -85,7 +85,13 @@ DEFAULT_MIN_LENGTH_FOR_SUMMARIZATION = 5000 # Resolve async auxiliary client at module level. # Handles Codex Responses API adapter transparently. -_aux_async_client, DEFAULT_SUMMARIZER_MODEL = get_async_text_auxiliary_client() +_aux_async_client, _DEFAULT_SUMMARIZER_MODEL = get_async_text_auxiliary_client("web_extract") + +# Allow per-task override via config.yaml auxiliary.web_extract_model +DEFAULT_SUMMARIZER_MODEL = ( + os.getenv("AUXILIARY_WEB_EXTRACT_MODEL", "").strip() + or _DEFAULT_SUMMARIZER_MODEL +) _debug = DebugSession("web_tools", env_var="WEB_TOOLS_DEBUG")