hermes_cli/runtime_provider.py

"""Shared runtime provider resolution for CLI, gateway, cron, and helpers."""

from __future__ import annotations

import os
from typing import Any, Dict, Optional

from hermes_cli.auth import (
    AuthError,
    PROVIDER_REGISTRY,
    format_auth_error,
    resolve_provider,
    resolve_nous_runtime_credentials,
    resolve_codex_runtime_credentials,
    resolve_api_key_provider_credentials,
)
from hermes_cli.config import load_config
from hermes_constants import OPENROUTER_BASE_URL


def _get_model_config() -> Dict[str, Any]:
    config = load_config()
    model_cfg = config.get("model")
    if isinstance(model_cfg, dict):
        return dict(model_cfg)
    if isinstance(model_cfg, str) and model_cfg.strip():
        return {"default": model_cfg.strip()}
    return {}


def resolve_requested_provider(requested: Optional[str] = None) -> str:
    """Resolve provider request from explicit arg, env, then config."""
    if requested and requested.strip():
        return requested.strip().lower()

    env_provider = os.getenv("HERMES_INFERENCE_PROVIDER", "").strip().lower()
    if env_provider:
        return env_provider

    model_cfg = _get_model_config()
    cfg_provider = model_cfg.get("provider")
    if isinstance(cfg_provider, str) and cfg_provider.strip():
        return cfg_provider.strip().lower()

    return "auto"


def _resolve_openrouter_runtime(
    *,
    requested_provider: str,
    explicit_api_key: Optional[str] = None,
    explicit_base_url: Optional[str] = None,
) -> Dict[str, Any]:
    model_cfg = _get_model_config()
    cfg_base_url = model_cfg.get("base_url") if isinstance(model_cfg.get("base_url"), str) else ""
    cfg_provider = model_cfg.get("provider") if isinstance(model_cfg.get("provider"), str) else ""
    requested_norm = (requested_provider or "").strip().lower()
    cfg_provider = cfg_provider.strip().lower()

    env_openai_base_url = os.getenv("OPENAI_BASE_URL", "").strip()
    env_openrouter_base_url = os.getenv("OPENROUTER_BASE_URL", "").strip()

    use_config_base_url = False
    if requested_norm == "auto":
        if cfg_base_url.strip() and not explicit_base_url and not env_openai_base_url:
            if not cfg_provider or cfg_provider == "auto":
                use_config_base_url = True

    # When the user explicitly requested the openrouter provider, skip
    # OPENAI_BASE_URL — it typically points to a custom / non-OpenRouter
    # endpoint and would prevent switching back to OpenRouter (#874).
    skip_openai_base = requested_norm == "openrouter"

    base_url = (
        (explicit_base_url or "").strip()
        or ("" if skip_openai_base else env_openai_base_url)
        or (cfg_base_url.strip() if use_config_base_url else "")
        or env_openrouter_base_url
        or OPENROUTER_BASE_URL
    ).rstrip("/")

    # Choose API key based on whether the resolved base_url targets OpenRouter.
    # When hitting OpenRouter, prefer OPENROUTER_API_KEY (issue #289).
    # When hitting a custom endpoint (e.g. Z.ai, local LLM), prefer
    # OPENAI_API_KEY so the OpenRouter key doesn't leak to an unrelated
    # provider (issues #420, #560).
    _is_openrouter_url = "openrouter.ai" in base_url
    if _is_openrouter_url:
        api_key = (
            explicit_api_key
            or os.getenv("OPENROUTER_API_KEY")
            or os.getenv("OPENAI_API_KEY")
            or ""
        )
    else:
        api_key = (
            explicit_api_key
            or os.getenv("OPENAI_API_KEY")
            or os.getenv("OPENROUTER_API_KEY")
            or ""
        )

    source = "explicit" if (explicit_api_key or explicit_base_url) else "env/config"

    return {
        "provider": "openrouter",
        "api_mode": "chat_completions",
        "base_url": base_url,
        "api_key": api_key,
        "source": source,
    }


def resolve_runtime_provider(
    *,
    requested: Optional[str] = None,
    explicit_api_key: Optional[str] = None,
    explicit_base_url: Optional[str] = None,
) -> Dict[str, Any]:
    """Resolve runtime provider credentials for agent execution."""
    requested_provider = resolve_requested_provider(requested)

    provider = resolve_provider(
        requested_provider,
        explicit_api_key=explicit_api_key,
        explicit_base_url=explicit_base_url,
    )

    if provider == "nous":
        creds = resolve_nous_runtime_credentials(
            min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))),
            timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
        )
        return {
            "provider": "nous",
            "api_mode": "chat_completions",
            "base_url": creds.get("base_url", "").rstrip("/"),
            "api_key": creds.get("api_key", ""),
            "source": creds.get("source", "portal"),
            "expires_at": creds.get("expires_at"),
            "requested_provider": requested_provider,
        }

    if provider == "openai-codex":
        creds = resolve_codex_runtime_credentials()
        return {
            "provider": "openai-codex",
            "api_mode": "codex_responses",
            "base_url": creds.get("base_url", "").rstrip("/"),
            "api_key": creds.get("api_key", ""),
            "source": creds.get("source", "hermes-auth-store"),
            "last_refresh": creds.get("last_refresh"),
            "requested_provider": requested_provider,
        }

    # API-key providers (z.ai/GLM, Kimi, MiniMax, MiniMax-CN)
    pconfig = PROVIDER_REGISTRY.get(provider)
    if pconfig and pconfig.auth_type == "api_key":
        creds = resolve_api_key_provider_credentials(provider)
        return {
            "provider": provider,
            "api_mode": "chat_completions",
            "base_url": creds.get("base_url", "").rstrip("/"),
            "api_key": creds.get("api_key", ""),
            "source": creds.get("source", "env"),
            "requested_provider": requested_provider,
        }

    runtime = _resolve_openrouter_runtime(
        requested_provider=requested_provider,
        explicit_api_key=explicit_api_key,
        explicit_base_url=explicit_base_url,
    )
    runtime["requested_provider"] = requested_provider
    return runtime


def format_runtime_provider_error(error: Exception) -> str:
    if isinstance(error, AuthError):
        return format_auth_error(error)
    return str(error)
Add OpenAI Codex provider runtime and responses integration (without .agent/PLANS.md) 2026-02-25 18:20:38 -08:00			`"""Shared runtime provider resolution for CLI, gateway, cron, and helpers."""`

			`from __future__ import annotations`

			`import os`
			`from typing import Any, Dict, Optional`

			`from hermes_cli.auth import (`
			`AuthError,`
feat: add z.ai/GLM, Kimi/Moonshot, MiniMax as first-class providers Adds 4 new direct API-key providers (zai, kimi-coding, minimax, minimax-cn) to the inference provider system. All use standard OpenAI-compatible chat/completions endpoints with Bearer token auth. Core changes: - auth.py: Extended ProviderConfig with api_key_env_vars and base_url_env_var fields. Added providers to PROVIDER_REGISTRY. Added provider aliases (glm, z-ai, zhipu, kimi, moonshot). Added auto-detection of API-key providers in resolve_provider(). Added resolve_api_key_provider_credentials() and get_api_key_provider_status() helpers. - runtime_provider.py: Added generic API-key provider branch in resolve_runtime_provider() — any provider with auth_type='api_key' is automatically handled. - main.py: Added providers to hermes model menu with generic _model_flow_api_key_provider() flow. Updated _has_any_provider_configured() to check all provider env vars. Updated argparse --provider choices. - setup.py: Added providers to setup wizard with API key prompts and curated model lists. - config.py: Added env vars (GLM_API_KEY, KIMI_API_KEY, MINIMAX_API_KEY, etc.) to OPTIONAL_ENV_VARS. - status.py: Added API key display and provider status section. - doctor.py: Added connectivity checks for each provider endpoint. - cli.py: Updated provider docstrings. Docs: Updated README.md, .env.example, cli-config.yaml.example, cli-commands.md, environment-variables.md, configuration.md. Tests: 50 new tests covering registry, aliases, resolution, auto-detection, credential resolution, and runtime provider dispatch. Inspired by PR #33 (numman-ali) which proposed a provider registry approach. Credit to tars90percent (PR #473) and manuelschipper (PR #420) for related provider improvements merged earlier in this changeset. 2026-03-06 18:55:12 -08:00			`PROVIDER_REGISTRY,`
Add OpenAI Codex provider runtime and responses integration (without .agent/PLANS.md) 2026-02-25 18:20:38 -08:00			`format_auth_error,`
			`resolve_provider,`
			`resolve_nous_runtime_credentials,`
			`resolve_codex_runtime_credentials,`
feat: add z.ai/GLM, Kimi/Moonshot, MiniMax as first-class providers Adds 4 new direct API-key providers (zai, kimi-coding, minimax, minimax-cn) to the inference provider system. All use standard OpenAI-compatible chat/completions endpoints with Bearer token auth. Core changes: - auth.py: Extended ProviderConfig with api_key_env_vars and base_url_env_var fields. Added providers to PROVIDER_REGISTRY. Added provider aliases (glm, z-ai, zhipu, kimi, moonshot). Added auto-detection of API-key providers in resolve_provider(). Added resolve_api_key_provider_credentials() and get_api_key_provider_status() helpers. - runtime_provider.py: Added generic API-key provider branch in resolve_runtime_provider() — any provider with auth_type='api_key' is automatically handled. - main.py: Added providers to hermes model menu with generic _model_flow_api_key_provider() flow. Updated _has_any_provider_configured() to check all provider env vars. Updated argparse --provider choices. - setup.py: Added providers to setup wizard with API key prompts and curated model lists. - config.py: Added env vars (GLM_API_KEY, KIMI_API_KEY, MINIMAX_API_KEY, etc.) to OPTIONAL_ENV_VARS. - status.py: Added API key display and provider status section. - doctor.py: Added connectivity checks for each provider endpoint. - cli.py: Updated provider docstrings. Docs: Updated README.md, .env.example, cli-config.yaml.example, cli-commands.md, environment-variables.md, configuration.md. Tests: 50 new tests covering registry, aliases, resolution, auto-detection, credential resolution, and runtime provider dispatch. Inspired by PR #33 (numman-ali) which proposed a provider registry approach. Credit to tars90percent (PR #473) and manuelschipper (PR #420) for related provider improvements merged earlier in this changeset. 2026-03-06 18:55:12 -08:00			`resolve_api_key_provider_credentials,`
Add OpenAI Codex provider runtime and responses integration (without .agent/PLANS.md) 2026-02-25 18:20:38 -08:00			`)`
			`from hermes_cli.config import load_config`
			`from hermes_constants import OPENROUTER_BASE_URL`


			`def _get_model_config() -> Dict[str, Any]:`
			`config = load_config()`
			`model_cfg = config.get("model")`
			`if isinstance(model_cfg, dict):`
			`return dict(model_cfg)`
			`if isinstance(model_cfg, str) and model_cfg.strip():`
			`return {"default": model_cfg.strip()}`
			`return {}`


			`def resolve_requested_provider(requested: Optional[str] = None) -> str:`
			`"""Resolve provider request from explicit arg, env, then config."""`
			`if requested and requested.strip():`
			`return requested.strip().lower()`

			`env_provider = os.getenv("HERMES_INFERENCE_PROVIDER", "").strip().lower()`
			`if env_provider:`
			`return env_provider`

			`model_cfg = _get_model_config()`
			`cfg_provider = model_cfg.get("provider")`
			`if isinstance(cfg_provider, str) and cfg_provider.strip():`
			`return cfg_provider.strip().lower()`

			`return "auto"`


			`def _resolve_openrouter_runtime(`
			`*,`
			`requested_provider: str,`
			`explicit_api_key: Optional[str] = None,`
			`explicit_base_url: Optional[str] = None,`
			`) -> Dict[str, Any]:`
			`model_cfg = _get_model_config()`
			`cfg_base_url = model_cfg.get("base_url") if isinstance(model_cfg.get("base_url"), str) else ""`
			`cfg_provider = model_cfg.get("provider") if isinstance(model_cfg.get("provider"), str) else ""`
			`requested_norm = (requested_provider or "").strip().lower()`
			`cfg_provider = cfg_provider.strip().lower()`

			`env_openai_base_url = os.getenv("OPENAI_BASE_URL", "").strip()`
			`env_openrouter_base_url = os.getenv("OPENROUTER_BASE_URL", "").strip()`

			`use_config_base_url = False`
			`if requested_norm == "auto":`
			`if cfg_base_url.strip() and not explicit_base_url and not env_openai_base_url:`
			`if not cfg_provider or cfg_provider == "auto":`
			`use_config_base_url = True`

fix: provider selection not persisting when switching via hermes model Two related bugs prevented users from reliably switching providers: 1. OPENAI_BASE_URL poisoning OpenRouter resolution: When a user with a custom endpoint ran /model openrouter:model, _resolve_openrouter_runtime picked up OPENAI_BASE_URL instead of the OpenRouter URL, causing model validation to probe the wrong API and reject valid models. Fix: skip OPENAI_BASE_URL when requested_provider is explicitly 'openrouter'. 2. Provider never saved to config: _save_model_choice() could save config.model as a plain string. All five _model_flow_* functions then checked isinstance(model, dict) before writing the provider — which silently failed on strings. With no provider in config, auto-detection would pick up stale credentials (e.g. Codex desktop app) instead of the user's explicit choice. Fix: _save_model_choice() now always saves as dict format. All flow functions also normalize string->dict as a safety net before writing provider. Adds 4 regression tests. 2873 tests pass. 2026-03-10 17:12:34 -07:00			`# When the user explicitly requested the openrouter provider, skip`
			`# OPENAI_BASE_URL — it typically points to a custom / non-OpenRouter`
			`# endpoint and would prevent switching back to OpenRouter (#874).`
			`skip_openai_base = requested_norm == "openrouter"`

Add OpenAI Codex provider runtime and responses integration (without .agent/PLANS.md) 2026-02-25 18:20:38 -08:00			`base_url = (`
			`(explicit_base_url or "").strip()`
fix: provider selection not persisting when switching via hermes model Two related bugs prevented users from reliably switching providers: 1. OPENAI_BASE_URL poisoning OpenRouter resolution: When a user with a custom endpoint ran /model openrouter:model, _resolve_openrouter_runtime picked up OPENAI_BASE_URL instead of the OpenRouter URL, causing model validation to probe the wrong API and reject valid models. Fix: skip OPENAI_BASE_URL when requested_provider is explicitly 'openrouter'. 2. Provider never saved to config: _save_model_choice() could save config.model as a plain string. All five _model_flow_* functions then checked isinstance(model, dict) before writing the provider — which silently failed on strings. With no provider in config, auto-detection would pick up stale credentials (e.g. Codex desktop app) instead of the user's explicit choice. Fix: _save_model_choice() now always saves as dict format. All flow functions also normalize string->dict as a safety net before writing provider. Adds 4 regression tests. 2873 tests pass. 2026-03-10 17:12:34 -07:00			`or ("" if skip_openai_base else env_openai_base_url)`
Add OpenAI Codex provider runtime and responses integration (without .agent/PLANS.md) 2026-02-25 18:20:38 -08:00			`or (cfg_base_url.strip() if use_config_base_url else "")`
			`or env_openrouter_base_url`
			`or OPENROUTER_BASE_URL`
			`).rstrip("/")`

fix: custom endpoint no longer leaks OPENROUTER_API_KEY (#560) API key selection is now base_url-aware: when the resolved base_url targets OpenRouter, OPENROUTER_API_KEY takes priority (preserving the #289 fix). When hitting any other endpoint (Z.ai, vLLM, custom, etc.), OPENAI_API_KEY takes priority so the OpenRouter key doesn't leak. Applied in both the runtime provider resolver (the real code path) and the CLI initial default (for consistency). Fixes #560. 2026-03-06 17:16:14 -08:00			`# Choose API key based on whether the resolved base_url targets OpenRouter.`
			`# When hitting OpenRouter, prefer OPENROUTER_API_KEY (issue #289).`
Merge PR #420: fix: respect OPENAI_BASE_URL when resolving API key priority Authored by manuelschipper. Adds GLM-4.7 and GLM-5 context lengths (202752) to model_metadata.py. The key priority fix (prefer OPENAI_API_KEY for non-OpenRouter endpoints) was already applied in PR #295; merged the Z.ai mention into the comment. 2026-03-06 18:43:13 -08:00			`# When hitting a custom endpoint (e.g. Z.ai, local LLM), prefer`
			`# OPENAI_API_KEY so the OpenRouter key doesn't leak to an unrelated`
			`# provider (issues #420, #560).`
fix: custom endpoint no longer leaks OPENROUTER_API_KEY (#560) API key selection is now base_url-aware: when the resolved base_url targets OpenRouter, OPENROUTER_API_KEY takes priority (preserving the #289 fix). When hitting any other endpoint (Z.ai, vLLM, custom, etc.), OPENAI_API_KEY takes priority so the OpenRouter key doesn't leak. Applied in both the runtime provider resolver (the real code path) and the CLI initial default (for consistency). Fixes #560. 2026-03-06 17:16:14 -08:00			`_is_openrouter_url = "openrouter.ai" in base_url`
			`if _is_openrouter_url:`
			`api_key = (`
			`explicit_api_key`
			`or os.getenv("OPENROUTER_API_KEY")`
			`or os.getenv("OPENAI_API_KEY")`
			`or ""`
			`)`
			`else:`
			`api_key = (`
			`explicit_api_key`
			`or os.getenv("OPENAI_API_KEY")`
			`or os.getenv("OPENROUTER_API_KEY")`
			`or ""`
			`)`
Add OpenAI Codex provider runtime and responses integration (without .agent/PLANS.md) 2026-02-25 18:20:38 -08:00
			`source = "explicit" if (explicit_api_key or explicit_base_url) else "env/config"`

			`return {`
			`"provider": "openrouter",`
			`"api_mode": "chat_completions",`
			`"base_url": base_url,`
			`"api_key": api_key,`
			`"source": source,`
			`}`


			`def resolve_runtime_provider(`
			`*,`
			`requested: Optional[str] = None,`
			`explicit_api_key: Optional[str] = None,`
			`explicit_base_url: Optional[str] = None,`
			`) -> Dict[str, Any]:`
			`"""Resolve runtime provider credentials for agent execution."""`
			`requested_provider = resolve_requested_provider(requested)`

			`provider = resolve_provider(`
			`requested_provider,`
			`explicit_api_key=explicit_api_key,`
			`explicit_base_url=explicit_base_url,`
			`)`

			`if provider == "nous":`
			`creds = resolve_nous_runtime_credentials(`
			`min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))),`
			`timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),`
			`)`
			`return {`
			`"provider": "nous",`
			`"api_mode": "chat_completions",`
			`"base_url": creds.get("base_url", "").rstrip("/"),`
			`"api_key": creds.get("api_key", ""),`
			`"source": creds.get("source", "portal"),`
			`"expires_at": creds.get("expires_at"),`
			`"requested_provider": requested_provider,`
			`}`

			`if provider == "openai-codex":`
			`creds = resolve_codex_runtime_credentials()`
			`return {`
			`"provider": "openai-codex",`
			`"api_mode": "codex_responses",`
			`"base_url": creds.get("base_url", "").rstrip("/"),`
			`"api_key": creds.get("api_key", ""),`
refactor(auth): transition Codex OAuth tokens to Hermes auth store Updated the authentication mechanism to store Codex OAuth tokens in the Hermes auth store located at ~/.hermes/auth.json instead of the previous ~/.codex/auth.json. This change includes refactoring related functions for reading and saving tokens, ensuring better management of authentication states and preventing conflicts between different applications. Adjusted tests to reflect the new storage structure and improved error handling for missing or malformed tokens. 2026-03-01 19:59:24 -08:00			`"source": creds.get("source", "hermes-auth-store"),`
Add OpenAI Codex provider runtime and responses integration (without .agent/PLANS.md) 2026-02-25 18:20:38 -08:00			`"last_refresh": creds.get("last_refresh"),`
			`"requested_provider": requested_provider,`
			`}`

feat: add z.ai/GLM, Kimi/Moonshot, MiniMax as first-class providers Adds 4 new direct API-key providers (zai, kimi-coding, minimax, minimax-cn) to the inference provider system. All use standard OpenAI-compatible chat/completions endpoints with Bearer token auth. Core changes: - auth.py: Extended ProviderConfig with api_key_env_vars and base_url_env_var fields. Added providers to PROVIDER_REGISTRY. Added provider aliases (glm, z-ai, zhipu, kimi, moonshot). Added auto-detection of API-key providers in resolve_provider(). Added resolve_api_key_provider_credentials() and get_api_key_provider_status() helpers. - runtime_provider.py: Added generic API-key provider branch in resolve_runtime_provider() — any provider with auth_type='api_key' is automatically handled. - main.py: Added providers to hermes model menu with generic _model_flow_api_key_provider() flow. Updated _has_any_provider_configured() to check all provider env vars. Updated argparse --provider choices. - setup.py: Added providers to setup wizard with API key prompts and curated model lists. - config.py: Added env vars (GLM_API_KEY, KIMI_API_KEY, MINIMAX_API_KEY, etc.) to OPTIONAL_ENV_VARS. - status.py: Added API key display and provider status section. - doctor.py: Added connectivity checks for each provider endpoint. - cli.py: Updated provider docstrings. Docs: Updated README.md, .env.example, cli-config.yaml.example, cli-commands.md, environment-variables.md, configuration.md. Tests: 50 new tests covering registry, aliases, resolution, auto-detection, credential resolution, and runtime provider dispatch. Inspired by PR #33 (numman-ali) which proposed a provider registry approach. Credit to tars90percent (PR #473) and manuelschipper (PR #420) for related provider improvements merged earlier in this changeset. 2026-03-06 18:55:12 -08:00			`# API-key providers (z.ai/GLM, Kimi, MiniMax, MiniMax-CN)`
			`pconfig = PROVIDER_REGISTRY.get(provider)`
			`if pconfig and pconfig.auth_type == "api_key":`
			`creds = resolve_api_key_provider_credentials(provider)`
			`return {`
			`"provider": provider,`
			`"api_mode": "chat_completions",`
			`"base_url": creds.get("base_url", "").rstrip("/"),`
			`"api_key": creds.get("api_key", ""),`
			`"source": creds.get("source", "env"),`
			`"requested_provider": requested_provider,`
			`}`

Add OpenAI Codex provider runtime and responses integration (without .agent/PLANS.md) 2026-02-25 18:20:38 -08:00			`runtime = _resolve_openrouter_runtime(`
			`requested_provider=requested_provider,`
			`explicit_api_key=explicit_api_key,`
			`explicit_base_url=explicit_base_url,`
			`)`
			`runtime["requested_provider"] = requested_provider`
			`return runtime`


			`def format_runtime_provider_error(error: Exception) -> str:`
			`if isinstance(error, AuthError):`
			`return format_auth_error(error)`
			`return str(error)`