From 8bc2de4ab696b46864f08b78754f2053452ec189 Mon Sep 17 00:00:00 2001 From: teknium1 Date: Sun, 1 Mar 2026 18:24:27 -0800 Subject: [PATCH] feat(provider-routing): add OpenRouter provider routing configuration Introduced a new `provider_routing` section in the CLI configuration to control how requests are routed across providers when using OpenRouter. This includes options for sorting providers by throughput, latency, or price, as well as allowing or ignoring specific providers, setting the order of provider attempts, and managing data collection policies. Updated relevant classes and documentation to support these features, enhancing flexibility in provider selection. --- CONTRIBUTING.md | 1 + README.md | 30 +++++ cli-config.yaml.example | 26 +++++ cli.py | 15 +++ gateway/run.py | 22 ++++ run_agent.py | 22 ++++ tests/test_codex_execution_paths.py | 1 + tests/test_provider_parity.py | 171 +++++++++++++++++++++++++++- 8 files changed, 287 insertions(+), 1 deletion(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 289605319..fab230de4 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -218,6 +218,7 @@ User message → AIAgent._run_agent_loop() - **Session persistence**: All conversations are stored in SQLite (`hermes_state.py`) with full-text search. JSON logs go to `~/.hermes/sessions/`. - **Ephemeral injection**: System prompts and prefill messages are injected at API call time, never persisted to the database or logs. - **Provider abstraction**: The agent works with any OpenAI-compatible API. Provider resolution happens at init time (Nous Portal OAuth, OpenRouter API key, or custom endpoint). +- **Provider routing**: When using OpenRouter, `provider_routing` in config.yaml controls provider selection (sort by throughput/latency/price, allow/ignore specific providers, data retention policies). These are injected as `extra_body.provider` in API requests. --- diff --git a/README.md b/README.md index 531a3049e..0ef3cfb4e 100644 --- a/README.md +++ b/README.md @@ -189,6 +189,24 @@ The `hermes config set` command automatically routes values to the right file | RL Training | [Tinker](https://tinker-console.thinkingmachines.ai/) + [WandB](https://wandb.ai/) | `TINKER_API_KEY`, `WANDB_API_KEY` | | Cross-session user modeling | [Honcho](https://honcho.dev/) | `HONCHO_API_KEY` | +### OpenRouter Provider Routing + +When using OpenRouter, you can control how requests are routed across providers. Add a `provider_routing` section to `~/.hermes/config.yaml`: + +```yaml +provider_routing: + sort: "throughput" # "price" (default), "throughput", or "latency" + # only: ["anthropic"] # Only use these providers + # ignore: ["deepinfra"] # Skip these providers + # order: ["anthropic", "google"] # Try providers in this order + # require_parameters: true # Only use providers that support all request params + # data_collection: "deny" # Exclude providers that may store/train on data +``` + +**Shortcuts:** Append `:nitro` to any model name for throughput sorting (e.g., `anthropic/claude-sonnet-4:nitro`), or `:floor` for price sorting. + +See [OpenRouter provider routing docs](https://openrouter.ai/docs/guides/routing/provider-selection) for all available options including quantization filtering, performance thresholds, and zero data retention. + --- ## Messaging Gateway @@ -1634,6 +1652,18 @@ All variables go in `~/.hermes/.env`. Run `hermes config set VAR value` to set t | Variable | Description | |----------|-------------| | `HERMES_MAX_ITERATIONS` | Max tool-calling iterations per conversation (default: 60) | +| `HERMES_TOOL_PROGRESS` | Send progress messages when using tools (`true`/`false`) | +| `HERMES_TOOL_PROGRESS_MODE` | `all` (every call, default) or `new` (only when tool changes) | + +**Provider Routing (config.yaml only — `provider_routing` section):** +| Key | Description | +|-----|-------------| +| `sort` | Sort providers: `"price"` (default), `"throughput"`, or `"latency"` | +| `only` | List of provider slugs to allow (e.g., `["anthropic", "google"]`) | +| `ignore` | List of provider slugs to skip (e.g., `["deepinfra"]`) | +| `order` | List of provider slugs to try in order | +| `require_parameters` | Only use providers supporting all request params (`true`/`false`) | +| `data_collection` | `"allow"` (default) or `"deny"` to exclude data-storing providers | **Context Compression:** | Variable | Description | diff --git a/cli-config.yaml.example b/cli-config.yaml.example index 72b2f572b..f7f112548 100644 --- a/cli-config.yaml.example +++ b/cli-config.yaml.example @@ -20,6 +20,32 @@ model: # api_key: "your-key-here" # Uncomment to set here instead of .env base_url: "https://openrouter.ai/api/v1" +# ============================================================================= +# OpenRouter Provider Routing (only applies when using OpenRouter) +# ============================================================================= +# Control how requests are routed across providers on OpenRouter. +# See: https://openrouter.ai/docs/guides/routing/provider-selection +# +# provider_routing: +# # Sort strategy: "price" (default), "throughput", or "latency" +# # Append :nitro to model name for a shortcut to throughput sorting. +# sort: "throughput" +# +# # Only allow these providers (provider slugs from OpenRouter) +# # only: ["anthropic", "google"] +# +# # Skip these providers entirely +# # ignore: ["deepinfra", "fireworks"] +# +# # Try providers in this order (overrides default load balancing) +# # order: ["anthropic", "google", "together"] +# +# # Require providers to support all parameters in your request +# # require_parameters: true +# +# # Data policy: "allow" (default) or "deny" to exclude providers that may store data +# # data_collection: "deny" + # ============================================================================= # Terminal Tool Configuration # ============================================================================= diff --git a/cli.py b/cli.py index 59fc904e0..09ec28ebf 100755 --- a/cli.py +++ b/cli.py @@ -880,6 +880,15 @@ class HermesCLI: CLI_CONFIG["agent"].get("reasoning_effort", "") ) + # OpenRouter provider routing preferences + pr = CLI_CONFIG.get("provider_routing", {}) or {} + self._provider_sort = pr.get("sort") + self._providers_only = pr.get("only") + self._providers_ignore = pr.get("ignore") + self._providers_order = pr.get("order") + self._provider_require_params = pr.get("require_parameters", False) + self._provider_data_collection = pr.get("data_collection") + # Agent will be initialized on first use self.agent: Optional[AIAgent] = None self._app = None # prompt_toolkit Application (set in run()) @@ -1016,6 +1025,12 @@ class HermesCLI: ephemeral_system_prompt=self.system_prompt if self.system_prompt else None, prefill_messages=self.prefill_messages or None, reasoning_config=self.reasoning_config, + providers_allowed=self._providers_only, + providers_ignored=self._providers_ignore, + providers_order=self._providers_order, + provider_sort=self._provider_sort, + provider_require_parameters=self._provider_require_params, + provider_data_collection=self._provider_data_collection, session_id=self.session_id, platform="cli", session_db=self._session_db, diff --git a/gateway/run.py b/gateway/run.py index bc778f103..6f043d448 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -164,6 +164,7 @@ class GatewayRunner: self._prefill_messages = self._load_prefill_messages() self._ephemeral_system_prompt = self._load_ephemeral_system_prompt() self._reasoning_config = self._load_reasoning_config() + self._provider_routing = self._load_provider_routing() # Wire process registry into session store for reset protection from tools.process_registry import process_registry @@ -346,6 +347,20 @@ class GatewayRunner: logger.warning("Unknown reasoning_effort '%s', using default (xhigh)", effort) return None + @staticmethod + def _load_provider_routing() -> dict: + """Load OpenRouter provider routing preferences from config.yaml.""" + try: + import yaml as _y + cfg_path = _hermes_home / "config.yaml" + if cfg_path.exists(): + with open(cfg_path) as _f: + cfg = _y.safe_load(_f) or {} + return cfg.get("provider_routing", {}) or {} + except Exception: + pass + return {} + async def start(self) -> bool: """ Start the gateway and all configured platform adapters. @@ -1824,6 +1839,7 @@ class GatewayRunner: "tools": [], } + pr = self._provider_routing agent = AIAgent( model=model, **runtime_kwargs, @@ -1834,6 +1850,12 @@ class GatewayRunner: ephemeral_system_prompt=combined_ephemeral or None, prefill_messages=self._prefill_messages or None, reasoning_config=self._reasoning_config, + providers_allowed=pr.get("only"), + providers_ignored=pr.get("ignore"), + providers_order=pr.get("order"), + provider_sort=pr.get("sort"), + provider_require_parameters=pr.get("require_parameters", False), + provider_data_collection=pr.get("data_collection"), session_id=session_id, tool_progress_callback=progress_callback if tool_progress_enabled else None, step_callback=_step_callback_sync if _hooks_ref.loaded_hooks else None, diff --git a/run_agent.py b/run_agent.py index f30b65af5..7d9d5a2c4 100644 --- a/run_agent.py +++ b/run_agent.py @@ -126,6 +126,8 @@ class AIAgent: providers_ignored: List[str] = None, providers_order: List[str] = None, provider_sort: str = None, + provider_require_parameters: bool = False, + provider_data_collection: str = None, session_id: str = None, tool_progress_callback: callable = None, clarify_callback: callable = None, @@ -230,6 +232,8 @@ class AIAgent: self.providers_ignored = providers_ignored self.providers_order = providers_order self.provider_sort = provider_sort + self.provider_require_parameters = provider_require_parameters + self.provider_data_collection = provider_data_collection # Store toolset filtering options self.enabled_toolsets = enabled_toolsets @@ -2083,6 +2087,10 @@ class AIAgent: provider_preferences["order"] = self.providers_order if self.provider_sort: provider_preferences["sort"] = self.provider_sort + if self.provider_require_parameters: + provider_preferences["require_parameters"] = True + if self.provider_data_collection: + provider_preferences["data_collection"] = self.provider_data_collection api_kwargs = { "model": self.model, @@ -2651,6 +2659,20 @@ class AIAgent: } if self.max_tokens is not None: summary_kwargs.update(self._max_tokens_param(self.max_tokens)) + + # Include provider routing preferences + provider_preferences = {} + if self.providers_allowed: + provider_preferences["only"] = self.providers_allowed + if self.providers_ignored: + provider_preferences["ignore"] = self.providers_ignored + if self.providers_order: + provider_preferences["order"] = self.providers_order + if self.provider_sort: + provider_preferences["sort"] = self.provider_sort + if provider_preferences: + summary_extra_body["provider"] = provider_preferences + if summary_extra_body: summary_kwargs["extra_body"] = summary_extra_body diff --git a/tests/test_codex_execution_paths.py b/tests/test_codex_execution_paths.py index ef24f02b5..892053831 100644 --- a/tests/test_codex_execution_paths.py +++ b/tests/test_codex_execution_paths.py @@ -148,6 +148,7 @@ def test_gateway_run_agent_codex_path_handles_internal_401_refresh(monkeypatch): runner._ephemeral_system_prompt = "" runner._prefill_messages = [] runner._reasoning_config = None + runner._provider_routing = {} runner._running_agents = {} from unittest.mock import MagicMock, AsyncMock runner.hooks = MagicMock() diff --git a/tests/test_provider_parity.py b/tests/test_provider_parity.py index 82199ac4c..5b8508e64 100644 --- a/tests/test_provider_parity.py +++ b/tests/test_provider_parity.py @@ -145,7 +145,7 @@ class TestBuildApiKwargsCodex: messages = [{"role": "user", "content": "hi"}] kwargs = agent._build_api_kwargs(messages) assert "reasoning" in kwargs - assert kwargs["reasoning"]["effort"] == "medium" + assert kwargs["reasoning"]["effort"] == "xhigh" def test_includes_encrypted_content_in_include(self, monkeypatch): agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses", @@ -458,3 +458,172 @@ class TestAuxiliaryClientProviderPriority: client, model = get_text_auxiliary_client() assert model == "gpt-5.3-codex" assert isinstance(client, CodexAuxiliaryClient) + + +# ── Provider routing tests ─────────────────────────────────────────────────── + +class TestProviderRouting: + """Verify provider_routing config flows into extra_body.provider.""" + + def test_sort_throughput(self, monkeypatch): + agent = _make_agent(monkeypatch, "openrouter") + agent.provider_sort = "throughput" + kwargs = agent._build_api_kwargs([{"role": "user", "content": "hi"}]) + assert kwargs["extra_body"]["provider"]["sort"] == "throughput" + + def test_only_providers(self, monkeypatch): + agent = _make_agent(monkeypatch, "openrouter") + agent.providers_allowed = ["anthropic", "google"] + kwargs = agent._build_api_kwargs([{"role": "user", "content": "hi"}]) + assert kwargs["extra_body"]["provider"]["only"] == ["anthropic", "google"] + + def test_ignore_providers(self, monkeypatch): + agent = _make_agent(monkeypatch, "openrouter") + agent.providers_ignored = ["deepinfra"] + kwargs = agent._build_api_kwargs([{"role": "user", "content": "hi"}]) + assert kwargs["extra_body"]["provider"]["ignore"] == ["deepinfra"] + + def test_order_providers(self, monkeypatch): + agent = _make_agent(monkeypatch, "openrouter") + agent.providers_order = ["anthropic", "together"] + kwargs = agent._build_api_kwargs([{"role": "user", "content": "hi"}]) + assert kwargs["extra_body"]["provider"]["order"] == ["anthropic", "together"] + + def test_require_parameters(self, monkeypatch): + agent = _make_agent(monkeypatch, "openrouter") + agent.provider_require_parameters = True + kwargs = agent._build_api_kwargs([{"role": "user", "content": "hi"}]) + assert kwargs["extra_body"]["provider"]["require_parameters"] is True + + def test_data_collection_deny(self, monkeypatch): + agent = _make_agent(monkeypatch, "openrouter") + agent.provider_data_collection = "deny" + kwargs = agent._build_api_kwargs([{"role": "user", "content": "hi"}]) + assert kwargs["extra_body"]["provider"]["data_collection"] == "deny" + + def test_no_routing_when_unset(self, monkeypatch): + agent = _make_agent(monkeypatch, "openrouter") + kwargs = agent._build_api_kwargs([{"role": "user", "content": "hi"}]) + assert "provider" not in kwargs.get("extra_body", {}).get("provider", {}) or \ + kwargs.get("extra_body", {}).get("provider") is None or \ + "only" not in kwargs.get("extra_body", {}).get("provider", {}) + + def test_combined_routing(self, monkeypatch): + agent = _make_agent(monkeypatch, "openrouter") + agent.provider_sort = "latency" + agent.providers_ignored = ["deepinfra"] + agent.provider_data_collection = "deny" + kwargs = agent._build_api_kwargs([{"role": "user", "content": "hi"}]) + prov = kwargs["extra_body"]["provider"] + assert prov["sort"] == "latency" + assert prov["ignore"] == ["deepinfra"] + assert prov["data_collection"] == "deny" + + def test_routing_not_injected_for_codex(self, monkeypatch): + """Codex Responses API doesn't use extra_body.provider.""" + agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses", + base_url="https://chatgpt.com/backend-api/codex") + agent.provider_sort = "throughput" + kwargs = agent._build_api_kwargs([{"role": "user", "content": "hi"}]) + assert "extra_body" not in kwargs + assert "provider" not in kwargs or kwargs.get("provider") is None + + +# ── Codex reasoning items preflight tests ──────────────────────────────────── + +class TestCodexReasoningPreflight: + """Verify reasoning items pass through preflight normalization.""" + + def test_reasoning_item_passes_through(self, monkeypatch): + agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses", + base_url="https://chatgpt.com/backend-api/codex") + raw_input = [ + {"role": "user", "content": "hello"}, + {"type": "reasoning", "encrypted_content": "abc123encrypted", "id": "r_001"}, + {"role": "assistant", "content": "hi there"}, + ] + normalized = agent._preflight_codex_input_items(raw_input) + reasoning_items = [i for i in normalized if i.get("type") == "reasoning"] + assert len(reasoning_items) == 1 + assert reasoning_items[0]["encrypted_content"] == "abc123encrypted" + assert reasoning_items[0]["id"] == "r_001" + + def test_reasoning_item_without_id(self, monkeypatch): + agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses", + base_url="https://chatgpt.com/backend-api/codex") + raw_input = [ + {"type": "reasoning", "encrypted_content": "abc123"}, + ] + normalized = agent._preflight_codex_input_items(raw_input) + assert len(normalized) == 1 + assert "id" not in normalized[0] + + def test_reasoning_item_empty_encrypted_skipped(self, monkeypatch): + agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses", + base_url="https://chatgpt.com/backend-api/codex") + raw_input = [ + {"type": "reasoning", "encrypted_content": ""}, + {"role": "user", "content": "hello"}, + ] + normalized = agent._preflight_codex_input_items(raw_input) + reasoning_items = [i for i in normalized if i.get("type") == "reasoning"] + assert len(reasoning_items) == 0 + + def test_reasoning_items_replayed_from_history(self, monkeypatch): + """Reasoning items stored in codex_reasoning_items get replayed.""" + agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses", + base_url="https://chatgpt.com/backend-api/codex") + messages = [ + {"role": "user", "content": "hello"}, + { + "role": "assistant", + "content": "hi", + "codex_reasoning_items": [ + {"type": "reasoning", "encrypted_content": "enc123", "id": "r_1"}, + ], + }, + {"role": "user", "content": "follow up"}, + ] + items = agent._chat_messages_to_responses_input(messages) + reasoning_items = [i for i in items if isinstance(i, dict) and i.get("type") == "reasoning"] + assert len(reasoning_items) == 1 + assert reasoning_items[0]["encrypted_content"] == "enc123" + + +# ── Reasoning effort consistency tests ─────────────────────────────────────── + +class TestReasoningEffortDefaults: + """Verify reasoning effort defaults to xhigh across all provider paths.""" + + def test_openrouter_default_xhigh(self, monkeypatch): + agent = _make_agent(monkeypatch, "openrouter") + kwargs = agent._build_api_kwargs([{"role": "user", "content": "hi"}]) + reasoning = kwargs["extra_body"]["reasoning"] + assert reasoning["effort"] == "xhigh" + + def test_codex_default_xhigh(self, monkeypatch): + agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses", + base_url="https://chatgpt.com/backend-api/codex") + kwargs = agent._build_api_kwargs([{"role": "user", "content": "hi"}]) + assert kwargs["reasoning"]["effort"] == "xhigh" + + def test_codex_reasoning_disabled(self, monkeypatch): + agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses", + base_url="https://chatgpt.com/backend-api/codex") + agent.reasoning_config = {"enabled": False} + kwargs = agent._build_api_kwargs([{"role": "user", "content": "hi"}]) + assert "reasoning" not in kwargs + assert kwargs["include"] == [] + + def test_codex_reasoning_low(self, monkeypatch): + agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses", + base_url="https://chatgpt.com/backend-api/codex") + agent.reasoning_config = {"enabled": True, "effort": "low"} + kwargs = agent._build_api_kwargs([{"role": "user", "content": "hi"}]) + assert kwargs["reasoning"]["effort"] == "low" + + def test_openrouter_reasoning_config_override(self, monkeypatch): + agent = _make_agent(monkeypatch, "openrouter") + agent.reasoning_config = {"enabled": True, "effort": "medium"} + kwargs = agent._build_api_kwargs([{"role": "user", "content": "hi"}]) + assert kwargs["extra_body"]["reasoning"]["effort"] == "medium"