diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 289605319..fab230de4 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -218,6 +218,7 @@ User message → AIAgent._run_agent_loop() - **Session persistence**: All conversations are stored in SQLite (`hermes_state.py`) with full-text search. JSON logs go to `~/.hermes/sessions/`. - **Ephemeral injection**: System prompts and prefill messages are injected at API call time, never persisted to the database or logs. - **Provider abstraction**: The agent works with any OpenAI-compatible API. Provider resolution happens at init time (Nous Portal OAuth, OpenRouter API key, or custom endpoint). +- **Provider routing**: When using OpenRouter, `provider_routing` in config.yaml controls provider selection (sort by throughput/latency/price, allow/ignore specific providers, data retention policies). These are injected as `extra_body.provider` in API requests. --- diff --git a/README.md b/README.md index 531a3049e..0ef3cfb4e 100644 --- a/README.md +++ b/README.md @@ -189,6 +189,24 @@ The `hermes config set` command automatically routes values to the right file | RL Training | [Tinker](https://tinker-console.thinkingmachines.ai/) + [WandB](https://wandb.ai/) | `TINKER_API_KEY`, `WANDB_API_KEY` | | Cross-session user modeling | [Honcho](https://honcho.dev/) | `HONCHO_API_KEY` | +### OpenRouter Provider Routing + +When using OpenRouter, you can control how requests are routed across providers. Add a `provider_routing` section to `~/.hermes/config.yaml`: + +```yaml +provider_routing: + sort: "throughput" # "price" (default), "throughput", or "latency" + # only: ["anthropic"] # Only use these providers + # ignore: ["deepinfra"] # Skip these providers + # order: ["anthropic", "google"] # Try providers in this order + # require_parameters: true # Only use providers that support all request params + # data_collection: "deny" # Exclude providers that may store/train on data +``` + +**Shortcuts:** Append `:nitro` to any model name for throughput sorting (e.g., `anthropic/claude-sonnet-4:nitro`), or `:floor` for price sorting. + +See [OpenRouter provider routing docs](https://openrouter.ai/docs/guides/routing/provider-selection) for all available options including quantization filtering, performance thresholds, and zero data retention. + --- ## Messaging Gateway @@ -1634,6 +1652,18 @@ All variables go in `~/.hermes/.env`. Run `hermes config set VAR value` to set t | Variable | Description | |----------|-------------| | `HERMES_MAX_ITERATIONS` | Max tool-calling iterations per conversation (default: 60) | +| `HERMES_TOOL_PROGRESS` | Send progress messages when using tools (`true`/`false`) | +| `HERMES_TOOL_PROGRESS_MODE` | `all` (every call, default) or `new` (only when tool changes) | + +**Provider Routing (config.yaml only — `provider_routing` section):** +| Key | Description | +|-----|-------------| +| `sort` | Sort providers: `"price"` (default), `"throughput"`, or `"latency"` | +| `only` | List of provider slugs to allow (e.g., `["anthropic", "google"]`) | +| `ignore` | List of provider slugs to skip (e.g., `["deepinfra"]`) | +| `order` | List of provider slugs to try in order | +| `require_parameters` | Only use providers supporting all request params (`true`/`false`) | +| `data_collection` | `"allow"` (default) or `"deny"` to exclude data-storing providers | **Context Compression:** | Variable | Description | diff --git a/cli-config.yaml.example b/cli-config.yaml.example index 72b2f572b..f7f112548 100644 --- a/cli-config.yaml.example +++ b/cli-config.yaml.example @@ -20,6 +20,32 @@ model: # api_key: "your-key-here" # Uncomment to set here instead of .env base_url: "https://openrouter.ai/api/v1" +# ============================================================================= +# OpenRouter Provider Routing (only applies when using OpenRouter) +# ============================================================================= +# Control how requests are routed across providers on OpenRouter. +# See: https://openrouter.ai/docs/guides/routing/provider-selection +# +# provider_routing: +# # Sort strategy: "price" (default), "throughput", or "latency" +# # Append :nitro to model name for a shortcut to throughput sorting. +# sort: "throughput" +# +# # Only allow these providers (provider slugs from OpenRouter) +# # only: ["anthropic", "google"] +# +# # Skip these providers entirely +# # ignore: ["deepinfra", "fireworks"] +# +# # Try providers in this order (overrides default load balancing) +# # order: ["anthropic", "google", "together"] +# +# # Require providers to support all parameters in your request +# # require_parameters: true +# +# # Data policy: "allow" (default) or "deny" to exclude providers that may store data +# # data_collection: "deny" + # ============================================================================= # Terminal Tool Configuration # ============================================================================= diff --git a/cli.py b/cli.py index 59fc904e0..09ec28ebf 100755 --- a/cli.py +++ b/cli.py @@ -880,6 +880,15 @@ class HermesCLI: CLI_CONFIG["agent"].get("reasoning_effort", "") ) + # OpenRouter provider routing preferences + pr = CLI_CONFIG.get("provider_routing", {}) or {} + self._provider_sort = pr.get("sort") + self._providers_only = pr.get("only") + self._providers_ignore = pr.get("ignore") + self._providers_order = pr.get("order") + self._provider_require_params = pr.get("require_parameters", False) + self._provider_data_collection = pr.get("data_collection") + # Agent will be initialized on first use self.agent: Optional[AIAgent] = None self._app = None # prompt_toolkit Application (set in run()) @@ -1016,6 +1025,12 @@ class HermesCLI: ephemeral_system_prompt=self.system_prompt if self.system_prompt else None, prefill_messages=self.prefill_messages or None, reasoning_config=self.reasoning_config, + providers_allowed=self._providers_only, + providers_ignored=self._providers_ignore, + providers_order=self._providers_order, + provider_sort=self._provider_sort, + provider_require_parameters=self._provider_require_params, + provider_data_collection=self._provider_data_collection, session_id=self.session_id, platform="cli", session_db=self._session_db, diff --git a/gateway/run.py b/gateway/run.py index bc778f103..6f043d448 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -164,6 +164,7 @@ class GatewayRunner: self._prefill_messages = self._load_prefill_messages() self._ephemeral_system_prompt = self._load_ephemeral_system_prompt() self._reasoning_config = self._load_reasoning_config() + self._provider_routing = self._load_provider_routing() # Wire process registry into session store for reset protection from tools.process_registry import process_registry @@ -346,6 +347,20 @@ class GatewayRunner: logger.warning("Unknown reasoning_effort '%s', using default (xhigh)", effort) return None + @staticmethod + def _load_provider_routing() -> dict: + """Load OpenRouter provider routing preferences from config.yaml.""" + try: + import yaml as _y + cfg_path = _hermes_home / "config.yaml" + if cfg_path.exists(): + with open(cfg_path) as _f: + cfg = _y.safe_load(_f) or {} + return cfg.get("provider_routing", {}) or {} + except Exception: + pass + return {} + async def start(self) -> bool: """ Start the gateway and all configured platform adapters. @@ -1824,6 +1839,7 @@ class GatewayRunner: "tools": [], } + pr = self._provider_routing agent = AIAgent( model=model, **runtime_kwargs, @@ -1834,6 +1850,12 @@ class GatewayRunner: ephemeral_system_prompt=combined_ephemeral or None, prefill_messages=self._prefill_messages or None, reasoning_config=self._reasoning_config, + providers_allowed=pr.get("only"), + providers_ignored=pr.get("ignore"), + providers_order=pr.get("order"), + provider_sort=pr.get("sort"), + provider_require_parameters=pr.get("require_parameters", False), + provider_data_collection=pr.get("data_collection"), session_id=session_id, tool_progress_callback=progress_callback if tool_progress_enabled else None, step_callback=_step_callback_sync if _hooks_ref.loaded_hooks else None, diff --git a/run_agent.py b/run_agent.py index f30b65af5..7d9d5a2c4 100644 --- a/run_agent.py +++ b/run_agent.py @@ -126,6 +126,8 @@ class AIAgent: providers_ignored: List[str] = None, providers_order: List[str] = None, provider_sort: str = None, + provider_require_parameters: bool = False, + provider_data_collection: str = None, session_id: str = None, tool_progress_callback: callable = None, clarify_callback: callable = None, @@ -230,6 +232,8 @@ class AIAgent: self.providers_ignored = providers_ignored self.providers_order = providers_order self.provider_sort = provider_sort + self.provider_require_parameters = provider_require_parameters + self.provider_data_collection = provider_data_collection # Store toolset filtering options self.enabled_toolsets = enabled_toolsets @@ -2083,6 +2087,10 @@ class AIAgent: provider_preferences["order"] = self.providers_order if self.provider_sort: provider_preferences["sort"] = self.provider_sort + if self.provider_require_parameters: + provider_preferences["require_parameters"] = True + if self.provider_data_collection: + provider_preferences["data_collection"] = self.provider_data_collection api_kwargs = { "model": self.model, @@ -2651,6 +2659,20 @@ class AIAgent: } if self.max_tokens is not None: summary_kwargs.update(self._max_tokens_param(self.max_tokens)) + + # Include provider routing preferences + provider_preferences = {} + if self.providers_allowed: + provider_preferences["only"] = self.providers_allowed + if self.providers_ignored: + provider_preferences["ignore"] = self.providers_ignored + if self.providers_order: + provider_preferences["order"] = self.providers_order + if self.provider_sort: + provider_preferences["sort"] = self.provider_sort + if provider_preferences: + summary_extra_body["provider"] = provider_preferences + if summary_extra_body: summary_kwargs["extra_body"] = summary_extra_body diff --git a/tests/test_codex_execution_paths.py b/tests/test_codex_execution_paths.py index ef24f02b5..892053831 100644 --- a/tests/test_codex_execution_paths.py +++ b/tests/test_codex_execution_paths.py @@ -148,6 +148,7 @@ def test_gateway_run_agent_codex_path_handles_internal_401_refresh(monkeypatch): runner._ephemeral_system_prompt = "" runner._prefill_messages = [] runner._reasoning_config = None + runner._provider_routing = {} runner._running_agents = {} from unittest.mock import MagicMock, AsyncMock runner.hooks = MagicMock() diff --git a/tests/test_provider_parity.py b/tests/test_provider_parity.py index 82199ac4c..5b8508e64 100644 --- a/tests/test_provider_parity.py +++ b/tests/test_provider_parity.py @@ -145,7 +145,7 @@ class TestBuildApiKwargsCodex: messages = [{"role": "user", "content": "hi"}] kwargs = agent._build_api_kwargs(messages) assert "reasoning" in kwargs - assert kwargs["reasoning"]["effort"] == "medium" + assert kwargs["reasoning"]["effort"] == "xhigh" def test_includes_encrypted_content_in_include(self, monkeypatch): agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses", @@ -458,3 +458,172 @@ class TestAuxiliaryClientProviderPriority: client, model = get_text_auxiliary_client() assert model == "gpt-5.3-codex" assert isinstance(client, CodexAuxiliaryClient) + + +# ── Provider routing tests ─────────────────────────────────────────────────── + +class TestProviderRouting: + """Verify provider_routing config flows into extra_body.provider.""" + + def test_sort_throughput(self, monkeypatch): + agent = _make_agent(monkeypatch, "openrouter") + agent.provider_sort = "throughput" + kwargs = agent._build_api_kwargs([{"role": "user", "content": "hi"}]) + assert kwargs["extra_body"]["provider"]["sort"] == "throughput" + + def test_only_providers(self, monkeypatch): + agent = _make_agent(monkeypatch, "openrouter") + agent.providers_allowed = ["anthropic", "google"] + kwargs = agent._build_api_kwargs([{"role": "user", "content": "hi"}]) + assert kwargs["extra_body"]["provider"]["only"] == ["anthropic", "google"] + + def test_ignore_providers(self, monkeypatch): + agent = _make_agent(monkeypatch, "openrouter") + agent.providers_ignored = ["deepinfra"] + kwargs = agent._build_api_kwargs([{"role": "user", "content": "hi"}]) + assert kwargs["extra_body"]["provider"]["ignore"] == ["deepinfra"] + + def test_order_providers(self, monkeypatch): + agent = _make_agent(monkeypatch, "openrouter") + agent.providers_order = ["anthropic", "together"] + kwargs = agent._build_api_kwargs([{"role": "user", "content": "hi"}]) + assert kwargs["extra_body"]["provider"]["order"] == ["anthropic", "together"] + + def test_require_parameters(self, monkeypatch): + agent = _make_agent(monkeypatch, "openrouter") + agent.provider_require_parameters = True + kwargs = agent._build_api_kwargs([{"role": "user", "content": "hi"}]) + assert kwargs["extra_body"]["provider"]["require_parameters"] is True + + def test_data_collection_deny(self, monkeypatch): + agent = _make_agent(monkeypatch, "openrouter") + agent.provider_data_collection = "deny" + kwargs = agent._build_api_kwargs([{"role": "user", "content": "hi"}]) + assert kwargs["extra_body"]["provider"]["data_collection"] == "deny" + + def test_no_routing_when_unset(self, monkeypatch): + agent = _make_agent(monkeypatch, "openrouter") + kwargs = agent._build_api_kwargs([{"role": "user", "content": "hi"}]) + assert "provider" not in kwargs.get("extra_body", {}).get("provider", {}) or \ + kwargs.get("extra_body", {}).get("provider") is None or \ + "only" not in kwargs.get("extra_body", {}).get("provider", {}) + + def test_combined_routing(self, monkeypatch): + agent = _make_agent(monkeypatch, "openrouter") + agent.provider_sort = "latency" + agent.providers_ignored = ["deepinfra"] + agent.provider_data_collection = "deny" + kwargs = agent._build_api_kwargs([{"role": "user", "content": "hi"}]) + prov = kwargs["extra_body"]["provider"] + assert prov["sort"] == "latency" + assert prov["ignore"] == ["deepinfra"] + assert prov["data_collection"] == "deny" + + def test_routing_not_injected_for_codex(self, monkeypatch): + """Codex Responses API doesn't use extra_body.provider.""" + agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses", + base_url="https://chatgpt.com/backend-api/codex") + agent.provider_sort = "throughput" + kwargs = agent._build_api_kwargs([{"role": "user", "content": "hi"}]) + assert "extra_body" not in kwargs + assert "provider" not in kwargs or kwargs.get("provider") is None + + +# ── Codex reasoning items preflight tests ──────────────────────────────────── + +class TestCodexReasoningPreflight: + """Verify reasoning items pass through preflight normalization.""" + + def test_reasoning_item_passes_through(self, monkeypatch): + agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses", + base_url="https://chatgpt.com/backend-api/codex") + raw_input = [ + {"role": "user", "content": "hello"}, + {"type": "reasoning", "encrypted_content": "abc123encrypted", "id": "r_001"}, + {"role": "assistant", "content": "hi there"}, + ] + normalized = agent._preflight_codex_input_items(raw_input) + reasoning_items = [i for i in normalized if i.get("type") == "reasoning"] + assert len(reasoning_items) == 1 + assert reasoning_items[0]["encrypted_content"] == "abc123encrypted" + assert reasoning_items[0]["id"] == "r_001" + + def test_reasoning_item_without_id(self, monkeypatch): + agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses", + base_url="https://chatgpt.com/backend-api/codex") + raw_input = [ + {"type": "reasoning", "encrypted_content": "abc123"}, + ] + normalized = agent._preflight_codex_input_items(raw_input) + assert len(normalized) == 1 + assert "id" not in normalized[0] + + def test_reasoning_item_empty_encrypted_skipped(self, monkeypatch): + agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses", + base_url="https://chatgpt.com/backend-api/codex") + raw_input = [ + {"type": "reasoning", "encrypted_content": ""}, + {"role": "user", "content": "hello"}, + ] + normalized = agent._preflight_codex_input_items(raw_input) + reasoning_items = [i for i in normalized if i.get("type") == "reasoning"] + assert len(reasoning_items) == 0 + + def test_reasoning_items_replayed_from_history(self, monkeypatch): + """Reasoning items stored in codex_reasoning_items get replayed.""" + agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses", + base_url="https://chatgpt.com/backend-api/codex") + messages = [ + {"role": "user", "content": "hello"}, + { + "role": "assistant", + "content": "hi", + "codex_reasoning_items": [ + {"type": "reasoning", "encrypted_content": "enc123", "id": "r_1"}, + ], + }, + {"role": "user", "content": "follow up"}, + ] + items = agent._chat_messages_to_responses_input(messages) + reasoning_items = [i for i in items if isinstance(i, dict) and i.get("type") == "reasoning"] + assert len(reasoning_items) == 1 + assert reasoning_items[0]["encrypted_content"] == "enc123" + + +# ── Reasoning effort consistency tests ─────────────────────────────────────── + +class TestReasoningEffortDefaults: + """Verify reasoning effort defaults to xhigh across all provider paths.""" + + def test_openrouter_default_xhigh(self, monkeypatch): + agent = _make_agent(monkeypatch, "openrouter") + kwargs = agent._build_api_kwargs([{"role": "user", "content": "hi"}]) + reasoning = kwargs["extra_body"]["reasoning"] + assert reasoning["effort"] == "xhigh" + + def test_codex_default_xhigh(self, monkeypatch): + agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses", + base_url="https://chatgpt.com/backend-api/codex") + kwargs = agent._build_api_kwargs([{"role": "user", "content": "hi"}]) + assert kwargs["reasoning"]["effort"] == "xhigh" + + def test_codex_reasoning_disabled(self, monkeypatch): + agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses", + base_url="https://chatgpt.com/backend-api/codex") + agent.reasoning_config = {"enabled": False} + kwargs = agent._build_api_kwargs([{"role": "user", "content": "hi"}]) + assert "reasoning" not in kwargs + assert kwargs["include"] == [] + + def test_codex_reasoning_low(self, monkeypatch): + agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses", + base_url="https://chatgpt.com/backend-api/codex") + agent.reasoning_config = {"enabled": True, "effort": "low"} + kwargs = agent._build_api_kwargs([{"role": "user", "content": "hi"}]) + assert kwargs["reasoning"]["effort"] == "low" + + def test_openrouter_reasoning_config_override(self, monkeypatch): + agent = _make_agent(monkeypatch, "openrouter") + agent.reasoning_config = {"enabled": True, "effort": "medium"} + kwargs = agent._build_api_kwargs([{"role": "user", "content": "hi"}]) + assert kwargs["extra_body"]["reasoning"]["effort"] == "medium"