From 9423fda5cb573ef6b1a7876fc01157433eb7d785 Mon Sep 17 00:00:00 2001 From: teknium1 Date: Wed, 11 Mar 2026 06:12:21 -0700 Subject: [PATCH] feat: configurable subagent provider:model with full credential resolution MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds delegation.model and delegation.provider config fields so subagents can run on a completely different provider:model pair than the parent agent. When delegation.provider is set, the system resolves the full credential bundle (base_url, api_key, api_mode) via resolve_runtime_provider() — the same path used by CLI/gateway startup. This means all configured providers work out of the box: openrouter, nous, zai, kimi-coding, minimax, minimax-cn. Key design decisions: - Provider resolution uses hermes_cli.runtime_provider (single source of truth for credential resolution across CLI, gateway, cron, and now delegation) - When only delegation.model is set (no provider), the model name changes but parent credentials are inherited (for switching models within the same provider like OpenRouter) - When delegation.provider is set, full credentials are resolved independently — enabling cross-provider delegation (e.g. parent on Nous Portal, subagents on OpenRouter) - Clear error messages if provider resolution fails (missing API key, unknown provider name) - _load_config() now falls back to hermes_cli.config.load_config() for gateway/cron contexts where CLI_CONFIG is unavailable Based on PR #791 by 0xbyt4 (closes #609), reworked to use proper provider credential resolution instead of passing provider as metadata. Co-authored-by: 0xbyt4 <0xbyt4@users.noreply.github.com> --- cli-config.yaml.example | 4 + cli.py | 2 + hermes_cli/config.py | 11 +- tests/tools/test_delegate.py | 283 +++++++++++++++++++++++ tools/delegate_tool.py | 120 +++++++++- website/docs/user-guide/configuration.md | 8 + 6 files changed, 418 insertions(+), 10 deletions(-) diff --git a/cli-config.yaml.example b/cli-config.yaml.example index 33f3702c5..fd39e9834 100644 --- a/cli-config.yaml.example +++ b/cli-config.yaml.example @@ -626,6 +626,10 @@ code_execution: delegation: max_iterations: 50 # Max tool-calling turns per child (default: 50) default_toolsets: ["terminal", "file", "web"] # Default toolsets for subagents + # model: "google/gemini-3-flash-preview" # Override model for subagents (empty = inherit parent) + # provider: "openrouter" # Override provider for subagents (empty = inherit parent) + # # Resolves full credentials (base_url, api_key) automatically. + # # Supported: openrouter, nous, zai, kimi-coding, minimax # ============================================================================= # Honcho Integration (Cross-Session User Modeling) diff --git a/cli.py b/cli.py index 5eb9577bb..feb0052d4 100755 --- a/cli.py +++ b/cli.py @@ -217,6 +217,8 @@ def load_cli_config() -> Dict[str, Any]: "delegation": { "max_iterations": 45, # Max tool-calling turns per child agent "default_toolsets": ["terminal", "file", "web"], # Default toolsets for subagents + "model": "", # Subagent model override (empty = inherit parent model) + "provider": "", # Subagent provider override (empty = inherit parent provider) }, } diff --git a/hermes_cli/config.py b/hermes_cli/config.py index e8df6f3f4..0a3c0e4eb 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -182,7 +182,16 @@ DEFAULT_CONFIG = { "memory_char_limit": 2200, # ~800 tokens at 2.75 chars/token "user_char_limit": 1375, # ~500 tokens at 2.75 chars/token }, - + + # Subagent delegation — override the provider:model used by delegate_task + # so child agents can run on a different (cheaper/faster) provider and model. + # Uses the same runtime provider resolution as CLI/gateway startup, so all + # configured providers (OpenRouter, Nous, Z.ai, Kimi, etc.) are supported. + "delegation": { + "model": "", # e.g. "google/gemini-3-flash-preview" (empty = inherit parent model) + "provider": "", # e.g. "openrouter" (empty = inherit parent provider + credentials) + }, + # Ephemeral prefill messages file — JSON list of {role, content} dicts # injected at the start of every API call for few-shot priming. # Never saved to sessions, logs, or trajectories. diff --git a/tests/tools/test_delegate.py b/tests/tools/test_delegate.py index aea7b127c..113fe3dd7 100644 --- a/tests/tools/test_delegate.py +++ b/tests/tools/test_delegate.py @@ -23,6 +23,7 @@ from tools.delegate_tool import ( delegate_task, _build_child_system_prompt, _strip_blocked_tools, + _resolve_delegation_credentials, ) @@ -255,5 +256,287 @@ class TestBlockedTools(unittest.TestCase): self.assertEqual(MAX_DEPTH, 2) +class TestDelegationCredentialResolution(unittest.TestCase): + """Tests for provider:model credential resolution in delegation config.""" + + def test_no_provider_returns_none_credentials(self): + """When delegation.provider is empty, all credentials are None (inherit parent).""" + parent = _make_mock_parent(depth=0) + cfg = {"model": "", "provider": ""} + creds = _resolve_delegation_credentials(cfg, parent) + self.assertIsNone(creds["provider"]) + self.assertIsNone(creds["base_url"]) + self.assertIsNone(creds["api_key"]) + self.assertIsNone(creds["api_mode"]) + self.assertIsNone(creds["model"]) + + def test_model_only_no_provider(self): + """When only model is set (no provider), model is returned but credentials are None.""" + parent = _make_mock_parent(depth=0) + cfg = {"model": "google/gemini-3-flash-preview", "provider": ""} + creds = _resolve_delegation_credentials(cfg, parent) + self.assertEqual(creds["model"], "google/gemini-3-flash-preview") + self.assertIsNone(creds["provider"]) + self.assertIsNone(creds["base_url"]) + self.assertIsNone(creds["api_key"]) + + @patch("hermes_cli.runtime_provider.resolve_runtime_provider") + def test_provider_resolves_full_credentials(self, mock_resolve): + """When delegation.provider is set, full credentials are resolved.""" + mock_resolve.return_value = { + "provider": "openrouter", + "base_url": "https://openrouter.ai/api/v1", + "api_key": "sk-or-test-key", + "api_mode": "chat_completions", + } + parent = _make_mock_parent(depth=0) + cfg = {"model": "google/gemini-3-flash-preview", "provider": "openrouter"} + creds = _resolve_delegation_credentials(cfg, parent) + self.assertEqual(creds["model"], "google/gemini-3-flash-preview") + self.assertEqual(creds["provider"], "openrouter") + self.assertEqual(creds["base_url"], "https://openrouter.ai/api/v1") + self.assertEqual(creds["api_key"], "sk-or-test-key") + self.assertEqual(creds["api_mode"], "chat_completions") + mock_resolve.assert_called_once_with(requested="openrouter") + + @patch("hermes_cli.runtime_provider.resolve_runtime_provider") + def test_nous_provider_resolves_nous_credentials(self, mock_resolve): + """Nous provider resolves Nous Portal base_url and api_key.""" + mock_resolve.return_value = { + "provider": "nous", + "base_url": "https://inference-api.nousresearch.com/v1", + "api_key": "nous-agent-key-xyz", + "api_mode": "chat_completions", + } + parent = _make_mock_parent(depth=0) + cfg = {"model": "hermes-3-llama-3.1-8b", "provider": "nous"} + creds = _resolve_delegation_credentials(cfg, parent) + self.assertEqual(creds["provider"], "nous") + self.assertEqual(creds["base_url"], "https://inference-api.nousresearch.com/v1") + self.assertEqual(creds["api_key"], "nous-agent-key-xyz") + mock_resolve.assert_called_once_with(requested="nous") + + @patch("hermes_cli.runtime_provider.resolve_runtime_provider") + def test_provider_resolution_failure_raises_valueerror(self, mock_resolve): + """When provider resolution fails, ValueError is raised with helpful message.""" + mock_resolve.side_effect = RuntimeError("OPENROUTER_API_KEY not set") + parent = _make_mock_parent(depth=0) + cfg = {"model": "some-model", "provider": "openrouter"} + with self.assertRaises(ValueError) as ctx: + _resolve_delegation_credentials(cfg, parent) + self.assertIn("openrouter", str(ctx.exception).lower()) + self.assertIn("Cannot resolve", str(ctx.exception)) + + @patch("hermes_cli.runtime_provider.resolve_runtime_provider") + def test_provider_resolves_but_no_api_key_raises(self, mock_resolve): + """When provider resolves but has no API key, ValueError is raised.""" + mock_resolve.return_value = { + "provider": "openrouter", + "base_url": "https://openrouter.ai/api/v1", + "api_key": "", + "api_mode": "chat_completions", + } + parent = _make_mock_parent(depth=0) + cfg = {"model": "some-model", "provider": "openrouter"} + with self.assertRaises(ValueError) as ctx: + _resolve_delegation_credentials(cfg, parent) + self.assertIn("no API key", str(ctx.exception)) + + def test_missing_config_keys_inherit_parent(self): + """When config dict has no model/provider keys at all, inherits parent.""" + parent = _make_mock_parent(depth=0) + cfg = {"max_iterations": 45} + creds = _resolve_delegation_credentials(cfg, parent) + self.assertIsNone(creds["model"]) + self.assertIsNone(creds["provider"]) + + +class TestDelegationProviderIntegration(unittest.TestCase): + """Integration tests: delegation config → _run_single_child → AIAgent construction.""" + + @patch("tools.delegate_tool._load_config") + @patch("tools.delegate_tool._resolve_delegation_credentials") + def test_config_provider_credentials_reach_child_agent(self, mock_creds, mock_cfg): + """When delegation.provider is configured, child agent gets resolved credentials.""" + mock_cfg.return_value = { + "max_iterations": 45, + "model": "google/gemini-3-flash-preview", + "provider": "openrouter", + } + mock_creds.return_value = { + "model": "google/gemini-3-flash-preview", + "provider": "openrouter", + "base_url": "https://openrouter.ai/api/v1", + "api_key": "sk-or-delegation-key", + "api_mode": "chat_completions", + } + parent = _make_mock_parent(depth=0) + + with patch("run_agent.AIAgent") as MockAgent: + mock_child = MagicMock() + mock_child.run_conversation.return_value = { + "final_response": "done", "completed": True, "api_calls": 1 + } + MockAgent.return_value = mock_child + + delegate_task(goal="Test provider routing", parent_agent=parent) + + _, kwargs = MockAgent.call_args + self.assertEqual(kwargs["model"], "google/gemini-3-flash-preview") + self.assertEqual(kwargs["provider"], "openrouter") + self.assertEqual(kwargs["base_url"], "https://openrouter.ai/api/v1") + self.assertEqual(kwargs["api_key"], "sk-or-delegation-key") + self.assertEqual(kwargs["api_mode"], "chat_completions") + + @patch("tools.delegate_tool._load_config") + @patch("tools.delegate_tool._resolve_delegation_credentials") + def test_cross_provider_delegation(self, mock_creds, mock_cfg): + """Parent on Nous, subagent on OpenRouter — full credential switch.""" + mock_cfg.return_value = { + "max_iterations": 45, + "model": "google/gemini-3-flash-preview", + "provider": "openrouter", + } + mock_creds.return_value = { + "model": "google/gemini-3-flash-preview", + "provider": "openrouter", + "base_url": "https://openrouter.ai/api/v1", + "api_key": "sk-or-key", + "api_mode": "chat_completions", + } + parent = _make_mock_parent(depth=0) + parent.provider = "nous" + parent.base_url = "https://inference-api.nousresearch.com/v1" + parent.api_key = "nous-key-abc" + + with patch("run_agent.AIAgent") as MockAgent: + mock_child = MagicMock() + mock_child.run_conversation.return_value = { + "final_response": "done", "completed": True, "api_calls": 1 + } + MockAgent.return_value = mock_child + + delegate_task(goal="Cross-provider test", parent_agent=parent) + + _, kwargs = MockAgent.call_args + # Child should use OpenRouter, NOT Nous + self.assertEqual(kwargs["provider"], "openrouter") + self.assertEqual(kwargs["base_url"], "https://openrouter.ai/api/v1") + self.assertEqual(kwargs["api_key"], "sk-or-key") + self.assertNotEqual(kwargs["base_url"], parent.base_url) + self.assertNotEqual(kwargs["api_key"], parent.api_key) + + @patch("tools.delegate_tool._load_config") + @patch("tools.delegate_tool._resolve_delegation_credentials") + def test_empty_config_inherits_parent(self, mock_creds, mock_cfg): + """When delegation config is empty, child inherits parent credentials.""" + mock_cfg.return_value = {"max_iterations": 45, "model": "", "provider": ""} + mock_creds.return_value = { + "model": None, + "provider": None, + "base_url": None, + "api_key": None, + "api_mode": None, + } + parent = _make_mock_parent(depth=0) + + with patch("run_agent.AIAgent") as MockAgent: + mock_child = MagicMock() + mock_child.run_conversation.return_value = { + "final_response": "done", "completed": True, "api_calls": 1 + } + MockAgent.return_value = mock_child + + delegate_task(goal="Test inherit", parent_agent=parent) + + _, kwargs = MockAgent.call_args + self.assertEqual(kwargs["model"], parent.model) + self.assertEqual(kwargs["provider"], parent.provider) + self.assertEqual(kwargs["base_url"], parent.base_url) + + @patch("tools.delegate_tool._load_config") + @patch("tools.delegate_tool._resolve_delegation_credentials") + def test_credential_error_returns_json_error(self, mock_creds, mock_cfg): + """When credential resolution fails, delegate_task returns a JSON error.""" + mock_cfg.return_value = {"model": "bad-model", "provider": "nonexistent"} + mock_creds.side_effect = ValueError( + "Cannot resolve delegation provider 'nonexistent': Unknown provider" + ) + parent = _make_mock_parent(depth=0) + + result = json.loads(delegate_task(goal="Should fail", parent_agent=parent)) + self.assertIn("error", result) + self.assertIn("Cannot resolve", result["error"]) + self.assertIn("nonexistent", result["error"]) + + @patch("tools.delegate_tool._load_config") + @patch("tools.delegate_tool._resolve_delegation_credentials") + def test_batch_mode_all_children_get_credentials(self, mock_creds, mock_cfg): + """In batch mode, all children receive the resolved credentials.""" + mock_cfg.return_value = { + "max_iterations": 45, + "model": "meta-llama/llama-4-scout", + "provider": "openrouter", + } + mock_creds.return_value = { + "model": "meta-llama/llama-4-scout", + "provider": "openrouter", + "base_url": "https://openrouter.ai/api/v1", + "api_key": "sk-or-batch", + "api_mode": "chat_completions", + } + parent = _make_mock_parent(depth=0) + + with patch("tools.delegate_tool._run_single_child") as mock_run: + mock_run.return_value = { + "task_index": 0, "status": "completed", + "summary": "Done", "api_calls": 1, "duration_seconds": 1.0 + } + + tasks = [{"goal": "Task A"}, {"goal": "Task B"}] + delegate_task(tasks=tasks, parent_agent=parent) + + for call in mock_run.call_args_list: + self.assertEqual(call.kwargs.get("model"), "meta-llama/llama-4-scout") + self.assertEqual(call.kwargs.get("override_provider"), "openrouter") + self.assertEqual(call.kwargs.get("override_base_url"), "https://openrouter.ai/api/v1") + self.assertEqual(call.kwargs.get("override_api_key"), "sk-or-batch") + self.assertEqual(call.kwargs.get("override_api_mode"), "chat_completions") + + @patch("tools.delegate_tool._load_config") + @patch("tools.delegate_tool._resolve_delegation_credentials") + def test_model_only_no_provider_inherits_parent_credentials(self, mock_creds, mock_cfg): + """Setting only model (no provider) changes model but keeps parent credentials.""" + mock_cfg.return_value = { + "max_iterations": 45, + "model": "google/gemini-3-flash-preview", + "provider": "", + } + mock_creds.return_value = { + "model": "google/gemini-3-flash-preview", + "provider": None, + "base_url": None, + "api_key": None, + "api_mode": None, + } + parent = _make_mock_parent(depth=0) + + with patch("run_agent.AIAgent") as MockAgent: + mock_child = MagicMock() + mock_child.run_conversation.return_value = { + "final_response": "done", "completed": True, "api_calls": 1 + } + MockAgent.return_value = mock_child + + delegate_task(goal="Model only test", parent_agent=parent) + + _, kwargs = MockAgent.call_args + # Model should be overridden + self.assertEqual(kwargs["model"], "google/gemini-3-flash-preview") + # But provider/base_url/api_key should inherit from parent + self.assertEqual(kwargs["provider"], parent.provider) + self.assertEqual(kwargs["base_url"], parent.base_url) + + if __name__ == "__main__": unittest.main() diff --git a/tools/delegate_tool.py b/tools/delegate_tool.py index 835b46afe..8ade49fe0 100644 --- a/tools/delegate_tool.py +++ b/tools/delegate_tool.py @@ -166,10 +166,20 @@ def _run_single_child( max_iterations: int, parent_agent, task_count: int = 1, + # Credential overrides from delegation config (provider:model resolution) + override_provider: Optional[str] = None, + override_base_url: Optional[str] = None, + override_api_key: Optional[str] = None, + override_api_mode: Optional[str] = None, ) -> Dict[str, Any]: """ Spawn and run a single child agent. Called from within a thread. Returns a structured result dict. + + When override_* params are set (from delegation config), the child uses + those credentials instead of inheriting from the parent. This enables + routing subagents to a different provider:model pair (e.g. cheap/fast + model on OpenRouter while the parent runs on Nous Portal). """ from run_agent import AIAgent @@ -199,12 +209,19 @@ def _run_single_child( # count toward the session-wide limit. shared_budget = getattr(parent_agent, "iteration_budget", None) + # Resolve effective credentials: config override > parent inherit + effective_model = model or parent_agent.model + effective_provider = override_provider or getattr(parent_agent, "provider", None) + effective_base_url = override_base_url or parent_agent.base_url + effective_api_key = override_api_key or parent_api_key + effective_api_mode = override_api_mode or getattr(parent_agent, "api_mode", None) + child = AIAgent( - base_url=parent_agent.base_url, - api_key=parent_api_key, - model=model or parent_agent.model, - provider=getattr(parent_agent, "provider", None), - api_mode=getattr(parent_agent, "api_mode", None), + base_url=effective_base_url, + api_key=effective_api_key, + model=effective_model, + provider=effective_provider, + api_mode=effective_api_mode, max_iterations=max_iterations, max_tokens=getattr(parent_agent, "max_tokens", None), reasoning_config=getattr(parent_agent, "reasoning_config", None), @@ -327,6 +344,16 @@ def delegate_task( default_max_iter = cfg.get("max_iterations", DEFAULT_MAX_ITERATIONS) effective_max_iter = max_iterations or default_max_iter + # Resolve delegation credentials (provider:model pair). + # When delegation.provider is configured, this resolves the full credential + # bundle (base_url, api_key, api_mode) via the same runtime provider system + # used by CLI/gateway startup. When unconfigured, returns None values so + # children inherit from the parent. + try: + creds = _resolve_delegation_credentials(cfg, parent_agent) + except ValueError as exc: + return json.dumps({"error": str(exc)}) + # Normalize to task list if tasks and isinstance(tasks, list): task_list = tasks[:MAX_CONCURRENT_CHILDREN] @@ -358,10 +385,14 @@ def delegate_task( goal=t["goal"], context=t.get("context"), toolsets=t.get("toolsets") or toolsets, - model=None, + model=creds["model"], max_iterations=effective_max_iter, parent_agent=parent_agent, task_count=1, + override_provider=creds["provider"], + override_base_url=creds["base_url"], + override_api_key=creds["api_key"], + override_api_mode=creds["api_mode"], ) results.append(result) else: @@ -383,10 +414,14 @@ def delegate_task( goal=t["goal"], context=t.get("context"), toolsets=t.get("toolsets") or toolsets, - model=None, + model=creds["model"], max_iterations=effective_max_iter, parent_agent=parent_agent, task_count=n_tasks, + override_provider=creds["provider"], + override_base_url=creds["base_url"], + override_api_key=creds["api_key"], + override_api_mode=creds["api_mode"], ) futures[future] = i @@ -444,11 +479,78 @@ def delegate_task( }, ensure_ascii=False) +def _resolve_delegation_credentials(cfg: dict, parent_agent) -> dict: + """Resolve credentials for subagent delegation. + + If ``delegation.provider`` is configured, resolves the full credential + bundle (base_url, api_key, api_mode, provider) via the runtime provider + system — the same path used by CLI/gateway startup. This lets subagents + run on a completely different provider:model pair. + + If no provider is configured, returns None values so the child inherits + everything from the parent agent. + + Raises ValueError with a user-friendly message on credential failure. + """ + configured_model = cfg.get("model") or None + configured_provider = cfg.get("provider") or None + + if not configured_provider: + # No provider override — child inherits everything from parent + return { + "model": configured_model, + "provider": None, + "base_url": None, + "api_key": None, + "api_mode": None, + } + + # Provider is configured — resolve full credentials + try: + from hermes_cli.runtime_provider import resolve_runtime_provider + runtime = resolve_runtime_provider(requested=configured_provider) + except Exception as exc: + raise ValueError( + f"Cannot resolve delegation provider '{configured_provider}': {exc}. " + f"Check that the provider is configured (API key set, valid provider name). " + f"Available providers: openrouter, nous, zai, kimi-coding, minimax." + ) from exc + + api_key = runtime.get("api_key", "") + if not api_key: + raise ValueError( + f"Delegation provider '{configured_provider}' resolved but has no API key. " + f"Set the appropriate environment variable or run 'hermes login'." + ) + + return { + "model": configured_model, + "provider": runtime.get("provider"), + "base_url": runtime.get("base_url"), + "api_key": api_key, + "api_mode": runtime.get("api_mode"), + } + + def _load_config() -> dict: - """Load delegation config from CLI_CONFIG if available.""" + """Load delegation config from CLI_CONFIG or persistent config. + + Checks the runtime config (cli.py CLI_CONFIG) first, then falls back + to the persistent config (hermes_cli/config.py load_config()) so that + ``delegation.model`` / ``delegation.provider`` are picked up regardless + of the entry point (CLI, gateway, cron). + """ try: from cli import CLI_CONFIG - return CLI_CONFIG.get("delegation", {}) + cfg = CLI_CONFIG.get("delegation", {}) + if cfg: + return cfg + except Exception: + pass + try: + from hermes_cli.config import load_config + full = load_config() + return full.get("delegation", {}) except Exception: return {} diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md index f9e72ea70..83921c2f2 100644 --- a/website/docs/user-guide/configuration.md +++ b/website/docs/user-guide/configuration.md @@ -729,8 +729,16 @@ delegation: - terminal - file - web + # model: "google/gemini-3-flash-preview" # Override model (empty = inherit parent) + # provider: "openrouter" # Override provider (empty = inherit parent) ``` +**Subagent provider:model override:** By default, subagents inherit the parent agent's provider and model. Set `delegation.provider` and `delegation.model` to route subagents to a different provider:model pair — e.g., use a cheap/fast model for narrowly-scoped subtasks while your primary agent runs an expensive reasoning model. + +The delegation provider uses the same credential resolution as CLI/gateway startup. All configured providers are supported: `openrouter`, `nous`, `zai`, `kimi-coding`, `minimax`, `minimax-cn`. When a provider is set, the system automatically resolves the correct base URL, API key, and API mode — no manual credential wiring needed. + +**Precedence:** `delegation.provider` in config → parent provider (inherited). `delegation.model` in config → parent model (inherited). Setting just `model` without `provider` changes only the model name while keeping the parent's credentials (useful for switching models within the same provider like OpenRouter). + ## Clarify Configure the clarification prompt behavior: