diff --git a/agent/prompt_builder.py b/agent/prompt_builder.py index c576b55c1..b86be15a4 100644 --- a/agent/prompt_builder.py +++ b/agent/prompt_builder.py @@ -66,7 +66,8 @@ DEFAULT_AGENT_IDENTITY = ( "range of tasks including answering questions, writing and editing code, " "analyzing information, creative work, and executing actions via your tools. " "You communicate clearly, admit uncertainty when appropriate, and prioritize " - "being genuinely useful over being verbose unless otherwise directed below." + "being genuinely useful over being verbose unless otherwise directed below. " + "Be targeted and efficient in your exploration and investigations." ) MEMORY_GUIDANCE = ( diff --git a/batch_runner.py b/batch_runner.py index 1bd6745b9..a4c402ffd 100644 --- a/batch_runner.py +++ b/batch_runner.py @@ -1155,7 +1155,7 @@ def main( providers_order (str): Comma-separated list of OpenRouter providers to try in order (e.g. "anthropic,openai,google") provider_sort (str): Sort providers by "price", "throughput", or "latency" (OpenRouter only) max_tokens (int): Maximum tokens for model responses (optional, uses model default if not set) - reasoning_effort (str): OpenRouter reasoning effort level: "xhigh", "high", "medium", "low", "minimal", "none" (default: "xhigh") + reasoning_effort (str): OpenRouter reasoning effort level: "xhigh", "high", "medium", "low", "minimal", "none" (default: "medium") reasoning_disabled (bool): Completely disable reasoning/thinking tokens (default: False) prefill_messages_file (str): Path to JSON file containing prefill messages (list of {role, content} dicts) max_samples (int): Only process the first N samples from the dataset (optional, processes all if not set) @@ -1216,7 +1216,7 @@ def main( providers_order_list = [p.strip() for p in providers_order.split(",")] if providers_order else None # Build reasoning_config from CLI flags - # --reasoning_disabled takes priority, then --reasoning_effort, then default (xhigh) + # --reasoning_disabled takes priority, then --reasoning_effort, then default (medium) reasoning_config = None if reasoning_disabled: # Completely disable reasoning/thinking tokens diff --git a/cli-config.yaml.example b/cli-config.yaml.example index d8489d95b..f0d5a95bd 100644 --- a/cli-config.yaml.example +++ b/cli-config.yaml.example @@ -285,7 +285,7 @@ agent: # Reasoning effort level (OpenRouter and Nous Portal) # Controls how much "thinking" the model does before responding. # Options: "xhigh" (max), "high", "medium", "low", "minimal", "none" (disable) - reasoning_effort: "xhigh" + reasoning_effort: "medium" # Predefined personalities (use with /personality command) personalities: diff --git a/cli.py b/cli.py index 4d1941f81..7dd74b0b2 100755 --- a/cli.py +++ b/cli.py @@ -108,7 +108,7 @@ def _parse_reasoning_config(effort: str) -> dict | None: """Parse a reasoning effort level into an OpenRouter reasoning config dict. Valid levels: "xhigh", "high", "medium", "low", "minimal", "none". - Returns None to use the default (xhigh), or a config dict to override. + Returns None to use the default (medium), or a config dict to override. """ if not effort or not effort.strip(): return None @@ -118,7 +118,7 @@ def _parse_reasoning_config(effort: str) -> dict | None: valid = ("xhigh", "high", "medium", "low", "minimal") if effort in valid: return {"enabled": True, "effort": effort} - logger.warning("Unknown reasoning_effort '%s', using default (xhigh)", effort) + logger.warning("Unknown reasoning_effort '%s', using default (medium)", effort) return None diff --git a/gateway/run.py b/gateway/run.py index 99fd2443f..3ed81379a 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -330,7 +330,7 @@ class GatewayRunner: Checks HERMES_REASONING_EFFORT env var first, then agent.reasoning_effort in config.yaml. Valid: "xhigh", "high", "medium", "low", "minimal", "none". - Returns None to use default (xhigh). + Returns None to use default (medium). """ effort = os.getenv("HERMES_REASONING_EFFORT", "") if not effort: @@ -351,7 +351,7 @@ class GatewayRunner: valid = ("xhigh", "high", "medium", "low", "minimal") if effort in valid: return {"enabled": True, "effort": effort} - logger.warning("Unknown reasoning_effort '%s', using default (xhigh)", effort) + logger.warning("Unknown reasoning_effort '%s', using default (medium)", effort) return None @staticmethod diff --git a/run_agent.py b/run_agent.py index 00c43657b..2fce80a9a 100644 --- a/run_agent.py +++ b/run_agent.py @@ -213,7 +213,7 @@ class AIAgent: Provided by the platform layer (CLI or gateway). If None, the clarify tool returns an error. max_tokens (int): Maximum tokens for model responses (optional, uses model default if not set) reasoning_config (Dict): OpenRouter reasoning configuration override (e.g. {"effort": "none"} to disable thinking). - If None, defaults to {"enabled": True, "effort": "xhigh"} for OpenRouter. Set to disable/customize reasoning. + If None, defaults to {"enabled": True, "effort": "medium"} for OpenRouter. Set to disable/customize reasoning. prefill_messages (List[Dict]): Messages to prepend to conversation history as prefilled context. Useful for injecting a few-shot example or priming the model's response style. Example: [{"role": "user", "content": "Hi!"}, {"role": "assistant", "content": "Hello!"}] @@ -287,7 +287,7 @@ class AIAgent: # Model response configuration self.max_tokens = max_tokens # None = use model default - self.reasoning_config = reasoning_config # None = use default (xhigh for OpenRouter) + self.reasoning_config = reasoning_config # None = use default (medium for OpenRouter) self.prefill_messages = prefill_messages or [] # Prefilled conversation turns # Anthropic prompt caching: auto-enabled for Claude models via OpenRouter. @@ -2157,8 +2157,8 @@ class AIAgent: if not instructions: instructions = DEFAULT_AGENT_IDENTITY - # Resolve reasoning effort: config > default (xhigh) - reasoning_effort = "xhigh" + # Resolve reasoning effort: config > default (medium) + reasoning_effort = "medium" reasoning_enabled = True if self.reasoning_config and isinstance(self.reasoning_config, dict): if self.reasoning_config.get("enabled") is False: @@ -2224,7 +2224,7 @@ class AIAgent: else: extra_body["reasoning"] = { "enabled": True, - "effort": "xhigh" + "effort": "medium" } # Nous Portal product attribution @@ -2767,7 +2767,7 @@ class AIAgent: else: summary_extra_body["reasoning"] = { "enabled": True, - "effort": "xhigh" + "effort": "medium" } if _is_nous: summary_extra_body["tags"] = ["product=hermes-agent"] diff --git a/tests/test_provider_parity.py b/tests/test_provider_parity.py index 00fc4dd9b..2ee313144 100644 --- a/tests/test_provider_parity.py +++ b/tests/test_provider_parity.py @@ -145,7 +145,7 @@ class TestBuildApiKwargsCodex: messages = [{"role": "user", "content": "hi"}] kwargs = agent._build_api_kwargs(messages) assert "reasoning" in kwargs - assert kwargs["reasoning"]["effort"] == "xhigh" + assert kwargs["reasoning"]["effort"] == "medium" def test_includes_encrypted_content_in_include(self, monkeypatch): agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses", @@ -596,19 +596,19 @@ class TestCodexReasoningPreflight: # ── Reasoning effort consistency tests ─────────────────────────────────────── class TestReasoningEffortDefaults: - """Verify reasoning effort defaults to xhigh across all provider paths.""" + """Verify reasoning effort defaults to medium across all provider paths.""" - def test_openrouter_default_xhigh(self, monkeypatch): + def test_openrouter_default_medium(self, monkeypatch): agent = _make_agent(monkeypatch, "openrouter") kwargs = agent._build_api_kwargs([{"role": "user", "content": "hi"}]) reasoning = kwargs["extra_body"]["reasoning"] - assert reasoning["effort"] == "xhigh" + assert reasoning["effort"] == "medium" - def test_codex_default_xhigh(self, monkeypatch): + def test_codex_default_medium(self, monkeypatch): agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses", base_url="https://chatgpt.com/backend-api/codex") kwargs = agent._build_api_kwargs([{"role": "user", "content": "hi"}]) - assert kwargs["reasoning"]["effort"] == "xhigh" + assert kwargs["reasoning"]["effort"] == "medium" def test_codex_reasoning_disabled(self, monkeypatch): agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses", diff --git a/tests/test_run_agent.py b/tests/test_run_agent.py index ae7924d45..226b29a6d 100644 --- a/tests/test_run_agent.py +++ b/tests/test_run_agent.py @@ -498,12 +498,12 @@ class TestBuildApiKwargs: assert kwargs["extra_body"]["provider"]["only"] == ["Anthropic"] def test_reasoning_config_default_openrouter(self, agent): - """Default reasoning config for OpenRouter should be xhigh.""" + """Default reasoning config for OpenRouter should be medium.""" messages = [{"role": "user", "content": "hi"}] kwargs = agent._build_api_kwargs(messages) reasoning = kwargs["extra_body"]["reasoning"] assert reasoning["enabled"] is True - assert reasoning["effort"] == "xhigh" + assert reasoning["effort"] == "medium" def test_reasoning_config_custom(self, agent): agent.reasoning_config = {"enabled": False} diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md index 6d6897794..33193619c 100644 --- a/website/docs/user-guide/configuration.md +++ b/website/docs/user-guide/configuration.md @@ -421,10 +421,10 @@ Control how much "thinking" the model does before responding: ```yaml agent: - reasoning_effort: "" # empty = use model default. Options: xhigh (max), high, medium, low, minimal, none + reasoning_effort: "" # empty = medium (default). Options: xhigh (max), high, medium, low, minimal, none ``` -When unset (default), the model's own default reasoning level is used. Setting a value overrides it — higher reasoning effort gives better results on complex tasks at the cost of more tokens and latency. +When unset (default), reasoning effort defaults to "medium" — a balanced level that works well for most tasks. Setting a value overrides it — higher reasoning effort gives better results on complex tasks at the cost of more tokens and latency. ## TTS Configuration