diff --git a/cli-config.yaml.example b/cli-config.yaml.example index d3ab513ce..0b49368dc 100644 --- a/cli-config.yaml.example +++ b/cli-config.yaml.example @@ -210,9 +210,10 @@ agent: # Enable verbose logging verbose: false - # Custom system prompt (personality, instructions, etc.) - # Leave empty or remove to use default agent behavior - system_prompt: "" + # Reasoning effort level (OpenRouter and Nous Portal) + # Controls how much "thinking" the model does before responding. + # Options: "xhigh" (max), "high", "medium", "low", "minimal", "none" (disable) + reasoning_effort: "xhigh" # Predefined personalities (use with /personality command) personalities: diff --git a/cli.py b/cli.py index 42394efa1..d711da172 100755 --- a/cli.py +++ b/cli.py @@ -90,6 +90,24 @@ def _load_prefill_messages(file_path: str) -> List[Dict[str, Any]]: return [] +def _parse_reasoning_config(effort: str) -> dict | None: + """Parse a reasoning effort level into an OpenRouter reasoning config dict. + + Valid levels: "xhigh", "high", "medium", "low", "minimal", "none". + Returns None to use the default (xhigh), or a config dict to override. + """ + if not effort or not effort.strip(): + return None + effort = effort.strip().lower() + if effort == "none": + return {"enabled": False} + valid = ("xhigh", "high", "medium", "low", "minimal") + if effort in valid: + return {"enabled": True, "effort": effort} + logger.warning("Unknown reasoning_effort '%s', using default (xhigh)", effort) + return None + + def load_cli_config() -> Dict[str, Any]: """ Load CLI configuration from config files. @@ -146,6 +164,7 @@ def load_cli_config() -> Dict[str, Any]: "verbose": False, "system_prompt": "", "prefill_messages_file": "", + "reasoning_effort": "", "personalities": { "helpful": "You are a helpful, friendly AI assistant.", "concise": "You are a concise assistant. Keep responses brief and to the point.", @@ -795,6 +814,11 @@ class HermesCLI: CLI_CONFIG["agent"].get("prefill_messages_file", "") ) + # Reasoning config (OpenRouter reasoning effort level) + self.reasoning_config = _parse_reasoning_config( + CLI_CONFIG["agent"].get("reasoning_effort", "") + ) + # Agent will be initialized on first use self.agent: Optional[AIAgent] = None self._app = None # prompt_toolkit Application (set in run()) @@ -889,6 +913,7 @@ class HermesCLI: quiet_mode=True, ephemeral_system_prompt=self.system_prompt if self.system_prompt else None, prefill_messages=self.prefill_messages or None, + reasoning_config=self.reasoning_config, session_id=self.session_id, platform="cli", session_db=self._session_db, diff --git a/gateway/run.py b/gateway/run.py index 9c07d28e5..ebc59be35 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -96,6 +96,7 @@ class GatewayRunner: # Both are injected at API-call time only and never persisted. self._prefill_messages = self._load_prefill_messages() self._ephemeral_system_prompt = self._load_ephemeral_system_prompt() + self._reasoning_config = self._load_reasoning_config() # Wire process registry into session store for reset protection from tools.process_registry import process_registry @@ -184,6 +185,36 @@ class GatewayRunner: pass return "" + @staticmethod + def _load_reasoning_config() -> dict | None: + """Load reasoning effort from config or env var. + + Checks HERMES_REASONING_EFFORT env var first, then agent.reasoning_effort + in config.yaml. Valid: "xhigh", "high", "medium", "low", "minimal", "none". + Returns None to use default (xhigh). + """ + effort = os.getenv("HERMES_REASONING_EFFORT", "") + if not effort: + try: + import yaml as _y + cfg_path = Path.home() / ".hermes" / "config.yaml" + if cfg_path.exists(): + with open(cfg_path) as _f: + cfg = _y.safe_load(_f) or {} + effort = str(cfg.get("agent", {}).get("reasoning_effort", "") or "").strip() + except Exception: + pass + if not effort: + return None + effort = effort.lower().strip() + if effort == "none": + return {"enabled": False} + valid = ("xhigh", "high", "medium", "low", "minimal") + if effort in valid: + return {"enabled": True, "effort": effort} + logger.warning("Unknown reasoning_effort '%s', using default (xhigh)", effort) + return None + async def start(self) -> bool: """ Start the gateway and all configured platform adapters. @@ -1352,6 +1383,7 @@ class GatewayRunner: enabled_toolsets=enabled_toolsets, ephemeral_system_prompt=combined_ephemeral or None, prefill_messages=self._prefill_messages or None, + reasoning_config=self._reasoning_config, session_id=session_id, tool_progress_callback=progress_callback if tool_progress_enabled else None, platform=platform_key, diff --git a/run_agent.py b/run_agent.py index 3b31b9947..04202c6af 100644 --- a/run_agent.py +++ b/run_agent.py @@ -1152,7 +1152,11 @@ class AIAgent: if provider_preferences: extra_body["provider"] = provider_preferences - if "openrouter" in self.base_url.lower(): + _supports_reasoning = ( + "openrouter" in self.base_url.lower() + or "nousresearch" in self.base_url.lower() + ) + if _supports_reasoning: if self.reasoning_config is not None: extra_body["reasoning"] = self.reasoning_config else: @@ -1574,7 +1578,11 @@ class AIAgent: api_messages.insert(sys_offset + idx, pfm.copy()) summary_extra_body = {} - if "openrouter" in self.base_url.lower(): + _supports_reasoning = ( + "openrouter" in self.base_url.lower() + or "nousresearch" in self.base_url.lower() + ) + if _supports_reasoning: if self.reasoning_config is not None: summary_extra_body["reasoning"] = self.reasoning_config else: