diff --git a/hermes_cli/config.py b/hermes_cli/config.py index b955a5399..dfa95aa54 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -138,6 +138,12 @@ DEFAULT_CONFIG = { "toolsets": ["hermes-cli"], "agent": { "max_turns": 90, + # Tool-use enforcement: injects system prompt guidance that tells the + # model to actually call tools instead of describing intended actions. + # Values: "auto" (default — applies to gpt/codex models), true/false + # (force on/off for all models), or a list of model-name substrings + # to match (e.g. ["gpt", "codex", "gemini", "qwen"]). + "tool_use_enforcement": "auto", }, "terminal": { diff --git a/run_agent.py b/run_agent.py index 65f67f8d9..9d778b3d7 100644 --- a/run_agent.py +++ b/run_agent.py @@ -1080,6 +1080,13 @@ class AIAgent: except Exception: pass + # Tool-use enforcement config: "auto" (default — matches hardcoded + # model list), true (always), false (never), or list of substrings. + _agent_section = _agent_cfg.get("agent", {}) + if not isinstance(_agent_section, dict): + _agent_section = {} + self._tool_use_enforcement = _agent_section.get("tool_use_enforcement", "auto") + # Initialize context compressor for automatic context management # Compresses conversation when approaching model's context limit # Configuration via config.yaml (compression section) @@ -2510,14 +2517,28 @@ class AIAgent: if tool_guidance: prompt_parts.append(" ".join(tool_guidance)) - # Some model families benefit from explicit tool-use enforcement. - # Without this, they tend to describe intended actions as text - # ("I will run the tests") instead of actually making tool calls. - # TOOL_USE_ENFORCEMENT_MODELS is a tuple of substrings to match. - # Inject only when the model has tools available. + # Tool-use enforcement: tells the model to actually call tools instead + # of describing intended actions. Controlled by config.yaml + # agent.tool_use_enforcement: + # "auto" (default) — matches TOOL_USE_ENFORCEMENT_MODELS + # true — always inject (all models) + # false — never inject + # list — custom model-name substrings to match if self.valid_tool_names: - model_lower = (self.model or "").lower() - if any(p in model_lower for p in TOOL_USE_ENFORCEMENT_MODELS): + _enforce = self._tool_use_enforcement + _inject = False + if _enforce is True or (isinstance(_enforce, str) and _enforce.lower() in ("true", "always", "yes", "on")): + _inject = True + elif _enforce is False or (isinstance(_enforce, str) and _enforce.lower() in ("false", "never", "no", "off")): + _inject = False + elif isinstance(_enforce, list): + model_lower = (self.model or "").lower() + _inject = any(p.lower() in model_lower for p in _enforce if isinstance(p, str)) + else: + # "auto" or any unrecognised value — use hardcoded defaults + model_lower = (self.model or "").lower() + _inject = any(p in model_lower for p in TOOL_USE_ENFORCEMENT_MODELS) + if _inject: prompt_parts.append(TOOL_USE_ENFORCEMENT_GUIDANCE) # Honcho CLI awareness: tell Hermes about its own management commands diff --git a/tests/test_run_agent.py b/tests/test_run_agent.py index 4a3537e9b..7ad5ee9a3 100644 --- a/tests/test_run_agent.py +++ b/tests/test_run_agent.py @@ -617,6 +617,132 @@ class TestBuildSystemPrompt: assert mock_skills.call_args.kwargs["available_toolsets"] == {"web", "skills"} +class TestToolUseEnforcementConfig: + """Tests for the agent.tool_use_enforcement config option.""" + + def _make_agent(self, model="openai/gpt-4.1", tool_use_enforcement="auto"): + """Create an agent with tools and a specific enforcement config.""" + with ( + patch( + "run_agent.get_tool_definitions", + return_value=_make_tool_defs("terminal", "web_search"), + ), + patch("run_agent.check_toolset_requirements", return_value={}), + patch("run_agent.OpenAI"), + patch( + "hermes_cli.config.load_config", + return_value={"agent": {"tool_use_enforcement": tool_use_enforcement}}, + ), + ): + a = AIAgent( + model=model, + api_key="test-key-1234567890", + quiet_mode=True, + skip_context_files=True, + skip_memory=True, + ) + a.client = MagicMock() + return a + + def test_auto_injects_for_gpt(self): + from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE + agent = self._make_agent(model="openai/gpt-4.1", tool_use_enforcement="auto") + prompt = agent._build_system_prompt() + assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt + + def test_auto_injects_for_codex(self): + from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE + agent = self._make_agent(model="openai/codex-mini", tool_use_enforcement="auto") + prompt = agent._build_system_prompt() + assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt + + def test_auto_skips_for_claude(self): + from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE + agent = self._make_agent(model="anthropic/claude-sonnet-4", tool_use_enforcement="auto") + prompt = agent._build_system_prompt() + assert TOOL_USE_ENFORCEMENT_GUIDANCE not in prompt + + def test_true_forces_for_all_models(self): + from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE + agent = self._make_agent(model="anthropic/claude-sonnet-4", tool_use_enforcement=True) + prompt = agent._build_system_prompt() + assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt + + def test_string_true_forces_for_all_models(self): + from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE + agent = self._make_agent(model="anthropic/claude-sonnet-4", tool_use_enforcement="true") + prompt = agent._build_system_prompt() + assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt + + def test_always_forces_for_all_models(self): + from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE + agent = self._make_agent(model="deepseek/deepseek-r1", tool_use_enforcement="always") + prompt = agent._build_system_prompt() + assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt + + def test_false_disables_for_gpt(self): + from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE + agent = self._make_agent(model="openai/gpt-4.1", tool_use_enforcement=False) + prompt = agent._build_system_prompt() + assert TOOL_USE_ENFORCEMENT_GUIDANCE not in prompt + + def test_string_false_disables(self): + from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE + agent = self._make_agent(model="openai/gpt-4.1", tool_use_enforcement="off") + prompt = agent._build_system_prompt() + assert TOOL_USE_ENFORCEMENT_GUIDANCE not in prompt + + def test_custom_list_matches(self): + from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE + agent = self._make_agent( + model="deepseek/deepseek-r1", + tool_use_enforcement=["deepseek", "gemini"], + ) + prompt = agent._build_system_prompt() + assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt + + def test_custom_list_no_match(self): + from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE + agent = self._make_agent( + model="anthropic/claude-sonnet-4", + tool_use_enforcement=["deepseek", "gemini"], + ) + prompt = agent._build_system_prompt() + assert TOOL_USE_ENFORCEMENT_GUIDANCE not in prompt + + def test_custom_list_case_insensitive(self): + from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE + agent = self._make_agent( + model="openai/GPT-4.1", + tool_use_enforcement=["GPT", "Codex"], + ) + prompt = agent._build_system_prompt() + assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt + + def test_no_tools_never_injects(self): + """Even with enforcement=true, no injection when agent has no tools.""" + from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE + with ( + patch("run_agent.get_tool_definitions", return_value=[]), + patch("run_agent.check_toolset_requirements", return_value={}), + patch("run_agent.OpenAI"), + patch( + "hermes_cli.config.load_config", + return_value={"agent": {"tool_use_enforcement": True}}, + ), + ): + a = AIAgent( + api_key="test-key-1234567890", + quiet_mode=True, + skip_context_files=True, + skip_memory=True, + enabled_toolsets=[], + ) + a.client = MagicMock() + prompt = a._build_system_prompt() + assert TOOL_USE_ENFORCEMENT_GUIDANCE not in prompt + + class TestInvalidateSystemPrompt: def test_clears_cache(self, agent): agent._cached_system_prompt = "cached value"