feat: make tool-use enforcement configurable via agent.tool_use_enforcement (#3551)

The TOOL_USE_ENFORCEMENT_GUIDANCE injection (added in #3528) was hardcoded to only match gpt/codex model names. This makes it a config option so users can turn it on for any model family. New config key: agent.tool_use_enforcement - "auto" (default): matches gpt/codex (existing behavior) - true: inject for all models - false: never inject - list of strings: custom model-name substrings to match e.g. ["gpt", "codex", "deepseek", "qwen"] No version bump needed — deep merge provides the default automatically for existing installs. 12 new tests covering all config modes.
2026-03-28 12:31:22 -07:00
parent d26ee20659
commit 901494d728
3 changed files with 160 additions and 7 deletions
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -138,6 +138,12 @@ DEFAULT_CONFIG = {
    "toolsets": ["hermes-cli"],
    "agent": {
        "max_turns": 90,
        # Tool-use enforcement: injects system prompt guidance that tells the
        # model to actually call tools instead of describing intended actions.
        # Values: "auto" (default — applies to gpt/codex models), true/false
        # (force on/off for all models), or a list of model-name substrings
        # to match (e.g. ["gpt", "codex", "gemini", "qwen"]).
        "tool_use_enforcement": "auto",
    },
    "terminal": {
--- a/run_agent.py
+++ b/run_agent.py
@@ -1080,6 +1080,13 @@ class AIAgent:
        except Exception:
            pass
        # Tool-use enforcement config: "auto" (default — matches hardcoded
        # model list), true (always), false (never), or list of substrings.
        _agent_section = _agent_cfg.get("agent", {})
        if not isinstance(_agent_section, dict):
            _agent_section = {}
        self._tool_use_enforcement = _agent_section.get("tool_use_enforcement", "auto")
        # Initialize context compressor for automatic context management
        # Compresses conversation when approaching model's context limit
        # Configuration via config.yaml (compression section)
@@ -2510,14 +2517,28 @@ class AIAgent:
        if tool_guidance:
            prompt_parts.append(" ".join(tool_guidance))
-        # Some model families benefit from explicit tool-use enforcement.
+        # Tool-use enforcement: tells the model to actually call tools instead
-        # Without this, they tend to describe intended actions as text
+        # of describing intended actions.  Controlled by config.yaml
-        # ("I will run the tests") instead of actually making tool calls.
+        # agent.tool_use_enforcement:
-        # TOOL_USE_ENFORCEMENT_MODELS is a tuple of substrings to match.
+        #   "auto" (default) — matches TOOL_USE_ENFORCEMENT_MODELS
-        # Inject only when the model has tools available.
+        #   true  — always inject (all models)
        #   false — never inject
        #   list  — custom model-name substrings to match
        if self.valid_tool_names:
-            model_lower = (self.model or "").lower()
+            _enforce = self._tool_use_enforcement
-            if any(p in model_lower for p in TOOL_USE_ENFORCEMENT_MODELS):
+            _inject = False
            if _enforce is True or (isinstance(_enforce, str) and _enforce.lower() in ("true", "always", "yes", "on")):
                _inject = True
            elif _enforce is False or (isinstance(_enforce, str) and _enforce.lower() in ("false", "never", "no", "off")):
                _inject = False
            elif isinstance(_enforce, list):
                model_lower = (self.model or "").lower()
                _inject = any(p.lower() in model_lower for p in _enforce if isinstance(p, str))
            else:
                # "auto" or any unrecognised value — use hardcoded defaults
                model_lower = (self.model or "").lower()
                _inject = any(p in model_lower for p in TOOL_USE_ENFORCEMENT_MODELS)
            if _inject:
                prompt_parts.append(TOOL_USE_ENFORCEMENT_GUIDANCE)
        # Honcho CLI awareness: tell Hermes about its own management commands
--- a/tests/test_run_agent.py
+++ b/tests/test_run_agent.py
@@ -617,6 +617,132 @@ class TestBuildSystemPrompt:
        assert mock_skills.call_args.kwargs["available_toolsets"] == {"web", "skills"}
 class TestToolUseEnforcementConfig:
    """Tests for the agent.tool_use_enforcement config option."""
    def _make_agent(self, model="openai/gpt-4.1", tool_use_enforcement="auto"):
        """Create an agent with tools and a specific enforcement config."""
        with (
            patch(
                "run_agent.get_tool_definitions",
                return_value=_make_tool_defs("terminal", "web_search"),
            ),
            patch("run_agent.check_toolset_requirements", return_value={}),
            patch("run_agent.OpenAI"),
            patch(
                "hermes_cli.config.load_config",
                return_value={"agent": {"tool_use_enforcement": tool_use_enforcement}},
            ),
        ):
            a = AIAgent(
                model=model,
                api_key="test-key-1234567890",
                quiet_mode=True,
                skip_context_files=True,
                skip_memory=True,
            )
            a.client = MagicMock()
            return a
    def test_auto_injects_for_gpt(self):
        from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
        agent = self._make_agent(model="openai/gpt-4.1", tool_use_enforcement="auto")
        prompt = agent._build_system_prompt()
        assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt
    def test_auto_injects_for_codex(self):
        from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
        agent = self._make_agent(model="openai/codex-mini", tool_use_enforcement="auto")
        prompt = agent._build_system_prompt()
        assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt
    def test_auto_skips_for_claude(self):
        from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
        agent = self._make_agent(model="anthropic/claude-sonnet-4", tool_use_enforcement="auto")
        prompt = agent._build_system_prompt()
        assert TOOL_USE_ENFORCEMENT_GUIDANCE not in prompt
    def test_true_forces_for_all_models(self):
        from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
        agent = self._make_agent(model="anthropic/claude-sonnet-4", tool_use_enforcement=True)
        prompt = agent._build_system_prompt()
        assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt
    def test_string_true_forces_for_all_models(self):
        from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
        agent = self._make_agent(model="anthropic/claude-sonnet-4", tool_use_enforcement="true")
        prompt = agent._build_system_prompt()
        assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt
    def test_always_forces_for_all_models(self):
        from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
        agent = self._make_agent(model="deepseek/deepseek-r1", tool_use_enforcement="always")
        prompt = agent._build_system_prompt()
        assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt
    def test_false_disables_for_gpt(self):
        from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
        agent = self._make_agent(model="openai/gpt-4.1", tool_use_enforcement=False)
        prompt = agent._build_system_prompt()
        assert TOOL_USE_ENFORCEMENT_GUIDANCE not in prompt
    def test_string_false_disables(self):
        from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
        agent = self._make_agent(model="openai/gpt-4.1", tool_use_enforcement="off")
        prompt = agent._build_system_prompt()
        assert TOOL_USE_ENFORCEMENT_GUIDANCE not in prompt
    def test_custom_list_matches(self):
        from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
        agent = self._make_agent(
            model="deepseek/deepseek-r1",
            tool_use_enforcement=["deepseek", "gemini"],
        )
        prompt = agent._build_system_prompt()
        assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt
    def test_custom_list_no_match(self):
        from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
        agent = self._make_agent(
            model="anthropic/claude-sonnet-4",
            tool_use_enforcement=["deepseek", "gemini"],
        )
        prompt = agent._build_system_prompt()
        assert TOOL_USE_ENFORCEMENT_GUIDANCE not in prompt
    def test_custom_list_case_insensitive(self):
        from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
        agent = self._make_agent(
            model="openai/GPT-4.1",
            tool_use_enforcement=["GPT", "Codex"],
        )
        prompt = agent._build_system_prompt()
        assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt
    def test_no_tools_never_injects(self):
        """Even with enforcement=true, no injection when agent has no tools."""
        from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
        with (
            patch("run_agent.get_tool_definitions", return_value=[]),
            patch("run_agent.check_toolset_requirements", return_value={}),
            patch("run_agent.OpenAI"),
            patch(
                "hermes_cli.config.load_config",
                return_value={"agent": {"tool_use_enforcement": True}},
            ),
        ):
            a = AIAgent(
                api_key="test-key-1234567890",
                quiet_mode=True,
                skip_context_files=True,
                skip_memory=True,
                enabled_toolsets=[],
            )
            a.client = MagicMock()
            prompt = a._build_system_prompt()
            assert TOOL_USE_ENFORCEMENT_GUIDANCE not in prompt
 class TestInvalidateSystemPrompt:
    def test_clears_cache(self, agent):
        agent._cached_system_prompt = "cached value"