feat: make tool-use enforcement configurable via agent.tool_use_enforcement (#3551)
The TOOL_USE_ENFORCEMENT_GUIDANCE injection (added in #3528) was hardcoded to only match gpt/codex model names. This makes it a config option so users can turn it on for any model family. New config key: agent.tool_use_enforcement - "auto" (default): matches gpt/codex (existing behavior) - true: inject for all models - false: never inject - list of strings: custom model-name substrings to match e.g. ["gpt", "codex", "deepseek", "qwen"] No version bump needed — deep merge provides the default automatically for existing installs. 12 new tests covering all config modes.
This commit is contained in:
@@ -138,6 +138,12 @@ DEFAULT_CONFIG = {
|
|||||||
"toolsets": ["hermes-cli"],
|
"toolsets": ["hermes-cli"],
|
||||||
"agent": {
|
"agent": {
|
||||||
"max_turns": 90,
|
"max_turns": 90,
|
||||||
|
# Tool-use enforcement: injects system prompt guidance that tells the
|
||||||
|
# model to actually call tools instead of describing intended actions.
|
||||||
|
# Values: "auto" (default — applies to gpt/codex models), true/false
|
||||||
|
# (force on/off for all models), or a list of model-name substrings
|
||||||
|
# to match (e.g. ["gpt", "codex", "gemini", "qwen"]).
|
||||||
|
"tool_use_enforcement": "auto",
|
||||||
},
|
},
|
||||||
|
|
||||||
"terminal": {
|
"terminal": {
|
||||||
|
|||||||
35
run_agent.py
35
run_agent.py
@@ -1080,6 +1080,13 @@ class AIAgent:
|
|||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
# Tool-use enforcement config: "auto" (default — matches hardcoded
|
||||||
|
# model list), true (always), false (never), or list of substrings.
|
||||||
|
_agent_section = _agent_cfg.get("agent", {})
|
||||||
|
if not isinstance(_agent_section, dict):
|
||||||
|
_agent_section = {}
|
||||||
|
self._tool_use_enforcement = _agent_section.get("tool_use_enforcement", "auto")
|
||||||
|
|
||||||
# Initialize context compressor for automatic context management
|
# Initialize context compressor for automatic context management
|
||||||
# Compresses conversation when approaching model's context limit
|
# Compresses conversation when approaching model's context limit
|
||||||
# Configuration via config.yaml (compression section)
|
# Configuration via config.yaml (compression section)
|
||||||
@@ -2510,14 +2517,28 @@ class AIAgent:
|
|||||||
if tool_guidance:
|
if tool_guidance:
|
||||||
prompt_parts.append(" ".join(tool_guidance))
|
prompt_parts.append(" ".join(tool_guidance))
|
||||||
|
|
||||||
# Some model families benefit from explicit tool-use enforcement.
|
# Tool-use enforcement: tells the model to actually call tools instead
|
||||||
# Without this, they tend to describe intended actions as text
|
# of describing intended actions. Controlled by config.yaml
|
||||||
# ("I will run the tests") instead of actually making tool calls.
|
# agent.tool_use_enforcement:
|
||||||
# TOOL_USE_ENFORCEMENT_MODELS is a tuple of substrings to match.
|
# "auto" (default) — matches TOOL_USE_ENFORCEMENT_MODELS
|
||||||
# Inject only when the model has tools available.
|
# true — always inject (all models)
|
||||||
|
# false — never inject
|
||||||
|
# list — custom model-name substrings to match
|
||||||
if self.valid_tool_names:
|
if self.valid_tool_names:
|
||||||
model_lower = (self.model or "").lower()
|
_enforce = self._tool_use_enforcement
|
||||||
if any(p in model_lower for p in TOOL_USE_ENFORCEMENT_MODELS):
|
_inject = False
|
||||||
|
if _enforce is True or (isinstance(_enforce, str) and _enforce.lower() in ("true", "always", "yes", "on")):
|
||||||
|
_inject = True
|
||||||
|
elif _enforce is False or (isinstance(_enforce, str) and _enforce.lower() in ("false", "never", "no", "off")):
|
||||||
|
_inject = False
|
||||||
|
elif isinstance(_enforce, list):
|
||||||
|
model_lower = (self.model or "").lower()
|
||||||
|
_inject = any(p.lower() in model_lower for p in _enforce if isinstance(p, str))
|
||||||
|
else:
|
||||||
|
# "auto" or any unrecognised value — use hardcoded defaults
|
||||||
|
model_lower = (self.model or "").lower()
|
||||||
|
_inject = any(p in model_lower for p in TOOL_USE_ENFORCEMENT_MODELS)
|
||||||
|
if _inject:
|
||||||
prompt_parts.append(TOOL_USE_ENFORCEMENT_GUIDANCE)
|
prompt_parts.append(TOOL_USE_ENFORCEMENT_GUIDANCE)
|
||||||
|
|
||||||
# Honcho CLI awareness: tell Hermes about its own management commands
|
# Honcho CLI awareness: tell Hermes about its own management commands
|
||||||
|
|||||||
@@ -617,6 +617,132 @@ class TestBuildSystemPrompt:
|
|||||||
assert mock_skills.call_args.kwargs["available_toolsets"] == {"web", "skills"}
|
assert mock_skills.call_args.kwargs["available_toolsets"] == {"web", "skills"}
|
||||||
|
|
||||||
|
|
||||||
|
class TestToolUseEnforcementConfig:
|
||||||
|
"""Tests for the agent.tool_use_enforcement config option."""
|
||||||
|
|
||||||
|
def _make_agent(self, model="openai/gpt-4.1", tool_use_enforcement="auto"):
|
||||||
|
"""Create an agent with tools and a specific enforcement config."""
|
||||||
|
with (
|
||||||
|
patch(
|
||||||
|
"run_agent.get_tool_definitions",
|
||||||
|
return_value=_make_tool_defs("terminal", "web_search"),
|
||||||
|
),
|
||||||
|
patch("run_agent.check_toolset_requirements", return_value={}),
|
||||||
|
patch("run_agent.OpenAI"),
|
||||||
|
patch(
|
||||||
|
"hermes_cli.config.load_config",
|
||||||
|
return_value={"agent": {"tool_use_enforcement": tool_use_enforcement}},
|
||||||
|
),
|
||||||
|
):
|
||||||
|
a = AIAgent(
|
||||||
|
model=model,
|
||||||
|
api_key="test-key-1234567890",
|
||||||
|
quiet_mode=True,
|
||||||
|
skip_context_files=True,
|
||||||
|
skip_memory=True,
|
||||||
|
)
|
||||||
|
a.client = MagicMock()
|
||||||
|
return a
|
||||||
|
|
||||||
|
def test_auto_injects_for_gpt(self):
|
||||||
|
from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
|
||||||
|
agent = self._make_agent(model="openai/gpt-4.1", tool_use_enforcement="auto")
|
||||||
|
prompt = agent._build_system_prompt()
|
||||||
|
assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt
|
||||||
|
|
||||||
|
def test_auto_injects_for_codex(self):
|
||||||
|
from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
|
||||||
|
agent = self._make_agent(model="openai/codex-mini", tool_use_enforcement="auto")
|
||||||
|
prompt = agent._build_system_prompt()
|
||||||
|
assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt
|
||||||
|
|
||||||
|
def test_auto_skips_for_claude(self):
|
||||||
|
from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
|
||||||
|
agent = self._make_agent(model="anthropic/claude-sonnet-4", tool_use_enforcement="auto")
|
||||||
|
prompt = agent._build_system_prompt()
|
||||||
|
assert TOOL_USE_ENFORCEMENT_GUIDANCE not in prompt
|
||||||
|
|
||||||
|
def test_true_forces_for_all_models(self):
|
||||||
|
from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
|
||||||
|
agent = self._make_agent(model="anthropic/claude-sonnet-4", tool_use_enforcement=True)
|
||||||
|
prompt = agent._build_system_prompt()
|
||||||
|
assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt
|
||||||
|
|
||||||
|
def test_string_true_forces_for_all_models(self):
|
||||||
|
from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
|
||||||
|
agent = self._make_agent(model="anthropic/claude-sonnet-4", tool_use_enforcement="true")
|
||||||
|
prompt = agent._build_system_prompt()
|
||||||
|
assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt
|
||||||
|
|
||||||
|
def test_always_forces_for_all_models(self):
|
||||||
|
from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
|
||||||
|
agent = self._make_agent(model="deepseek/deepseek-r1", tool_use_enforcement="always")
|
||||||
|
prompt = agent._build_system_prompt()
|
||||||
|
assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt
|
||||||
|
|
||||||
|
def test_false_disables_for_gpt(self):
|
||||||
|
from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
|
||||||
|
agent = self._make_agent(model="openai/gpt-4.1", tool_use_enforcement=False)
|
||||||
|
prompt = agent._build_system_prompt()
|
||||||
|
assert TOOL_USE_ENFORCEMENT_GUIDANCE not in prompt
|
||||||
|
|
||||||
|
def test_string_false_disables(self):
|
||||||
|
from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
|
||||||
|
agent = self._make_agent(model="openai/gpt-4.1", tool_use_enforcement="off")
|
||||||
|
prompt = agent._build_system_prompt()
|
||||||
|
assert TOOL_USE_ENFORCEMENT_GUIDANCE not in prompt
|
||||||
|
|
||||||
|
def test_custom_list_matches(self):
|
||||||
|
from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
|
||||||
|
agent = self._make_agent(
|
||||||
|
model="deepseek/deepseek-r1",
|
||||||
|
tool_use_enforcement=["deepseek", "gemini"],
|
||||||
|
)
|
||||||
|
prompt = agent._build_system_prompt()
|
||||||
|
assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt
|
||||||
|
|
||||||
|
def test_custom_list_no_match(self):
|
||||||
|
from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
|
||||||
|
agent = self._make_agent(
|
||||||
|
model="anthropic/claude-sonnet-4",
|
||||||
|
tool_use_enforcement=["deepseek", "gemini"],
|
||||||
|
)
|
||||||
|
prompt = agent._build_system_prompt()
|
||||||
|
assert TOOL_USE_ENFORCEMENT_GUIDANCE not in prompt
|
||||||
|
|
||||||
|
def test_custom_list_case_insensitive(self):
|
||||||
|
from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
|
||||||
|
agent = self._make_agent(
|
||||||
|
model="openai/GPT-4.1",
|
||||||
|
tool_use_enforcement=["GPT", "Codex"],
|
||||||
|
)
|
||||||
|
prompt = agent._build_system_prompt()
|
||||||
|
assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt
|
||||||
|
|
||||||
|
def test_no_tools_never_injects(self):
|
||||||
|
"""Even with enforcement=true, no injection when agent has no tools."""
|
||||||
|
from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
|
||||||
|
with (
|
||||||
|
patch("run_agent.get_tool_definitions", return_value=[]),
|
||||||
|
patch("run_agent.check_toolset_requirements", return_value={}),
|
||||||
|
patch("run_agent.OpenAI"),
|
||||||
|
patch(
|
||||||
|
"hermes_cli.config.load_config",
|
||||||
|
return_value={"agent": {"tool_use_enforcement": True}},
|
||||||
|
),
|
||||||
|
):
|
||||||
|
a = AIAgent(
|
||||||
|
api_key="test-key-1234567890",
|
||||||
|
quiet_mode=True,
|
||||||
|
skip_context_files=True,
|
||||||
|
skip_memory=True,
|
||||||
|
enabled_toolsets=[],
|
||||||
|
)
|
||||||
|
a.client = MagicMock()
|
||||||
|
prompt = a._build_system_prompt()
|
||||||
|
assert TOOL_USE_ENFORCEMENT_GUIDANCE not in prompt
|
||||||
|
|
||||||
|
|
||||||
class TestInvalidateSystemPrompt:
|
class TestInvalidateSystemPrompt:
|
||||||
def test_clears_cache(self, agent):
|
def test_clears_cache(self, agent):
|
||||||
agent._cached_system_prompt = "cached value"
|
agent._cached_system_prompt = "cached value"
|
||||||
|
|||||||
Reference in New Issue
Block a user