feat: make tool-use enforcement configurable via agent.tool_use_enforcement (#3551)

The TOOL_USE_ENFORCEMENT_GUIDANCE injection (added in #3528) was
hardcoded to only match gpt/codex model names. This makes it a
config option so users can turn it on for any model family.

New config key: agent.tool_use_enforcement
  - "auto" (default): matches gpt/codex (existing behavior)
  - true: inject for all models
  - false: never inject
  - list of strings: custom model-name substrings to match
    e.g. ["gpt", "codex", "deepseek", "qwen"]

No version bump needed — deep merge provides the default
automatically for existing installs.

12 new tests covering all config modes.
This commit is contained in:
Teknium
2026-03-28 12:31:22 -07:00
committed by GitHub
parent d26ee20659
commit 901494d728
3 changed files with 160 additions and 7 deletions

View File

@@ -138,6 +138,12 @@ DEFAULT_CONFIG = {
"toolsets": ["hermes-cli"], "toolsets": ["hermes-cli"],
"agent": { "agent": {
"max_turns": 90, "max_turns": 90,
# Tool-use enforcement: injects system prompt guidance that tells the
# model to actually call tools instead of describing intended actions.
# Values: "auto" (default — applies to gpt/codex models), true/false
# (force on/off for all models), or a list of model-name substrings
# to match (e.g. ["gpt", "codex", "gemini", "qwen"]).
"tool_use_enforcement": "auto",
}, },
"terminal": { "terminal": {

View File

@@ -1080,6 +1080,13 @@ class AIAgent:
except Exception: except Exception:
pass pass
# Tool-use enforcement config: "auto" (default — matches hardcoded
# model list), true (always), false (never), or list of substrings.
_agent_section = _agent_cfg.get("agent", {})
if not isinstance(_agent_section, dict):
_agent_section = {}
self._tool_use_enforcement = _agent_section.get("tool_use_enforcement", "auto")
# Initialize context compressor for automatic context management # Initialize context compressor for automatic context management
# Compresses conversation when approaching model's context limit # Compresses conversation when approaching model's context limit
# Configuration via config.yaml (compression section) # Configuration via config.yaml (compression section)
@@ -2510,14 +2517,28 @@ class AIAgent:
if tool_guidance: if tool_guidance:
prompt_parts.append(" ".join(tool_guidance)) prompt_parts.append(" ".join(tool_guidance))
# Some model families benefit from explicit tool-use enforcement. # Tool-use enforcement: tells the model to actually call tools instead
# Without this, they tend to describe intended actions as text # of describing intended actions. Controlled by config.yaml
# ("I will run the tests") instead of actually making tool calls. # agent.tool_use_enforcement:
# TOOL_USE_ENFORCEMENT_MODELS is a tuple of substrings to match. # "auto" (default) — matches TOOL_USE_ENFORCEMENT_MODELS
# Inject only when the model has tools available. # true — always inject (all models)
# false — never inject
# list — custom model-name substrings to match
if self.valid_tool_names: if self.valid_tool_names:
model_lower = (self.model or "").lower() _enforce = self._tool_use_enforcement
if any(p in model_lower for p in TOOL_USE_ENFORCEMENT_MODELS): _inject = False
if _enforce is True or (isinstance(_enforce, str) and _enforce.lower() in ("true", "always", "yes", "on")):
_inject = True
elif _enforce is False or (isinstance(_enforce, str) and _enforce.lower() in ("false", "never", "no", "off")):
_inject = False
elif isinstance(_enforce, list):
model_lower = (self.model or "").lower()
_inject = any(p.lower() in model_lower for p in _enforce if isinstance(p, str))
else:
# "auto" or any unrecognised value — use hardcoded defaults
model_lower = (self.model or "").lower()
_inject = any(p in model_lower for p in TOOL_USE_ENFORCEMENT_MODELS)
if _inject:
prompt_parts.append(TOOL_USE_ENFORCEMENT_GUIDANCE) prompt_parts.append(TOOL_USE_ENFORCEMENT_GUIDANCE)
# Honcho CLI awareness: tell Hermes about its own management commands # Honcho CLI awareness: tell Hermes about its own management commands

View File

@@ -617,6 +617,132 @@ class TestBuildSystemPrompt:
assert mock_skills.call_args.kwargs["available_toolsets"] == {"web", "skills"} assert mock_skills.call_args.kwargs["available_toolsets"] == {"web", "skills"}
class TestToolUseEnforcementConfig:
"""Tests for the agent.tool_use_enforcement config option."""
def _make_agent(self, model="openai/gpt-4.1", tool_use_enforcement="auto"):
"""Create an agent with tools and a specific enforcement config."""
with (
patch(
"run_agent.get_tool_definitions",
return_value=_make_tool_defs("terminal", "web_search"),
),
patch("run_agent.check_toolset_requirements", return_value={}),
patch("run_agent.OpenAI"),
patch(
"hermes_cli.config.load_config",
return_value={"agent": {"tool_use_enforcement": tool_use_enforcement}},
),
):
a = AIAgent(
model=model,
api_key="test-key-1234567890",
quiet_mode=True,
skip_context_files=True,
skip_memory=True,
)
a.client = MagicMock()
return a
def test_auto_injects_for_gpt(self):
from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
agent = self._make_agent(model="openai/gpt-4.1", tool_use_enforcement="auto")
prompt = agent._build_system_prompt()
assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt
def test_auto_injects_for_codex(self):
from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
agent = self._make_agent(model="openai/codex-mini", tool_use_enforcement="auto")
prompt = agent._build_system_prompt()
assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt
def test_auto_skips_for_claude(self):
from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
agent = self._make_agent(model="anthropic/claude-sonnet-4", tool_use_enforcement="auto")
prompt = agent._build_system_prompt()
assert TOOL_USE_ENFORCEMENT_GUIDANCE not in prompt
def test_true_forces_for_all_models(self):
from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
agent = self._make_agent(model="anthropic/claude-sonnet-4", tool_use_enforcement=True)
prompt = agent._build_system_prompt()
assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt
def test_string_true_forces_for_all_models(self):
from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
agent = self._make_agent(model="anthropic/claude-sonnet-4", tool_use_enforcement="true")
prompt = agent._build_system_prompt()
assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt
def test_always_forces_for_all_models(self):
from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
agent = self._make_agent(model="deepseek/deepseek-r1", tool_use_enforcement="always")
prompt = agent._build_system_prompt()
assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt
def test_false_disables_for_gpt(self):
from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
agent = self._make_agent(model="openai/gpt-4.1", tool_use_enforcement=False)
prompt = agent._build_system_prompt()
assert TOOL_USE_ENFORCEMENT_GUIDANCE not in prompt
def test_string_false_disables(self):
from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
agent = self._make_agent(model="openai/gpt-4.1", tool_use_enforcement="off")
prompt = agent._build_system_prompt()
assert TOOL_USE_ENFORCEMENT_GUIDANCE not in prompt
def test_custom_list_matches(self):
from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
agent = self._make_agent(
model="deepseek/deepseek-r1",
tool_use_enforcement=["deepseek", "gemini"],
)
prompt = agent._build_system_prompt()
assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt
def test_custom_list_no_match(self):
from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
agent = self._make_agent(
model="anthropic/claude-sonnet-4",
tool_use_enforcement=["deepseek", "gemini"],
)
prompt = agent._build_system_prompt()
assert TOOL_USE_ENFORCEMENT_GUIDANCE not in prompt
def test_custom_list_case_insensitive(self):
from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
agent = self._make_agent(
model="openai/GPT-4.1",
tool_use_enforcement=["GPT", "Codex"],
)
prompt = agent._build_system_prompt()
assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt
def test_no_tools_never_injects(self):
"""Even with enforcement=true, no injection when agent has no tools."""
from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
with (
patch("run_agent.get_tool_definitions", return_value=[]),
patch("run_agent.check_toolset_requirements", return_value={}),
patch("run_agent.OpenAI"),
patch(
"hermes_cli.config.load_config",
return_value={"agent": {"tool_use_enforcement": True}},
),
):
a = AIAgent(
api_key="test-key-1234567890",
quiet_mode=True,
skip_context_files=True,
skip_memory=True,
enabled_toolsets=[],
)
a.client = MagicMock()
prompt = a._build_system_prompt()
assert TOOL_USE_ENFORCEMENT_GUIDANCE not in prompt
class TestInvalidateSystemPrompt: class TestInvalidateSystemPrompt:
def test_clears_cache(self, agent): def test_clears_cache(self, agent):
agent._cached_system_prompt = "cached value" agent._cached_system_prompt = "cached value"