Files
hermes-agent/tests/test_model_tools.py
Alexander Whitestone 8c712866c4
Some checks failed
Forge CI / smoke-and-build (pull_request) Failing after 22s
fix(tools): validate handler return types at dispatch boundary
Fixes #297

Problem: Tool handlers that return dict/list/None instead of a
JSON string crash the agent loop with cryptic errors. No error
proofing at the boundary.
Fix: In handle_function_call(), after dispatch returns:
1. If result is not str → wrap in JSON with _type_warning
2. If result is str but not valid JSON → wrap in {"output": ...}
3. Log type violations for analysis
4. Valid JSON strings pass through unchanged

Tests: 4 new tests (dict, None, non-JSON string, valid JSON).
All 16 tests in test_model_tools.py pass.
2026-04-13 15:47:52 -04:00

215 lines
8.0 KiB
Python

"""Tests for model_tools.py — function call dispatch, agent-loop interception, legacy toolsets."""
import json
from unittest.mock import call, patch
import pytest
from model_tools import (
handle_function_call,
get_all_tool_names,
get_toolset_for_tool,
_AGENT_LOOP_TOOLS,
_LEGACY_TOOLSET_MAP,
TOOL_TO_TOOLSET_MAP,
)
# =========================================================================
# handle_function_call
# =========================================================================
class TestHandleFunctionCall:
def test_agent_loop_tool_returns_error(self):
for tool_name in _AGENT_LOOP_TOOLS:
result = json.loads(handle_function_call(tool_name, {}))
assert "error" in result
assert "agent loop" in result["error"].lower()
def test_unknown_tool_returns_error(self):
result = json.loads(handle_function_call("totally_fake_tool_xyz", {}))
assert "error" in result
assert "totally_fake_tool_xyz" in result["error"]
def test_exception_returns_json_error(self):
# Even if something goes wrong, should return valid JSON
result = handle_function_call("web_search", None) # None args may cause issues
parsed = json.loads(result)
assert isinstance(parsed, dict)
assert "error" in parsed
assert len(parsed["error"]) > 0
assert "error" in parsed["error"].lower() or "failed" in parsed["error"].lower()
def test_tool_hooks_receive_session_and_tool_call_ids(self):
with (
patch("model_tools.registry.dispatch", return_value='{"ok":true}'),
patch("hermes_cli.plugins.invoke_hook") as mock_invoke_hook,
):
result = handle_function_call(
"web_search",
{"q": "test"},
task_id="task-1",
tool_call_id="call-1",
session_id="session-1",
)
assert result == '{"ok":true}'
assert mock_invoke_hook.call_args_list == [
call(
"pre_tool_call",
tool_name="web_search",
args={"q": "test"},
task_id="task-1",
session_id="session-1",
tool_call_id="call-1",
),
call(
"post_tool_call",
tool_name="web_search",
args={"q": "test"},
result='{"ok":true}',
task_id="task-1",
session_id="session-1",
tool_call_id="call-1",
),
]
# =========================================================================
# Agent loop tools
# =========================================================================
class TestAgentLoopTools:
def test_expected_tools_in_set(self):
assert "todo" in _AGENT_LOOP_TOOLS
assert "memory" in _AGENT_LOOP_TOOLS
assert "session_search" in _AGENT_LOOP_TOOLS
assert "delegate_task" in _AGENT_LOOP_TOOLS
def test_no_regular_tools_in_set(self):
assert "web_search" not in _AGENT_LOOP_TOOLS
assert "terminal" not in _AGENT_LOOP_TOOLS
# =========================================================================
# Legacy toolset map
# =========================================================================
class TestLegacyToolsetMap:
def test_expected_legacy_names(self):
expected = [
"web_tools", "terminal_tools", "vision_tools", "moa_tools",
"image_tools", "skills_tools", "browser_tools", "cronjob_tools",
"rl_tools", "file_tools", "tts_tools",
]
for name in expected:
assert name in _LEGACY_TOOLSET_MAP, f"Missing legacy toolset: {name}"
def test_values_are_lists_of_strings(self):
for name, tools in _LEGACY_TOOLSET_MAP.items():
assert isinstance(tools, list), f"{name} is not a list"
for tool in tools:
assert isinstance(tool, str), f"{name} contains non-string: {tool}"
# =========================================================================
# Backward-compat wrappers
# =========================================================================
class TestBackwardCompat:
def test_get_all_tool_names_returns_list(self):
names = get_all_tool_names()
assert isinstance(names, list)
assert len(names) > 0
# Should contain well-known tools
assert "web_search" in names
assert "terminal" in names
def test_get_toolset_for_tool(self):
result = get_toolset_for_tool("web_search")
assert result is not None
assert isinstance(result, str)
def test_get_toolset_for_unknown_tool(self):
result = get_toolset_for_tool("totally_nonexistent_tool")
assert result is None
def test_tool_to_toolset_map(self):
assert isinstance(TOOL_TO_TOOLSET_MAP, dict)
assert len(TOOL_TO_TOOLSET_MAP) > 0
class TestToolReturnTypeValidation:
"""Poka-yoke: tool handlers must return JSON strings."""
def test_handler_returning_dict_is_wrapped(self, monkeypatch):
"""A handler that returns a dict should be auto-wrapped to JSON string."""
from tools.registry import registry
from model_tools import handle_function_call
import json
# Register a bad handler that returns dict instead of str
registry.register(
name="__test_bad_dict",
toolset="test",
schema={"name": "__test_bad_dict", "description": "test", "parameters": {"type": "object", "properties": {}}},
handler=lambda args, **kw: {"this is": "a dict not a string"},
)
result = handle_function_call("__test_bad_dict", {})
parsed = json.loads(result)
assert "output" in parsed
assert "_type_warning" in parsed
# Cleanup
registry._tools.pop("__test_bad_dict", None)
def test_handler_returning_none_is_wrapped(self, monkeypatch):
"""A handler that returns None should be auto-wrapped."""
from tools.registry import registry
from model_tools import handle_function_call
import json
registry.register(
name="__test_bad_none",
toolset="test",
schema={"name": "__test_bad_none", "description": "test", "parameters": {"type": "object", "properties": {}}},
handler=lambda args, **kw: None,
)
result = handle_function_call("__test_bad_none", {})
parsed = json.loads(result)
assert "_type_warning" in parsed
registry._tools.pop("__test_bad_none", None)
def test_handler_returning_non_json_string_is_wrapped(self):
"""A handler returning a plain string (not JSON) should be wrapped."""
from tools.registry import registry
from model_tools import handle_function_call
import json
registry.register(
name="__test_bad_plain",
toolset="test",
schema={"name": "__test_bad_plain", "description": "test", "parameters": {"type": "object", "properties": {}}},
handler=lambda args, **kw: "just a plain string, not json",
)
result = handle_function_call("__test_bad_plain", {})
parsed = json.loads(result)
assert "output" in parsed
registry._tools.pop("__test_bad_plain", None)
def test_handler_returning_valid_json_passes_through(self):
"""A handler returning valid JSON string passes through unchanged."""
from tools.registry import registry
from model_tools import handle_function_call
import json
registry.register(
name="__test_good",
toolset="test",
schema={"name": "__test_good", "description": "test", "parameters": {"type": "object", "properties": {}}},
handler=lambda args, **kw: json.dumps({"status": "ok", "data": [1, 2, 3]}),
)
result = handle_function_call("__test_good", {})
parsed = json.loads(result)
assert parsed == {"status": "ok", "data": [1, 2, 3]}
registry._tools.pop("__test_good", None)