fix: sync execute_code sandbox stubs with real tool schemas
The _TOOL_STUBS dict in code_execution_tool.py was out of sync with the actual tool schemas, causing TypeErrors when the LLM used parameters it sees in its system prompt but the sandbox stubs didn't accept: search_files: - Added missing params: context, offset, output_mode - Fixed target default: 'grep' → 'content' (old value was obsolete) patch: - Added missing params: mode, patch (V4A multi-file patch support) Also added 4 drift-detection tests (TestStubSchemaDrift) that will catch future divergence between stubs and real schemas: - test_stubs_cover_all_schema_params: every schema param in stub - test_stubs_pass_all_params_to_rpc: every stub param sent over RPC - test_search_files_target_uses_current_values: no obsolete values - test_generated_module_accepts_all_params: generated code compiles All 28 tests pass.
This commit is contained in:
@@ -298,5 +298,100 @@ except ValueError as e:
|
||||
self.assertIn("caught: nope", result["output"])
|
||||
|
||||
|
||||
class TestStubSchemaDrift(unittest.TestCase):
|
||||
"""Verify that _TOOL_STUBS in code_execution_tool.py stay in sync with
|
||||
the real tool schemas registered in tools/registry.py.
|
||||
|
||||
If a tool gains a new parameter but the sandbox stub isn't updated,
|
||||
the LLM will try to use the parameter (it sees it in the system prompt)
|
||||
and get a TypeError. This test catches that drift.
|
||||
"""
|
||||
|
||||
# Parameters that are internal (injected by the handler, not user-facing)
|
||||
_INTERNAL_PARAMS = {"task_id", "user_task"}
|
||||
# Parameters intentionally blocked in the sandbox
|
||||
_BLOCKED_TERMINAL_PARAMS = {"background", "check_interval", "pty"}
|
||||
|
||||
def test_stubs_cover_all_schema_params(self):
|
||||
"""Every user-facing parameter in the real schema must appear in the
|
||||
corresponding _TOOL_STUBS entry."""
|
||||
import re
|
||||
from tools.code_execution_tool import _TOOL_STUBS
|
||||
|
||||
# Import the registry and trigger tool registration
|
||||
from tools.registry import registry
|
||||
import tools.file_tools # noqa: F401 - registers read_file, write_file, patch, search_files
|
||||
import tools.web_tools # noqa: F401 - registers web_search, web_extract
|
||||
|
||||
for tool_name, (func_name, sig, doc, args_expr) in _TOOL_STUBS.items():
|
||||
entry = registry._tools.get(tool_name)
|
||||
if not entry:
|
||||
# Tool might not be registered yet (e.g., terminal uses a
|
||||
# different registration path). Skip gracefully.
|
||||
continue
|
||||
|
||||
schema_props = entry.schema.get("parameters", {}).get("properties", {})
|
||||
schema_params = set(schema_props.keys()) - self._INTERNAL_PARAMS
|
||||
if tool_name == "terminal":
|
||||
schema_params -= self._BLOCKED_TERMINAL_PARAMS
|
||||
|
||||
# Extract parameter names from the stub signature string
|
||||
# Match word before colon: "pattern: str, target: str = ..."
|
||||
stub_params = set(re.findall(r'(\w+)\s*:', sig))
|
||||
|
||||
missing = schema_params - stub_params
|
||||
self.assertEqual(
|
||||
missing, set(),
|
||||
f"Stub for '{tool_name}' is missing parameters that exist in "
|
||||
f"the real schema: {missing}. Update _TOOL_STUBS in "
|
||||
f"code_execution_tool.py to include them."
|
||||
)
|
||||
|
||||
def test_stubs_pass_all_params_to_rpc(self):
|
||||
"""The args_dict_expr in each stub must include every parameter from
|
||||
the signature, so that all params are actually sent over RPC."""
|
||||
import re
|
||||
from tools.code_execution_tool import _TOOL_STUBS
|
||||
|
||||
for tool_name, (func_name, sig, doc, args_expr) in _TOOL_STUBS.items():
|
||||
stub_params = set(re.findall(r'(\w+)\s*:', sig))
|
||||
# Check that each param name appears in the args dict expression
|
||||
for param in stub_params:
|
||||
self.assertIn(
|
||||
f'"{param}"',
|
||||
args_expr,
|
||||
f"Stub for '{tool_name}' has parameter '{param}' in its "
|
||||
f"signature but doesn't pass it in the args dict: {args_expr}"
|
||||
)
|
||||
|
||||
def test_search_files_target_uses_current_values(self):
|
||||
"""search_files stub should use 'content'/'files', not old 'grep'/'find'."""
|
||||
from tools.code_execution_tool import _TOOL_STUBS
|
||||
_, sig, doc, _ = _TOOL_STUBS["search_files"]
|
||||
self.assertIn('"content"', sig,
|
||||
"search_files stub should default target to 'content', not 'grep'")
|
||||
self.assertNotIn('"grep"', sig,
|
||||
"search_files stub still uses obsolete 'grep' target value")
|
||||
self.assertNotIn('"find"', doc,
|
||||
"search_files stub docstring still uses obsolete 'find' target value")
|
||||
|
||||
def test_generated_module_accepts_all_params(self):
|
||||
"""The generated hermes_tools.py module should accept all current params
|
||||
without TypeError when called with keyword arguments."""
|
||||
src = generate_hermes_tools_module(list(SANDBOX_ALLOWED_TOOLS))
|
||||
|
||||
# Compile the generated module to check for syntax errors
|
||||
compile(src, "hermes_tools.py", "exec")
|
||||
|
||||
# Verify specific parameter signatures are in the source
|
||||
# search_files must accept context, offset, output_mode
|
||||
self.assertIn("context", src)
|
||||
self.assertIn("offset", src)
|
||||
self.assertIn("output_mode", src)
|
||||
|
||||
# patch must accept mode and patch params
|
||||
self.assertIn("mode", src)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
||||
@@ -95,15 +95,15 @@ _TOOL_STUBS = {
|
||||
),
|
||||
"search_files": (
|
||||
"search_files",
|
||||
'pattern: str, target: str = "grep", path: str = ".", file_glob: str = None, limit: int = 50',
|
||||
'"""Search file contents (target="grep") or find files by name (target="find"). Returns dict with "matches"."""',
|
||||
'{"pattern": pattern, "target": target, "path": path, "file_glob": file_glob, "limit": limit}',
|
||||
'pattern: str, target: str = "content", path: str = ".", file_glob: str = None, limit: int = 50, offset: int = 0, output_mode: str = "content", context: int = 0',
|
||||
'"""Search file contents (target="content") or find files by name (target="files"). Returns dict with "matches"."""',
|
||||
'{"pattern": pattern, "target": target, "path": path, "file_glob": file_glob, "limit": limit, "offset": offset, "output_mode": output_mode, "context": context}',
|
||||
),
|
||||
"patch": (
|
||||
"patch",
|
||||
"path: str, old_string: str, new_string: str, replace_all: bool = False",
|
||||
'"""Replace old_string with new_string in a file. Returns dict with status."""',
|
||||
'{"path": path, "old_string": old_string, "new_string": new_string, "replace_all": replace_all}',
|
||||
'path: str = None, old_string: str = None, new_string: str = None, replace_all: bool = False, mode: str = "replace", patch: str = None',
|
||||
'"""Targeted find-and-replace (mode="replace") or V4A multi-file patches (mode="patch"). Returns dict with status."""',
|
||||
'{"path": path, "old_string": old_string, "new_string": new_string, "replace_all": replace_all, "mode": mode, "patch": patch}',
|
||||
),
|
||||
"terminal": (
|
||||
"terminal",
|
||||
|
||||
Reference in New Issue
Block a user