From 3982fcf0951719b0a17fab3f139755758a99bf4e Mon Sep 17 00:00:00 2001 From: teknium1 Date: Fri, 6 Mar 2026 03:40:06 -0800 Subject: [PATCH] fix: sync execute_code sandbox stubs with real tool schemas MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The _TOOL_STUBS dict in code_execution_tool.py was out of sync with the actual tool schemas, causing TypeErrors when the LLM used parameters it sees in its system prompt but the sandbox stubs didn't accept: search_files: - Added missing params: context, offset, output_mode - Fixed target default: 'grep' → 'content' (old value was obsolete) patch: - Added missing params: mode, patch (V4A multi-file patch support) Also added 4 drift-detection tests (TestStubSchemaDrift) that will catch future divergence between stubs and real schemas: - test_stubs_cover_all_schema_params: every schema param in stub - test_stubs_pass_all_params_to_rpc: every stub param sent over RPC - test_search_files_target_uses_current_values: no obsolete values - test_generated_module_accepts_all_params: generated code compiles All 28 tests pass. --- tests/tools/test_code_execution.py | 95 ++++++++++++++++++++++++++++++ tools/code_execution_tool.py | 12 ++-- 2 files changed, 101 insertions(+), 6 deletions(-) diff --git a/tests/tools/test_code_execution.py b/tests/tools/test_code_execution.py index 84f9db180..70ecbe904 100644 --- a/tests/tools/test_code_execution.py +++ b/tests/tools/test_code_execution.py @@ -298,5 +298,100 @@ except ValueError as e: self.assertIn("caught: nope", result["output"]) +class TestStubSchemaDrift(unittest.TestCase): + """Verify that _TOOL_STUBS in code_execution_tool.py stay in sync with + the real tool schemas registered in tools/registry.py. + + If a tool gains a new parameter but the sandbox stub isn't updated, + the LLM will try to use the parameter (it sees it in the system prompt) + and get a TypeError. This test catches that drift. + """ + + # Parameters that are internal (injected by the handler, not user-facing) + _INTERNAL_PARAMS = {"task_id", "user_task"} + # Parameters intentionally blocked in the sandbox + _BLOCKED_TERMINAL_PARAMS = {"background", "check_interval", "pty"} + + def test_stubs_cover_all_schema_params(self): + """Every user-facing parameter in the real schema must appear in the + corresponding _TOOL_STUBS entry.""" + import re + from tools.code_execution_tool import _TOOL_STUBS + + # Import the registry and trigger tool registration + from tools.registry import registry + import tools.file_tools # noqa: F401 - registers read_file, write_file, patch, search_files + import tools.web_tools # noqa: F401 - registers web_search, web_extract + + for tool_name, (func_name, sig, doc, args_expr) in _TOOL_STUBS.items(): + entry = registry._tools.get(tool_name) + if not entry: + # Tool might not be registered yet (e.g., terminal uses a + # different registration path). Skip gracefully. + continue + + schema_props = entry.schema.get("parameters", {}).get("properties", {}) + schema_params = set(schema_props.keys()) - self._INTERNAL_PARAMS + if tool_name == "terminal": + schema_params -= self._BLOCKED_TERMINAL_PARAMS + + # Extract parameter names from the stub signature string + # Match word before colon: "pattern: str, target: str = ..." + stub_params = set(re.findall(r'(\w+)\s*:', sig)) + + missing = schema_params - stub_params + self.assertEqual( + missing, set(), + f"Stub for '{tool_name}' is missing parameters that exist in " + f"the real schema: {missing}. Update _TOOL_STUBS in " + f"code_execution_tool.py to include them." + ) + + def test_stubs_pass_all_params_to_rpc(self): + """The args_dict_expr in each stub must include every parameter from + the signature, so that all params are actually sent over RPC.""" + import re + from tools.code_execution_tool import _TOOL_STUBS + + for tool_name, (func_name, sig, doc, args_expr) in _TOOL_STUBS.items(): + stub_params = set(re.findall(r'(\w+)\s*:', sig)) + # Check that each param name appears in the args dict expression + for param in stub_params: + self.assertIn( + f'"{param}"', + args_expr, + f"Stub for '{tool_name}' has parameter '{param}' in its " + f"signature but doesn't pass it in the args dict: {args_expr}" + ) + + def test_search_files_target_uses_current_values(self): + """search_files stub should use 'content'/'files', not old 'grep'/'find'.""" + from tools.code_execution_tool import _TOOL_STUBS + _, sig, doc, _ = _TOOL_STUBS["search_files"] + self.assertIn('"content"', sig, + "search_files stub should default target to 'content', not 'grep'") + self.assertNotIn('"grep"', sig, + "search_files stub still uses obsolete 'grep' target value") + self.assertNotIn('"find"', doc, + "search_files stub docstring still uses obsolete 'find' target value") + + def test_generated_module_accepts_all_params(self): + """The generated hermes_tools.py module should accept all current params + without TypeError when called with keyword arguments.""" + src = generate_hermes_tools_module(list(SANDBOX_ALLOWED_TOOLS)) + + # Compile the generated module to check for syntax errors + compile(src, "hermes_tools.py", "exec") + + # Verify specific parameter signatures are in the source + # search_files must accept context, offset, output_mode + self.assertIn("context", src) + self.assertIn("offset", src) + self.assertIn("output_mode", src) + + # patch must accept mode and patch params + self.assertIn("mode", src) + + if __name__ == "__main__": unittest.main() diff --git a/tools/code_execution_tool.py b/tools/code_execution_tool.py index 442ec9402..572d0a89d 100644 --- a/tools/code_execution_tool.py +++ b/tools/code_execution_tool.py @@ -95,15 +95,15 @@ _TOOL_STUBS = { ), "search_files": ( "search_files", - 'pattern: str, target: str = "grep", path: str = ".", file_glob: str = None, limit: int = 50', - '"""Search file contents (target="grep") or find files by name (target="find"). Returns dict with "matches"."""', - '{"pattern": pattern, "target": target, "path": path, "file_glob": file_glob, "limit": limit}', + 'pattern: str, target: str = "content", path: str = ".", file_glob: str = None, limit: int = 50, offset: int = 0, output_mode: str = "content", context: int = 0', + '"""Search file contents (target="content") or find files by name (target="files"). Returns dict with "matches"."""', + '{"pattern": pattern, "target": target, "path": path, "file_glob": file_glob, "limit": limit, "offset": offset, "output_mode": output_mode, "context": context}', ), "patch": ( "patch", - "path: str, old_string: str, new_string: str, replace_all: bool = False", - '"""Replace old_string with new_string in a file. Returns dict with status."""', - '{"path": path, "old_string": old_string, "new_string": new_string, "replace_all": replace_all}', + 'path: str = None, old_string: str = None, new_string: str = None, replace_all: bool = False, mode: str = "replace", patch: str = None', + '"""Targeted find-and-replace (mode="replace") or V4A multi-file patches (mode="patch"). Returns dict with status."""', + '{"path": path, "old_string": old_string, "new_string": new_string, "replace_all": replace_all, "mode": mode, "patch": patch}', ), "terminal": ( "terminal",