fix: sync execute_code sandbox stubs with real tool schemas

The _TOOL_STUBS dict in code_execution_tool.py was out of sync with the actual tool schemas, causing TypeErrors when the LLM used parameters it sees in its system prompt but the sandbox stubs didn't accept: search_files: - Added missing params: context, offset, output_mode - Fixed target default: 'grep' → 'content' (old value was obsolete) patch: - Added missing params: mode, patch (V4A multi-file patch support) Also added 4 drift-detection tests (TestStubSchemaDrift) that will catch future divergence between stubs and real schemas: - test_stubs_cover_all_schema_params: every schema param in stub - test_stubs_pass_all_params_to_rpc: every stub param sent over RPC - test_search_files_target_uses_current_values: no obsolete values - test_generated_module_accepts_all_params: generated code compiles All 28 tests pass.
2026-03-06 03:40:06 -08:00
parent 8481fdcf08
commit 3982fcf095
2 changed files with 101 additions and 6 deletions
--- a/tests/tools/test_code_execution.py
+++ b/tests/tools/test_code_execution.py
@@ -298,5 +298,100 @@ except ValueError as e:
        self.assertIn("caught: nope", result["output"])


+class TestStubSchemaDrift(unittest.TestCase):
+    """Verify that _TOOL_STUBS in code_execution_tool.py stay in sync with
+    the real tool schemas registered in tools/registry.py.
+
+    If a tool gains a new parameter but the sandbox stub isn't updated,
+    the LLM will try to use the parameter (it sees it in the system prompt)
+    and get a TypeError.  This test catches that drift.
+    """
+
+    # Parameters that are internal (injected by the handler, not user-facing)
+    _INTERNAL_PARAMS = {"task_id", "user_task"}
+    # Parameters intentionally blocked in the sandbox
+    _BLOCKED_TERMINAL_PARAMS = {"background", "check_interval", "pty"}
+
+    def test_stubs_cover_all_schema_params(self):
+        """Every user-facing parameter in the real schema must appear in the
+        corresponding _TOOL_STUBS entry."""
+        import re
+        from tools.code_execution_tool import _TOOL_STUBS
+
+        # Import the registry and trigger tool registration
+        from tools.registry import registry
+        import tools.file_tools  # noqa: F401 - registers read_file, write_file, patch, search_files
+        import tools.web_tools  # noqa: F401 - registers web_search, web_extract
+
+        for tool_name, (func_name, sig, doc, args_expr) in _TOOL_STUBS.items():
+            entry = registry._tools.get(tool_name)
+            if not entry:
+                # Tool might not be registered yet (e.g., terminal uses a
+                # different registration path).  Skip gracefully.
+                continue
+
+            schema_props = entry.schema.get("parameters", {}).get("properties", {})
+            schema_params = set(schema_props.keys()) - self._INTERNAL_PARAMS
+            if tool_name == "terminal":
+                schema_params -= self._BLOCKED_TERMINAL_PARAMS
+
+            # Extract parameter names from the stub signature string
+            # Match word before colon: "pattern: str, target: str = ..."
+            stub_params = set(re.findall(r'(\w+)\s*:', sig))
+
+            missing = schema_params - stub_params
+            self.assertEqual(
+                missing, set(),
+                f"Stub for '{tool_name}' is missing parameters that exist in "
+                f"the real schema: {missing}. Update _TOOL_STUBS in "
+                f"code_execution_tool.py to include them."
+            )
+
+    def test_stubs_pass_all_params_to_rpc(self):
+        """The args_dict_expr in each stub must include every parameter from
+        the signature, so that all params are actually sent over RPC."""
+        import re
+        from tools.code_execution_tool import _TOOL_STUBS
+
+        for tool_name, (func_name, sig, doc, args_expr) in _TOOL_STUBS.items():
+            stub_params = set(re.findall(r'(\w+)\s*:', sig))
+            # Check that each param name appears in the args dict expression
+            for param in stub_params:
+                self.assertIn(
+                    f'"{param}"',
+                    args_expr,
+                    f"Stub for '{tool_name}' has parameter '{param}' in its "
+                    f"signature but doesn't pass it in the args dict: {args_expr}"
+                )
+
+    def test_search_files_target_uses_current_values(self):
+        """search_files stub should use 'content'/'files', not old 'grep'/'find'."""
+        from tools.code_execution_tool import _TOOL_STUBS
+        _, sig, doc, _ = _TOOL_STUBS["search_files"]
+        self.assertIn('"content"', sig,
+                      "search_files stub should default target to 'content', not 'grep'")
+        self.assertNotIn('"grep"', sig,
+                         "search_files stub still uses obsolete 'grep' target value")
+        self.assertNotIn('"find"', doc,
+                         "search_files stub docstring still uses obsolete 'find' target value")
+
+    def test_generated_module_accepts_all_params(self):
+        """The generated hermes_tools.py module should accept all current params
+        without TypeError when called with keyword arguments."""
+        src = generate_hermes_tools_module(list(SANDBOX_ALLOWED_TOOLS))
+
+        # Compile the generated module to check for syntax errors
+        compile(src, "hermes_tools.py", "exec")
+
+        # Verify specific parameter signatures are in the source
+        # search_files must accept context, offset, output_mode
+        self.assertIn("context", src)
+        self.assertIn("offset", src)
+        self.assertIn("output_mode", src)
+
+        # patch must accept mode and patch params
+        self.assertIn("mode", src)
+
+
 if __name__ == "__main__":
    unittest.main()
--- a/tools/code_execution_tool.py
+++ b/tools/code_execution_tool.py
@@ -95,15 +95,15 @@ _TOOL_STUBS = {
    ),
    "search_files": (
        "search_files",
-        'pattern: str, target: str = "grep", path: str = ".", file_glob: str = None, limit: int = 50',
-        '"""Search file contents (target="grep") or find files by name (target="find"). Returns dict with "matches"."""',
-        '{"pattern": pattern, "target": target, "path": path, "file_glob": file_glob, "limit": limit}',
+        'pattern: str, target: str = "content", path: str = ".", file_glob: str = None, limit: int = 50, offset: int = 0, output_mode: str = "content", context: int = 0',
+        '"""Search file contents (target="content") or find files by name (target="files"). Returns dict with "matches"."""',
+        '{"pattern": pattern, "target": target, "path": path, "file_glob": file_glob, "limit": limit, "offset": offset, "output_mode": output_mode, "context": context}',
    ),
    "patch": (
        "patch",
-        "path: str, old_string: str, new_string: str, replace_all: bool = False",
-        '"""Replace old_string with new_string in a file. Returns dict with status."""',
-        '{"path": path, "old_string": old_string, "new_string": new_string, "replace_all": replace_all}',
+        'path: str = None, old_string: str = None, new_string: str = None, replace_all: bool = False, mode: str = "replace", patch: str = None',
+        '"""Targeted find-and-replace (mode="replace") or V4A multi-file patches (mode="patch"). Returns dict with status."""',
+        '{"path": path, "old_string": old_string, "new_string": new_string, "replace_all": replace_all, "mode": mode, "patch": patch}',
    ),
    "terminal": (
        "terminal",