fix: sync execute_code sandbox stubs with real tool schemas

The _TOOL_STUBS dict in code_execution_tool.py was out of sync with the
actual tool schemas, causing TypeErrors when the LLM used parameters it
sees in its system prompt but the sandbox stubs didn't accept:

search_files:
  - Added missing params: context, offset, output_mode
  - Fixed target default: 'grep' → 'content' (old value was obsolete)

patch:
  - Added missing params: mode, patch (V4A multi-file patch support)

Also added 4 drift-detection tests (TestStubSchemaDrift) that will
catch future divergence between stubs and real schemas:
  - test_stubs_cover_all_schema_params: every schema param in stub
  - test_stubs_pass_all_params_to_rpc: every stub param sent over RPC
  - test_search_files_target_uses_current_values: no obsolete values
  - test_generated_module_accepts_all_params: generated code compiles

All 28 tests pass.
This commit is contained in:
teknium1
2026-03-06 03:40:06 -08:00
parent 8481fdcf08
commit 3982fcf095
2 changed files with 101 additions and 6 deletions

View File

@@ -298,5 +298,100 @@ except ValueError as e:
self.assertIn("caught: nope", result["output"])
class TestStubSchemaDrift(unittest.TestCase):
"""Verify that _TOOL_STUBS in code_execution_tool.py stay in sync with
the real tool schemas registered in tools/registry.py.
If a tool gains a new parameter but the sandbox stub isn't updated,
the LLM will try to use the parameter (it sees it in the system prompt)
and get a TypeError. This test catches that drift.
"""
# Parameters that are internal (injected by the handler, not user-facing)
_INTERNAL_PARAMS = {"task_id", "user_task"}
# Parameters intentionally blocked in the sandbox
_BLOCKED_TERMINAL_PARAMS = {"background", "check_interval", "pty"}
def test_stubs_cover_all_schema_params(self):
"""Every user-facing parameter in the real schema must appear in the
corresponding _TOOL_STUBS entry."""
import re
from tools.code_execution_tool import _TOOL_STUBS
# Import the registry and trigger tool registration
from tools.registry import registry
import tools.file_tools # noqa: F401 - registers read_file, write_file, patch, search_files
import tools.web_tools # noqa: F401 - registers web_search, web_extract
for tool_name, (func_name, sig, doc, args_expr) in _TOOL_STUBS.items():
entry = registry._tools.get(tool_name)
if not entry:
# Tool might not be registered yet (e.g., terminal uses a
# different registration path). Skip gracefully.
continue
schema_props = entry.schema.get("parameters", {}).get("properties", {})
schema_params = set(schema_props.keys()) - self._INTERNAL_PARAMS
if tool_name == "terminal":
schema_params -= self._BLOCKED_TERMINAL_PARAMS
# Extract parameter names from the stub signature string
# Match word before colon: "pattern: str, target: str = ..."
stub_params = set(re.findall(r'(\w+)\s*:', sig))
missing = schema_params - stub_params
self.assertEqual(
missing, set(),
f"Stub for '{tool_name}' is missing parameters that exist in "
f"the real schema: {missing}. Update _TOOL_STUBS in "
f"code_execution_tool.py to include them."
)
def test_stubs_pass_all_params_to_rpc(self):
"""The args_dict_expr in each stub must include every parameter from
the signature, so that all params are actually sent over RPC."""
import re
from tools.code_execution_tool import _TOOL_STUBS
for tool_name, (func_name, sig, doc, args_expr) in _TOOL_STUBS.items():
stub_params = set(re.findall(r'(\w+)\s*:', sig))
# Check that each param name appears in the args dict expression
for param in stub_params:
self.assertIn(
f'"{param}"',
args_expr,
f"Stub for '{tool_name}' has parameter '{param}' in its "
f"signature but doesn't pass it in the args dict: {args_expr}"
)
def test_search_files_target_uses_current_values(self):
"""search_files stub should use 'content'/'files', not old 'grep'/'find'."""
from tools.code_execution_tool import _TOOL_STUBS
_, sig, doc, _ = _TOOL_STUBS["search_files"]
self.assertIn('"content"', sig,
"search_files stub should default target to 'content', not 'grep'")
self.assertNotIn('"grep"', sig,
"search_files stub still uses obsolete 'grep' target value")
self.assertNotIn('"find"', doc,
"search_files stub docstring still uses obsolete 'find' target value")
def test_generated_module_accepts_all_params(self):
"""The generated hermes_tools.py module should accept all current params
without TypeError when called with keyword arguments."""
src = generate_hermes_tools_module(list(SANDBOX_ALLOWED_TOOLS))
# Compile the generated module to check for syntax errors
compile(src, "hermes_tools.py", "exec")
# Verify specific parameter signatures are in the source
# search_files must accept context, offset, output_mode
self.assertIn("context", src)
self.assertIn("offset", src)
self.assertIn("output_mode", src)
# patch must accept mode and patch params
self.assertIn("mode", src)
if __name__ == "__main__":
unittest.main()