#!/usr/bin/env python3 """ Tests for build_execute_code_schema, environment variable filtering, and other untested code paths in code_execution_tool.py. Run with: python -m pytest tests/tools/test_code_execution_schema.py -v """ import json import os import sys import unittest from unittest.mock import patch, MagicMock from tools.code_execution_tool import ( SANDBOX_ALLOWED_TOOLS, build_execute_code_schema, execute_code, check_sandbox_requirements, _TOOL_DOC_LINES, DEFAULT_TIMEOUT, DEFAULT_MAX_TOOL_CALLS, ) # --------------------------------------------------------------------------- # build_execute_code_schema # --------------------------------------------------------------------------- class TestBuildExecuteCodeSchema(unittest.TestCase): """Tests for build_execute_code_schema — previously completely untested.""" def test_default_includes_all_tools(self): schema = build_execute_code_schema() desc = schema["description"] for name, _ in _TOOL_DOC_LINES: self.assertIn(name, desc, f"Default schema should mention '{name}'") def test_schema_structure(self): schema = build_execute_code_schema() self.assertEqual(schema["name"], "execute_code") self.assertIn("parameters", schema) self.assertIn("code", schema["parameters"]["properties"]) self.assertEqual(schema["parameters"]["required"], ["code"]) def test_subset_only_lists_enabled_tools(self): enabled = {"terminal", "read_file"} schema = build_execute_code_schema(enabled) desc = schema["description"] self.assertIn("terminal(", desc) self.assertIn("read_file(", desc) self.assertNotIn("web_search(", desc) self.assertNotIn("web_extract(", desc) self.assertNotIn("write_file(", desc) def test_single_tool(self): schema = build_execute_code_schema({"terminal"}) desc = schema["description"] self.assertIn("terminal(", desc) self.assertNotIn("web_search(", desc) def test_import_examples_prefer_web_search_and_terminal(self): enabled = {"web_search", "terminal", "read_file"} schema = build_execute_code_schema(enabled) code_desc = schema["parameters"]["properties"]["code"]["description"] self.assertIn("web_search", code_desc) self.assertIn("terminal", code_desc) def test_import_examples_fallback_when_no_preferred(self): """When neither web_search nor terminal are enabled, falls back to sorted first two tools.""" enabled = {"read_file", "write_file", "patch"} schema = build_execute_code_schema(enabled) code_desc = schema["parameters"]["properties"]["code"]["description"] # Should use sorted first 2: patch, read_file self.assertIn("patch", code_desc) self.assertIn("read_file", code_desc) def test_empty_set_produces_valid_description(self): """BUG: build_execute_code_schema(set()) produces 'import , ...' in the code property description — a broken import example.""" schema = build_execute_code_schema(set()) code_desc = schema["parameters"]["properties"]["code"]["description"] # The description should NOT contain a bare comma before ellipsis # like "from hermes_tools import , ..." self.assertNotIn("import , ...", code_desc, "Empty enabled set produces broken import syntax in description") def test_real_scenario_all_sandbox_tools_disabled(self): """Reproduce the exact code path from model_tools.py:231-234. Scenario: user runs `hermes tools code_execution` (only code_execution toolset enabled). tools_to_include = {"execute_code"}. model_tools.py does: sandbox_enabled = SANDBOX_ALLOWED_TOOLS & tools_to_include dynamic_schema = build_execute_code_schema(sandbox_enabled) SANDBOX_ALLOWED_TOOLS = {web_search, web_extract, read_file, write_file, search_files, patch, terminal} tools_to_include = {"execute_code"} intersection = empty set This sends set() to build_execute_code_schema, which BEFORE the fix produced "from hermes_tools import , ..." in the description. """ from tools.code_execution_tool import SANDBOX_ALLOWED_TOOLS # Simulate model_tools.py:233 tools_to_include = {"execute_code"} sandbox_enabled = SANDBOX_ALLOWED_TOOLS & tools_to_include self.assertEqual(sandbox_enabled, set(), "Intersection should be empty when only execute_code is enabled") schema = build_execute_code_schema(sandbox_enabled) code_desc = schema["parameters"]["properties"]["code"]["description"] self.assertNotIn("import , ...", code_desc, "Bug: broken import syntax sent to the model") def test_real_scenario_only_vision_enabled(self): """Another real path: user runs `hermes tools code_execution,vision`. tools_to_include = {"execute_code", "vision_analyze"} SANDBOX_ALLOWED_TOOLS has neither, so intersection is empty. """ from tools.code_execution_tool import SANDBOX_ALLOWED_TOOLS tools_to_include = {"execute_code", "vision_analyze"} sandbox_enabled = SANDBOX_ALLOWED_TOOLS & tools_to_include self.assertEqual(sandbox_enabled, set()) schema = build_execute_code_schema(sandbox_enabled) code_desc = schema["parameters"]["properties"]["code"]["description"] self.assertNotIn("import , ...", code_desc) def test_description_mentions_limits(self): schema = build_execute_code_schema() desc = schema["description"] self.assertIn("5-minute timeout", desc) self.assertIn("50KB", desc) self.assertIn("50 tool calls", desc) def test_description_mentions_helpers(self): schema = build_execute_code_schema() desc = schema["description"] self.assertIn("json_parse", desc) self.assertIn("shell_quote", desc) self.assertIn("retry", desc) def test_none_defaults_to_all_tools(self): schema_none = build_execute_code_schema(None) schema_all = build_execute_code_schema(SANDBOX_ALLOWED_TOOLS) self.assertEqual(schema_none["description"], schema_all["description"]) # --------------------------------------------------------------------------- # Environment variable filtering (security critical) # --------------------------------------------------------------------------- @unittest.skipIf(sys.platform == "win32", "UDS not available on Windows") class TestEnvVarFiltering(unittest.TestCase): """Verify that execute_code filters environment variables correctly. The child process should NOT receive API keys, tokens, or secrets. It should receive safe vars like PATH, HOME, LANG, etc. """ def _get_child_env(self, extra_env=None): """Run a script that dumps its environment and return the env dict.""" code = ( "import os, json\n" "print(json.dumps(dict(os.environ)))\n" ) env_backup = os.environ.copy() try: if extra_env: os.environ.update(extra_env) with patch("model_tools.handle_function_call", return_value='{}'), \ patch("tools.code_execution_tool._load_config", return_value={"timeout": 10, "max_tool_calls": 50}): raw = execute_code(code, task_id="test-env", enabled_tools=list(SANDBOX_ALLOWED_TOOLS)) finally: os.environ.clear() os.environ.update(env_backup) result = json.loads(raw) self.assertEqual(result["status"], "success", result.get("error", "")) return json.loads(result["output"].strip()) def test_api_keys_excluded(self): child_env = self._get_child_env({ "OPENAI_API_KEY": "sk-secret123", "ANTHROPIC_API_KEY": "sk-ant-secret", "FIRECRAWL_API_KEY": "fc-secret", }) self.assertNotIn("OPENAI_API_KEY", child_env) self.assertNotIn("ANTHROPIC_API_KEY", child_env) self.assertNotIn("FIRECRAWL_API_KEY", child_env) def test_tokens_excluded(self): child_env = self._get_child_env({ "GITHUB_TOKEN": "ghp_secret", "MODAL_TOKEN_ID": "tok-123", "MODAL_TOKEN_SECRET": "tok-sec", }) self.assertNotIn("GITHUB_TOKEN", child_env) self.assertNotIn("MODAL_TOKEN_ID", child_env) self.assertNotIn("MODAL_TOKEN_SECRET", child_env) def test_password_vars_excluded(self): child_env = self._get_child_env({ "DB_PASSWORD": "hunter2", "MY_PASSWD": "secret", "AUTH_CREDENTIAL": "cred", }) self.assertNotIn("DB_PASSWORD", child_env) self.assertNotIn("MY_PASSWD", child_env) self.assertNotIn("AUTH_CREDENTIAL", child_env) def test_path_included(self): child_env = self._get_child_env() self.assertIn("PATH", child_env) def test_home_included(self): child_env = self._get_child_env() self.assertIn("HOME", child_env) def test_hermes_rpc_socket_injected(self): child_env = self._get_child_env() self.assertIn("HERMES_RPC_SOCKET", child_env) def test_pythondontwritebytecode_set(self): child_env = self._get_child_env() self.assertEqual(child_env.get("PYTHONDONTWRITEBYTECODE"), "1") def test_timezone_injected_when_set(self): env_backup = os.environ.copy() try: os.environ["HERMES_TIMEZONE"] = "America/New_York" child_env = self._get_child_env() self.assertEqual(child_env.get("TZ"), "America/New_York") finally: os.environ.clear() os.environ.update(env_backup) def test_timezone_not_set_when_empty(self): env_backup = os.environ.copy() try: os.environ.pop("HERMES_TIMEZONE", None) child_env = self._get_child_env() # TZ should not be set unless HERMES_TIMEZONE is non-empty # (it might be set from the system, so we just check it's not # set to empty string) if "TZ" in child_env: self.assertNotEqual(child_env["TZ"], "") finally: os.environ.clear() os.environ.update(env_backup) # --------------------------------------------------------------------------- # execute_code edge cases # --------------------------------------------------------------------------- class TestExecuteCodeEdgeCases(unittest.TestCase): def test_windows_returns_error(self): """On Windows (or when SANDBOX_AVAILABLE is False), returns error JSON.""" with patch("tools.code_execution_tool.SANDBOX_AVAILABLE", False): result = json.loads(execute_code("print('hi')", task_id="test")) self.assertIn("error", result) self.assertIn("Windows", result["error"]) def test_whitespace_only_code(self): result = json.loads(execute_code(" \n\t ", task_id="test")) self.assertIn("error", result) self.assertIn("No code", result["error"]) @unittest.skipIf(sys.platform == "win32", "UDS not available on Windows") def test_none_enabled_tools_uses_all(self): """When enabled_tools is None, all sandbox tools should be available.""" code = ( "from hermes_tools import terminal, web_search, read_file\n" "print('all imports ok')\n" ) with patch("model_tools.handle_function_call", return_value=json.dumps({"ok": True})): result = json.loads(execute_code(code, task_id="test-none", enabled_tools=None)) self.assertEqual(result["status"], "success") self.assertIn("all imports ok", result["output"]) @unittest.skipIf(sys.platform == "win32", "UDS not available on Windows") def test_empty_enabled_tools_uses_all(self): """When enabled_tools is [] (empty), all sandbox tools should be available.""" code = ( "from hermes_tools import terminal, web_search\n" "print('imports ok')\n" ) with patch("model_tools.handle_function_call", return_value=json.dumps({"ok": True})): result = json.loads(execute_code(code, task_id="test-empty", enabled_tools=[])) self.assertEqual(result["status"], "success") self.assertIn("imports ok", result["output"]) @unittest.skipIf(sys.platform == "win32", "UDS not available on Windows") def test_nonoverlapping_tools_fallback(self): """When enabled_tools has no overlap with SANDBOX_ALLOWED_TOOLS, should fall back to all allowed tools.""" code = ( "from hermes_tools import terminal\n" "print('fallback ok')\n" ) with patch("model_tools.handle_function_call", return_value=json.dumps({"ok": True})): result = json.loads(execute_code( code, task_id="test-nonoverlap", enabled_tools=["vision_analyze", "browser_snapshot"], )) self.assertEqual(result["status"], "success") self.assertIn("fallback ok", result["output"]) # --------------------------------------------------------------------------- # _load_config # --------------------------------------------------------------------------- class TestLoadConfig(unittest.TestCase): def test_returns_empty_dict_when_cli_config_unavailable(self): from tools.code_execution_tool import _load_config with patch("tools.code_execution_tool.CLI_CONFIG", {"code_execution": {"timeout": 120}}, create=True): # When the import works, it should return the config pass # When CLI_CONFIG import fails, should return {} with patch.dict("sys.modules", {"cli": None}): result = _load_config() self.assertIsInstance(result, dict) def test_returns_code_execution_section(self): from tools.code_execution_tool import _load_config mock_cli = MagicMock() mock_cli.CLI_CONFIG = {"code_execution": {"timeout": 120, "max_tool_calls": 10}} with patch.dict("sys.modules", {"cli": mock_cli}): with patch("tools.code_execution_tool._load_config", wraps=_load_config): result = _load_config() # Result should be a dict (either the config or empty) self.assertIsInstance(result, dict) # --------------------------------------------------------------------------- # Interrupt event # --------------------------------------------------------------------------- @unittest.skipIf(sys.platform == "win32", "UDS not available on Windows") class TestInterruptHandling(unittest.TestCase): def test_interrupt_event_stops_execution(self): """When _interrupt_event is set, execute_code should stop the script.""" import threading code = "import time; time.sleep(60); print('should not reach')" def set_interrupt_after_delay(): import time as _t _t.sleep(1) from tools.terminal_tool import _interrupt_event _interrupt_event.set() t = threading.Thread(target=set_interrupt_after_delay, daemon=True) t.start() try: with patch("model_tools.handle_function_call", return_value=json.dumps({"ok": True})), \ patch("tools.code_execution_tool._load_config", return_value={"timeout": 30, "max_tool_calls": 50}): result = json.loads(execute_code( code, task_id="test-interrupt", enabled_tools=list(SANDBOX_ALLOWED_TOOLS), )) self.assertEqual(result["status"], "interrupted") self.assertIn("interrupted", result["output"]) finally: from tools.terminal_tool import _interrupt_event _interrupt_event.clear() t.join(timeout=3) if __name__ == "__main__": unittest.main()