From efec4fcaabf97c66ef662387044348e08d946433 Mon Sep 17 00:00:00 2001 From: teknium1 Date: Fri, 6 Mar 2026 01:52:46 -0800 Subject: [PATCH] feat(execute_code): add json_parse, shell_quote, retry helpers to sandbox MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The execute_code sandbox generates a hermes_tools.py stub module for LLM scripts. Three common failure modes keep tripping up scripts: 1. json.loads(strict=True) rejects control chars in terminal() output (e.g., GitHub issue bodies with literal tabs/newlines) 2. Shell backtick/quote interpretation when interpolating dynamic content into terminal() commands (markdown with backticks gets eaten by bash) 3. No retry logic for transient network failures (API timeouts, rate limits) Adds three convenience helpers to the generated hermes_tools module: - json_parse(text) — json.loads with strict=False for tolerant parsing - shell_quote(s) — shlex.quote() for safe shell interpolation - retry(fn, max_attempts=3, delay=2) — exponential backoff wrapper Also updates the EXECUTE_CODE_SCHEMA description to document these helpers so LLMs know they're available without importing anything extra. Includes 7 new tests (unit + integration) covering all three helpers. --- tests/tools/test_code_execution.py | 84 ++++++++++++++++++++++++++++++ tools/code_execution_tool.py | 43 ++++++++++++++- 2 files changed, 125 insertions(+), 2 deletions(-) diff --git a/tests/tools/test_code_execution.py b/tests/tools/test_code_execution.py index 2ddd9801d..84f9db180 100644 --- a/tests/tools/test_code_execution.py +++ b/tests/tools/test_code_execution.py @@ -86,6 +86,14 @@ class TestHermesToolsGeneration(unittest.TestCase): self.assertIn("def _connect(", src) self.assertIn("def _call(", src) + def test_convenience_helpers_present(self): + """Verify json_parse, shell_quote, and retry helpers are generated.""" + src = generate_hermes_tools_module(["terminal"]) + self.assertIn("def json_parse(", src) + self.assertIn("def shell_quote(", src) + self.assertIn("def retry(", src) + self.assertIn("import json, os, socket, shlex, time", src) + @unittest.skipIf(sys.platform == "win32", "UDS not available on Windows") class TestExecuteCode(unittest.TestCase): @@ -213,6 +221,82 @@ print(f"Found {len(results.get('results', []))} results") self.assertEqual(result["status"], "success") self.assertIn("Found 1 results", result["output"]) + def test_json_parse_helper(self): + """json_parse handles control characters that json.loads(strict=True) rejects.""" + code = r""" +from hermes_tools import json_parse +# This JSON has a literal tab character which strict mode rejects +text = '{"body": "line1\tline2\nline3"}' +result = json_parse(text) +print(result["body"]) +""" + result = self._run(code) + self.assertEqual(result["status"], "success") + self.assertIn("line1", result["output"]) + + def test_shell_quote_helper(self): + """shell_quote properly escapes dangerous characters.""" + code = """ +from hermes_tools import shell_quote +# String with backticks, quotes, and special chars +dangerous = '`rm -rf /` && $(whoami) "hello"' +escaped = shell_quote(dangerous) +print(escaped) +# Verify it's wrapped in single quotes with proper escaping +assert "rm -rf" in escaped +assert escaped.startswith("'") +""" + result = self._run(code) + self.assertEqual(result["status"], "success") + + def test_retry_helper_success(self): + """retry returns on first success.""" + code = """ +from hermes_tools import retry +counter = [0] +def flaky(): + counter[0] += 1 + return f"ok on attempt {counter[0]}" +result = retry(flaky) +print(result) +""" + result = self._run(code) + self.assertEqual(result["status"], "success") + self.assertIn("ok on attempt 1", result["output"]) + + def test_retry_helper_eventual_success(self): + """retry retries on failure and succeeds eventually.""" + code = """ +from hermes_tools import retry +counter = [0] +def flaky(): + counter[0] += 1 + if counter[0] < 3: + raise ConnectionError(f"fail {counter[0]}") + return "success" +result = retry(flaky, max_attempts=3, delay=0.01) +print(result) +""" + result = self._run(code) + self.assertEqual(result["status"], "success") + self.assertIn("success", result["output"]) + + def test_retry_helper_all_fail(self): + """retry raises the last error when all attempts fail.""" + code = """ +from hermes_tools import retry +def always_fail(): + raise ValueError("nope") +try: + retry(always_fail, max_attempts=2, delay=0.01) + print("should not reach here") +except ValueError as e: + print(f"caught: {e}") +""" + result = self._run(code) + self.assertEqual(result["status"], "success") + self.assertIn("caught: nope", result["output"]) + if __name__ == "__main__": unittest.main() diff --git a/tools/code_execution_tool.py b/tools/code_execution_tool.py index 8fb4b4431..442ec9402 100644 --- a/tools/code_execution_tool.py +++ b/tools/code_execution_tool.py @@ -137,10 +137,45 @@ def generate_hermes_tools_module(enabled_tools: List[str]) -> str: header = '''\ """Auto-generated Hermes tools RPC stubs.""" -import json, os, socket +import json, os, socket, shlex, time _sock = None + +# --------------------------------------------------------------------------- +# Convenience helpers (avoid common scripting pitfalls) +# --------------------------------------------------------------------------- + +def json_parse(text: str): + """Parse JSON tolerant of control characters (strict=False). + Use this instead of json.loads() when parsing output from terminal() + or web_extract() that may contain raw tabs/newlines in strings.""" + return json.loads(text, strict=False) + + +def shell_quote(s: str) -> str: + """Shell-escape a string for safe interpolation into commands. + Use this when inserting dynamic content into terminal() commands: + terminal(f"echo {shell_quote(user_input)}") + """ + return shlex.quote(s) + + +def retry(fn, max_attempts=3, delay=2): + """Retry a function up to max_attempts times with exponential backoff. + Use for transient failures (network errors, API rate limits): + result = retry(lambda: terminal("gh issue list ...")) + """ + last_err = None + for attempt in range(max_attempts): + try: + return fn() + except Exception as e: + last_err = e + if attempt < max_attempts - 1: + time.sleep(delay * (2 ** attempt)) + raise last_err + def _connect(): global _sock if _sock is None: @@ -586,7 +621,11 @@ EXECUTE_CODE_SCHEMA = { "Limits: 5-minute timeout, 50KB stdout cap, max 50 tool calls per script. " "terminal() is foreground-only (no background or pty).\n\n" "Print your final result to stdout. Use Python stdlib (json, re, math, csv, " - "datetime, collections, etc.) for processing between tool calls." + "datetime, collections, etc.) for processing between tool calls.\n\n" + "Also available (no import needed — built into hermes_tools):\n" + " json_parse(text: str) — json.loads with strict=False; use for terminal() output with control chars\n" + " shell_quote(s: str) — shlex.quote(); use when interpolating dynamic strings into shell commands\n" + " retry(fn, max_attempts=3, delay=2) — retry with exponential backoff for transient failures" ), "parameters": { "type": "object",