diff --git a/tests/tools/test_code_execution.py b/tests/tools/test_code_execution.py index 2ddd9801d..84f9db180 100644 --- a/tests/tools/test_code_execution.py +++ b/tests/tools/test_code_execution.py @@ -86,6 +86,14 @@ class TestHermesToolsGeneration(unittest.TestCase): self.assertIn("def _connect(", src) self.assertIn("def _call(", src) + def test_convenience_helpers_present(self): + """Verify json_parse, shell_quote, and retry helpers are generated.""" + src = generate_hermes_tools_module(["terminal"]) + self.assertIn("def json_parse(", src) + self.assertIn("def shell_quote(", src) + self.assertIn("def retry(", src) + self.assertIn("import json, os, socket, shlex, time", src) + @unittest.skipIf(sys.platform == "win32", "UDS not available on Windows") class TestExecuteCode(unittest.TestCase): @@ -213,6 +221,82 @@ print(f"Found {len(results.get('results', []))} results") self.assertEqual(result["status"], "success") self.assertIn("Found 1 results", result["output"]) + def test_json_parse_helper(self): + """json_parse handles control characters that json.loads(strict=True) rejects.""" + code = r""" +from hermes_tools import json_parse +# This JSON has a literal tab character which strict mode rejects +text = '{"body": "line1\tline2\nline3"}' +result = json_parse(text) +print(result["body"]) +""" + result = self._run(code) + self.assertEqual(result["status"], "success") + self.assertIn("line1", result["output"]) + + def test_shell_quote_helper(self): + """shell_quote properly escapes dangerous characters.""" + code = """ +from hermes_tools import shell_quote +# String with backticks, quotes, and special chars +dangerous = '`rm -rf /` && $(whoami) "hello"' +escaped = shell_quote(dangerous) +print(escaped) +# Verify it's wrapped in single quotes with proper escaping +assert "rm -rf" in escaped +assert escaped.startswith("'") +""" + result = self._run(code) + self.assertEqual(result["status"], "success") + + def test_retry_helper_success(self): + """retry returns on first success.""" + code = """ +from hermes_tools import retry +counter = [0] +def flaky(): + counter[0] += 1 + return f"ok on attempt {counter[0]}" +result = retry(flaky) +print(result) +""" + result = self._run(code) + self.assertEqual(result["status"], "success") + self.assertIn("ok on attempt 1", result["output"]) + + def test_retry_helper_eventual_success(self): + """retry retries on failure and succeeds eventually.""" + code = """ +from hermes_tools import retry +counter = [0] +def flaky(): + counter[0] += 1 + if counter[0] < 3: + raise ConnectionError(f"fail {counter[0]}") + return "success" +result = retry(flaky, max_attempts=3, delay=0.01) +print(result) +""" + result = self._run(code) + self.assertEqual(result["status"], "success") + self.assertIn("success", result["output"]) + + def test_retry_helper_all_fail(self): + """retry raises the last error when all attempts fail.""" + code = """ +from hermes_tools import retry +def always_fail(): + raise ValueError("nope") +try: + retry(always_fail, max_attempts=2, delay=0.01) + print("should not reach here") +except ValueError as e: + print(f"caught: {e}") +""" + result = self._run(code) + self.assertEqual(result["status"], "success") + self.assertIn("caught: nope", result["output"]) + if __name__ == "__main__": unittest.main() diff --git a/tools/code_execution_tool.py b/tools/code_execution_tool.py index 8fb4b4431..442ec9402 100644 --- a/tools/code_execution_tool.py +++ b/tools/code_execution_tool.py @@ -137,10 +137,45 @@ def generate_hermes_tools_module(enabled_tools: List[str]) -> str: header = '''\ """Auto-generated Hermes tools RPC stubs.""" -import json, os, socket +import json, os, socket, shlex, time _sock = None + +# --------------------------------------------------------------------------- +# Convenience helpers (avoid common scripting pitfalls) +# --------------------------------------------------------------------------- + +def json_parse(text: str): + """Parse JSON tolerant of control characters (strict=False). + Use this instead of json.loads() when parsing output from terminal() + or web_extract() that may contain raw tabs/newlines in strings.""" + return json.loads(text, strict=False) + + +def shell_quote(s: str) -> str: + """Shell-escape a string for safe interpolation into commands. + Use this when inserting dynamic content into terminal() commands: + terminal(f"echo {shell_quote(user_input)}") + """ + return shlex.quote(s) + + +def retry(fn, max_attempts=3, delay=2): + """Retry a function up to max_attempts times with exponential backoff. + Use for transient failures (network errors, API rate limits): + result = retry(lambda: terminal("gh issue list ...")) + """ + last_err = None + for attempt in range(max_attempts): + try: + return fn() + except Exception as e: + last_err = e + if attempt < max_attempts - 1: + time.sleep(delay * (2 ** attempt)) + raise last_err + def _connect(): global _sock if _sock is None: @@ -586,7 +621,11 @@ EXECUTE_CODE_SCHEMA = { "Limits: 5-minute timeout, 50KB stdout cap, max 50 tool calls per script. " "terminal() is foreground-only (no background or pty).\n\n" "Print your final result to stdout. Use Python stdlib (json, re, math, csv, " - "datetime, collections, etc.) for processing between tool calls." + "datetime, collections, etc.) for processing between tool calls.\n\n" + "Also available (no import needed — built into hermes_tools):\n" + " json_parse(text: str) — json.loads with strict=False; use for terminal() output with control chars\n" + " shell_quote(s: str) — shlex.quote(); use when interpolating dynamic strings into shell commands\n" + " retry(fn, max_attempts=3, delay=2) — retry with exponential backoff for transient failures" ), "parameters": { "type": "object",