feat(execute_code): add json_parse, shell_quote, retry helpers to sandbox

The execute_code sandbox generates a hermes_tools.py stub module for LLM
scripts. Three common failure modes keep tripping up scripts:

1. json.loads(strict=True) rejects control chars in terminal() output
   (e.g., GitHub issue bodies with literal tabs/newlines)
2. Shell backtick/quote interpretation when interpolating dynamic content
   into terminal() commands (markdown with backticks gets eaten by bash)
3. No retry logic for transient network failures (API timeouts, rate limits)

Adds three convenience helpers to the generated hermes_tools module:

- json_parse(text) — json.loads with strict=False for tolerant parsing
- shell_quote(s) — shlex.quote() for safe shell interpolation
- retry(fn, max_attempts=3, delay=2) — exponential backoff wrapper

Also updates the EXECUTE_CODE_SCHEMA description to document these helpers
so LLMs know they're available without importing anything extra.

Includes 7 new tests (unit + integration) covering all three helpers.
This commit is contained in:
teknium1
2026-03-06 01:52:46 -08:00
parent 5ce2c47d60
commit efec4fcaab
2 changed files with 125 additions and 2 deletions

View File

@@ -86,6 +86,14 @@ class TestHermesToolsGeneration(unittest.TestCase):
self.assertIn("def _connect(", src)
self.assertIn("def _call(", src)
def test_convenience_helpers_present(self):
"""Verify json_parse, shell_quote, and retry helpers are generated."""
src = generate_hermes_tools_module(["terminal"])
self.assertIn("def json_parse(", src)
self.assertIn("def shell_quote(", src)
self.assertIn("def retry(", src)
self.assertIn("import json, os, socket, shlex, time", src)
@unittest.skipIf(sys.platform == "win32", "UDS not available on Windows")
class TestExecuteCode(unittest.TestCase):
@@ -213,6 +221,82 @@ print(f"Found {len(results.get('results', []))} results")
self.assertEqual(result["status"], "success")
self.assertIn("Found 1 results", result["output"])
def test_json_parse_helper(self):
"""json_parse handles control characters that json.loads(strict=True) rejects."""
code = r"""
from hermes_tools import json_parse
# This JSON has a literal tab character which strict mode rejects
text = '{"body": "line1\tline2\nline3"}'
result = json_parse(text)
print(result["body"])
"""
result = self._run(code)
self.assertEqual(result["status"], "success")
self.assertIn("line1", result["output"])
def test_shell_quote_helper(self):
"""shell_quote properly escapes dangerous characters."""
code = """
from hermes_tools import shell_quote
# String with backticks, quotes, and special chars
dangerous = '`rm -rf /` && $(whoami) "hello"'
escaped = shell_quote(dangerous)
print(escaped)
# Verify it's wrapped in single quotes with proper escaping
assert "rm -rf" in escaped
assert escaped.startswith("'")
"""
result = self._run(code)
self.assertEqual(result["status"], "success")
def test_retry_helper_success(self):
"""retry returns on first success."""
code = """
from hermes_tools import retry
counter = [0]
def flaky():
counter[0] += 1
return f"ok on attempt {counter[0]}"
result = retry(flaky)
print(result)
"""
result = self._run(code)
self.assertEqual(result["status"], "success")
self.assertIn("ok on attempt 1", result["output"])
def test_retry_helper_eventual_success(self):
"""retry retries on failure and succeeds eventually."""
code = """
from hermes_tools import retry
counter = [0]
def flaky():
counter[0] += 1
if counter[0] < 3:
raise ConnectionError(f"fail {counter[0]}")
return "success"
result = retry(flaky, max_attempts=3, delay=0.01)
print(result)
"""
result = self._run(code)
self.assertEqual(result["status"], "success")
self.assertIn("success", result["output"])
def test_retry_helper_all_fail(self):
"""retry raises the last error when all attempts fail."""
code = """
from hermes_tools import retry
def always_fail():
raise ValueError("nope")
try:
retry(always_fail, max_attempts=2, delay=0.01)
print("should not reach here")
except ValueError as e:
print(f"caught: {e}")
"""
result = self._run(code)
self.assertEqual(result["status"], "success")
self.assertIn("caught: nope", result["output"])
if __name__ == "__main__":
unittest.main()