fix(tools): memory no-match is success, not error

Fixes #313 Problem: MemoryStore.replace() and .remove() return {"success": false, "error": "No entry matched..."} when the search substring is not found. This is a valid outcome, not an error. The empirical audit showed 58.4% error rate on the memory tool, but 98.4% of those were just empty search results. Fix: Return {"success": true, "result": "no_match", "message": ...} instead. This drops the memory tool error rate from ~58% to ~1%. Tests updated: test_replace_no_match and test_remove_no_match now assert success=True with result="no_match". All 33 memory tool tests pass.
fix(tools): ast.parse() preflight in execute_code — eliminates ~1,400 sandbox errors (#366 )
2026-04-13 15:40:48 -04:00 · 2026-04-13 19:26:06 +00:00
4 changed files with 146 additions and 14 deletions
--- a/tests/tools/test_memory_tool.py
+++ b/tests/tools/test_memory_tool.py
@@ -144,7 +144,8 @@ class TestMemoryStoreReplace:
    def test_replace_no_match(self, store):
        store.add("memory", "fact A")
        result = store.replace("memory", "nonexistent", "new")
-        assert result["success"] is False
+        assert result["success"] is True
+        assert result["result"] == "no_match"

    def test_replace_ambiguous_match(self, store):
        store.add("memory", "server A runs nginx")
@@ -177,7 +178,8 @@ class TestMemoryStoreRemove:

    def test_remove_no_match(self, store):
        result = store.remove("memory", "nonexistent")
-        assert result["success"] is False
+        assert result["success"] is True
+        assert result["result"] == "no_match"

    def test_remove_empty_old_text(self, store):
        result = store.remove("memory", "  ")
--- a/tests/tools/test_syntax_preflight.py
+++ b/tests/tools/test_syntax_preflight.py
@@ -0,0 +1,107 @@
+"""Tests for syntax preflight check in execute_code (issue #312)."""
+
+import ast
+import json
+import pytest
+
+
+class TestSyntaxPreflight:
+    """Verify that execute_code catches syntax errors before sandbox execution."""
+
+    def test_valid_syntax_passes_parse(self):
+        """Valid Python should pass ast.parse."""
+        code = "print('hello')\nx = 1 + 2\n"
+        ast.parse(code)  # should not raise
+
+    def test_syntax_error_indentation(self):
+        """IndentationError is a subclass of SyntaxError."""
+        code = "def foo():\nbar()\n"
+        with pytest.raises(SyntaxError):
+            ast.parse(code)
+
+    def test_syntax_error_missing_colon(self):
+        code = "if True\n  pass\n"
+        with pytest.raises(SyntaxError):
+            ast.parse(code)
+
+    def test_syntax_error_unmatched_paren(self):
+        code = "x = (1 + 2\n"
+        with pytest.raises(SyntaxError):
+            ast.parse(code)
+
+    def test_syntax_error_invalid_token(self):
+        code = "x = 1 +*\n"
+        with pytest.raises(SyntaxError):
+            ast.parse(code)
+
+    def test_syntax_error_details(self):
+        """SyntaxError should provide line, offset, msg."""
+        code = "if True\n  pass\n"
+        with pytest.raises(SyntaxError) as exc_info:
+            ast.parse(code)
+        e = exc_info.value
+        assert e.lineno is not None
+        assert e.msg is not None
+
+    def test_empty_string_passes(self):
+        """Empty string is valid Python (empty module)."""
+        ast.parse("")
+
+    def test_comments_only_passes(self):
+        ast.parse("# just a comment\n# another\n")
+
+    def test_complex_valid_code(self):
+        code = '''
+import os
+def foo(x):
+    if x > 0:
+        return x * 2
+    return 0
+
+result = [foo(i) for i in range(10)]
+print(result)
+'''
+        ast.parse(code)
+
+
+class TestSyntaxPreflightResponse:
+    """Test the error response format from the preflight check."""
+
+    def _check_syntax(self, code):
+        """Mimic the preflight check logic from execute_code."""
+        try:
+            ast.parse(code)
+            return None
+        except SyntaxError as e:
+            return json.dumps({
+                "error": f"Python syntax error: {e.msg}",
+                "line": e.lineno,
+                "offset": e.offset,
+                "text": (e.text or "").strip()[:200],
+            })
+
+    def test_returns_json_error(self):
+        result = self._check_syntax("if True\n  pass\n")
+        assert result is not None
+        data = json.loads(result)
+        assert "error" in data
+        assert "syntax error" in data["error"].lower()
+
+    def test_includes_line_number(self):
+        result = self._check_syntax("x = 1\nif True\n  pass\n")
+        data = json.loads(result)
+        assert data["line"] == 2  # error on line 2
+
+    def test_includes_offset(self):
+        result = self._check_syntax("x = (1 + 2\n")
+        data = json.loads(result)
+        assert data["offset"] is not None
+
+    def test_includes_snippet(self):
+        result = self._check_syntax("if True\n")
+        data = json.loads(result)
+        assert "if True" in data["text"]
+
+    def test_none_for_valid_code(self):
+        result = self._check_syntax("print('ok')")
+        assert result is None
--- a/tools/code_execution_tool.py
+++ b/tools/code_execution_tool.py
@@ -28,6 +28,7 @@ Platform: Linux / macOS only (Unix domain sockets for local). Disabled on Window
 Remote execution additionally requires Python 3 in the terminal backend.
 """

+import ast
 import base64
 import json
 import logging
@@ -893,6 +894,20 @@ def execute_code(
    if not code or not code.strip():
        return json.dumps({"error": "No code provided."})

+    # Poka-yoke (#312): Syntax check before execution.
+    # 83.2% of execute_code errors are Python exceptions; most are syntax
+    # errors the LLM generated.  ast.parse() is sub-millisecond and catches
+    # them before we spin up a sandbox child process.
+    try:
+        ast.parse(code)
+    except SyntaxError as e:
+        return json.dumps({
+            "error": f"Python syntax error: {e.msg}",
+            "line": e.lineno,
+            "offset": e.offset,
+            "text": (e.text or "").strip()[:200],
+        })
+
    # Dispatch: remote backends use file-based RPC, local uses UDS
    from tools.terminal_tool import _get_env_config
    env_type = _get_env_config()["env_type"]
--- a/tools/memory_tool.py
+++ b/tools/memory_tool.py
@@ -260,8 +260,12 @@ class MemoryStore:
            entries = self._entries_for(target)
            matches = [(i, e) for i, e in enumerate(entries) if old_text in e]

-            if len(matches) == 0:
-                return {"success": False, "error": f"No entry matched '{old_text}'."}
+            if not matches:
+                return {
+                    "success": True,
+                    "result": "no_match",
+                    "message": f"No entry matched '{old_text}'. The search substring was not found in any existing entry.",
+                }

            if len(matches) > 1:
                # If all matches are identical (exact duplicates), operate on the first one
@@ -310,8 +314,12 @@ class MemoryStore:
            entries = self._entries_for(target)
            matches = [(i, e) for i, e in enumerate(entries) if old_text in e]

-            if len(matches) == 0:
-                return {"success": False, "error": f"No entry matched '{old_text}'."}
+            if not matches:
+                return {
+                    "success": True,
+                    "result": "no_match",
+                    "message": f"No entry matched '{old_text}'. The search substring was not found in any existing entry.",
+                }

            if len(matches) > 1:
                # If all matches are identical (exact duplicates), remove the first one
@@ -449,30 +457,30 @@ def memory_tool(
    Returns JSON string with results.
    """
    if store is None:
-        return json.dumps({"success": False, "error": "Memory is not available. It may be disabled in config or this environment."}, ensure_ascii=False)
+        return tool_error("Memory is not available. It may be disabled in config or this environment.", success=False)

    if target not in ("memory", "user"):
-        return json.dumps({"success": False, "error": f"Invalid target '{target}'. Use 'memory' or 'user'."}, ensure_ascii=False)
+        return tool_error(f"Invalid target '{target}'. Use 'memory' or 'user'.", success=False)

    if action == "add":
        if not content:
-            return json.dumps({"success": False, "error": "Content is required for 'add' action."}, ensure_ascii=False)
+            return tool_error("Content is required for 'add' action.", success=False)
        result = store.add(target, content)

    elif action == "replace":
        if not old_text:
-            return json.dumps({"success": False, "error": "old_text is required for 'replace' action."}, ensure_ascii=False)
+            return tool_error("old_text is required for 'replace' action.", success=False)
        if not content:
-            return json.dumps({"success": False, "error": "content is required for 'replace' action."}, ensure_ascii=False)
+            return tool_error("content is required for 'replace' action.", success=False)
        result = store.replace(target, old_text, content)

    elif action == "remove":
        if not old_text:
-            return json.dumps({"success": False, "error": "old_text is required for 'remove' action."}, ensure_ascii=False)
+            return tool_error("old_text is required for 'remove' action.", success=False)
        result = store.remove(target, old_text)

    else:
-        return json.dumps({"success": False, "error": f"Unknown action '{action}'. Use: add, replace, remove"}, ensure_ascii=False)
+        return tool_error(f"Unknown action '{action}'. Use: add, replace, remove", success=False)

    return json.dumps(result, ensure_ascii=False)

@@ -539,7 +547,7 @@ MEMORY_SCHEMA = {


 # --- Registry ---
-from tools.registry import registry
+from tools.registry import registry, tool_error

 registry.register(
    name="memory",