feat: add exit code context for common CLI tools in terminal results (#5144)
When commands like grep, diff, test, or find return non-zero exit codes that aren't actual errors (grep 1 = no matches, diff 1 = files differ), the model wastes turns investigating non-problems. This adds an exit_code_meaning field to the terminal JSON result that explains informational exit codes, so the agent can move on instead of debugging. Covers grep/rg/ag/ack (no matches), diff (files differ), find (partial access), test/[ (condition false), curl (timeouts, DNS, HTTP errors), and git (context-dependent). Correctly extracts the last command from pipelines and chains, strips full paths and env var assignments. The exit_code field itself is unchanged — this is purely additive context.
This commit is contained in:
152
tests/tools/test_terminal_exit_semantics.py
Normal file
152
tests/tools/test_terminal_exit_semantics.py
Normal file
@@ -0,0 +1,152 @@
|
||||
"""Tests for terminal command exit code semantic interpretation."""
|
||||
|
||||
import pytest
|
||||
|
||||
from tools.terminal_tool import _interpret_exit_code
|
||||
|
||||
|
||||
class TestInterpretExitCode:
|
||||
"""Test _interpret_exit_code returns correct notes for known command semantics."""
|
||||
|
||||
# ---- exit code 0 always returns None ----
|
||||
|
||||
def test_success_returns_none(self):
|
||||
assert _interpret_exit_code("grep foo bar", 0) is None
|
||||
assert _interpret_exit_code("diff a b", 0) is None
|
||||
assert _interpret_exit_code("test -f /etc/passwd", 0) is None
|
||||
|
||||
# ---- grep / rg family: exit 1 = no matches ----
|
||||
|
||||
@pytest.mark.parametrize("cmd", [
|
||||
"grep 'pattern' file.txt",
|
||||
"egrep 'pattern' file.txt",
|
||||
"fgrep 'pattern' file.txt",
|
||||
"rg 'foo' .",
|
||||
"ag 'foo' .",
|
||||
"ack 'foo' .",
|
||||
])
|
||||
def test_grep_family_no_matches(self, cmd):
|
||||
result = _interpret_exit_code(cmd, 1)
|
||||
assert result is not None
|
||||
assert "no matches" in result.lower()
|
||||
|
||||
def test_grep_real_error_no_note(self):
|
||||
"""grep exit 2+ is a real error — should return None."""
|
||||
assert _interpret_exit_code("grep 'foo' bar", 2) is None
|
||||
assert _interpret_exit_code("rg 'foo' .", 2) is None
|
||||
|
||||
# ---- diff: exit 1 = files differ ----
|
||||
|
||||
def test_diff_files_differ(self):
|
||||
result = _interpret_exit_code("diff file1 file2", 1)
|
||||
assert result is not None
|
||||
assert "differ" in result.lower()
|
||||
|
||||
def test_colordiff_files_differ(self):
|
||||
result = _interpret_exit_code("colordiff file1 file2", 1)
|
||||
assert result is not None
|
||||
assert "differ" in result.lower()
|
||||
|
||||
def test_diff_real_error_no_note(self):
|
||||
assert _interpret_exit_code("diff a b", 2) is None
|
||||
|
||||
# ---- test / [: exit 1 = condition false ----
|
||||
|
||||
def test_test_condition_false(self):
|
||||
result = _interpret_exit_code("test -f /nonexistent", 1)
|
||||
assert result is not None
|
||||
assert "false" in result.lower()
|
||||
|
||||
def test_bracket_condition_false(self):
|
||||
result = _interpret_exit_code("[ -f /nonexistent ]", 1)
|
||||
assert result is not None
|
||||
assert "false" in result.lower()
|
||||
|
||||
# ---- find: exit 1 = partial success ----
|
||||
|
||||
def test_find_partial_success(self):
|
||||
result = _interpret_exit_code("find . -name '*.py'", 1)
|
||||
assert result is not None
|
||||
assert "inaccessible" in result.lower()
|
||||
|
||||
# ---- curl: various informational codes ----
|
||||
|
||||
def test_curl_timeout(self):
|
||||
result = _interpret_exit_code("curl https://example.com", 28)
|
||||
assert result is not None
|
||||
assert "timed out" in result.lower()
|
||||
|
||||
def test_curl_connection_refused(self):
|
||||
result = _interpret_exit_code("curl http://localhost:99999", 7)
|
||||
assert result is not None
|
||||
assert "connect" in result.lower()
|
||||
|
||||
# ---- git: exit 1 is context-dependent ----
|
||||
|
||||
def test_git_diff_exit_1(self):
|
||||
result = _interpret_exit_code("git diff HEAD~1", 1)
|
||||
assert result is not None
|
||||
assert "normal" in result.lower()
|
||||
|
||||
# ---- pipeline / chain handling ----
|
||||
|
||||
def test_pipeline_last_command(self):
|
||||
"""In a pipeline, the last command determines the exit code."""
|
||||
result = _interpret_exit_code("ls -la | grep 'pattern'", 1)
|
||||
assert result is not None
|
||||
assert "no matches" in result.lower()
|
||||
|
||||
def test_and_chain_last_command(self):
|
||||
result = _interpret_exit_code("cd /tmp && grep foo bar", 1)
|
||||
assert result is not None
|
||||
assert "no matches" in result.lower()
|
||||
|
||||
def test_semicolon_chain_last_command(self):
|
||||
result = _interpret_exit_code("cat file; diff a b", 1)
|
||||
assert result is not None
|
||||
assert "differ" in result.lower()
|
||||
|
||||
def test_or_chain_last_command(self):
|
||||
result = _interpret_exit_code("false || grep foo bar", 1)
|
||||
assert result is not None
|
||||
assert "no matches" in result.lower()
|
||||
|
||||
# ---- full paths ----
|
||||
|
||||
def test_full_path_command(self):
|
||||
result = _interpret_exit_code("/usr/bin/grep 'foo' bar", 1)
|
||||
assert result is not None
|
||||
assert "no matches" in result.lower()
|
||||
|
||||
# ---- env var prefix ----
|
||||
|
||||
def test_env_var_prefix_stripped(self):
|
||||
result = _interpret_exit_code("LANG=C grep 'foo' bar", 1)
|
||||
assert result is not None
|
||||
assert "no matches" in result.lower()
|
||||
|
||||
def test_multiple_env_vars(self):
|
||||
result = _interpret_exit_code("FOO=1 BAR=2 grep 'foo' bar", 1)
|
||||
assert result is not None
|
||||
assert "no matches" in result.lower()
|
||||
|
||||
# ---- unknown commands return None ----
|
||||
|
||||
@pytest.mark.parametrize("cmd", [
|
||||
"python3 script.py",
|
||||
"rm -rf /tmp/test",
|
||||
"npm test",
|
||||
"make build",
|
||||
"cargo build",
|
||||
])
|
||||
def test_unknown_commands_return_none(self, cmd):
|
||||
assert _interpret_exit_code(cmd, 1) is None
|
||||
|
||||
# ---- edge cases ----
|
||||
|
||||
def test_empty_command(self):
|
||||
assert _interpret_exit_code("", 1) is None
|
||||
|
||||
def test_only_env_vars(self):
|
||||
"""Command with only env var assignments, no actual command."""
|
||||
assert _interpret_exit_code("FOO=bar", 1) is None
|
||||
@@ -35,6 +35,7 @@ import json
|
||||
import logging
|
||||
import os
|
||||
import platform
|
||||
import re
|
||||
import time
|
||||
import threading
|
||||
import atexit
|
||||
@@ -899,6 +900,78 @@ def _atexit_cleanup():
|
||||
atexit.register(_atexit_cleanup)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Exit Code Context for Common CLI Tools
|
||||
# =============================================================================
|
||||
# Many Unix commands use non-zero exit codes for informational purposes, not
|
||||
# to indicate failure. The model sees a raw exit_code=1 from `grep` and
|
||||
# wastes a turn investigating something that just means "no matches".
|
||||
# This lookup adds a human-readable note so the agent can move on.
|
||||
|
||||
def _interpret_exit_code(command: str, exit_code: int) -> str | None:
|
||||
"""Return a human-readable note when a non-zero exit code is non-erroneous.
|
||||
|
||||
Returns None when the exit code is 0 or genuinely signals an error.
|
||||
The note is appended to the tool result so the model doesn't waste
|
||||
turns investigating expected exit codes.
|
||||
"""
|
||||
if exit_code == 0:
|
||||
return None
|
||||
|
||||
# Extract the last command in a pipeline/chain — that determines the
|
||||
# exit code. Handles `cmd1 && cmd2`, `cmd1 | cmd2`, `cmd1; cmd2`.
|
||||
# Deliberately simple: split on shell operators and take the last piece.
|
||||
segments = re.split(r'\s*(?:\|\||&&|[|;])\s*', command)
|
||||
last_segment = (segments[-1] if segments else command).strip()
|
||||
|
||||
# Get base command name (first word), stripping env var assignments
|
||||
# like VAR=val cmd ...
|
||||
words = last_segment.split()
|
||||
base_cmd = ""
|
||||
for w in words:
|
||||
if "=" in w and not w.startswith("-"):
|
||||
continue # skip VAR=val
|
||||
base_cmd = w.split("/")[-1] # handle /usr/bin/grep -> grep
|
||||
break
|
||||
|
||||
if not base_cmd:
|
||||
return None
|
||||
|
||||
# Command-specific semantics
|
||||
semantics: dict[str, dict[int, str]] = {
|
||||
# grep/rg/ag/ack: 1=no matches found (normal), 2+=real error
|
||||
"grep": {1: "No matches found (not an error)"},
|
||||
"egrep": {1: "No matches found (not an error)"},
|
||||
"fgrep": {1: "No matches found (not an error)"},
|
||||
"rg": {1: "No matches found (not an error)"},
|
||||
"ag": {1: "No matches found (not an error)"},
|
||||
"ack": {1: "No matches found (not an error)"},
|
||||
# diff: 1=files differ (expected), 2+=real error
|
||||
"diff": {1: "Files differ (expected, not an error)"},
|
||||
"colordiff": {1: "Files differ (expected, not an error)"},
|
||||
# find: 1=some dirs inaccessible but results may still be valid
|
||||
"find": {1: "Some directories were inaccessible (partial results may still be valid)"},
|
||||
# test/[: 1=condition is false (expected)
|
||||
"test": {1: "Condition evaluated to false (expected, not an error)"},
|
||||
"[": {1: "Condition evaluated to false (expected, not an error)"},
|
||||
# curl: common non-error codes
|
||||
"curl": {
|
||||
6: "Could not resolve host",
|
||||
7: "Failed to connect to host",
|
||||
22: "HTTP response code indicated error (e.g. 404, 500)",
|
||||
28: "Operation timed out",
|
||||
},
|
||||
# git: 1 is context-dependent but often normal (e.g. git diff with changes)
|
||||
"git": {1: "Non-zero exit (often normal — e.g. 'git diff' returns 1 when files differ)"},
|
||||
}
|
||||
|
||||
cmd_semantics = semantics.get(base_cmd)
|
||||
if cmd_semantics and exit_code in cmd_semantics:
|
||||
return cmd_semantics[exit_code]
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def terminal_tool(
|
||||
command: str,
|
||||
background: bool = False,
|
||||
@@ -1242,13 +1315,20 @@ def terminal_tool(
|
||||
from agent.redact import redact_sensitive_text
|
||||
output = redact_sensitive_text(output.strip()) if output else ""
|
||||
|
||||
# Interpret non-zero exit codes that aren't real errors
|
||||
# (e.g. grep=1 means "no matches", diff=1 means "files differ")
|
||||
exit_note = _interpret_exit_code(command, returncode)
|
||||
|
||||
result_dict = {
|
||||
"output": output,
|
||||
"exit_code": returncode,
|
||||
"error": None
|
||||
"error": None,
|
||||
}
|
||||
if approval_note:
|
||||
result_dict["approval"] = approval_note
|
||||
if exit_note:
|
||||
result_dict["exit_code_meaning"] = exit_note
|
||||
|
||||
return json.dumps(result_dict, ensure_ascii=False)
|
||||
|
||||
except Exception as e:
|
||||
|
||||
Reference in New Issue
Block a user