feat: add exit code context for common CLI tools in terminal results (#5144)

When commands like grep, diff, test, or find return non-zero exit codes
that aren't actual errors (grep 1 = no matches, diff 1 = files differ),
the model wastes turns investigating non-problems. This adds an
exit_code_meaning field to the terminal JSON result that explains
informational exit codes, so the agent can move on instead of debugging.

Covers grep/rg/ag/ack (no matches), diff (files differ), find (partial
access), test/[ (condition false), curl (timeouts, DNS, HTTP errors),
and git (context-dependent). Correctly extracts the last command from
pipelines and chains, strips full paths and env var assignments.

The exit_code field itself is unchanged — this is purely additive context.
This commit is contained in:
Teknium
2026-04-04 16:57:24 -07:00
committed by GitHub
parent 5879b3ef82
commit aa475aef31
2 changed files with 233 additions and 1 deletions

View File

@@ -0,0 +1,152 @@
"""Tests for terminal command exit code semantic interpretation."""
import pytest
from tools.terminal_tool import _interpret_exit_code
class TestInterpretExitCode:
"""Test _interpret_exit_code returns correct notes for known command semantics."""
# ---- exit code 0 always returns None ----
def test_success_returns_none(self):
assert _interpret_exit_code("grep foo bar", 0) is None
assert _interpret_exit_code("diff a b", 0) is None
assert _interpret_exit_code("test -f /etc/passwd", 0) is None
# ---- grep / rg family: exit 1 = no matches ----
@pytest.mark.parametrize("cmd", [
"grep 'pattern' file.txt",
"egrep 'pattern' file.txt",
"fgrep 'pattern' file.txt",
"rg 'foo' .",
"ag 'foo' .",
"ack 'foo' .",
])
def test_grep_family_no_matches(self, cmd):
result = _interpret_exit_code(cmd, 1)
assert result is not None
assert "no matches" in result.lower()
def test_grep_real_error_no_note(self):
"""grep exit 2+ is a real error — should return None."""
assert _interpret_exit_code("grep 'foo' bar", 2) is None
assert _interpret_exit_code("rg 'foo' .", 2) is None
# ---- diff: exit 1 = files differ ----
def test_diff_files_differ(self):
result = _interpret_exit_code("diff file1 file2", 1)
assert result is not None
assert "differ" in result.lower()
def test_colordiff_files_differ(self):
result = _interpret_exit_code("colordiff file1 file2", 1)
assert result is not None
assert "differ" in result.lower()
def test_diff_real_error_no_note(self):
assert _interpret_exit_code("diff a b", 2) is None
# ---- test / [: exit 1 = condition false ----
def test_test_condition_false(self):
result = _interpret_exit_code("test -f /nonexistent", 1)
assert result is not None
assert "false" in result.lower()
def test_bracket_condition_false(self):
result = _interpret_exit_code("[ -f /nonexistent ]", 1)
assert result is not None
assert "false" in result.lower()
# ---- find: exit 1 = partial success ----
def test_find_partial_success(self):
result = _interpret_exit_code("find . -name '*.py'", 1)
assert result is not None
assert "inaccessible" in result.lower()
# ---- curl: various informational codes ----
def test_curl_timeout(self):
result = _interpret_exit_code("curl https://example.com", 28)
assert result is not None
assert "timed out" in result.lower()
def test_curl_connection_refused(self):
result = _interpret_exit_code("curl http://localhost:99999", 7)
assert result is not None
assert "connect" in result.lower()
# ---- git: exit 1 is context-dependent ----
def test_git_diff_exit_1(self):
result = _interpret_exit_code("git diff HEAD~1", 1)
assert result is not None
assert "normal" in result.lower()
# ---- pipeline / chain handling ----
def test_pipeline_last_command(self):
"""In a pipeline, the last command determines the exit code."""
result = _interpret_exit_code("ls -la | grep 'pattern'", 1)
assert result is not None
assert "no matches" in result.lower()
def test_and_chain_last_command(self):
result = _interpret_exit_code("cd /tmp && grep foo bar", 1)
assert result is not None
assert "no matches" in result.lower()
def test_semicolon_chain_last_command(self):
result = _interpret_exit_code("cat file; diff a b", 1)
assert result is not None
assert "differ" in result.lower()
def test_or_chain_last_command(self):
result = _interpret_exit_code("false || grep foo bar", 1)
assert result is not None
assert "no matches" in result.lower()
# ---- full paths ----
def test_full_path_command(self):
result = _interpret_exit_code("/usr/bin/grep 'foo' bar", 1)
assert result is not None
assert "no matches" in result.lower()
# ---- env var prefix ----
def test_env_var_prefix_stripped(self):
result = _interpret_exit_code("LANG=C grep 'foo' bar", 1)
assert result is not None
assert "no matches" in result.lower()
def test_multiple_env_vars(self):
result = _interpret_exit_code("FOO=1 BAR=2 grep 'foo' bar", 1)
assert result is not None
assert "no matches" in result.lower()
# ---- unknown commands return None ----
@pytest.mark.parametrize("cmd", [
"python3 script.py",
"rm -rf /tmp/test",
"npm test",
"make build",
"cargo build",
])
def test_unknown_commands_return_none(self, cmd):
assert _interpret_exit_code(cmd, 1) is None
# ---- edge cases ----
def test_empty_command(self):
assert _interpret_exit_code("", 1) is None
def test_only_env_vars(self):
"""Command with only env var assignments, no actual command."""
assert _interpret_exit_code("FOO=bar", 1) is None

View File

@@ -35,6 +35,7 @@ import json
import logging
import os
import platform
import re
import time
import threading
import atexit
@@ -899,6 +900,78 @@ def _atexit_cleanup():
atexit.register(_atexit_cleanup)
# =============================================================================
# Exit Code Context for Common CLI Tools
# =============================================================================
# Many Unix commands use non-zero exit codes for informational purposes, not
# to indicate failure. The model sees a raw exit_code=1 from `grep` and
# wastes a turn investigating something that just means "no matches".
# This lookup adds a human-readable note so the agent can move on.
def _interpret_exit_code(command: str, exit_code: int) -> str | None:
"""Return a human-readable note when a non-zero exit code is non-erroneous.
Returns None when the exit code is 0 or genuinely signals an error.
The note is appended to the tool result so the model doesn't waste
turns investigating expected exit codes.
"""
if exit_code == 0:
return None
# Extract the last command in a pipeline/chain — that determines the
# exit code. Handles `cmd1 && cmd2`, `cmd1 | cmd2`, `cmd1; cmd2`.
# Deliberately simple: split on shell operators and take the last piece.
segments = re.split(r'\s*(?:\|\||&&|[|;])\s*', command)
last_segment = (segments[-1] if segments else command).strip()
# Get base command name (first word), stripping env var assignments
# like VAR=val cmd ...
words = last_segment.split()
base_cmd = ""
for w in words:
if "=" in w and not w.startswith("-"):
continue # skip VAR=val
base_cmd = w.split("/")[-1] # handle /usr/bin/grep -> grep
break
if not base_cmd:
return None
# Command-specific semantics
semantics: dict[str, dict[int, str]] = {
# grep/rg/ag/ack: 1=no matches found (normal), 2+=real error
"grep": {1: "No matches found (not an error)"},
"egrep": {1: "No matches found (not an error)"},
"fgrep": {1: "No matches found (not an error)"},
"rg": {1: "No matches found (not an error)"},
"ag": {1: "No matches found (not an error)"},
"ack": {1: "No matches found (not an error)"},
# diff: 1=files differ (expected), 2+=real error
"diff": {1: "Files differ (expected, not an error)"},
"colordiff": {1: "Files differ (expected, not an error)"},
# find: 1=some dirs inaccessible but results may still be valid
"find": {1: "Some directories were inaccessible (partial results may still be valid)"},
# test/[: 1=condition is false (expected)
"test": {1: "Condition evaluated to false (expected, not an error)"},
"[": {1: "Condition evaluated to false (expected, not an error)"},
# curl: common non-error codes
"curl": {
6: "Could not resolve host",
7: "Failed to connect to host",
22: "HTTP response code indicated error (e.g. 404, 500)",
28: "Operation timed out",
},
# git: 1 is context-dependent but often normal (e.g. git diff with changes)
"git": {1: "Non-zero exit (often normal — e.g. 'git diff' returns 1 when files differ)"},
}
cmd_semantics = semantics.get(base_cmd)
if cmd_semantics and exit_code in cmd_semantics:
return cmd_semantics[exit_code]
return None
def terminal_tool(
command: str,
background: bool = False,
@@ -1242,13 +1315,20 @@ def terminal_tool(
from agent.redact import redact_sensitive_text
output = redact_sensitive_text(output.strip()) if output else ""
# Interpret non-zero exit codes that aren't real errors
# (e.g. grep=1 means "no matches", diff=1 means "files differ")
exit_note = _interpret_exit_code(command, returncode)
result_dict = {
"output": output,
"exit_code": returncode,
"error": None
"error": None,
}
if approval_note:
result_dict["approval"] = approval_note
if exit_note:
result_dict["exit_code_meaning"] = exit_note
return json.dumps(result_dict, ensure_ascii=False)
except Exception as e: