feat: add exit code context for common CLI tools in terminal results (#5144)

When commands like grep, diff, test, or find return non-zero exit codes
that aren't actual errors (grep 1 = no matches, diff 1 = files differ),
the model wastes turns investigating non-problems. This adds an
exit_code_meaning field to the terminal JSON result that explains
informational exit codes, so the agent can move on instead of debugging.

Covers grep/rg/ag/ack (no matches), diff (files differ), find (partial
access), test/[ (condition false), curl (timeouts, DNS, HTTP errors),
and git (context-dependent). Correctly extracts the last command from
pipelines and chains, strips full paths and env var assignments.

The exit_code field itself is unchanged — this is purely additive context.
This commit is contained in:
Teknium
2026-04-04 16:57:24 -07:00
committed by GitHub
parent 5879b3ef82
commit aa475aef31
2 changed files with 233 additions and 1 deletions

View File

@@ -35,6 +35,7 @@ import json
import logging
import os
import platform
import re
import time
import threading
import atexit
@@ -899,6 +900,78 @@ def _atexit_cleanup():
atexit.register(_atexit_cleanup)
# =============================================================================
# Exit Code Context for Common CLI Tools
# =============================================================================
# Many Unix commands use non-zero exit codes for informational purposes, not
# to indicate failure. The model sees a raw exit_code=1 from `grep` and
# wastes a turn investigating something that just means "no matches".
# This lookup adds a human-readable note so the agent can move on.
def _interpret_exit_code(command: str, exit_code: int) -> str | None:
"""Return a human-readable note when a non-zero exit code is non-erroneous.
Returns None when the exit code is 0 or genuinely signals an error.
The note is appended to the tool result so the model doesn't waste
turns investigating expected exit codes.
"""
if exit_code == 0:
return None
# Extract the last command in a pipeline/chain — that determines the
# exit code. Handles `cmd1 && cmd2`, `cmd1 | cmd2`, `cmd1; cmd2`.
# Deliberately simple: split on shell operators and take the last piece.
segments = re.split(r'\s*(?:\|\||&&|[|;])\s*', command)
last_segment = (segments[-1] if segments else command).strip()
# Get base command name (first word), stripping env var assignments
# like VAR=val cmd ...
words = last_segment.split()
base_cmd = ""
for w in words:
if "=" in w and not w.startswith("-"):
continue # skip VAR=val
base_cmd = w.split("/")[-1] # handle /usr/bin/grep -> grep
break
if not base_cmd:
return None
# Command-specific semantics
semantics: dict[str, dict[int, str]] = {
# grep/rg/ag/ack: 1=no matches found (normal), 2+=real error
"grep": {1: "No matches found (not an error)"},
"egrep": {1: "No matches found (not an error)"},
"fgrep": {1: "No matches found (not an error)"},
"rg": {1: "No matches found (not an error)"},
"ag": {1: "No matches found (not an error)"},
"ack": {1: "No matches found (not an error)"},
# diff: 1=files differ (expected), 2+=real error
"diff": {1: "Files differ (expected, not an error)"},
"colordiff": {1: "Files differ (expected, not an error)"},
# find: 1=some dirs inaccessible but results may still be valid
"find": {1: "Some directories were inaccessible (partial results may still be valid)"},
# test/[: 1=condition is false (expected)
"test": {1: "Condition evaluated to false (expected, not an error)"},
"[": {1: "Condition evaluated to false (expected, not an error)"},
# curl: common non-error codes
"curl": {
6: "Could not resolve host",
7: "Failed to connect to host",
22: "HTTP response code indicated error (e.g. 404, 500)",
28: "Operation timed out",
},
# git: 1 is context-dependent but often normal (e.g. git diff with changes)
"git": {1: "Non-zero exit (often normal — e.g. 'git diff' returns 1 when files differ)"},
}
cmd_semantics = semantics.get(base_cmd)
if cmd_semantics and exit_code in cmd_semantics:
return cmd_semantics[exit_code]
return None
def terminal_tool(
command: str,
background: bool = False,
@@ -1242,13 +1315,20 @@ def terminal_tool(
from agent.redact import redact_sensitive_text
output = redact_sensitive_text(output.strip()) if output else ""
# Interpret non-zero exit codes that aren't real errors
# (e.g. grep=1 means "no matches", diff=1 means "files differ")
exit_note = _interpret_exit_code(command, returncode)
result_dict = {
"output": output,
"exit_code": returncode,
"error": None
"error": None,
}
if approval_note:
result_dict["approval"] = approval_note
if exit_note:
result_dict["exit_code_meaning"] = exit_note
return json.dumps(result_dict, ensure_ascii=False)
except Exception as e: