feat: add exit code context for common CLI tools in terminal results (#5144)

When commands like grep, diff, test, or find return non-zero exit codes that aren't actual errors (grep 1 = no matches, diff 1 = files differ), the model wastes turns investigating non-problems. This adds an exit_code_meaning field to the terminal JSON result that explains informational exit codes, so the agent can move on instead of debugging. Covers grep/rg/ag/ack (no matches), diff (files differ), find (partial access), test/[ (condition false), curl (timeouts, DNS, HTTP errors), and git (context-dependent). Correctly extracts the last command from pipelines and chains, strips full paths and env var assignments. The exit_code field itself is unchanged — this is purely additive context.
2026-04-28 06:51:16 +08:00 · 2026-04-04 16:57:24 -07:00
parent 5879b3ef82
commit aa475aef31
2 changed files with 233 additions and 1 deletions
--- a/tools/terminal_tool.py
+++ b/tools/terminal_tool.py
@@ -35,6 +35,7 @@ import json
 import logging
 import os
 import platform
+import re
 import time
 import threading
 import atexit
@@ -899,6 +900,78 @@ def _atexit_cleanup():
 atexit.register(_atexit_cleanup)


+# =============================================================================
+# Exit Code Context for Common CLI Tools
+# =============================================================================
+# Many Unix commands use non-zero exit codes for informational purposes, not
+# to indicate failure.  The model sees a raw exit_code=1 from `grep` and
+# wastes a turn investigating something that just means "no matches".
+# This lookup adds a human-readable note so the agent can move on.
+
+def _interpret_exit_code(command: str, exit_code: int) -> str | None:
+    """Return a human-readable note when a non-zero exit code is non-erroneous.
+
+    Returns None when the exit code is 0 or genuinely signals an error.
+    The note is appended to the tool result so the model doesn't waste
+    turns investigating expected exit codes.
+    """
+    if exit_code == 0:
+        return None
+
+    # Extract the last command in a pipeline/chain — that determines the
+    # exit code.  Handles  `cmd1 && cmd2`, `cmd1 | cmd2`, `cmd1; cmd2`.
+    # Deliberately simple: split on shell operators and take the last piece.
+    segments = re.split(r'\s*(?:\|\||&&|[|;])\s*', command)
+    last_segment = (segments[-1] if segments else command).strip()
+
+    # Get base command name (first word), stripping env var assignments
+    # like  VAR=val cmd ...
+    words = last_segment.split()
+    base_cmd = ""
+    for w in words:
+        if "=" in w and not w.startswith("-"):
+            continue  # skip VAR=val
+        base_cmd = w.split("/")[-1]  # handle /usr/bin/grep -> grep
+        break
+
+    if not base_cmd:
+        return None
+
+    # Command-specific semantics
+    semantics: dict[str, dict[int, str]] = {
+        # grep/rg/ag/ack: 1=no matches found (normal), 2+=real error
+        "grep":  {1: "No matches found (not an error)"},
+        "egrep": {1: "No matches found (not an error)"},
+        "fgrep": {1: "No matches found (not an error)"},
+        "rg":    {1: "No matches found (not an error)"},
+        "ag":    {1: "No matches found (not an error)"},
+        "ack":   {1: "No matches found (not an error)"},
+        # diff: 1=files differ (expected), 2+=real error
+        "diff":  {1: "Files differ (expected, not an error)"},
+        "colordiff": {1: "Files differ (expected, not an error)"},
+        # find: 1=some dirs inaccessible but results may still be valid
+        "find":  {1: "Some directories were inaccessible (partial results may still be valid)"},
+        # test/[: 1=condition is false (expected)
+        "test":  {1: "Condition evaluated to false (expected, not an error)"},
+        "[":     {1: "Condition evaluated to false (expected, not an error)"},
+        # curl: common non-error codes
+        "curl":  {
+            6: "Could not resolve host",
+            7: "Failed to connect to host",
+            22: "HTTP response code indicated error (e.g. 404, 500)",
+            28: "Operation timed out",
+        },
+        # git: 1 is context-dependent but often normal (e.g. git diff with changes)
+        "git":   {1: "Non-zero exit (often normal — e.g. 'git diff' returns 1 when files differ)"},
+    }
+
+    cmd_semantics = semantics.get(base_cmd)
+    if cmd_semantics and exit_code in cmd_semantics:
+        return cmd_semantics[exit_code]
+
+    return None
+
+
 def terminal_tool(
    command: str,
    background: bool = False,
@@ -1242,13 +1315,20 @@ def terminal_tool(
            from agent.redact import redact_sensitive_text
            output = redact_sensitive_text(output.strip()) if output else ""

+            # Interpret non-zero exit codes that aren't real errors
+            # (e.g. grep=1 means "no matches", diff=1 means "files differ")
+            exit_note = _interpret_exit_code(command, returncode)
+
            result_dict = {
                "output": output,
                "exit_code": returncode,
-                "error": None
+                "error": None,
            }
            if approval_note:
                result_dict["approval"] = approval_note
+            if exit_note:
+                result_dict["exit_code_meaning"] = exit_note
+
            return json.dumps(result_dict, ensure_ascii=False)

    except Exception as e: