fix: strip ANSI at the source — clean terminal output before it reaches the model

Root cause: terminal_tool, execute_code, and process_registry returned raw
subprocess output with ANSI escape sequences intact. The model saw these
in tool results and copied them into file writes.

Previous fix (PR #2532) stripped ANSI at the write point in file_tools.py,
but this was a band-aid — regex on file content risks corrupting legitimate
content, and doesn't prevent ANSI from wasting tokens in the model context.

Source-level fix:
- New tools/ansi_strip.py with comprehensive ECMA-48 regex covering CSI
  (incl. private-mode, colon-separated, intermediate bytes), OSC (both
  terminators), DCS/SOS/PM/APC strings, Fp/Fe/Fs/nF escapes, 8-bit C1
- terminal_tool.py: strip output before returning to model
- code_execution_tool.py: strip stdout/stderr before returning
- process_registry.py: strip output in poll/read_log/wait
- file_tools.py: remove _strip_ansi band-aid (no longer needed)

Verified: `ls --color=always` output returned as clean text to model,
file written from that output contains zero ESC bytes.
This commit is contained in:
Teknium
2026-03-23 06:50:39 -07:00
parent 6302e56e7c
commit 934fbe3c06
7 changed files with 236 additions and 21 deletions

View File

@@ -426,12 +426,14 @@ class ProcessRegistry:
def poll(self, session_id: str) -> dict:
"""Check status and get new output for a background process."""
from tools.ansi_strip import strip_ansi
session = self.get(session_id)
if session is None:
return {"status": "not_found", "error": f"No process with ID {session_id}"}
with session._lock:
output_preview = session.output_buffer[-1000:] if session.output_buffer else ""
output_preview = strip_ansi(session.output_buffer[-1000:]) if session.output_buffer else ""
result = {
"session_id": session.id,
@@ -450,12 +452,14 @@ class ProcessRegistry:
def read_log(self, session_id: str, offset: int = 0, limit: int = 200) -> dict:
"""Read the full output log with optional pagination by lines."""
from tools.ansi_strip import strip_ansi
session = self.get(session_id)
if session is None:
return {"status": "not_found", "error": f"No process with ID {session_id}"}
with session._lock:
full_output = session.output_buffer
full_output = strip_ansi(session.output_buffer)
lines = full_output.splitlines()
total_lines = len(lines)
@@ -486,6 +490,7 @@ class ProcessRegistry:
dict with status ("exited", "timeout", "interrupted", "not_found")
and output snapshot.
"""
from tools.ansi_strip import strip_ansi
from tools.terminal_tool import _interrupt_event
default_timeout = int(os.getenv("TERMINAL_TIMEOUT", "180"))
@@ -513,7 +518,7 @@ class ProcessRegistry:
result = {
"status": "exited",
"exit_code": session.exit_code,
"output": session.output_buffer[-2000:],
"output": strip_ansi(session.output_buffer[-2000:]),
}
if timeout_note:
result["timeout_note"] = timeout_note
@@ -522,7 +527,7 @@ class ProcessRegistry:
if _interrupt_event.is_set():
result = {
"status": "interrupted",
"output": session.output_buffer[-1000:],
"output": strip_ansi(session.output_buffer[-1000:]),
"note": "User sent a new message -- wait interrupted",
}
if timeout_note:
@@ -533,7 +538,7 @@ class ProcessRegistry:
result = {
"status": "timeout",
"output": session.output_buffer[-1000:],
"output": strip_ansi(session.output_buffer[-1000:]),
}
if timeout_note:
result["timeout_note"] = timeout_note