fix(gateway): silence background agent terminal output (#3297)

* fix(gateway): silence flush agent terminal output

quiet_mode=True only suppresses AIAgent init messages.
Tool call output still leaks to the terminal through
_safe_print → _print_fn during session reset/expiry.

Since #2670 injected live memory state into the flush prompt,
the flush agent now reliably calls memory tools — making the
output leak noticeable for the first time.

Set _print_fn to a no-op so the background flush is fully silent.

* test(gateway): add test for flush agent terminal silence + fix dotenv mock

- Add TestFlushAgentSilenced: verifies _print_fn is set to a no-op on
  the flush agent so tool output never leaks to the terminal
- Fix pre-existing test failures: replace patch('run_agent.AIAgent')
  with sys.modules mock to avoid importing run_agent (requires openai)
- Add autouse _mock_dotenv fixture so all tests in this file run
  without the dotenv package installed

* fix(display): route KawaiiSpinner output through print_fn to fully silence flush agent

The previous fix set tmp_agent._print_fn = no-op on the flush agent but
spinner output and quiet-mode cute messages bypassed _print_fn entirely:
- KawaiiSpinner captured sys.stdout at __init__ and wrote directly to it
- quiet-mode tool results used builtin print() instead of _safe_print()

Add optional print_fn parameter to KawaiiSpinner.__init__; _write routes
through it when set. Pass self._print_fn to all spinner construction sites
in run_agent.py and change the quiet-mode cute message print to _safe_print.
The existing gateway fix (tmp_agent._print_fn = lambda) now propagates
correctly through both paths.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

* fix(gateway): silence hygiene and compression background agents

Two more background AIAgent instances in the gateway were created with
quiet_mode=True but without _print_fn = no-op, causing tool output to
leak to the terminal:
- _hyg_agent (in-turn hygiene memory agent)
- tmp_agent (_compress_context path)

Apply the same _print_fn no-op pattern used for the flush agent.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

* chore(display): remove unused _last_flush_time from KawaiiSpinner

Attribute was set but never read; upstream already removed it.
Leftover from conflict resolution during rebase onto upstream/main.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

---------

Co-authored-by: Dilee <uzmpsk.dilekakbas@gmail.com>
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Teknium
2026-03-26 17:40:31 -07:00
committed by GitHub
parent 08fa326bb0
commit 0375b2a0d7
4 changed files with 116 additions and 40 deletions

View File

@@ -231,7 +231,7 @@ class KawaiiSpinner:
"analyzing", "computing", "synthesizing", "formulating", "brainstorming", "analyzing", "computing", "synthesizing", "formulating", "brainstorming",
] ]
def __init__(self, message: str = "", spinner_type: str = 'dots'): def __init__(self, message: str = "", spinner_type: str = 'dots', print_fn=None):
self.message = message self.message = message
self.spinner_frames = self.SPINNERS.get(spinner_type, self.SPINNERS['dots']) self.spinner_frames = self.SPINNERS.get(spinner_type, self.SPINNERS['dots'])
self.running = False self.running = False
@@ -239,12 +239,26 @@ class KawaiiSpinner:
self.frame_idx = 0 self.frame_idx = 0
self.start_time = None self.start_time = None
self.last_line_len = 0 self.last_line_len = 0
# Optional callable to route all output through (e.g. a no-op for silent
# background agents). When set, bypasses self._out entirely so that
# agents with _print_fn overridden remain fully silent.
self._print_fn = print_fn
# Capture stdout NOW, before any redirect_stdout(devnull) from # Capture stdout NOW, before any redirect_stdout(devnull) from
# child agents can replace sys.stdout with a black hole. # child agents can replace sys.stdout with a black hole.
self._out = sys.stdout self._out = sys.stdout
def _write(self, text: str, end: str = '\n', flush: bool = False): def _write(self, text: str, end: str = '\n', flush: bool = False):
"""Write to the stdout captured at spinner creation time.""" """Write to the stdout captured at spinner creation time.
If a print_fn was supplied at construction, all output is routed through
it instead — allowing callers to silence the spinner with a no-op lambda.
"""
if self._print_fn is not None:
try:
self._print_fn(text)
except Exception:
pass
return
try: try:
self._out.write(text + end) self._out.write(text + end)
if flush: if flush:

View File

@@ -573,6 +573,10 @@ class GatewayRunner:
session_id=old_session_id, session_id=old_session_id,
honcho_session_key=honcho_session_key, honcho_session_key=honcho_session_key,
) )
# Fully silence the flush agent — quiet_mode only suppresses init
# messages; tool call output still leaks to the terminal through
# _safe_print → _print_fn. Set a no-op to prevent that.
tmp_agent._print_fn = lambda *a, **kw: None
# Build conversation history from transcript # Build conversation history from transcript
msgs = [ msgs = [
@@ -2175,6 +2179,7 @@ class GatewayRunner:
enabled_toolsets=["memory"], enabled_toolsets=["memory"],
session_id=session_entry.session_id, session_id=session_entry.session_id,
) )
_hyg_agent._print_fn = lambda *a, **kw: None
loop = asyncio.get_event_loop() loop = asyncio.get_event_loop()
_compressed, _ = await loop.run_in_executor( _compressed, _ = await loop.run_in_executor(
@@ -3885,6 +3890,7 @@ class GatewayRunner:
enabled_toolsets=["memory"], enabled_toolsets=["memory"],
session_id=session_entry.session_id, session_id=session_entry.session_id,
) )
tmp_agent._print_fn = lambda *a, **kw: None
loop = asyncio.get_event_loop() loop = asyncio.get_event_loop()
compressed, _ = await loop.run_in_executor( compressed, _ = await loop.run_in_executor(

View File

@@ -5087,7 +5087,7 @@ class AIAgent:
spinner = None spinner = None
if self.quiet_mode and not self.tool_progress_callback: if self.quiet_mode and not self.tool_progress_callback:
face = random.choice(KawaiiSpinner.KAWAII_WAITING) face = random.choice(KawaiiSpinner.KAWAII_WAITING)
spinner = KawaiiSpinner(f"{face} ⚡ running {num_tools} tools concurrently", spinner_type='dots') spinner = KawaiiSpinner(f"{face} ⚡ running {num_tools} tools concurrently", spinner_type='dots', print_fn=self._print_fn)
spinner.start() spinner.start()
try: try:
@@ -5128,7 +5128,7 @@ class AIAgent:
# Print cute message per tool # Print cute message per tool
if self.quiet_mode: if self.quiet_mode:
cute_msg = _get_cute_tool_message_impl(name, args, tool_duration, result=function_result) cute_msg = _get_cute_tool_message_impl(name, args, tool_duration, result=function_result)
print(f" {cute_msg}") self._safe_print(f" {cute_msg}")
elif not self.quiet_mode: elif not self.quiet_mode:
if self.verbose_logging: if self.verbose_logging:
print(f" ✅ Tool {i+1} completed in {tool_duration:.2f}s") print(f" ✅ Tool {i+1} completed in {tool_duration:.2f}s")
@@ -5313,7 +5313,7 @@ class AIAgent:
spinner = None spinner = None
if self.quiet_mode and not self.tool_progress_callback: if self.quiet_mode and not self.tool_progress_callback:
face = random.choice(KawaiiSpinner.KAWAII_WAITING) face = random.choice(KawaiiSpinner.KAWAII_WAITING)
spinner = KawaiiSpinner(f"{face} {spinner_label}", spinner_type='dots') spinner = KawaiiSpinner(f"{face} {spinner_label}", spinner_type='dots', print_fn=self._print_fn)
spinner.start() spinner.start()
self._delegate_spinner = spinner self._delegate_spinner = spinner
_delegate_result = None _delegate_result = None
@@ -5343,7 +5343,7 @@ class AIAgent:
preview = _build_tool_preview(function_name, function_args) or function_name preview = _build_tool_preview(function_name, function_args) or function_name
if len(preview) > 30: if len(preview) > 30:
preview = preview[:27] + "..." preview = preview[:27] + "..."
spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots') spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots', print_fn=self._print_fn)
spinner.start() spinner.start()
_spinner_result = None _spinner_result = None
try: try:
@@ -6026,7 +6026,7 @@ class AIAgent:
# Raw KawaiiSpinner only when no streaming consumers # Raw KawaiiSpinner only when no streaming consumers
# (would conflict with streamed token output) # (would conflict with streamed token output)
spinner_type = random.choice(['brain', 'sparkle', 'pulse', 'moon', 'star']) spinner_type = random.choice(['brain', 'sparkle', 'pulse', 'moon', 'star'])
thinking_spinner = KawaiiSpinner(f"{face} {verb}...", spinner_type=spinner_type) thinking_spinner = KawaiiSpinner(f"{face} {verb}...", spinner_type=spinner_type, print_fn=self._print_fn)
thinking_spinner.start() thinking_spinner.start()
# Log request details if verbose # Log request details if verbose

View File

@@ -7,11 +7,21 @@ Verifies that:
3. The flush still works normally when memory files don't exist 3. The flush still works normally when memory files don't exist
""" """
import sys
import types
import pytest import pytest
from pathlib import Path from pathlib import Path
from unittest.mock import MagicMock, patch, call from unittest.mock import MagicMock, patch, call
@pytest.fixture(autouse=True)
def _mock_dotenv(monkeypatch):
"""gateway.run imports dotenv at module level; stub it so tests run without the package."""
fake = types.ModuleType("dotenv")
fake.load_dotenv = lambda *a, **kw: None
monkeypatch.setitem(sys.modules, "dotenv", fake)
def _make_runner(): def _make_runner():
from gateway.run import GatewayRunner from gateway.run import GatewayRunner
@@ -57,105 +67,151 @@ class TestCronSessionBypass:
runner.session_store.load_transcript.assert_called_once_with("session_abc123") runner.session_store.load_transcript.assert_called_once_with("session_abc123")
def _make_flush_context(monkeypatch, memory_dir=None):
"""Return (runner, tmp_agent, fake_run_agent) with run_agent mocked in sys.modules."""
tmp_agent = MagicMock()
fake_run_agent = types.ModuleType("run_agent")
fake_run_agent.AIAgent = MagicMock(return_value=tmp_agent)
monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)
runner = _make_runner()
runner.session_store.load_transcript.return_value = _TRANSCRIPT_4_MSGS
return runner, tmp_agent, memory_dir
class TestMemoryInjection: class TestMemoryInjection:
"""The flush prompt should include current memory state from disk.""" """The flush prompt should include current memory state from disk."""
def test_memory_content_injected_into_flush_prompt(self, tmp_path): def test_memory_content_injected_into_flush_prompt(self, tmp_path, monkeypatch):
"""When memory files exist, their content appears in the flush prompt.""" """When memory files exist, their content appears in the flush prompt."""
runner = _make_runner()
runner.session_store.load_transcript.return_value = _TRANSCRIPT_4_MSGS
tmp_agent = MagicMock()
memory_dir = tmp_path / "memories" memory_dir = tmp_path / "memories"
memory_dir.mkdir() memory_dir.mkdir()
(memory_dir / "MEMORY.md").write_text("Agent knows Python\n§\nUser prefers dark mode") (memory_dir / "MEMORY.md").write_text("Agent knows Python\n§\nUser prefers dark mode")
(memory_dir / "USER.md").write_text("Name: Alice\n§\nTimezone: PST") (memory_dir / "USER.md").write_text("Name: Alice\n§\nTimezone: PST")
runner, tmp_agent, _ = _make_flush_context(monkeypatch, memory_dir)
with ( with (
patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "k"}), patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "k"}),
patch("gateway.run._resolve_gateway_model", return_value="test-model"), patch("gateway.run._resolve_gateway_model", return_value="test-model"),
patch("run_agent.AIAgent", return_value=tmp_agent),
# Intercept `from tools.memory_tool import MEMORY_DIR` inside the function
patch.dict("sys.modules", {"tools.memory_tool": MagicMock(MEMORY_DIR=memory_dir)}), patch.dict("sys.modules", {"tools.memory_tool": MagicMock(MEMORY_DIR=memory_dir)}),
): ):
runner._flush_memories_for_session("session_123") runner._flush_memories_for_session("session_123")
tmp_agent.run_conversation.assert_called_once() tmp_agent.run_conversation.assert_called_once()
call_kwargs = tmp_agent.run_conversation.call_args.kwargs flush_prompt = tmp_agent.run_conversation.call_args.kwargs.get("user_message", "")
flush_prompt = call_kwargs.get("user_message", "")
# Verify both memory sections appear in the prompt
assert "Agent knows Python" in flush_prompt assert "Agent knows Python" in flush_prompt
assert "User prefers dark mode" in flush_prompt assert "User prefers dark mode" in flush_prompt
assert "Name: Alice" in flush_prompt assert "Name: Alice" in flush_prompt
assert "Timezone: PST" in flush_prompt assert "Timezone: PST" in flush_prompt
# Verify the stale-overwrite warning is present
assert "Do NOT overwrite or remove entries" in flush_prompt assert "Do NOT overwrite or remove entries" in flush_prompt
assert "current live state of memory" in flush_prompt assert "current live state of memory" in flush_prompt
def test_flush_works_without_memory_files(self, tmp_path): def test_flush_works_without_memory_files(self, tmp_path, monkeypatch):
"""When no memory files exist, flush still runs without the guard.""" """When no memory files exist, flush still runs without the guard."""
runner = _make_runner()
runner.session_store.load_transcript.return_value = _TRANSCRIPT_4_MSGS
tmp_agent = MagicMock()
empty_dir = tmp_path / "no_memories" empty_dir = tmp_path / "no_memories"
empty_dir.mkdir() empty_dir.mkdir()
runner, tmp_agent, _ = _make_flush_context(monkeypatch)
with ( with (
patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "k"}), patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "k"}),
patch("gateway.run._resolve_gateway_model", return_value="test-model"), patch("gateway.run._resolve_gateway_model", return_value="test-model"),
patch("run_agent.AIAgent", return_value=tmp_agent),
patch.dict("sys.modules", {"tools.memory_tool": MagicMock(MEMORY_DIR=empty_dir)}), patch.dict("sys.modules", {"tools.memory_tool": MagicMock(MEMORY_DIR=empty_dir)}),
): ):
runner._flush_memories_for_session("session_456") runner._flush_memories_for_session("session_456")
# Should still run, just without the memory guard section
tmp_agent.run_conversation.assert_called_once() tmp_agent.run_conversation.assert_called_once()
flush_prompt = tmp_agent.run_conversation.call_args.kwargs.get("user_message", "") flush_prompt = tmp_agent.run_conversation.call_args.kwargs.get("user_message", "")
assert "Do NOT overwrite or remove entries" not in flush_prompt assert "Do NOT overwrite or remove entries" not in flush_prompt
assert "Review the conversation above" in flush_prompt assert "Review the conversation above" in flush_prompt
def test_empty_memory_files_no_injection(self, tmp_path): def test_empty_memory_files_no_injection(self, tmp_path, monkeypatch):
"""Empty memory files should not trigger the guard section.""" """Empty memory files should not trigger the guard section."""
runner = _make_runner()
runner.session_store.load_transcript.return_value = _TRANSCRIPT_4_MSGS
tmp_agent = MagicMock()
memory_dir = tmp_path / "memories" memory_dir = tmp_path / "memories"
memory_dir.mkdir() memory_dir.mkdir()
(memory_dir / "MEMORY.md").write_text("") (memory_dir / "MEMORY.md").write_text("")
(memory_dir / "USER.md").write_text(" \n ") # whitespace only (memory_dir / "USER.md").write_text(" \n ") # whitespace only
runner, tmp_agent, _ = _make_flush_context(monkeypatch)
with ( with (
patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "k"}), patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "k"}),
patch("gateway.run._resolve_gateway_model", return_value="test-model"), patch("gateway.run._resolve_gateway_model", return_value="test-model"),
patch("run_agent.AIAgent", return_value=tmp_agent),
patch.dict("sys.modules", {"tools.memory_tool": MagicMock(MEMORY_DIR=memory_dir)}), patch.dict("sys.modules", {"tools.memory_tool": MagicMock(MEMORY_DIR=memory_dir)}),
): ):
runner._flush_memories_for_session("session_789") runner._flush_memories_for_session("session_789")
tmp_agent.run_conversation.assert_called_once() tmp_agent.run_conversation.assert_called_once()
flush_prompt = tmp_agent.run_conversation.call_args.kwargs.get("user_message", "") flush_prompt = tmp_agent.run_conversation.call_args.kwargs.get("user_message", "")
# No memory content → no guard section
assert "current live state of memory" not in flush_prompt assert "current live state of memory" not in flush_prompt
class TestFlushAgentSilenced:
"""The flush agent must not produce any terminal output."""
def test_print_fn_set_to_noop(self, tmp_path, monkeypatch):
"""_print_fn on the flush agent must be a no-op so tool output never leaks."""
runner = _make_runner()
runner.session_store.load_transcript.return_value = _TRANSCRIPT_4_MSGS
captured_agent = {}
def _fake_ai_agent(*args, **kwargs):
agent = MagicMock()
captured_agent["instance"] = agent
return agent
fake_run_agent = types.ModuleType("run_agent")
fake_run_agent.AIAgent = _fake_ai_agent
monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)
with (
patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "k"}),
patch("gateway.run._resolve_gateway_model", return_value="test-model"),
patch.dict("sys.modules", {"tools.memory_tool": MagicMock(MEMORY_DIR=tmp_path)}),
):
runner._flush_memories_for_session("session_silent")
agent = captured_agent["instance"]
assert agent._print_fn is not None, "_print_fn should be overridden to suppress output"
# Confirm it is callable and produces no output (no exception)
agent._print_fn("should be silenced")
def test_kawaii_spinner_respects_print_fn(self):
"""KawaiiSpinner must route all output through print_fn when supplied."""
from agent.display import KawaiiSpinner
written = []
spinner = KawaiiSpinner("test", print_fn=lambda *a, **kw: written.append(a))
spinner._write("hello")
assert written == [("hello",)], "spinner should route through print_fn"
# A no-op print_fn must produce no output to stdout
import io, sys
buf = io.StringIO()
old_stdout = sys.stdout
sys.stdout = buf
try:
silent_spinner = KawaiiSpinner("silent", print_fn=lambda *a, **kw: None)
silent_spinner._write("should not appear")
silent_spinner.stop("done")
finally:
sys.stdout = old_stdout
assert buf.getvalue() == "", "no-op print_fn spinner must not write to stdout"
class TestFlushPromptStructure: class TestFlushPromptStructure:
"""Verify the flush prompt retains its core instructions.""" """Verify the flush prompt retains its core instructions."""
def test_core_instructions_present(self): def test_core_instructions_present(self, monkeypatch):
"""The flush prompt should still contain the original guidance.""" """The flush prompt should still contain the original guidance."""
runner = _make_runner() runner, tmp_agent, _ = _make_flush_context(monkeypatch)
runner.session_store.load_transcript.return_value = _TRANSCRIPT_4_MSGS
tmp_agent = MagicMock()
with ( with (
patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "k"}), patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "k"}),
patch("gateway.run._resolve_gateway_model", return_value="test-model"), patch("gateway.run._resolve_gateway_model", return_value="test-model"),
patch("run_agent.AIAgent", return_value=tmp_agent),
# Make the import fail gracefully so we test without memory files
patch.dict("sys.modules", {"tools.memory_tool": MagicMock(MEMORY_DIR=Path("/nonexistent"))}), patch.dict("sys.modules", {"tools.memory_tool": MagicMock(MEMORY_DIR=Path("/nonexistent"))}),
): ):
runner._flush_memories_for_session("session_struct") runner._flush_memories_for_session("session_struct")