Compare commits

...

1 Commits

Author SHA1 Message Date
teknium1
0c513d315b fix: respect disabled auto-compaction on context overflow
Port from anomalyco/opencode#30749.

When compression.enabled is false, NO automatic compaction trigger may
fire. The proactive token-threshold paths (preflight + post-response
should_compress gate) already honoured the setting, but the three
provider-overflow recovery paths in the agent loop — long-context-tier
429, 413 payload-too-large, and context-overflow — called
_compress_context() unconditionally, silently compressing and rotating
the session against the user's explicit choice.

Add a single guard at the top of the overflow-recovery dispatch: when
compression is disabled and the error is one of those three overflow
classes, surface a terminal error (compaction_disabled: True) telling the
user to /compress manually, /new, switch to a larger-context model, or
reduce attachments. Manual /compress (force=True) is unaffected — it never
enters this loop.

Tests: new TestOverflowWithCompactionDisabled (413 + 400 overflow don't
compress when disabled; control case still compresses when enabled).
Existing overflow-recovery tests updated to enable compaction explicitly
(they verify the recovery fires); fixture defaults flipped to True to
match production (compression.enabled defaults to True).
2026-06-04 17:11:21 -07:00
3 changed files with 162 additions and 1 deletions

View File

@@ -2720,6 +2720,61 @@ def run_conversation(
# compress history and retry, not abort immediately.
status_code = getattr(api_error, "status_code", None)
# ── Respect disabled auto-compaction on overflow ──────
# Ported from anomalyco/opencode#30749. When the user has
# turned auto-compaction off (``compression.enabled: false``),
# NO automatic compaction trigger may fire — including the
# provider/request-size overflow recovery paths below
# (long-context-tier 429, 413 payload-too-large, and
# context-overflow). Without this guard the proactive
# threshold path correctly honours the setting (see the
# preflight check and the post-response ``should_compress``
# gate) but a provider overflow error would still silently
# compress + rotate the session, bypassing the user's
# explicit choice. Surface a terminal error instead so the
# user can compact manually (``/compress``), start fresh
# (``/new``), switch to a larger-context model, or reduce
# attachments. Forced compaction via ``/compress``
# (``force=True``) is unaffected — it never reaches this loop.
_overflow_reasons = {
FailoverReason.long_context_tier,
FailoverReason.payload_too_large,
FailoverReason.context_overflow,
}
if (
classified.reason in _overflow_reasons
and not getattr(agent, "compression_enabled", True)
):
agent._flush_status_buffer()
agent._vprint(
f"{agent.log_prefix}❌ Context overflow, but auto-compaction is disabled "
f"(compression.enabled: false).",
force=True,
)
agent._vprint(
f"{agent.log_prefix} 💡 Run /compress to compact manually, /new to start fresh, "
f"switch to a larger-context model, or reduce attachments.",
force=True,
)
logger.error(
f"{agent.log_prefix}Context overflow ({classified.reason.value}) with "
f"auto-compaction disabled — not compressing."
)
agent._persist_session(messages, conversation_history)
return {
"messages": messages,
"completed": False,
"api_calls": api_call_count,
"error": (
"Context overflow and auto-compaction is disabled "
"(compression.enabled: false). Run /compress to compact manually, "
"/new to start fresh, or switch to a larger-context model."
),
"partial": True,
"failed": True,
"compaction_disabled": True,
}
# ── Anthropic Sonnet long-context tier gate ───────────
# Anthropic returns HTTP 429 "Extra usage is required for
# long context requests" when a Claude Max (or similar)

View File

@@ -94,7 +94,11 @@ def agent():
a._cached_system_prompt = "You are helpful."
a._use_prompt_caching = False
a.tool_delay = 0
a.compression_enabled = False
# Default matches production (`compression.enabled` defaults to True).
# Overflow-recovery tests below verify that 413 / context-overflow
# errors DO trigger compression; the disabled-path behavior is
# covered explicitly by TestOverflowWithCompactionDisabled.
a.compression_enabled = True
a.save_trajectories = False
return a
@@ -415,6 +419,13 @@ class TestPreflightCompression:
def test_compress_context_emits_lifecycle_status_before_work(self, agent):
"""Direct context compression should tell gateway users why the turn paused."""
# This test calls _compress_context directly and asserts the FIRST
# status event is the lifecycle "Compacting context" message. With
# compaction enabled the lazy feasibility probe would emit an
# aux-provider warning first (no aux key in the hermetic test env),
# displacing events[0]. The flag value is irrelevant to what this
# test asserts, so disable it to suppress the probe.
agent.compression_enabled = False
events = []
agent.status_callback = lambda ev, msg: events.append((ev, msg))
@@ -802,3 +813,95 @@ class TestToolResultPreflightCompression:
mock_compress.assert_called_once()
assert result["completed"] is True
# ---------------------------------------------------------------------------
# Disabled auto-compaction on overflow (port of anomalyco/opencode#30749)
# ---------------------------------------------------------------------------
class TestOverflowWithCompactionDisabled:
"""When ``compression.enabled`` is False, NO automatic compaction may
fire — including the provider/request-size overflow recovery paths.
Ported from anomalyco/opencode#30749: the proactive token-threshold
path already honoured the setting, but provider overflow errors
(413 payload-too-large, context-overflow, long-context-tier 429) still
silently compressed + rotated the session. The fix surfaces a terminal
error so the user can compact manually, start fresh, or switch models.
"""
@staticmethod
def _prefill():
return [
{"role": "user", "content": "previous question"},
{"role": "assistant", "content": "previous answer"},
]
def test_413_does_not_compress_when_disabled(self, agent):
"""413 must NOT call _compress_context when compaction is disabled."""
agent.compression_enabled = False
err_413 = _make_413_error()
# If the guard fails, a second (success) response would be consumed.
agent.client.chat.completions.create.side_effect = [err_413, _mock_response()]
with (
patch.object(agent, "_compress_context") as mock_compress,
patch.object(agent, "_persist_session") as mock_persist,
patch.object(agent, "_save_trajectory"),
patch.object(agent, "_cleanup_task_resources"),
):
result = agent.run_conversation("hello", conversation_history=self._prefill())
mock_compress.assert_not_called()
mock_persist.assert_called()
assert result.get("failed") is True
assert result.get("compaction_disabled") is True
assert "auto-compaction is disabled" in result["error"]
def test_context_overflow_does_not_compress_when_disabled(self, agent):
"""400 'prompt is too long' must NOT compress when compaction disabled."""
agent.compression_enabled = False
err_400 = Exception(
"Error code: 400 - {'type': 'error', 'error': {'type': "
"'invalid_request_error', 'message': 'prompt is too long: "
"233153 tokens > 200000 maximum'}}"
)
err_400.status_code = 400
agent.client.chat.completions.create.side_effect = [err_400, _mock_response()]
with (
patch.object(agent, "_compress_context") as mock_compress,
patch.object(agent, "_persist_session"),
patch.object(agent, "_save_trajectory"),
patch.object(agent, "_cleanup_task_resources"),
):
result = agent.run_conversation("hello", conversation_history=self._prefill())
mock_compress.assert_not_called()
assert result.get("compaction_disabled") is True
def test_413_still_compresses_when_enabled(self, agent):
"""Control: with compaction enabled, 413 still triggers compression.
Guards against the disabled-path guard accidentally swallowing the
enabled path.
"""
agent.compression_enabled = True
err_413 = _make_413_error()
ok_resp = _mock_response(content="Recovered", finish_reason="stop")
agent.client.chat.completions.create.side_effect = [err_413, ok_resp]
with (
patch.object(agent, "_compress_context") as mock_compress,
patch.object(agent, "_persist_session"),
patch.object(agent, "_save_trajectory"),
patch.object(agent, "_cleanup_task_resources"),
):
mock_compress.return_value = (
[{"role": "user", "content": "hello"}], "compressed",
)
result = agent.run_conversation("hello", conversation_history=self._prefill())
mock_compress.assert_called_once()
assert result["completed"] is True
assert result.get("compaction_disabled") is not True

View File

@@ -3903,6 +3903,7 @@ class TestRunConversation:
def test_glm_prompt_exceeds_max_length_triggers_compression(self, agent):
"""GLM/Z.AI uses 'Prompt exceeds max length' for context overflow."""
self._setup_agent(agent)
agent.compression_enabled = True # this test verifies overflow→compression fires
err_400 = Exception(
"Error code: 400 - {'error': {'code': '1261', 'message': 'Prompt exceeds max length'}}"
)
@@ -3937,6 +3938,7 @@ class TestRunConversation:
to the generic 128K fallback tier.
"""
self._setup_agent(agent)
agent.compression_enabled = True # this test verifies overflow→compression fires
agent.provider = "minimax"
agent.model = "MiniMax-M2.7-highspeed"
agent.base_url = "https://api.minimax.io/anthropic"
@@ -3982,6 +3984,7 @@ class TestRunConversation:
rely on compression — see #33669 / PR #33826.
"""
self._setup_agent(agent)
agent.compression_enabled = True # this test verifies overflow→compression fires
agent.provider = "openrouter"
agent.model = "some/unknown-model"
agent.base_url = "https://openrouter.ai/api/v1"