Compare commits

...

1 Commits

Author SHA1 Message Date
Teknium
bf754b4611 fix: clear conversation_history after mid-loop compression to prevent empty sessions
After mid-loop compression (triggered by 413, context_overflow, or Anthropic
long-context tier errors), _compress_context() creates a new session in SQLite
and resets _last_flushed_db_idx=0. However, conversation_history was not cleared,
so _flush_messages_to_session_db() computed:

    flush_from = max(len(conversation_history=200), _last_flushed_db_idx=0) = 200
    messages[200:] → empty (compressed messages < 200)

This resulted in zero messages being written to the new session's SQLite store.
On resume, the user would see 'Session found but has no messages.'

The preflight compression path (line 7311) already had the fix:
    conversation_history = None

This commit adds the same clearing to the three mid-loop compression sites:
- Anthropic long-context tier overflow
- HTTP 413 payload too large
- Generic context_overflow error

Reported by Aaryan (Nous community).
2026-04-10 00:05:04 -07:00
2 changed files with 93 additions and 0 deletions

View File

@@ -8344,6 +8344,10 @@ class AIAgent:
approx_tokens=approx_tokens,
task_id=effective_task_id,
)
# Compression created a new session — clear history
# so _flush_messages_to_session_db writes compressed
# messages to the new session, not skipping them.
conversation_history = None
if len(messages) < original_len or old_ctx > _reduced_ctx:
self._emit_status(
f"🗜️ Context reduced to {_reduced_ctx:,} tokens "
@@ -8401,6 +8405,10 @@ class AIAgent:
messages, system_message, approx_tokens=approx_tokens,
task_id=effective_task_id,
)
# Compression created a new session — clear history
# so _flush_messages_to_session_db writes compressed
# messages to the new session, not skipping them.
conversation_history = None
if len(messages) < original_len:
self._emit_status(f"🗜️ Compressed {original_len}{len(messages)} messages, retrying...")
@@ -8519,6 +8527,10 @@ class AIAgent:
messages, system_message, approx_tokens=approx_tokens,
task_id=effective_task_id,
)
# Compression created a new session — clear history
# so _flush_messages_to_session_db writes compressed
# messages to the new session, not skipping them.
conversation_history = None
if len(messages) < original_len or new_ctx and new_ctx < old_ctx:
if len(messages) < original_len:

View File

@@ -172,6 +172,87 @@ class TestHTTP413Compression:
mock_compress.assert_called_once()
assert result["completed"] is True
def test_413_clears_conversation_history_on_persist(self, agent):
"""After 413-triggered compression, _persist_session must receive None history.
Bug: _compress_context() creates a new session and resets _last_flushed_db_idx=0,
but if conversation_history still holds the original (pre-compression) list,
_flush_messages_to_session_db computes flush_from = max(len(history), 0) which
exceeds len(compressed_messages), so messages[flush_from:] is empty and nothing
is written to the new session → "Session found but has no messages" on resume.
"""
err_413 = _make_413_error()
ok_resp = _mock_response(content="OK", finish_reason="stop")
agent.client.chat.completions.create.side_effect = [err_413, ok_resp]
big_history = [
{"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"}
for i in range(200)
]
persist_calls = []
with (
patch.object(agent, "_compress_context") as mock_compress,
patch.object(
agent, "_persist_session",
side_effect=lambda msgs, hist: persist_calls.append(hist),
),
patch.object(agent, "_save_trajectory"),
patch.object(agent, "_cleanup_task_resources"),
):
mock_compress.return_value = (
[{"role": "user", "content": "summary"}],
"compressed prompt",
)
agent.run_conversation("hello", conversation_history=big_history)
assert len(persist_calls) >= 1, "Expected at least one _persist_session call"
for hist in persist_calls:
assert hist is None, (
f"conversation_history should be None after mid-loop compression, "
f"got list with {len(hist)} items"
)
def test_context_overflow_clears_conversation_history_on_persist(self, agent):
"""After context-overflow compression, _persist_session must receive None history."""
err_400 = Exception(
"Error code: 400 - This endpoint's maximum context length is 128000 tokens. "
"However, you requested about 270460 tokens."
)
err_400.status_code = 400
ok_resp = _mock_response(content="OK", finish_reason="stop")
agent.client.chat.completions.create.side_effect = [err_400, ok_resp]
big_history = [
{"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"}
for i in range(200)
]
persist_calls = []
with (
patch.object(agent, "_compress_context") as mock_compress,
patch.object(
agent, "_persist_session",
side_effect=lambda msgs, hist: persist_calls.append(hist),
),
patch.object(agent, "_save_trajectory"),
patch.object(agent, "_cleanup_task_resources"),
):
mock_compress.return_value = (
[{"role": "user", "content": "summary"}],
"compressed prompt",
)
agent.run_conversation("hello", conversation_history=big_history)
assert len(persist_calls) >= 1
for hist in persist_calls:
assert hist is None, (
f"conversation_history should be None after context-overflow compression, "
f"got list with {len(hist)} items"
)
def test_400_context_length_triggers_compression(self, agent):
"""A 400 with 'maximum context length' should trigger compression, not abort as generic 4xx.