feat: context window usage warnings at 80% and 95%

Adds one-time warnings when context usage crosses critical thresholds: - 80%: suggests /compress or /new if responses degrade - 95%: warns of imminent errors/truncation, suggests /new Each threshold fires at most once per session to avoid spam. Warnings show actual token counts and percentage. Suppressed for subagents (delegate_depth > 0) where the user can't act on them. Always shown in CLI mode regardless of quiet_mode setting. Inspired by OpenCode PR #152 (context window warning). Bug fix found during live testing: - Anthropic prompt caching reports input tokens across three fields (input_tokens, cache_read_input_tokens, cache_creation_input_tokens). The existing code only counted input_tokens, causing the context compressor to see ~0 tokens when caching was active. Fixed by summing all three fields. This also fixes context % display in the status bar for Anthropic users. Changes: - agent/context_compressor.py: add check_context_warning() with _warned_80/_warned_95 state tracking - run_agent.py: call check_context_warning() after each API response, fix Anthropic cached token counting - tests/test_context_warning.py: 8 tests covering thresholds, one-shot behavior, escalation, edge cases Live tested with: - Nous Portal (chat_completions mode) ✔ - Anthropic direct (anthropic_messages mode) ✔ - Interactive CLI session ✔
2026-05-06 02:37:05 +08:00 · 2026-03-16 06:12:45 -07:00
parent 2633272ea9
commit 885c5dc5e6
3 changed files with 112 additions and 0 deletions
--- a/tests/test_context_warning.py
+++ b/tests/test_context_warning.py
@@ -0,0 +1,70 @@
+"""Tests for context window usage warnings."""
+
+from agent.context_compressor import ContextCompressor
+
+
+class TestContextWarning:
+    def _make_compressor(self, context_length=200_000):
+        c = ContextCompressor(model="test/model", threshold_percent=0.50)
+        c.context_length = context_length
+        c.threshold_tokens = int(context_length * 0.50)
+        return c
+
+    def test_no_warning_below_80_percent(self):
+        c = self._make_compressor()
+        c.update_from_response({"prompt_tokens": 100_000})  # 50%
+        assert c.check_context_warning() is None
+
+    def test_warning_at_80_percent(self):
+        c = self._make_compressor()
+        c.update_from_response({"prompt_tokens": 160_000})  # 80%
+        warning = c.check_context_warning()
+        assert warning is not None
+        assert "80%" in warning
+        assert "/compress" in warning
+
+    def test_warning_at_95_percent(self):
+        c = self._make_compressor()
+        c.update_from_response({"prompt_tokens": 190_000})  # 95%
+        warning = c.check_context_warning()
+        assert warning is not None
+        assert "95%" in warning
+        assert "/new" in warning
+
+    def test_warning_fires_only_once_per_threshold(self):
+        c = self._make_compressor()
+        c.update_from_response({"prompt_tokens": 170_000})  # 85%
+        w1 = c.check_context_warning()
+        assert w1 is not None  # First time at 80%
+
+        c.update_from_response({"prompt_tokens": 175_000})  # Still above 80%
+        w2 = c.check_context_warning()
+        assert w2 is None  # Already warned
+
+    def test_95_fires_after_80_already_warned(self):
+        c = self._make_compressor()
+        c.update_from_response({"prompt_tokens": 165_000})  # 82.5%
+        w1 = c.check_context_warning()
+        assert w1 is not None
+        assert "82%" in w1 or "Context window" in w1
+
+        c.update_from_response({"prompt_tokens": 195_000})  # 97.5%
+        w2 = c.check_context_warning()
+        assert w2 is not None
+        assert "nearly exhausted" in w2  # Escalated warning
+
+    def test_no_warning_when_context_length_zero(self):
+        c = self._make_compressor(context_length=0)
+        c.update_from_response({"prompt_tokens": 100_000})
+        assert c.check_context_warning() is None
+
+    def test_no_warning_when_no_tokens(self):
+        c = self._make_compressor()
+        assert c.check_context_warning() is None
+
+    def test_warning_includes_token_counts(self):
+        c = self._make_compressor(context_length=100_000)
+        c.update_from_response({"prompt_tokens": 85_000})
+        warning = c.check_context_warning()
+        assert "85,000" in warning
+        assert "100,000" in warning