Files
hermes-agent/tests/test_context_warning.py
teknium1 885c5dc5e6 feat: context window usage warnings at 80% and 95%
Adds one-time warnings when context usage crosses critical thresholds:
- 80%: suggests /compress or /new if responses degrade
- 95%: warns of imminent errors/truncation, suggests /new

Each threshold fires at most once per session to avoid spam.
Warnings show actual token counts and percentage. Suppressed for
subagents (delegate_depth > 0) where the user can't act on them.
Always shown in CLI mode regardless of quiet_mode setting.

Inspired by OpenCode PR #152 (context window warning).

Bug fix found during live testing:
- Anthropic prompt caching reports input tokens across three fields
  (input_tokens, cache_read_input_tokens, cache_creation_input_tokens).
  The existing code only counted input_tokens, causing the context
  compressor to see ~0 tokens when caching was active. Fixed by summing
  all three fields. This also fixes context % display in the status bar
  for Anthropic users.

Changes:
- agent/context_compressor.py: add check_context_warning() with
  _warned_80/_warned_95 state tracking
- run_agent.py: call check_context_warning() after each API response,
  fix Anthropic cached token counting
- tests/test_context_warning.py: 8 tests covering thresholds,
  one-shot behavior, escalation, edge cases

Live tested with:
- Nous Portal (chat_completions mode) ✔
- Anthropic direct (anthropic_messages mode) ✔
- Interactive CLI session ✔
2026-03-16 06:30:04 -07:00

71 lines
2.6 KiB
Python

"""Tests for context window usage warnings."""
from agent.context_compressor import ContextCompressor
class TestContextWarning:
def _make_compressor(self, context_length=200_000):
c = ContextCompressor(model="test/model", threshold_percent=0.50)
c.context_length = context_length
c.threshold_tokens = int(context_length * 0.50)
return c
def test_no_warning_below_80_percent(self):
c = self._make_compressor()
c.update_from_response({"prompt_tokens": 100_000}) # 50%
assert c.check_context_warning() is None
def test_warning_at_80_percent(self):
c = self._make_compressor()
c.update_from_response({"prompt_tokens": 160_000}) # 80%
warning = c.check_context_warning()
assert warning is not None
assert "80%" in warning
assert "/compress" in warning
def test_warning_at_95_percent(self):
c = self._make_compressor()
c.update_from_response({"prompt_tokens": 190_000}) # 95%
warning = c.check_context_warning()
assert warning is not None
assert "95%" in warning
assert "/new" in warning
def test_warning_fires_only_once_per_threshold(self):
c = self._make_compressor()
c.update_from_response({"prompt_tokens": 170_000}) # 85%
w1 = c.check_context_warning()
assert w1 is not None # First time at 80%
c.update_from_response({"prompt_tokens": 175_000}) # Still above 80%
w2 = c.check_context_warning()
assert w2 is None # Already warned
def test_95_fires_after_80_already_warned(self):
c = self._make_compressor()
c.update_from_response({"prompt_tokens": 165_000}) # 82.5%
w1 = c.check_context_warning()
assert w1 is not None
assert "82%" in w1 or "Context window" in w1
c.update_from_response({"prompt_tokens": 195_000}) # 97.5%
w2 = c.check_context_warning()
assert w2 is not None
assert "nearly exhausted" in w2 # Escalated warning
def test_no_warning_when_context_length_zero(self):
c = self._make_compressor(context_length=0)
c.update_from_response({"prompt_tokens": 100_000})
assert c.check_context_warning() is None
def test_no_warning_when_no_tokens(self):
c = self._make_compressor()
assert c.check_context_warning() is None
def test_warning_includes_token_counts(self):
c = self._make_compressor(context_length=100_000)
c.update_from_response({"prompt_tokens": 85_000})
warning = c.check_context_warning()
assert "85,000" in warning
assert "100,000" in warning