diff --git a/agent/context_compressor.py b/agent/context_compressor.py
index aa05a8daa0..0c6cb215dd 100644
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@@ -65,12 +65,41 @@ class ContextCompressor:
 
         self.summary_model = summary_model_override or ""
 
+        # Context usage warning thresholds (fire once each per session)
+        self._warned_80 = False
+        self._warned_95 = False
+
     def update_from_response(self, usage: Dict[str, Any]):
         """Update tracked token usage from API response."""
         self.last_prompt_tokens = usage.get("prompt_tokens", 0)
         self.last_completion_tokens = usage.get("completion_tokens", 0)
         self.last_total_tokens = usage.get("total_tokens", 0)
 
+    def check_context_warning(self) -> str | None:
+        """Return a warning string if context usage crossed a threshold, else None.
+
+        Each threshold fires at most once per session to avoid spam.
+        """
+        if not self.context_length or not self.last_prompt_tokens:
+            return None
+        pct = self.last_prompt_tokens / self.context_length
+        if pct >= 0.95 and not self._warned_95:
+            self._warned_95 = True
+            used = f"{self.last_prompt_tokens:,}"
+            total = f"{self.context_length:,}"
+            return (
+                f"⚠ Context nearly exhausted ({used}/{total} tokens, {pct:.0%}). "
+                f"Risk of errors or truncation. Use /new to start fresh."
+            )
+        if pct >= 0.80 and not self._warned_80:
+            self._warned_80 = True
+            used = f"{self.last_prompt_tokens:,}"
+            total = f"{self.context_length:,}"
+            return (
+                f"⚠ Context window {pct:.0%} full ({used}/{total} tokens). "
+                f"Consider /compress or /new if responses degrade."
+            )
+
     def should_compress(self, prompt_tokens: int = None) -> bool:
         """Check if context exceeds the compression threshold."""
         tokens = prompt_tokens if prompt_tokens is not None else self.last_prompt_tokens
diff --git a/run_agent.py b/run_agent.py
index 8a4147a8b0..792b90b6bd 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -5102,6 +5102,10 @@ class AIAgent:
                     if hasattr(response, 'usage') and response.usage:
                         if self.api_mode in ("codex_responses", "anthropic_messages"):
                             prompt_tokens = getattr(response.usage, 'input_tokens', 0) or 0
+                            # Include cached input tokens for accurate context tracking
+                            # (Anthropic reports non-cached, cache-read, and cache-creation separately)
+                            prompt_tokens += getattr(response.usage, 'cache_read_input_tokens', 0) or 0
+                            prompt_tokens += getattr(response.usage, 'cache_creation_input_tokens', 0) or 0
                             completion_tokens = getattr(response.usage, 'output_tokens', 0) or 0
                             total_tokens = (
                                 getattr(response.usage, 'total_tokens', None)
@@ -5118,6 +5122,15 @@ class AIAgent:
                         }
                         self.context_compressor.update_from_response(usage_dict)
 
+                        # Emit one-time warnings when context crosses 80% or 95%.
+                        # Always show these (even in quiet_mode) — they're critical
+                        # user-facing alerts, not debug noise.  Only suppress for
+                        # subagents (delegate_depth > 0) where the user can't act.
+                        if getattr(self, '_delegate_depth', 0) == 0:
+                            _ctx_warning = self.context_compressor.check_context_warning()
+                            if _ctx_warning:
+                                print(f"\n{_ctx_warning}\n")
+
                         # Cache discovered context length after successful call
                         if self.context_compressor._context_probed:
                             ctx = self.context_compressor.context_length
diff --git a/tests/test_context_warning.py b/tests/test_context_warning.py
new file mode 100644
index 0000000000..2186d30d3a
--- /dev/null
+++ b/tests/test_context_warning.py
@@ -0,0 +1,70 @@
+"""Tests for context window usage warnings."""
+
+from agent.context_compressor import ContextCompressor
+
+
+class TestContextWarning:
+    def _make_compressor(self, context_length=200_000):
+        c = ContextCompressor(model="test/model", threshold_percent=0.50)
+        c.context_length = context_length
+        c.threshold_tokens = int(context_length * 0.50)
+        return c
+
+    def test_no_warning_below_80_percent(self):
+        c = self._make_compressor()
+        c.update_from_response({"prompt_tokens": 100_000})  # 50%
+        assert c.check_context_warning() is None
+
+    def test_warning_at_80_percent(self):
+        c = self._make_compressor()
+        c.update_from_response({"prompt_tokens": 160_000})  # 80%
+        warning = c.check_context_warning()
+        assert warning is not None
+        assert "80%" in warning
+        assert "/compress" in warning
+
+    def test_warning_at_95_percent(self):
+        c = self._make_compressor()
+        c.update_from_response({"prompt_tokens": 190_000})  # 95%
+        warning = c.check_context_warning()
+        assert warning is not None
+        assert "95%" in warning
+        assert "/new" in warning
+
+    def test_warning_fires_only_once_per_threshold(self):
+        c = self._make_compressor()
+        c.update_from_response({"prompt_tokens": 170_000})  # 85%
+        w1 = c.check_context_warning()
+        assert w1 is not None  # First time at 80%
+
+        c.update_from_response({"prompt_tokens": 175_000})  # Still above 80%
+        w2 = c.check_context_warning()
+        assert w2 is None  # Already warned
+
+    def test_95_fires_after_80_already_warned(self):
+        c = self._make_compressor()
+        c.update_from_response({"prompt_tokens": 165_000})  # 82.5%
+        w1 = c.check_context_warning()
+        assert w1 is not None
+        assert "82%" in w1 or "Context window" in w1
+
+        c.update_from_response({"prompt_tokens": 195_000})  # 97.5%
+        w2 = c.check_context_warning()
+        assert w2 is not None
+        assert "nearly exhausted" in w2  # Escalated warning
+
+    def test_no_warning_when_context_length_zero(self):
+        c = self._make_compressor(context_length=0)
+        c.update_from_response({"prompt_tokens": 100_000})
+        assert c.check_context_warning() is None
+
+    def test_no_warning_when_no_tokens(self):
+        c = self._make_compressor()
+        assert c.check_context_warning() is None
+
+    def test_warning_includes_token_counts(self):
+        c = self._make_compressor(context_length=100_000)
+        c.update_from_response({"prompt_tokens": 85_000})
+        warning = c.check_context_warning()
+        assert "85,000" in warning
+        assert "100,000" in warning