fix(tui): chrome cost from Nous portal headers only (F3)

The status-bar cost segment must show cost ONLY when running against the Nous portal — per-model cache/input/output pricing is unreliable across the model long tail, so a guessed figure is worse than none. - New nous_header_cost_usd(agent): the chrome cost source, derived ONLY from the x-nous-credits-* header delta (deliberately ignores the OpenRouter usage.cost accumulator). _get_usage now uses it for cost_usd, so a non-Nous session reports no cost and the TUI hides the segment. - The /usage accounting page is unchanged in spirit: it now reads real_session_cost_usd(agent) directly (both provider-reported sources) instead of the chrome-narrowed _get_usage cost_usd, so OpenRouter cost still shows there. Tests: new TestNousHeaderCost (header-only, OR-accumulator ignored, clamp, no-method); updated gateway _get_usage tests for the chrome narrowing; /usage page test still asserts the full provider-reported figure. 316 gateway + 25 cost tests green.
gateway: compact /usage with current-session per-model costs
2026-06-22 18:10:53 +08:00 · 2026-06-16 19:27:07 +05:30 · 2026-06-16 19:26:33 +05:30 · 2026-06-16 19:25:41 +05:30
17 changed files with 916 additions and 93 deletions
--- a/agent/account_usage.py
+++ b/agent/account_usage.py
@@ -243,6 +243,17 @@ def nous_credits_lines(*, markdown: bool = False, timeout: float = 10.0) -> list
    renders from that fixture instead of the real portal (so the block + gauge are
    testable without a live account). Throwaway scaffolding.
    """
+    snapshot = _fetch_nous_credits_snapshot(timeout=timeout)
+    return render_account_usage_lines(snapshot, markdown=markdown)
+
+
+def _fetch_nous_credits_snapshot(timeout: float = 10.0) -> Optional[AccountUsageSnapshot]:
+    """Auth-gate + portal fetch + snapshot build for the Nous credits block.
+
+    Shared by ``nous_credits_lines`` (full block) and
+    ``nous_credits_compact_line`` (one-liner). Honors the
+    HERMES_DEV_CREDITS_FIXTURE dev override. Fail-open → None.
+    """
    # Dev fixture short-circuit — render /usage from the injected state, no portal.
    try:
        from agent.credits_tracker import dev_fixture_credits_state
@@ -251,17 +262,16 @@ def nous_credits_lines(*, markdown: bool = False, timeout: float = 10.0) -> list
    except Exception:
        fixture = None
    if fixture is not None:
-        snapshot = _snapshot_from_credits_state(fixture)
-        return render_account_usage_lines(snapshot, markdown=markdown)
+        return _snapshot_from_credits_state(fixture)

    try:
        from hermes_cli.auth import get_provider_auth_state

        tok = (get_provider_auth_state("nous") or {}).get("access_token")
        if not (isinstance(tok, str) and tok.strip()):
-            return []
+            return None
    except Exception:
-        return []
+        return None
    try:
        import concurrent.futures

@@ -271,13 +281,36 @@ def nous_credits_lines(*, markdown: bool = False, timeout: float = 10.0) -> list
            account = pool.submit(
                get_nous_portal_account_info, force_fresh=True
            ).result(timeout=timeout)
-        snapshot = build_nous_credits_snapshot(account)
-        return render_account_usage_lines(snapshot, markdown=markdown)
+        return build_nous_credits_snapshot(account)
    except Exception:
        # Fail-open (caller shows nothing), but leave a breadcrumb so a dead
        # /usage credits block is diagnosable in agent.log without a dev flag.
        logger.debug("credits ▸ /usage portal fetch/render failed (fail-open)", exc_info=True)
-        return []
+        return None
+
+
+def nous_credits_compact_line(*, timeout: float = 10.0) -> Optional[str]:
+    """One-line Nous credits summary for the compact /usage view, or None.
+
+    Condenses the snapshot's own detail strings (stable, locally-built
+    formats) into ``Nous credits (Plan): Total usable: $X · Renews: …``.
+    Same gating/fail-open semantics as ``nous_credits_lines``.
+    """
+    snap = _fetch_nous_credits_snapshot(timeout=timeout)
+    if snap is None or not snap.available:
+        return None
+    picked = [
+        d for d in snap.details
+        if d.startswith(("Total usable:", "Renews:", "Status:"))
+    ]
+    if not picked:
+        picked = [d for d in snap.details if not d.startswith("Manage / top up:")][:2]
+    if not picked:
+        return None
+    title = snap.title
+    if snap.plan:
+        title += f" ({snap.plan})"
+    return f"{title}: " + " · ".join(picked)


 def _snapshot_from_credits_state(state) -> Optional[AccountUsageSnapshot]:
--- a/agent/agent_init.py
+++ b/agent/agent_init.py
@@ -1598,6 +1598,12 @@ def init_agent(
    agent.session_cache_write_tokens = 0
    agent.session_reasoning_tokens = 0
    agent.session_estimated_cost_usd = 0.0
+    # Provider-REPORTED cost only (e.g. OpenRouter usage.cost). None means
+    # "nothing reported" — distinct from a real $0.00.
+    agent.session_actual_cost_usd = None
+    # Per-model session usage rows for /usage: {model: {calls, input, output,
+    # cache_read, cache_write, cost_usd|None}}.
+    agent.session_model_usage = {}
    agent.session_cost_status = "unknown"
    agent.session_cost_source = "none"
    
--- a/agent/conversation_loop.py
+++ b/agent/conversation_loop.py
@@ -57,7 +57,11 @@ from agent.process_bootstrap import _install_safe_stdio
 from agent.prompt_caching import apply_anthropic_cache_control
 from agent.retry_utils import jittered_backoff
 from agent.trajectory import has_incomplete_scratchpad
-from agent.usage_pricing import estimate_usage_cost, normalize_usage
+from agent.usage_pricing import (
+    estimate_usage_cost,
+    extract_provider_cost_usd,
+    normalize_usage,
+)
 from hermes_constants import PARTIAL_STREAM_STUB_ID
 from hermes_logging import set_session_context
 from tools.skill_provenance import set_current_write_origin
@@ -1805,6 +1809,37 @@ def run_conversation(
                    agent.session_cost_status = cost_result.status
                    agent.session_cost_source = cost_result.source

+                    # ── Real provider-REPORTED cost (never estimated) ──
+                    # OpenRouter usage accounting returns ``usage.cost`` on the
+                    # response when the request carries usage:{include:true}
+                    # (added on OpenRouter routes). When the provider reports
+                    # nothing, this stays None — absent, NOT zero — so cost
+                    # displays hide instead of showing a fabricated $0.00.
+                    reported_cost_usd = extract_provider_cost_usd(response.usage)
+                    if reported_cost_usd is not None:
+                        _prev_actual = getattr(agent, "session_actual_cost_usd", None)
+                        agent.session_actual_cost_usd = (_prev_actual or 0.0) + reported_cost_usd
+                        agent.session_cost_status = "actual"
+                        agent.session_cost_source = "provider_cost_api"
+
+                    # Per-model session breakdown for /usage — counts are always
+                    # real; cost_usd only accumulates provider-reported values
+                    # and stays None when the provider reports nothing.
+                    _model_usage = getattr(agent, "session_model_usage", None)
+                    if _model_usage is None:
+                        _model_usage = agent.session_model_usage = {}
+                    _mrow = _model_usage.setdefault(agent.model, {
+                        "calls": 0, "input": 0, "output": 0,
+                        "cache_read": 0, "cache_write": 0, "cost_usd": None,
+                    })
+                    _mrow["calls"] += 1
+                    _mrow["input"] += canonical_usage.input_tokens
+                    _mrow["output"] += canonical_usage.output_tokens
+                    _mrow["cache_read"] += canonical_usage.cache_read_tokens
+                    _mrow["cache_write"] += canonical_usage.cache_write_tokens
+                    if reported_cost_usd is not None:
+                        _mrow["cost_usd"] = (_mrow["cost_usd"] or 0.0) + reported_cost_usd
+
                    # Persist token counts to session DB for /insights.
                    # Do this for every platform with a session_id so non-CLI
                    # sessions (gateway, cron, delegated runs) cannot lose
@@ -1831,8 +1866,14 @@ def run_conversation(
                                reasoning_tokens=canonical_usage.reasoning_tokens,
                                estimated_cost_usd=float(cost_result.amount_usd)
                                if cost_result.amount_usd is not None else None,
-                                cost_status=cost_result.status,
-                                cost_source=cost_result.source,
+                                # Provider-reported per-call cost delta. NULL
+                                # (not 0) when the provider reported nothing —
+                                # the SQL CASE keeps actual_cost_usd untouched.
+                                actual_cost_usd=reported_cost_usd,
+                                cost_status="actual"
+                                if reported_cost_usd is not None else cost_result.status,
+                                cost_source="provider_cost_api"
+                                if reported_cost_usd is not None else cost_result.source,
                                billing_provider=agent.provider,
                                billing_base_url=agent.base_url,
                                billing_mode="subscription_included"
--- a/agent/transports/chat_completions.py
+++ b/agent/transports/chat_completions.py
@@ -388,6 +388,13 @@ class ChatCompletionsTransport(ProviderTransport):
        if provider_prefs and is_openrouter:
            extra_body["provider"] = provider_prefs

+        # OpenRouter usage accounting — response `usage.cost` carries the REAL
+        # charged cost (credits are 1:1 USD). Parity with the profile path in
+        # plugins/model-providers/openrouter/__init__.py; this branch only runs
+        # when the OpenRouter profile isn't loaded.
+        if is_openrouter:
+            extra_body["usage"] = {"include": True}
+
        # Pareto Code router plugin — model-gated. Same shape as the
        # profile path in plugins/model-providers/openrouter/__init__.py;
        # this branch only runs when the OpenRouter profile isn't loaded.
--- a/agent/usage_pricing.py
+++ b/agent/usage_pricing.py
@@ -852,6 +852,100 @@ def estimate_usage_cost(
    )


+def _finite_nonneg_number(value: Any) -> Optional[float]:
+    """Return ``value`` as a float when it is a real, finite, non-negative
+    number (int/float, not bool); otherwise None."""
+    if isinstance(value, bool) or not isinstance(value, (int, float)):
+        return None
+    try:
+        f = float(value)
+    except (TypeError, ValueError):
+        return None
+    if f != f or f in (float("inf"), float("-inf")) or f < 0:
+        return None
+    return f
+
+
+def extract_provider_cost_usd(response_usage: Any) -> Optional[float]:
+    """Provider-REPORTED cost (USD) from a response ``usage`` object, or None.
+
+    Reads the ``usage.cost`` field that OpenRouter's usage accounting returns
+    (``usage: {"include": true}`` request param; OpenRouter credits are 1:1
+    USD). OpenRouter-compatible aggregators use the same field. This NEVER
+    estimates: when the provider reports nothing, the result is None — callers
+    must treat None as "no cost data", not zero. A reported ``0`` is a real
+    zero (e.g. free-tier models) and is returned as ``0.0``.
+    """
+    if response_usage is None:
+        return None
+    cost = getattr(response_usage, "cost", None)
+    if cost is None and isinstance(response_usage, dict):
+        cost = response_usage.get("cost")
+    return _finite_nonneg_number(cost)
+
+
+def real_session_cost_usd(agent: Any) -> Optional[float]:
+    """Session-cumulative provider-REPORTED cost in USD, or None.
+
+    Combines the two real sources Hermes has — no estimation, ever:
+      - ``agent.session_actual_cost_usd``: per-response ``usage.cost``
+        accumulator (OpenRouter usage accounting).
+      - Nous ``x-nous-credits-*`` header delta via
+        ``agent.get_credits_spent_micros()`` (account-level spend since the
+        session first saw a header; clamped at 0 so a mid-session top-up
+        doesn't render a negative cost).
+
+    Returns None when neither source has reported anything — callers must
+    hide their cost display in that case rather than showing $0.00.
+    """
+    total: Optional[float] = None
+
+    actual = _finite_nonneg_number(getattr(agent, "session_actual_cost_usd", None))
+    if actual is not None:
+        total = actual
+
+    try:
+        spent_micros = agent.get_credits_spent_micros()
+    except Exception:
+        spent_micros = None
+    if spent_micros is not None:
+        try:
+            spent_usd = max(0, int(spent_micros)) / 1_000_000
+        except (TypeError, ValueError):
+            spent_usd = None
+        if spent_usd is not None:
+            total = (total or 0.0) + spent_usd
+
+    return total
+
+
+def nous_header_cost_usd(agent: Any) -> Optional[float]:
+    """Session-cumulative cost in USD derived ONLY from the Nous portal
+    ``x-nous-credits-*`` header delta, or None.
+
+    This is the STATUS-BAR cost source (glitch 2026-06-13, F3): the TUI chrome
+    must show cost ONLY when the session runs against the Nous portal, because
+    the header delta is the one figure we can trust without re-deriving per-model
+    cache/input/output pricing (which is unreliable across the model long tail).
+    Unlike :func:`real_session_cost_usd`, this DELIBERATELY ignores the
+    OpenRouter ``usage.cost`` accumulator — a non-Nous route reports no header,
+    so the chrome hides its cost segment entirely.
+
+    The ``/usage`` accounting page keeps using ``real_session_cost_usd`` (both
+    provider-reported sources); only the chrome bar narrows to header-only.
+    """
+    try:
+        spent_micros = agent.get_credits_spent_micros()
+    except Exception:
+        return None
+    if spent_micros is None:
+        return None
+    try:
+        return max(0, int(spent_micros)) / 1_000_000
+    except (TypeError, ValueError):
+        return None
+
+
 def has_known_pricing(
    model_name: str,
    provider: Optional[str] = None,
--- a/cli.py
+++ b/cli.py
@@ -8301,14 +8301,13 @@ class HermesCLI(CLIAgentSetupMixin, CLICommandsMixin):
        compressions = compressor.compression_count

        msg_count = len(self.conversation_history)
-        cost_result = estimate_usage_cost(
+        # Cost — provider-REPORTED only (OpenRouter usage.cost accumulator
+        # and/or Nous credits-header delta). No estimation: an unreported
+        # cost shows as "not reported", never a fabricated dollar figure.
+        from agent.usage_pricing import real_session_cost_usd, resolve_billing_route
+        real_cost_usd = real_session_cost_usd(agent)
+        _billing_route = resolve_billing_route(
            agent.model,
-            CanonicalUsage(
-                input_tokens=input_tokens,
-                output_tokens=output_tokens,
-                cache_read_tokens=cache_read_tokens,
-                cache_write_tokens=cache_write_tokens,
-            ),
            provider=getattr(agent, "provider", None),
            base_url=getattr(agent, "base_url", None),
        )
@@ -8328,21 +8327,16 @@ class HermesCLI(CLIAgentSetupMixin, CLICommandsMixin):
        print(f"  Total tokens:              {total:>10,}")
        print(f"  API calls:                 {calls:>10,}")
        print(f"  Session duration:          {elapsed:>10}")
-        print(f"  Cost status:              {cost_result.status:>10}")
-        print(f"  Cost source:              {cost_result.source:>10}")
-        if cost_result.amount_usd is not None:
-            prefix = "~" if cost_result.status == "estimated" else ""
-            print(f"  Total cost:              {prefix}${float(cost_result.amount_usd):>10.4f}")
-        elif cost_result.status == "included":
-            print(f"  Total cost:              {'included':>10}")
+        if real_cost_usd is not None:
+            print(f"  Cost (provider-reported): ${real_cost_usd:>9.4f}")
+        elif _billing_route.billing_mode == "subscription_included":
+            print(f"  Cost:                    {'included':>11}")
        else:
-            print(f"  Total cost:              {'n/a':>10}")
+            print(f"  Cost:        {'not reported by provider':>23}")
        print(f"  {'─' * 40}")
        print(f"  Current context:  {last_prompt:,} / {ctx_len:,} ({pct:.0f}%)")
        print(f"  Messages:         {msg_count}")
        print(f"  Compressions:     {compressions}")
-        if cost_result.status == "unknown":
-            print(f"  Note:             Pricing unknown for {agent.model}")

        # Account limits -- fetched off-thread with a hard timeout so slow
        # provider APIs don't hang the prompt.
--- a/gateway/slash_commands.py
+++ b/gateway/slash_commands.py
@@ -3215,25 +3215,24 @@ class GatewaySlashCommandsMixin:
            lines.append(t("gateway.usage.label_total", count=f"{agent.session_total_tokens:,}"))
            lines.append(t("gateway.usage.label_api_calls", count=agent.session_api_calls))

-            # Cost estimation
+            # Cost — provider-REPORTED only (OpenRouter usage.cost accumulator
+            # and/or Nous credits-header delta). No estimation: when nothing
+            # was reported the line is omitted entirely, never shown as $0.00.
+            # Subscription-included routes (a billing fact, not a price guess)
+            # still show "included".
            try:
-                from agent.usage_pricing import CanonicalUsage, estimate_usage_cost
-                cost_result = estimate_usage_cost(
-                    agent.model,
-                    CanonicalUsage(
-                        input_tokens=input_tokens,
-                        output_tokens=output_tokens,
-                        cache_read_tokens=cache_read,
-                        cache_write_tokens=cache_write,
-                    ),
-                    provider=getattr(agent, "provider", None),
-                    base_url=getattr(agent, "base_url", None),
-                )
-                if cost_result.amount_usd is not None:
-                    prefix = "~" if cost_result.status == "estimated" else ""
-                    lines.append(t("gateway.usage.label_cost", prefix=prefix, amount=f"{float(cost_result.amount_usd):.4f}"))
-                elif cost_result.status == "included":
-                    lines.append(t("gateway.usage.label_cost_included"))
+                from agent.usage_pricing import real_session_cost_usd, resolve_billing_route
+                real_cost = real_session_cost_usd(agent)
+                if real_cost is not None:
+                    lines.append(t("gateway.usage.label_cost", prefix="", amount=f"{real_cost:.4f}"))
+                else:
+                    route = resolve_billing_route(
+                        agent.model,
+                        provider=getattr(agent, "provider", None),
+                        base_url=getattr(agent, "base_url", None),
+                    )
+                    if route.billing_mode == "subscription_included":
+                        lines.append(t("gateway.usage.label_cost_included"))
            except Exception:
                pass

--- a/hermes_state.py
+++ b/hermes_state.py
@@ -1695,6 +1695,36 @@ class SessionDB:

        return self._execute_write(_do) or 0

+    def usage_totals(self, days: int = 30) -> Dict[str, Any]:
+        """Aggregate usage for sessions started in the last ``days``.
+
+        ``reported_cost_usd`` sums only provider-REPORTED ``actual_cost_usd``
+        (never estimates) and is None when no session in the window has a
+        reported cost — callers must hide cost rather than print $0.00.
+        """
+        cutoff = time.time() - days * 86400
+        with self._lock:
+            row = self._conn.execute(
+                """SELECT COUNT(*) AS sessions,
+                          COALESCE(SUM(input_tokens), 0)
+                            + COALESCE(SUM(cache_read_tokens), 0)
+                            + COALESCE(SUM(cache_write_tokens), 0) AS input_tokens,
+                          COALESCE(SUM(output_tokens), 0) AS output_tokens,
+                          COALESCE(SUM(api_call_count), 0) AS api_calls,
+                          SUM(actual_cost_usd) AS reported_cost_usd
+                   FROM sessions WHERE started_at >= ?""",
+                (cutoff,),
+            ).fetchone()
+        result = dict(row) if row else {}
+        return {
+            "days": days,
+            "sessions": int(result.get("sessions") or 0),
+            "input_tokens": int(result.get("input_tokens") or 0),
+            "output_tokens": int(result.get("output_tokens") or 0),
+            "api_calls": int(result.get("api_calls") or 0),
+            "reported_cost_usd": result.get("reported_cost_usd"),
+        }
+
    def get_session(self, session_id: str) -> Optional[Dict[str, Any]]:
        """Get a session by ID."""
        with self._lock:
--- a/plugins/model-providers/openrouter/init.py
+++ b/plugins/model-providers/openrouter/init.py
@@ -82,6 +82,13 @@ class OpenRouterProfile(ProviderProfile):
        if prefs:
            body["provider"] = prefs

+        # Usage accounting — makes OpenRouter return the REAL cost it charged
+        # in the response `usage.cost` field (credits are 1:1 USD), instead of
+        # Hermes having to estimate from a pricing table. Captured by
+        # agent.usage_pricing.extract_provider_cost_usd in the conversation
+        # loop. https://openrouter.ai/docs/use-cases/usage-accounting
+        body["usage"] = {"include": True}
+
        # Pareto Code router — model-gated. The plugins block is only
        # meaningful for openrouter/pareto-code; sending it on any other
        # model has no documented effect and would be confusing in logs.
--- a/run_agent.py
+++ b/run_agent.py
@@ -636,6 +636,9 @@ class AIAgent:
        self.session_reasoning_tokens = 0
        self.session_api_calls = 0
        self.session_estimated_cost_usd = 0.0
+        # Provider-REPORTED cost only — None means "nothing reported".
+        self.session_actual_cost_usd = None
+        self.session_model_usage = {}
        self.session_cost_status = "unknown"
        self.session_cost_source = "none"
        
--- a/tests/agent/test_provider_cost_capture.py
+++ b/tests/agent/test_provider_cost_capture.py
@@ -0,0 +1,246 @@
+"""Real provider-reported cost capture — never estimated, absent ≠ zero.
+
+Covers the three fixture shapes from the cost-tracking fix:
+  - OpenRouter usage accounting: response ``usage.cost`` present → accumulates.
+  - Nous: ``x-nous-credits-*`` headers present → header delta accumulates.
+  - Provider reports nothing → cost stays None/absent (NOT zero-as-real).
+"""
+
+from types import SimpleNamespace
+
+import pytest
+
+from agent.usage_pricing import extract_provider_cost_usd, nous_header_cost_usd, real_session_cost_usd
+
+
+# ── extract_provider_cost_usd — the per-response REAL cost reader ────────────
+
+
+class TestExtractProviderCost:
+    def test_openrouter_usage_cost_attr(self):
+        usage = SimpleNamespace(prompt_tokens=10, completion_tokens=5, cost=0.001234)
+        assert extract_provider_cost_usd(usage) == pytest.approx(0.001234)
+
+    def test_dict_shaped_usage(self):
+        assert extract_provider_cost_usd({"cost": 0.5}) == pytest.approx(0.5)
+
+    def test_reported_zero_is_real_zero(self):
+        # Free-tier models really cost $0 — distinct from "not reported".
+        usage = SimpleNamespace(cost=0)
+        assert extract_provider_cost_usd(usage) == 0.0
+
+    def test_absent_cost_is_none_not_zero(self):
+        usage = SimpleNamespace(prompt_tokens=10, completion_tokens=5)
+        assert extract_provider_cost_usd(usage) is None
+        assert extract_provider_cost_usd({"prompt_tokens": 10}) is None
+
+    def test_none_usage_is_none(self):
+        assert extract_provider_cost_usd(None) is None
+
+    def test_garbage_cost_values_are_none(self):
+        for bad in ("0.01", True, float("nan"), float("inf"), -0.5, [], {}):
+            assert extract_provider_cost_usd(SimpleNamespace(cost=bad)) is None, bad
+
+
+# ── real_session_cost_usd — the session accumulator surface ─────────────────
+
+
+class _FakeAgent:
+    def __init__(self, actual=None, credits_micros=None):
+        self.session_actual_cost_usd = actual
+        self._credits_micros = credits_micros
+
+    def get_credits_spent_micros(self):
+        return self._credits_micros
+
+
+class TestRealSessionCost:
+    def test_nothing_reported_is_none(self):
+        assert real_session_cost_usd(_FakeAgent()) is None
+
+    def test_openrouter_accumulator_only(self):
+        assert real_session_cost_usd(_FakeAgent(actual=0.42)) == pytest.approx(0.42)
+
+    def test_nous_credits_delta_only(self):
+        # 123_400 micros = $0.1234
+        assert real_session_cost_usd(
+            _FakeAgent(credits_micros=123_400)
+        ) == pytest.approx(0.1234)
+
+    def test_both_sources_sum(self):
+        assert real_session_cost_usd(
+            _FakeAgent(actual=0.10, credits_micros=200_000)
+        ) == pytest.approx(0.30)
+
+    def test_negative_credits_delta_clamped(self):
+        # A mid-session top-up makes the delta negative — never show negative cost.
+        assert real_session_cost_usd(_FakeAgent(credits_micros=-50_000)) == 0.0
+
+    def test_agent_without_credits_method(self):
+        agent = SimpleNamespace(session_actual_cost_usd=None)
+        assert real_session_cost_usd(agent) is None
+
+    def test_non_numeric_actual_ignored(self):
+        agent = _FakeAgent()
+        agent.session_actual_cost_usd = "0.42"  # corrupted attr → ignore
+        assert real_session_cost_usd(agent) is None
+
+
+# ── nous_header_cost_usd — the CHROME status-bar cost (F3: header-only) ──────
+
+
+class TestNousHeaderCost:
+    def test_header_delta_only(self):
+        # 123_400 micros = $0.1234 — the Nous header source feeds the chrome.
+        assert nous_header_cost_usd(_FakeAgent(credits_micros=123_400)) == pytest.approx(0.1234)
+
+    def test_openrouter_accumulator_ignored(self):
+        # The OpenRouter usage.cost accumulator must NOT feed the chrome bar:
+        # a non-Nous session (no header → None) reports no cost even when the
+        # OpenRouter accumulator has a value.
+        assert nous_header_cost_usd(_FakeAgent(actual=0.42)) is None
+
+    def test_no_header_is_none(self):
+        assert nous_header_cost_usd(_FakeAgent()) is None
+
+    def test_negative_delta_clamped(self):
+        # A mid-session top-up makes the delta negative — never show negative.
+        assert nous_header_cost_usd(_FakeAgent(credits_micros=-50_000)) == 0.0
+
+    def test_agent_without_credits_method_is_none(self):
+        agent = SimpleNamespace(session_actual_cost_usd=0.42)
+        assert nous_header_cost_usd(agent) is None
+
+
+# ── Nous header fixture → real accumulator (full _capture_credits path) ─────
+
+
+def _nous_headers(remaining_micros: int) -> dict:
+    return {
+        "x-nous-credits-version": "1",
+        "x-nous-credits-remaining-micros": str(remaining_micros),
+        "x-nous-credits-remaining-usd": f"{remaining_micros / 1_000_000:.2f}",
+        "x-nous-credits-subscription-micros": str(remaining_micros),
+        "x-nous-credits-subscription-usd": f"{remaining_micros / 1_000_000:.2f}",
+        "x-nous-credits-rollover-micros": "0",
+        "x-nous-credits-purchased-micros": "0",
+        "x-nous-credits-purchased-usd": "0.00",
+        "x-nous-credits-denominator-kind": "none",
+        "x-nous-credits-paid-access": "true",
+        "x-nous-credits-as-of-ms": "1717000000000",
+    }
+
+
+def _bare_nous_agent():
+    """Minimal AIAgent shell exercising the real _capture_credits path."""
+    from run_agent import AIAgent
+
+    agent = object.__new__(AIAgent)
+    agent.provider = "nous"
+    agent._credits_state = None
+    agent._credits_session_start_micros = None
+    agent.notice_callback = None
+    agent.notice_clear_callback = None
+    agent.session_actual_cost_usd = None
+    return agent
+
+
+class TestNousHeaderAccumulation:
+    def test_headers_accumulate_into_real_session_cost(self, monkeypatch):
+        monkeypatch.delenv("HERMES_DEV_CREDITS_FIXTURE", raising=False)
+        agent = _bare_nous_agent()
+
+        # First response latches the session-start balance ($10.00).
+        agent._capture_credits(SimpleNamespace(headers=_nous_headers(10_000_000)))
+        assert real_session_cost_usd(agent) == 0.0  # real zero: headers seen, $0 spent
+
+        # Second response: balance dropped by $0.25 → real reported spend.
+        agent._capture_credits(SimpleNamespace(headers=_nous_headers(9_750_000)))
+        assert real_session_cost_usd(agent) == pytest.approx(0.25)
+
+    def test_no_headers_means_no_cost(self, monkeypatch):
+        monkeypatch.delenv("HERMES_DEV_CREDITS_FIXTURE", raising=False)
+        agent = _bare_nous_agent()
+        agent._capture_credits(SimpleNamespace(headers={"content-type": "application/json"}))
+        assert real_session_cost_usd(agent) is None
+
+
+# ── OpenRouter request param — usage accounting must be requested ────────────
+
+
+class TestOpenRouterUsageParam:
+    def test_profile_extra_body_requests_usage_accounting(self):
+        import importlib.util
+        from pathlib import Path
+
+        from providers import get_provider_profile
+
+        profile = get_provider_profile("openrouter")
+        if profile is None:
+            # Force plugin discovery in minimal test envs.
+            plugin = Path(__file__).resolve().parents[2] / "plugins" / "model-providers" / "openrouter" / "__init__.py"
+            spec = importlib.util.spec_from_file_location("_or_plugin", plugin)
+            mod = importlib.util.module_from_spec(spec)
+            spec.loader.exec_module(mod)
+            profile = mod.openrouter
+
+        body = profile.build_extra_body(session_id="s-1")
+        assert body["usage"] == {"include": True}
+
+    def test_legacy_transport_path_requests_usage_accounting(self):
+        from agent.transports.chat_completions import ChatCompletionsTransport
+
+        transport = ChatCompletionsTransport()
+        kwargs = transport.build_kwargs(
+            model="anthropic/claude-sonnet-4.6",
+            messages=[{"role": "user", "content": "hi"}],
+            tools=None,
+            is_openrouter=True,
+        )
+        assert kwargs["extra_body"]["usage"] == {"include": True}
+
+    def test_non_openrouter_does_not_send_usage_param(self):
+        from agent.transports.chat_completions import ChatCompletionsTransport
+
+        transport = ChatCompletionsTransport()
+        kwargs = transport.build_kwargs(
+            model="deepseek-chat",
+            messages=[{"role": "user", "content": "hi"}],
+            tools=None,
+            is_openrouter=False,
+        )
+        assert "usage" not in (kwargs.get("extra_body") or {})
+
+
+# ── nous_credits_compact_line — one-liner for the compact /usage page ───────
+
+
+class TestNousCreditsCompactLine:
+    def test_condenses_snapshot_details(self, monkeypatch):
+        import agent.account_usage as au
+
+        snap = au.AccountUsageSnapshot(
+            provider="nous",
+            source="portal-account",
+            fetched_at=au._utc_now(),
+            title="Nous credits",
+            plan="Ultra",
+            details=(
+                "Subscription credits: $-0.79",
+                "Top-up credits: $988.99",
+                "Total usable: $988.99",
+                "Renews: 2026-06-11T08:14:55.000Z",
+                "Manage / top up: https://portal.nousresearch.com/billing",
+            ),
+        )
+        monkeypatch.setattr(au, "_fetch_nous_credits_snapshot", lambda timeout=10.0: snap)
+        line = au.nous_credits_compact_line()
+        assert line == (
+            "Nous credits (Ultra): Total usable: $988.99 · Renews: 2026-06-11T08:14:55.000Z"
+        )
+
+    def test_none_when_no_snapshot(self, monkeypatch):
+        import agent.account_usage as au
+
+        monkeypatch.setattr(au, "_fetch_nous_credits_snapshot", lambda timeout=10.0: None)
+        assert au.nous_credits_compact_line() is None
--- a/tests/cli/test_cli_status_bar.py
+++ b/tests/cli/test_cli_status_bar.py
@@ -457,7 +457,7 @@ class TestCLIStatusBar:


 class TestCLIUsageReport:
-    def test_show_usage_includes_estimated_cost(self, capsys):
+    def test_show_usage_reports_real_provider_cost(self, capsys):
        cli_obj = _attach_agent(
            _make_cli(),
            prompt_tokens=10_230,
@@ -469,20 +469,22 @@ class TestCLIUsageReport:
            compressions=1,
        )
        cli_obj.verbose = False
+        # Provider-reported cost (e.g. OpenRouter usage accounting accumulator).
+        cli_obj.agent.session_actual_cost_usd = 0.0640

        cli_obj._show_usage()
        output = capsys.readouterr().out

        assert "Model:" in output
-        assert "Cost status:" in output
-        assert "Cost source:" in output
-        assert "Total cost:" in output
+        assert "Cost (provider-reported):" in output
        assert "$" in output
        assert "0.064" in output
        assert "Session duration:" in output
        assert "Compressions:" in output

-    def test_show_usage_marks_unknown_pricing(self, capsys):
+    def test_show_usage_unreported_cost_is_not_a_dollar_figure(self, capsys):
+        """No estimation: when the provider reports nothing, /usage must NOT
+        fabricate a dollar amount — not even $0.00."""
        cli_obj = _attach_agent(
            _make_cli(model="local/my-custom-model"),
            prompt_tokens=1_000,
@@ -497,13 +499,15 @@ class TestCLIUsageReport:
        cli_obj._show_usage()
        output = capsys.readouterr().out

-        assert "Total cost:" in output
-        assert "n/a" in output
-        assert "Pricing unknown for local/my-custom-model" in output
+        assert "not reported by provider" in output
+        assert "Cost (provider-reported):" not in output
+        assert "$0.00" not in output

-    def test_zero_priced_provider_models_stay_unknown(self, capsys):
+    def test_show_usage_never_estimates_even_with_known_pricing(self, capsys):
+        """A model with a pricing-table entry must still show NO cost when the
+        provider reported nothing (hard requirement: real cost only)."""
        cli_obj = _attach_agent(
-            _make_cli(model="glm-5"),
+            _make_cli(model="anthropic/claude-sonnet-4-6"),
            prompt_tokens=1_000,
            completion_tokens=500,
            total_tokens=1_500,
@@ -516,9 +520,8 @@ class TestCLIUsageReport:
        cli_obj._show_usage()
        output = capsys.readouterr().out

-        assert "Total cost:" in output
-        assert "n/a" in output
-        assert "Pricing unknown for glm-5" in output
+        assert "not reported by provider" in output
+        assert "Cost (provider-reported):" not in output


 class TestStatusBarWidthSource:
--- a/tests/gateway/test_usage_command.py
+++ b/tests/gateway/test_usage_command.py
@@ -21,11 +21,16 @@ def _make_mock_agent(**overrides):
        "session_output_tokens": 10_000,
        "session_cache_read_tokens": 5_000,
        "session_cache_write_tokens": 2_000,
+        # Real provider-reported cost: None = nothing reported (the default).
+        "session_actual_cost_usd": None,
    }
    defaults.update(overrides)
    for k, v in defaults.items():
        setattr(agent, k, v)

+    # No Nous credits headers seen unless a test overrides this.
+    agent.get_credits_spent_micros = MagicMock(return_value=None)
+
    # Rate limit state
    rl = MagicMock()
    rl.has_data = True
@@ -72,13 +77,11 @@ class TestUsageCachedAgent:

    @pytest.mark.asyncio
    async def test_cached_agent_shows_detailed_usage(self):
-        agent = _make_mock_agent()
+        agent = _make_mock_agent(session_actual_cost_usd=0.1234)
        runner = _make_runner(SK, cached_agent=agent)
        event = MagicMock()

-        with patch("agent.rate_limit_tracker.format_rate_limit_compact", return_value="RPM: 50/60"), \
-             patch("agent.usage_pricing.estimate_usage_cost") as mock_cost:
-            mock_cost.return_value = MagicMock(amount_usd=0.1234, status="estimated")
+        with patch("agent.rate_limit_tracker.format_rate_limit_compact", return_value="RPM: 50/60"):
            result = await runner._handle_usage_command(event)

        assert "claude-sonnet-4.6" in result
@@ -99,9 +102,7 @@ class TestUsageCachedAgent:
        runner = _make_runner(SK, agent=running, cached_agent=cached)
        event = MagicMock()

-        with patch("agent.rate_limit_tracker.format_rate_limit_compact", return_value="RPM: 50/60"), \
-             patch("agent.usage_pricing.estimate_usage_cost") as mock_cost:
-            mock_cost.return_value = MagicMock(amount_usd=None, status="unknown")
+        with patch("agent.rate_limit_tracker.format_rate_limit_compact", return_value="RPM: 50/60"):
            result = await runner._handle_usage_command(event)

        assert "80,000" in result   # running agent's total
@@ -117,9 +118,7 @@ class TestUsageCachedAgent:
        runner._running_agents[SK] = _AGENT_PENDING_SENTINEL
        event = MagicMock()

-        with patch("agent.rate_limit_tracker.format_rate_limit_compact", return_value="RPM: 50/60"), \
-             patch("agent.usage_pricing.estimate_usage_cost") as mock_cost:
-            mock_cost.return_value = MagicMock(amount_usd=None, status="unknown")
+        with patch("agent.rate_limit_tracker.format_rate_limit_compact", return_value="RPM: 50/60"):
            result = await runner._handle_usage_command(event)

        assert "claude-sonnet-4.6" in result
@@ -153,9 +152,7 @@ class TestUsageCachedAgent:
        runner = _make_runner(SK, cached_agent=agent)
        event = MagicMock()

-        with patch("agent.rate_limit_tracker.format_rate_limit_compact", return_value="RPM: 50/60"), \
-             patch("agent.usage_pricing.estimate_usage_cost") as mock_cost:
-            mock_cost.return_value = MagicMock(amount_usd=None, status="unknown")
+        with patch("agent.rate_limit_tracker.format_rate_limit_compact", return_value="RPM: 50/60"):
            result = await runner._handle_usage_command(event)

        assert "Cache read" not in result
@@ -168,9 +165,7 @@ class TestUsageCachedAgent:
        runner = _make_runner(SK, cached_agent=agent)
        event = MagicMock()

-        with patch("agent.rate_limit_tracker.format_rate_limit_compact", return_value="RPM: 50/60"), \
-             patch("agent.usage_pricing.estimate_usage_cost") as mock_cost:
-            mock_cost.return_value = MagicMock(amount_usd=None, status="included")
+        with patch("agent.rate_limit_tracker.format_rate_limit_compact", return_value="RPM: 50/60"):
            result = await runner._handle_usage_command(event)

        assert "Cost: included" in result
@@ -199,9 +194,7 @@ class TestUsageAccountSection:
                "Session: 85% remaining (15% used)",
            ],
        )
-        with patch("agent.rate_limit_tracker.format_rate_limit_compact", return_value="RPM: 50/60"), \
-             patch("agent.usage_pricing.estimate_usage_cost") as mock_cost:
-            mock_cost.return_value = MagicMock(amount_usd=None, status="included")
+        with patch("agent.rate_limit_tracker.format_rate_limit_compact", return_value="RPM: 50/60"):
            result = await runner._handle_usage_command(event)

        assert "📊 **Session Token Usage**" in result
@@ -256,3 +249,42 @@ class TestUsageAccountSection:
        assert account_call["kwargs"]["base_url"] == "https://chatgpt.com/backend-api/codex"
        assert "📊 **Session Info**" in result
        assert "📈 **Account limits**" in result
+
+
+class TestUsageRealCostOnly:
+    """Cost lines are provider-REPORTED only — never estimated, never $0.00."""
+
+    @pytest.mark.asyncio
+    async def test_unreported_cost_renders_no_cost_line(self):
+        agent = _make_mock_agent()  # openrouter, nothing reported
+        runner = _make_runner(SK, cached_agent=agent)
+        event = MagicMock()
+
+        with patch("agent.rate_limit_tracker.format_rate_limit_compact", return_value="RPM: 50/60"):
+            result = await runner._handle_usage_command(event)
+
+        assert "Cost:" not in result
+        assert "$0.00" not in result
+
+    @pytest.mark.asyncio
+    async def test_nous_credits_delta_renders_as_cost(self):
+        agent = _make_mock_agent(provider="nous", model="Hermes-4.1-405B")
+        agent.get_credits_spent_micros = MagicMock(return_value=123_400)
+        runner = _make_runner(SK, cached_agent=agent)
+        event = MagicMock()
+
+        with patch("agent.rate_limit_tracker.format_rate_limit_compact", return_value="RPM: 50/60"):
+            result = await runner._handle_usage_command(event)
+
+        assert "$0.1234" in result
+
+    @pytest.mark.asyncio
+    async def test_openrouter_reported_cost_renders(self):
+        agent = _make_mock_agent(session_actual_cost_usd=0.9876)
+        runner = _make_runner(SK, cached_agent=agent)
+        event = MagicMock()
+
+        with patch("agent.rate_limit_tracker.format_rate_limit_compact", return_value="RPM: 50/60"):
+            result = await runner._handle_usage_command(event)
+
+        assert "$0.9876" in result
--- a/tests/providers/test_provider_profiles.py
+++ b/tests/providers/test_provider_profiles.py
@@ -110,7 +110,9 @@ class TestOpenRouterProfile:
    def test_extra_body_no_prefs(self):
        p = get_provider_profile("openrouter")
        body = p.build_extra_body()
-        assert body == {}
+        # Usage accounting is always requested (real provider-reported cost);
+        # nothing else should appear without prefs/session.
+        assert body == {"usage": {"include": True}}

    def test_pareto_min_coding_score_emitted_for_pareto_model(self):
        """min_coding_score → plugins block when model is openrouter/pareto-code."""
--- a/tests/test_hermes_state.py
+++ b/tests/test_hermes_state.py
@@ -158,6 +158,37 @@ class TestSessionLifecycle:
        assert session["api_call_count"] == 5
        assert session["input_tokens"] == 300

+    def test_update_token_counts_actual_cost_null_keeps_value(self, db):
+        """A NULL actual_cost_usd delta must not touch the stored REAL cost."""
+        db.create_session(session_id="s1", source="cli")
+        db.update_token_counts("s1", input_tokens=100, actual_cost_usd=0.25)
+        db.update_token_counts("s1", input_tokens=100, actual_cost_usd=None)
+        db.update_token_counts("s1", input_tokens=100, actual_cost_usd=0.10)
+
+        session = db.get_session("s1")
+        assert session["actual_cost_usd"] == pytest.approx(0.35)
+
+    def test_usage_totals_reported_cost_none_when_nothing_reported(self, db):
+        """usage_totals must distinguish 'no reported cost' (None) from $0."""
+        db.create_session(session_id="s1", source="cli")
+        db.update_token_counts("s1", input_tokens=100, output_tokens=50, api_call_count=1)
+
+        totals = db.usage_totals(days=30)
+        assert totals["sessions"] == 1
+        assert totals["input_tokens"] == 100
+        assert totals["output_tokens"] == 50
+        assert totals["reported_cost_usd"] is None
+
+    def test_usage_totals_sums_reported_costs(self, db):
+        db.create_session(session_id="s1", source="cli")
+        db.create_session(session_id="s2", source="tui")
+        db.update_token_counts("s1", input_tokens=100, actual_cost_usd=0.20)
+        db.update_token_counts("s2", input_tokens=300, actual_cost_usd=0.05)
+
+        totals = db.usage_totals(days=30)
+        assert totals["sessions"] == 2
+        assert totals["reported_cost_usd"] == pytest.approx(0.25)
+
    def test_update_token_counts_backfills_model_when_null(self, db):
        db.create_session(session_id="s1", source="telegram")
        db.update_token_counts("s1", input_tokens=10, output_tokens=5, model="openai/gpt-5.4")
--- a/tests/test_tui_gateway_server.py
+++ b/tests/test_tui_gateway_server.py
@@ -7,6 +7,7 @@ import time
 import types
 from datetime import datetime
 from pathlib import Path
+import pytest
 from unittest.mock import patch

 from hermes_constants import reset_hermes_home_override, set_hermes_home_override
@@ -7497,3 +7498,183 @@ def test_reap_idle_sessions_closes_only_evictable(monkeypatch):
        assert closed == [("stale", "idle_timeout")]
    finally:
        server._sessions.clear()
+
+
+# ── /usage: compact in-process page with real-only costs ─────────────────────
+
+
+def _usage_agent(**overrides):
+    """SimpleNamespace agent with realistic session counters for /usage."""
+    base = dict(
+        model="anthropic/claude-sonnet-4.6",
+        provider="openrouter",
+        base_url="https://openrouter.ai/api/v1",
+        session_input_tokens=35_000,
+        session_output_tokens=10_000,
+        session_cache_read_tokens=5_000,
+        session_cache_write_tokens=2_000,
+        session_reasoning_tokens=0,
+        session_prompt_tokens=40_000,
+        session_completion_tokens=10_000,
+        session_total_tokens=50_000,
+        session_api_calls=5,
+        session_actual_cost_usd=None,
+        session_model_usage={},
+        context_compressor=None,
+    )
+    base.update(overrides)
+    return types.SimpleNamespace(**base)
+
+
+def _mute_usage_externals(monkeypatch):
+    import agent.account_usage as account_usage
+
+    monkeypatch.setattr(server, "_get_db", lambda: None)
+    monkeypatch.setattr(account_usage, "nous_credits_compact_line", lambda **kw: None)
+
+
+def test_get_usage_cost_absent_when_provider_reports_nothing(monkeypatch):
+    """No estimation: even a model with known pricing gets NO cost_usd field."""
+    agent = _usage_agent()
+    usage = server._get_usage(agent)
+    assert "cost_usd" not in usage
+    assert usage["input"] == 35_000
+
+
+def test_get_usage_cost_absent_for_openrouter_accumulator_chrome(monkeypatch):
+    """F3: the chrome status bar shows cost ONLY from the Nous header delta.
+    An OpenRouter usage.cost accumulator alone (no x-nous-credits header) must
+    NOT surface cost_usd in _get_usage — the chrome hides cost off-Nous. (The
+    /usage accounting page still counts the accumulator; that's a separate path.)
+    """
+    agent = _usage_agent(session_actual_cost_usd=0.4321)
+    usage = server._get_usage(agent)
+    assert "cost_usd" not in usage
+
+
+def test_get_usage_cost_from_nous_credits_delta(monkeypatch):
+    agent = _usage_agent(provider="nous")
+    agent.get_credits_spent_micros = lambda: 250_000  # $0.25 real header delta
+    usage = server._get_usage(agent)
+    assert usage["cost_usd"] == pytest.approx(0.25)
+    assert usage["cost_status"] == "actual"
+
+
+def test_compact_usage_per_model_rows_and_real_cost(monkeypatch):
+    _mute_usage_externals(monkeypatch)
+    agent = _usage_agent(
+        session_actual_cost_usd=0.42,
+        session_model_usage={
+            "anthropic/claude-sonnet-4.6": {
+                "calls": 4, "input": 30_000, "output": 9_000,
+                "cache_read": 5_000, "cache_write": 2_000, "cost_usd": 0.42,
+            },
+            "deepseek/deepseek-chat": {
+                "calls": 1, "input": 5_000, "output": 1_000,
+                "cache_read": 0, "cache_write": 0, "cost_usd": None,
+            },
+        },
+    )
+    text = server._compact_usage_text(_session(agent=agent))
+
+    assert "Session — anthropic/claude-sonnet-4.6 (openrouter)" in text
+    sonnet_row = next(l for l in text.splitlines() if "claude-sonnet-4.6" in l and "reqs" in l)
+    assert "reqs 4" in sonnet_row and "$0.4200" in sonnet_row
+    deepseek_row = next(l for l in text.splitlines() if "deepseek-chat" in l)
+    # Cost not reported for this model → no dollar figure on its row.
+    assert "reqs 1" in deepseek_row and "$" not in deepseek_row
+    assert "session cost: $0.4200 (provider-reported)" in text
+    assert "/usage full" in text
+
+
+def test_compact_usage_absent_cost_never_renders_zero(monkeypatch):
+    _mute_usage_externals(monkeypatch)
+    agent = _usage_agent()  # nothing reported
+    text = server._compact_usage_text(_session(agent=agent))
+    assert "session cost: not reported by provider" in text
+    assert "$0.00" not in text
+
+
+def test_compact_usage_no_agent(monkeypatch):
+    _mute_usage_externals(monkeypatch)
+    text = server._compact_usage_text(_session(agent=None) | {"agent": None})
+    assert "no API calls yet" in text
+
+
+def test_compact_usage_recent_summary_and_credits_line(monkeypatch):
+    import agent.account_usage as account_usage
+
+    class _DB:
+        def usage_totals(self, days=30):
+            return {
+                "days": 30, "sessions": 12, "input_tokens": 1_200_000,
+                "output_tokens": 90_000, "api_calls": 64,
+                "reported_cost_usd": 4.5678,
+            }
+
+    monkeypatch.setattr(server, "_get_db", lambda: _DB())
+    monkeypatch.setattr(
+        account_usage, "nous_credits_compact_line",
+        lambda **kw: "Nous credits (Ultra): Total usable: $988.99 · Renews: 2026-06-11",
+    )
+    text = server._compact_usage_text(_session(agent=_usage_agent()))
+    assert "Last 30d: 12 sessions" in text
+    assert "reported cost $4.57" in text
+    assert "Nous credits (Ultra)" in text
+
+
+def test_compact_usage_recent_summary_hides_unreported_cost(monkeypatch):
+    _mute_usage_externals(monkeypatch)
+
+    class _DB:
+        def usage_totals(self, days=30):
+            return {
+                "days": 30, "sessions": 3, "input_tokens": 10_000,
+                "output_tokens": 2_000, "api_calls": 7,
+                "reported_cost_usd": None,
+            }
+
+    monkeypatch.setattr(server, "_get_db", lambda: _DB())
+    text = server._compact_usage_text(_session(agent=_usage_agent()))
+    assert "Last 30d: 3 sessions" in text
+    assert "reported cost" not in text
+
+
+def test_slash_exec_usage_is_answered_in_process(monkeypatch):
+    """/usage must not hit the slash worker (it has no live agent)."""
+    _mute_usage_externals(monkeypatch)
+    server._sessions["sid-usage"] = _session(agent=_usage_agent())
+    try:
+        resp = server.handle_request(
+            {"id": "1", "method": "slash.exec",
+             "params": {"session_id": "sid-usage", "command": "usage"}}
+        )
+        out = resp["result"]["output"]
+        assert "Session — anthropic/claude-sonnet-4.6" in out
+        # Worker untouched.
+        assert server._sessions["sid-usage"]["slash_worker"] is None
+    finally:
+        server._sessions.pop("sid-usage", None)
+
+
+def test_slash_exec_usage_full_falls_through_to_worker(monkeypatch):
+    ran = []
+
+    class _Worker:
+        def run(self, cmd):
+            ran.append(cmd)
+            return "detailed legacy page"
+
+    sess = _session(agent=_usage_agent())
+    sess["slash_worker"] = _Worker()
+    server._sessions["sid-usage-full"] = sess
+    try:
+        monkeypatch.setattr(server, "_mirror_slash_side_effects", lambda *a: "")
+        resp = server.handle_request(
+            {"id": "1", "method": "slash.exec",
+             "params": {"session_id": "sid-usage-full", "command": "usage full"}}
+        )
+        assert resp["result"]["output"] == "detailed legacy page"
+        assert ran == ["usage full"]
+    finally:
+        server._sessions.pop("sid-usage-full", None)
--- a/tui_gateway/server.py
+++ b/tui_gateway/server.py
@@ -2281,23 +2281,20 @@ def _get_usage(agent) -> dict:
            usage["context_max"] = ctx_max
            usage["context_percent"] = max(0, min(100, round(ctx_used / ctx_max * 100)))
        usage["compressions"] = getattr(comp, "compression_count", 0) or 0
+    # Cost (chrome status bar): Nous portal header delta ONLY (F3, glitch
+    # 2026-06-13). The OpenRouter usage.cost accumulator is deliberately NOT
+    # used here — per-model cache/input/output pricing is unreliable across the
+    # model long tail, so the bar shows cost ONLY on a Nous-portal session and
+    # hides it everywhere else. `cost_usd` is ABSENT (not $0.00) when no header
+    # was seen, and the TUI hides its cost segment. (The /usage accounting page
+    # still uses real_session_cost_usd — both provider-reported sources.)
    try:
-        from agent.usage_pricing import CanonicalUsage, estimate_usage_cost
+        from agent.usage_pricing import nous_header_cost_usd

-        cost = estimate_usage_cost(
-            usage["model"],
-            CanonicalUsage(
-                input_tokens=usage["input"],
-                output_tokens=usage["output"],
-                cache_read_tokens=usage["cache_read"],
-                cache_write_tokens=usage["cache_write"],
-            ),
-            provider=getattr(agent, "provider", None),
-            base_url=getattr(agent, "base_url", None),
-        )
-        usage["cost_status"] = cost.status
-        if cost.amount_usd is not None:
-            usage["cost_usd"] = float(cost.amount_usd)
+        real_cost = nous_header_cost_usd(agent)
+        if real_cost is not None:
+            usage["cost_usd"] = real_cost
+            usage["cost_status"] = "actual"
    except Exception:
        pass
    # Dev-only live credits-spent readout (L0 usage-aware-credits). Gated on
@@ -2312,6 +2309,112 @@ def _get_usage(agent) -> dict:
    return usage


+def _compact_usage_text(session: dict) -> str:
+    """Compact /usage page: current session per-model rows + a tight recent
+    summary + a one-line Nous credits gauge.
+
+    Costs are provider-REPORTED only. When a provider reports nothing the
+    cost is simply omitted (never rendered as $0.00). The detailed legacy
+    page stays reachable via `/usage full` (slash-worker → CLI path).
+    """
+    from agent.usage_pricing import format_token_count_compact as _fmt
+
+    agent = session.get("agent")
+    lines: list[str] = []
+
+    calls = (getattr(agent, "session_api_calls", 0) or 0) if agent is not None else 0
+    if agent is not None and calls > 0:
+        u = _get_usage(agent)
+        header = f"Session — {u['model']}"
+        provider = getattr(agent, "provider", None)
+        if provider:
+            header += f" ({provider})"
+        lines.append(header)
+
+        per_model = getattr(agent, "session_model_usage", None) or {}
+        rows = list(per_model.items()) or [(
+            u["model"],
+            {
+                "calls": u["calls"], "input": u["input"], "output": u["output"],
+                "cache_read": u["cache_read"], "cache_write": u["cache_write"],
+                "cost_usd": None,
+            },
+        )]
+        name_w = max(len(name or "?") for name, _ in rows)
+        for name, row in rows:
+            cells = [
+                f"{(name or '?'):<{name_w}}",
+                f"reqs {row.get('calls', 0)}",
+                f"in {_fmt(int(row.get('input', 0) or 0))}",
+                f"out {_fmt(int(row.get('output', 0) or 0))}",
+            ]
+            cache_read = int(row.get("cache_read", 0) or 0)
+            if cache_read:
+                cells.append(f"cache {_fmt(cache_read)}")
+            cost = row.get("cost_usd")
+            if cost is not None:
+                cells.append(f"${cost:.4f}")
+            lines.append("  " + " · ".join(cells))
+
+        ctx_pct = u.get("context_percent")
+        tail = [f"total {_fmt(int(u['total'] or 0))} tokens", f"{u['calls']} calls"]
+        if ctx_pct is not None:
+            tail.append(f"context {ctx_pct}%")
+        if u.get("compressions"):
+            tail.append(f"compressions {u['compressions']}")
+        lines.append("  " + " · ".join(tail))
+
+        # The /usage page reports the FULL provider-reported cost (OpenRouter
+        # usage.cost accumulator AND/OR the Nous header delta) — NOT the chrome's
+        # Nous-header-only figure (F3 narrowed `_get_usage["cost_usd"]` to the
+        # status bar). Read it straight from real_session_cost_usd here so the
+        # accounting page keeps both sources.
+        try:
+            from agent.usage_pricing import real_session_cost_usd
+
+            cost_usd = real_session_cost_usd(agent)
+        except Exception:
+            cost_usd = None
+        if cost_usd is not None:
+            lines.append(f"  session cost: ${cost_usd:.4f} (provider-reported)")
+        else:
+            lines.append("  session cost: not reported by provider")
+    else:
+        lines.append("Session — no API calls yet")
+
+    # Tight recent summary from the session DB (real costs only).
+    try:
+        db = _get_db()
+        totals = db.usage_totals(days=30) if db is not None else None
+    except Exception:
+        totals = None
+    if totals and totals.get("sessions"):
+        from agent.usage_pricing import format_token_count_compact as _fmt30
+
+        parts = [
+            f"{totals['sessions']} sessions",
+            f"in {_fmt30(totals['input_tokens'])}",
+            f"out {_fmt30(totals['output_tokens'])}",
+        ]
+        reported = totals.get("reported_cost_usd")
+        if reported is not None:
+            parts.append(f"reported cost ${float(reported):.2f}")
+        lines.append("Last 30d: " + " · ".join(parts))
+
+    # Nous credits one-liner (account-level; independent of the live agent).
+    try:
+        from agent.account_usage import nous_credits_compact_line
+
+        credits_line = nous_credits_compact_line()
+    except Exception:
+        credits_line = None
+    if credits_line:
+        lines.append(credits_line)
+
+    lines.append("(/usage full — detailed page)")
+    return "\n".join(lines)
+
+
 def _probe_credentials(agent) -> str:
    """Light credential check at session creation — returns warning or ''."""
    try:
@@ -9189,6 +9292,17 @@ def _(rid, params: dict) -> dict:
        except Exception as e:
            return _ok(rid, {"output": f"Plugin command error: {e}"})

+    # /usage — answered in-process from the LIVE agent's session counters.
+    # The slash worker is a separate subprocess that resumes the session
+    # WITHOUT an agent, so it can never see current-session tokens/costs
+    # (it only printed the Nous credits block). `/usage full` still falls
+    # through to the worker for the detailed CLI page.
+    if _cmd_base == "usage" and _cmd_arg.strip().lower() not in {"full", "--full"}:
+        try:
+            return _ok(rid, {"output": _compact_usage_text(session)})
+        except Exception:
+            pass  # fall through to the slash worker
+
    worker = session.get("slash_worker")
    if not worker:
        try: