From 18e6cd993861ac8e1c109a36d1e9f60d403987e0 Mon Sep 17 00:00:00 2001
From: yoniebans <jonny@nousresearch.com>
Date: Mon, 27 Apr 2026 19:14:30 +0200
Subject: [PATCH] fix: include cache tokens in dashboard analytics input totals
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The /api/analytics/usage endpoint summed only the raw input_tokens
column, which for Anthropic-direct sessions holds only the uncached
portion of the prompt.  cache_read_tokens and cache_write_tokens
(which complete the total prompt) were ignored.

This caused the dashboard to massively undercount token usage —
showing ~117M instead of ~345M over 30 days — since Anthropic
sessions with high cache hit rates stored almost all prompt tokens
in the cache columns.

Fix: fold COALESCE(cache_read_tokens, 0) + COALESCE(cache_write_tokens, 0)
into the input_tokens sum across all three SQL queries (daily, by-model,
totals).  This is correct for every provider because normalize_usage()
guarantees input_tokens + cache_read + cache_write = total prompt tokens
regardless of API shape (Anthropic / OpenAI / Codex).

Add a regression test that creates a session with Anthropic-style token
splits and asserts the endpoint returns the combined total.
---
 hermes_cli/web_server.py            |  8 +++---
 tests/hermes_cli/test_web_server.py | 43 +++++++++++++++++++++++++++++
 2 files changed, 47 insertions(+), 4 deletions(-)

diff --git a/hermes_cli/web_server.py b/hermes_cli/web_server.py
index 13337a7342..dd60e489d3 100644
--- a/hermes_cli/web_server.py
+++ b/hermes_cli/web_server.py
@@ -2212,7 +2212,7 @@ async def get_usage_analytics(days: int = 30):
         cutoff = time.time() - (days * 86400)
         cur = db._conn.execute("""
             SELECT date(started_at, 'unixepoch') as day,
-                   SUM(input_tokens) as input_tokens,
+                   SUM(input_tokens + COALESCE(cache_read_tokens, 0) + COALESCE(cache_write_tokens, 0)) as input_tokens,
                    SUM(output_tokens) as output_tokens,
                    SUM(cache_read_tokens) as cache_read_tokens,
                    SUM(reasoning_tokens) as reasoning_tokens,
@@ -2227,18 +2227,18 @@ async def get_usage_analytics(days: int = 30):
 
         cur2 = db._conn.execute("""
             SELECT model,
-                   SUM(input_tokens) as input_tokens,
+                   SUM(input_tokens + COALESCE(cache_read_tokens, 0) + COALESCE(cache_write_tokens, 0)) as input_tokens,
                    SUM(output_tokens) as output_tokens,
                    COALESCE(SUM(estimated_cost_usd), 0) as estimated_cost,
                    COUNT(*) as sessions,
                    SUM(COALESCE(api_call_count, 0)) as api_calls
             FROM sessions WHERE started_at > ? AND model IS NOT NULL
-            GROUP BY model ORDER BY SUM(input_tokens) + SUM(output_tokens) DESC
+            GROUP BY model ORDER BY SUM(input_tokens + COALESCE(cache_read_tokens, 0) + COALESCE(cache_write_tokens, 0)) + SUM(output_tokens) DESC
         """, (cutoff,))
         by_model = [dict(r) for r in cur2.fetchall()]
 
         cur3 = db._conn.execute("""
-            SELECT SUM(input_tokens) as total_input,
+            SELECT SUM(input_tokens + COALESCE(cache_read_tokens, 0) + COALESCE(cache_write_tokens, 0)) as total_input,
                    SUM(output_tokens) as total_output,
                    SUM(cache_read_tokens) as total_cache_read,
                    SUM(reasoning_tokens) as total_reasoning,
diff --git a/tests/hermes_cli/test_web_server.py b/tests/hermes_cli/test_web_server.py
index e7b3b03305..410d19d55a 100644
--- a/tests/hermes_cli/test_web_server.py
+++ b/tests/hermes_cli/test_web_server.py
@@ -750,6 +750,49 @@ class TestNewEndpoints:
             "top_skills": [],
         }
 
+    def test_analytics_usage_includes_cache_tokens_in_input(self):
+        """input_tokens in the response must include cache_read + cache_write."""
+        from hermes_state import SessionDB
+
+        db = SessionDB()
+        try:
+            db.create_session(
+                session_id="cache-tok-test",
+                source="cli",
+                model="claude-opus-4-6",
+            )
+            db.update_token_counts(
+                "cache-tok-test",
+                input_tokens=10,
+                output_tokens=50,
+                cache_read_tokens=9000,
+                cache_write_tokens=1000,
+                billing_provider="anthropic",
+                model="claude-opus-4-6",
+            )
+        finally:
+            db.close()
+
+        resp = self.client.get("/api/analytics/usage?days=7")
+        assert resp.status_code == 200
+        data = resp.json()
+
+        # Totals: input must be 10 + 9000 + 1000 = 10010
+        assert data["totals"]["total_input"] == 10010
+        assert data["totals"]["total_output"] == 50
+
+        # Daily: find the entry and verify
+        assert len(data["daily"]) == 1
+        day = data["daily"][0]
+        assert day["input_tokens"] == 10010
+        assert day["output_tokens"] == 50
+
+        # By-model: verify the model row
+        assert len(data["by_model"]) == 1
+        model_row = data["by_model"][0]
+        assert model_row["input_tokens"] == 10010
+        assert model_row["output_tokens"] == 50
+
     def test_analytics_usage_includes_skill_breakdown(self):
         from hermes_state import SessionDB