From 18e6cd993861ac8e1c109a36d1e9f60d403987e0 Mon Sep 17 00:00:00 2001 From: yoniebans Date: Mon, 27 Apr 2026 19:14:30 +0200 Subject: [PATCH] fix: include cache tokens in dashboard analytics input totals MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The /api/analytics/usage endpoint summed only the raw input_tokens column, which for Anthropic-direct sessions holds only the uncached portion of the prompt. cache_read_tokens and cache_write_tokens (which complete the total prompt) were ignored. This caused the dashboard to massively undercount token usage — showing ~117M instead of ~345M over 30 days — since Anthropic sessions with high cache hit rates stored almost all prompt tokens in the cache columns. Fix: fold COALESCE(cache_read_tokens, 0) + COALESCE(cache_write_tokens, 0) into the input_tokens sum across all three SQL queries (daily, by-model, totals). This is correct for every provider because normalize_usage() guarantees input_tokens + cache_read + cache_write = total prompt tokens regardless of API shape (Anthropic / OpenAI / Codex). Add a regression test that creates a session with Anthropic-style token splits and asserts the endpoint returns the combined total. --- hermes_cli/web_server.py | 8 +++--- tests/hermes_cli/test_web_server.py | 43 +++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 4 deletions(-) diff --git a/hermes_cli/web_server.py b/hermes_cli/web_server.py index 13337a7342..dd60e489d3 100644 --- a/hermes_cli/web_server.py +++ b/hermes_cli/web_server.py @@ -2212,7 +2212,7 @@ async def get_usage_analytics(days: int = 30): cutoff = time.time() - (days * 86400) cur = db._conn.execute(""" SELECT date(started_at, 'unixepoch') as day, - SUM(input_tokens) as input_tokens, + SUM(input_tokens + COALESCE(cache_read_tokens, 0) + COALESCE(cache_write_tokens, 0)) as input_tokens, SUM(output_tokens) as output_tokens, SUM(cache_read_tokens) as cache_read_tokens, SUM(reasoning_tokens) as reasoning_tokens, @@ -2227,18 +2227,18 @@ async def get_usage_analytics(days: int = 30): cur2 = db._conn.execute(""" SELECT model, - SUM(input_tokens) as input_tokens, + SUM(input_tokens + COALESCE(cache_read_tokens, 0) + COALESCE(cache_write_tokens, 0)) as input_tokens, SUM(output_tokens) as output_tokens, COALESCE(SUM(estimated_cost_usd), 0) as estimated_cost, COUNT(*) as sessions, SUM(COALESCE(api_call_count, 0)) as api_calls FROM sessions WHERE started_at > ? AND model IS NOT NULL - GROUP BY model ORDER BY SUM(input_tokens) + SUM(output_tokens) DESC + GROUP BY model ORDER BY SUM(input_tokens + COALESCE(cache_read_tokens, 0) + COALESCE(cache_write_tokens, 0)) + SUM(output_tokens) DESC """, (cutoff,)) by_model = [dict(r) for r in cur2.fetchall()] cur3 = db._conn.execute(""" - SELECT SUM(input_tokens) as total_input, + SELECT SUM(input_tokens + COALESCE(cache_read_tokens, 0) + COALESCE(cache_write_tokens, 0)) as total_input, SUM(output_tokens) as total_output, SUM(cache_read_tokens) as total_cache_read, SUM(reasoning_tokens) as total_reasoning, diff --git a/tests/hermes_cli/test_web_server.py b/tests/hermes_cli/test_web_server.py index e7b3b03305..410d19d55a 100644 --- a/tests/hermes_cli/test_web_server.py +++ b/tests/hermes_cli/test_web_server.py @@ -750,6 +750,49 @@ class TestNewEndpoints: "top_skills": [], } + def test_analytics_usage_includes_cache_tokens_in_input(self): + """input_tokens in the response must include cache_read + cache_write.""" + from hermes_state import SessionDB + + db = SessionDB() + try: + db.create_session( + session_id="cache-tok-test", + source="cli", + model="claude-opus-4-6", + ) + db.update_token_counts( + "cache-tok-test", + input_tokens=10, + output_tokens=50, + cache_read_tokens=9000, + cache_write_tokens=1000, + billing_provider="anthropic", + model="claude-opus-4-6", + ) + finally: + db.close() + + resp = self.client.get("/api/analytics/usage?days=7") + assert resp.status_code == 200 + data = resp.json() + + # Totals: input must be 10 + 9000 + 1000 = 10010 + assert data["totals"]["total_input"] == 10010 + assert data["totals"]["total_output"] == 50 + + # Daily: find the entry and verify + assert len(data["daily"]) == 1 + day = data["daily"][0] + assert day["input_tokens"] == 10010 + assert day["output_tokens"] == 50 + + # By-model: verify the model row + assert len(data["by_model"]) == 1 + model_row = data["by_model"][0] + assert model_row["input_tokens"] == 10010 + assert model_row["output_tokens"] == 50 + def test_analytics_usage_includes_skill_breakdown(self): from hermes_state import SessionDB