mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-28 06:51:16 +08:00
fix: include cache tokens in dashboard analytics input totals
The /api/analytics/usage endpoint summed only the raw input_tokens column, which for Anthropic-direct sessions holds only the uncached portion of the prompt. cache_read_tokens and cache_write_tokens (which complete the total prompt) were ignored. This caused the dashboard to massively undercount token usage — showing ~117M instead of ~345M over 30 days — since Anthropic sessions with high cache hit rates stored almost all prompt tokens in the cache columns. Fix: fold COALESCE(cache_read_tokens, 0) + COALESCE(cache_write_tokens, 0) into the input_tokens sum across all three SQL queries (daily, by-model, totals). This is correct for every provider because normalize_usage() guarantees input_tokens + cache_read + cache_write = total prompt tokens regardless of API shape (Anthropic / OpenAI / Codex). Add a regression test that creates a session with Anthropic-style token splits and asserts the endpoint returns the combined total.
This commit is contained in:
@@ -2212,7 +2212,7 @@ async def get_usage_analytics(days: int = 30):
|
||||
cutoff = time.time() - (days * 86400)
|
||||
cur = db._conn.execute("""
|
||||
SELECT date(started_at, 'unixepoch') as day,
|
||||
SUM(input_tokens) as input_tokens,
|
||||
SUM(input_tokens + COALESCE(cache_read_tokens, 0) + COALESCE(cache_write_tokens, 0)) as input_tokens,
|
||||
SUM(output_tokens) as output_tokens,
|
||||
SUM(cache_read_tokens) as cache_read_tokens,
|
||||
SUM(reasoning_tokens) as reasoning_tokens,
|
||||
@@ -2227,18 +2227,18 @@ async def get_usage_analytics(days: int = 30):
|
||||
|
||||
cur2 = db._conn.execute("""
|
||||
SELECT model,
|
||||
SUM(input_tokens) as input_tokens,
|
||||
SUM(input_tokens + COALESCE(cache_read_tokens, 0) + COALESCE(cache_write_tokens, 0)) as input_tokens,
|
||||
SUM(output_tokens) as output_tokens,
|
||||
COALESCE(SUM(estimated_cost_usd), 0) as estimated_cost,
|
||||
COUNT(*) as sessions,
|
||||
SUM(COALESCE(api_call_count, 0)) as api_calls
|
||||
FROM sessions WHERE started_at > ? AND model IS NOT NULL
|
||||
GROUP BY model ORDER BY SUM(input_tokens) + SUM(output_tokens) DESC
|
||||
GROUP BY model ORDER BY SUM(input_tokens + COALESCE(cache_read_tokens, 0) + COALESCE(cache_write_tokens, 0)) + SUM(output_tokens) DESC
|
||||
""", (cutoff,))
|
||||
by_model = [dict(r) for r in cur2.fetchall()]
|
||||
|
||||
cur3 = db._conn.execute("""
|
||||
SELECT SUM(input_tokens) as total_input,
|
||||
SELECT SUM(input_tokens + COALESCE(cache_read_tokens, 0) + COALESCE(cache_write_tokens, 0)) as total_input,
|
||||
SUM(output_tokens) as total_output,
|
||||
SUM(cache_read_tokens) as total_cache_read,
|
||||
SUM(reasoning_tokens) as total_reasoning,
|
||||
|
||||
@@ -750,6 +750,49 @@ class TestNewEndpoints:
|
||||
"top_skills": [],
|
||||
}
|
||||
|
||||
def test_analytics_usage_includes_cache_tokens_in_input(self):
|
||||
"""input_tokens in the response must include cache_read + cache_write."""
|
||||
from hermes_state import SessionDB
|
||||
|
||||
db = SessionDB()
|
||||
try:
|
||||
db.create_session(
|
||||
session_id="cache-tok-test",
|
||||
source="cli",
|
||||
model="claude-opus-4-6",
|
||||
)
|
||||
db.update_token_counts(
|
||||
"cache-tok-test",
|
||||
input_tokens=10,
|
||||
output_tokens=50,
|
||||
cache_read_tokens=9000,
|
||||
cache_write_tokens=1000,
|
||||
billing_provider="anthropic",
|
||||
model="claude-opus-4-6",
|
||||
)
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
resp = self.client.get("/api/analytics/usage?days=7")
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
|
||||
# Totals: input must be 10 + 9000 + 1000 = 10010
|
||||
assert data["totals"]["total_input"] == 10010
|
||||
assert data["totals"]["total_output"] == 50
|
||||
|
||||
# Daily: find the entry and verify
|
||||
assert len(data["daily"]) == 1
|
||||
day = data["daily"][0]
|
||||
assert day["input_tokens"] == 10010
|
||||
assert day["output_tokens"] == 50
|
||||
|
||||
# By-model: verify the model row
|
||||
assert len(data["by_model"]) == 1
|
||||
model_row = data["by_model"][0]
|
||||
assert model_row["input_tokens"] == 10010
|
||||
assert model_row["output_tokens"] == 50
|
||||
|
||||
def test_analytics_usage_includes_skill_breakdown(self):
|
||||
from hermes_state import SessionDB
|
||||
|
||||
|
||||
Reference in New Issue
Block a user