mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-22 18:10:53 +08:00
Compare commits
3 Commits
bb/desktop
...
feat/provi
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a4181bddc9 | ||
|
|
6c1a83bc4b | ||
|
|
2f2eeb9a85 |
@@ -243,6 +243,17 @@ def nous_credits_lines(*, markdown: bool = False, timeout: float = 10.0) -> list
|
||||
renders from that fixture instead of the real portal (so the block + gauge are
|
||||
testable without a live account). Throwaway scaffolding.
|
||||
"""
|
||||
snapshot = _fetch_nous_credits_snapshot(timeout=timeout)
|
||||
return render_account_usage_lines(snapshot, markdown=markdown)
|
||||
|
||||
|
||||
def _fetch_nous_credits_snapshot(timeout: float = 10.0) -> Optional[AccountUsageSnapshot]:
|
||||
"""Auth-gate + portal fetch + snapshot build for the Nous credits block.
|
||||
|
||||
Shared by ``nous_credits_lines`` (full block) and
|
||||
``nous_credits_compact_line`` (one-liner). Honors the
|
||||
HERMES_DEV_CREDITS_FIXTURE dev override. Fail-open → None.
|
||||
"""
|
||||
# Dev fixture short-circuit — render /usage from the injected state, no portal.
|
||||
try:
|
||||
from agent.credits_tracker import dev_fixture_credits_state
|
||||
@@ -251,17 +262,16 @@ def nous_credits_lines(*, markdown: bool = False, timeout: float = 10.0) -> list
|
||||
except Exception:
|
||||
fixture = None
|
||||
if fixture is not None:
|
||||
snapshot = _snapshot_from_credits_state(fixture)
|
||||
return render_account_usage_lines(snapshot, markdown=markdown)
|
||||
return _snapshot_from_credits_state(fixture)
|
||||
|
||||
try:
|
||||
from hermes_cli.auth import get_provider_auth_state
|
||||
|
||||
tok = (get_provider_auth_state("nous") or {}).get("access_token")
|
||||
if not (isinstance(tok, str) and tok.strip()):
|
||||
return []
|
||||
return None
|
||||
except Exception:
|
||||
return []
|
||||
return None
|
||||
try:
|
||||
import concurrent.futures
|
||||
|
||||
@@ -271,13 +281,36 @@ def nous_credits_lines(*, markdown: bool = False, timeout: float = 10.0) -> list
|
||||
account = pool.submit(
|
||||
get_nous_portal_account_info, force_fresh=True
|
||||
).result(timeout=timeout)
|
||||
snapshot = build_nous_credits_snapshot(account)
|
||||
return render_account_usage_lines(snapshot, markdown=markdown)
|
||||
return build_nous_credits_snapshot(account)
|
||||
except Exception:
|
||||
# Fail-open (caller shows nothing), but leave a breadcrumb so a dead
|
||||
# /usage credits block is diagnosable in agent.log without a dev flag.
|
||||
logger.debug("credits ▸ /usage portal fetch/render failed (fail-open)", exc_info=True)
|
||||
return []
|
||||
return None
|
||||
|
||||
|
||||
def nous_credits_compact_line(*, timeout: float = 10.0) -> Optional[str]:
|
||||
"""One-line Nous credits summary for the compact /usage view, or None.
|
||||
|
||||
Condenses the snapshot's own detail strings (stable, locally-built
|
||||
formats) into ``Nous credits (Plan): Total usable: $X · Renews: …``.
|
||||
Same gating/fail-open semantics as ``nous_credits_lines``.
|
||||
"""
|
||||
snap = _fetch_nous_credits_snapshot(timeout=timeout)
|
||||
if snap is None or not snap.available:
|
||||
return None
|
||||
picked = [
|
||||
d for d in snap.details
|
||||
if d.startswith(("Total usable:", "Renews:", "Status:"))
|
||||
]
|
||||
if not picked:
|
||||
picked = [d for d in snap.details if not d.startswith("Manage / top up:")][:2]
|
||||
if not picked:
|
||||
return None
|
||||
title = snap.title
|
||||
if snap.plan:
|
||||
title += f" ({snap.plan})"
|
||||
return f"{title}: " + " · ".join(picked)
|
||||
|
||||
|
||||
def _snapshot_from_credits_state(state) -> Optional[AccountUsageSnapshot]:
|
||||
|
||||
@@ -1598,6 +1598,12 @@ def init_agent(
|
||||
agent.session_cache_write_tokens = 0
|
||||
agent.session_reasoning_tokens = 0
|
||||
agent.session_estimated_cost_usd = 0.0
|
||||
# Provider-REPORTED cost only (e.g. OpenRouter usage.cost). None means
|
||||
# "nothing reported" — distinct from a real $0.00.
|
||||
agent.session_actual_cost_usd = None
|
||||
# Per-model session usage rows for /usage: {model: {calls, input, output,
|
||||
# cache_read, cache_write, cost_usd|None}}.
|
||||
agent.session_model_usage = {}
|
||||
agent.session_cost_status = "unknown"
|
||||
agent.session_cost_source = "none"
|
||||
|
||||
|
||||
@@ -57,7 +57,11 @@ from agent.process_bootstrap import _install_safe_stdio
|
||||
from agent.prompt_caching import apply_anthropic_cache_control
|
||||
from agent.retry_utils import jittered_backoff
|
||||
from agent.trajectory import has_incomplete_scratchpad
|
||||
from agent.usage_pricing import estimate_usage_cost, normalize_usage
|
||||
from agent.usage_pricing import (
|
||||
estimate_usage_cost,
|
||||
extract_provider_cost_usd,
|
||||
normalize_usage,
|
||||
)
|
||||
from hermes_constants import PARTIAL_STREAM_STUB_ID
|
||||
from hermes_logging import set_session_context
|
||||
from tools.skill_provenance import set_current_write_origin
|
||||
@@ -1805,6 +1809,37 @@ def run_conversation(
|
||||
agent.session_cost_status = cost_result.status
|
||||
agent.session_cost_source = cost_result.source
|
||||
|
||||
# ── Real provider-REPORTED cost (never estimated) ──
|
||||
# OpenRouter usage accounting returns ``usage.cost`` on the
|
||||
# response when the request carries usage:{include:true}
|
||||
# (added on OpenRouter routes). When the provider reports
|
||||
# nothing, this stays None — absent, NOT zero — so cost
|
||||
# displays hide instead of showing a fabricated $0.00.
|
||||
reported_cost_usd = extract_provider_cost_usd(response.usage)
|
||||
if reported_cost_usd is not None:
|
||||
_prev_actual = getattr(agent, "session_actual_cost_usd", None)
|
||||
agent.session_actual_cost_usd = (_prev_actual or 0.0) + reported_cost_usd
|
||||
agent.session_cost_status = "actual"
|
||||
agent.session_cost_source = "provider_cost_api"
|
||||
|
||||
# Per-model session breakdown for /usage — counts are always
|
||||
# real; cost_usd only accumulates provider-reported values
|
||||
# and stays None when the provider reports nothing.
|
||||
_model_usage = getattr(agent, "session_model_usage", None)
|
||||
if _model_usage is None:
|
||||
_model_usage = agent.session_model_usage = {}
|
||||
_mrow = _model_usage.setdefault(agent.model, {
|
||||
"calls": 0, "input": 0, "output": 0,
|
||||
"cache_read": 0, "cache_write": 0, "cost_usd": None,
|
||||
})
|
||||
_mrow["calls"] += 1
|
||||
_mrow["input"] += canonical_usage.input_tokens
|
||||
_mrow["output"] += canonical_usage.output_tokens
|
||||
_mrow["cache_read"] += canonical_usage.cache_read_tokens
|
||||
_mrow["cache_write"] += canonical_usage.cache_write_tokens
|
||||
if reported_cost_usd is not None:
|
||||
_mrow["cost_usd"] = (_mrow["cost_usd"] or 0.0) + reported_cost_usd
|
||||
|
||||
# Persist token counts to session DB for /insights.
|
||||
# Do this for every platform with a session_id so non-CLI
|
||||
# sessions (gateway, cron, delegated runs) cannot lose
|
||||
@@ -1831,8 +1866,14 @@ def run_conversation(
|
||||
reasoning_tokens=canonical_usage.reasoning_tokens,
|
||||
estimated_cost_usd=float(cost_result.amount_usd)
|
||||
if cost_result.amount_usd is not None else None,
|
||||
cost_status=cost_result.status,
|
||||
cost_source=cost_result.source,
|
||||
# Provider-reported per-call cost delta. NULL
|
||||
# (not 0) when the provider reported nothing —
|
||||
# the SQL CASE keeps actual_cost_usd untouched.
|
||||
actual_cost_usd=reported_cost_usd,
|
||||
cost_status="actual"
|
||||
if reported_cost_usd is not None else cost_result.status,
|
||||
cost_source="provider_cost_api"
|
||||
if reported_cost_usd is not None else cost_result.source,
|
||||
billing_provider=agent.provider,
|
||||
billing_base_url=agent.base_url,
|
||||
billing_mode="subscription_included"
|
||||
|
||||
@@ -388,6 +388,13 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
if provider_prefs and is_openrouter:
|
||||
extra_body["provider"] = provider_prefs
|
||||
|
||||
# OpenRouter usage accounting — response `usage.cost` carries the REAL
|
||||
# charged cost (credits are 1:1 USD). Parity with the profile path in
|
||||
# plugins/model-providers/openrouter/__init__.py; this branch only runs
|
||||
# when the OpenRouter profile isn't loaded.
|
||||
if is_openrouter:
|
||||
extra_body["usage"] = {"include": True}
|
||||
|
||||
# Pareto Code router plugin — model-gated. Same shape as the
|
||||
# profile path in plugins/model-providers/openrouter/__init__.py;
|
||||
# this branch only runs when the OpenRouter profile isn't loaded.
|
||||
|
||||
@@ -852,6 +852,100 @@ def estimate_usage_cost(
|
||||
)
|
||||
|
||||
|
||||
def _finite_nonneg_number(value: Any) -> Optional[float]:
|
||||
"""Return ``value`` as a float when it is a real, finite, non-negative
|
||||
number (int/float, not bool); otherwise None."""
|
||||
if isinstance(value, bool) or not isinstance(value, (int, float)):
|
||||
return None
|
||||
try:
|
||||
f = float(value)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
if f != f or f in (float("inf"), float("-inf")) or f < 0:
|
||||
return None
|
||||
return f
|
||||
|
||||
|
||||
def extract_provider_cost_usd(response_usage: Any) -> Optional[float]:
|
||||
"""Provider-REPORTED cost (USD) from a response ``usage`` object, or None.
|
||||
|
||||
Reads the ``usage.cost`` field that OpenRouter's usage accounting returns
|
||||
(``usage: {"include": true}`` request param; OpenRouter credits are 1:1
|
||||
USD). OpenRouter-compatible aggregators use the same field. This NEVER
|
||||
estimates: when the provider reports nothing, the result is None — callers
|
||||
must treat None as "no cost data", not zero. A reported ``0`` is a real
|
||||
zero (e.g. free-tier models) and is returned as ``0.0``.
|
||||
"""
|
||||
if response_usage is None:
|
||||
return None
|
||||
cost = getattr(response_usage, "cost", None)
|
||||
if cost is None and isinstance(response_usage, dict):
|
||||
cost = response_usage.get("cost")
|
||||
return _finite_nonneg_number(cost)
|
||||
|
||||
|
||||
def real_session_cost_usd(agent: Any) -> Optional[float]:
|
||||
"""Session-cumulative provider-REPORTED cost in USD, or None.
|
||||
|
||||
Combines the two real sources Hermes has — no estimation, ever:
|
||||
- ``agent.session_actual_cost_usd``: per-response ``usage.cost``
|
||||
accumulator (OpenRouter usage accounting).
|
||||
- Nous ``x-nous-credits-*`` header delta via
|
||||
``agent.get_credits_spent_micros()`` (account-level spend since the
|
||||
session first saw a header; clamped at 0 so a mid-session top-up
|
||||
doesn't render a negative cost).
|
||||
|
||||
Returns None when neither source has reported anything — callers must
|
||||
hide their cost display in that case rather than showing $0.00.
|
||||
"""
|
||||
total: Optional[float] = None
|
||||
|
||||
actual = _finite_nonneg_number(getattr(agent, "session_actual_cost_usd", None))
|
||||
if actual is not None:
|
||||
total = actual
|
||||
|
||||
try:
|
||||
spent_micros = agent.get_credits_spent_micros()
|
||||
except Exception:
|
||||
spent_micros = None
|
||||
if spent_micros is not None:
|
||||
try:
|
||||
spent_usd = max(0, int(spent_micros)) / 1_000_000
|
||||
except (TypeError, ValueError):
|
||||
spent_usd = None
|
||||
if spent_usd is not None:
|
||||
total = (total or 0.0) + spent_usd
|
||||
|
||||
return total
|
||||
|
||||
|
||||
def nous_header_cost_usd(agent: Any) -> Optional[float]:
|
||||
"""Session-cumulative cost in USD derived ONLY from the Nous portal
|
||||
``x-nous-credits-*`` header delta, or None.
|
||||
|
||||
This is the STATUS-BAR cost source (glitch 2026-06-13, F3): the TUI chrome
|
||||
must show cost ONLY when the session runs against the Nous portal, because
|
||||
the header delta is the one figure we can trust without re-deriving per-model
|
||||
cache/input/output pricing (which is unreliable across the model long tail).
|
||||
Unlike :func:`real_session_cost_usd`, this DELIBERATELY ignores the
|
||||
OpenRouter ``usage.cost`` accumulator — a non-Nous route reports no header,
|
||||
so the chrome hides its cost segment entirely.
|
||||
|
||||
The ``/usage`` accounting page keeps using ``real_session_cost_usd`` (both
|
||||
provider-reported sources); only the chrome bar narrows to header-only.
|
||||
"""
|
||||
try:
|
||||
spent_micros = agent.get_credits_spent_micros()
|
||||
except Exception:
|
||||
return None
|
||||
if spent_micros is None:
|
||||
return None
|
||||
try:
|
||||
return max(0, int(spent_micros)) / 1_000_000
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
def has_known_pricing(
|
||||
model_name: str,
|
||||
provider: Optional[str] = None,
|
||||
|
||||
28
cli.py
28
cli.py
@@ -8301,14 +8301,13 @@ class HermesCLI(CLIAgentSetupMixin, CLICommandsMixin):
|
||||
compressions = compressor.compression_count
|
||||
|
||||
msg_count = len(self.conversation_history)
|
||||
cost_result = estimate_usage_cost(
|
||||
# Cost — provider-REPORTED only (OpenRouter usage.cost accumulator
|
||||
# and/or Nous credits-header delta). No estimation: an unreported
|
||||
# cost shows as "not reported", never a fabricated dollar figure.
|
||||
from agent.usage_pricing import real_session_cost_usd, resolve_billing_route
|
||||
real_cost_usd = real_session_cost_usd(agent)
|
||||
_billing_route = resolve_billing_route(
|
||||
agent.model,
|
||||
CanonicalUsage(
|
||||
input_tokens=input_tokens,
|
||||
output_tokens=output_tokens,
|
||||
cache_read_tokens=cache_read_tokens,
|
||||
cache_write_tokens=cache_write_tokens,
|
||||
),
|
||||
provider=getattr(agent, "provider", None),
|
||||
base_url=getattr(agent, "base_url", None),
|
||||
)
|
||||
@@ -8328,21 +8327,16 @@ class HermesCLI(CLIAgentSetupMixin, CLICommandsMixin):
|
||||
print(f" Total tokens: {total:>10,}")
|
||||
print(f" API calls: {calls:>10,}")
|
||||
print(f" Session duration: {elapsed:>10}")
|
||||
print(f" Cost status: {cost_result.status:>10}")
|
||||
print(f" Cost source: {cost_result.source:>10}")
|
||||
if cost_result.amount_usd is not None:
|
||||
prefix = "~" if cost_result.status == "estimated" else ""
|
||||
print(f" Total cost: {prefix}${float(cost_result.amount_usd):>10.4f}")
|
||||
elif cost_result.status == "included":
|
||||
print(f" Total cost: {'included':>10}")
|
||||
if real_cost_usd is not None:
|
||||
print(f" Cost (provider-reported): ${real_cost_usd:>9.4f}")
|
||||
elif _billing_route.billing_mode == "subscription_included":
|
||||
print(f" Cost: {'included':>11}")
|
||||
else:
|
||||
print(f" Total cost: {'n/a':>10}")
|
||||
print(f" Cost: {'not reported by provider':>23}")
|
||||
print(f" {'─' * 40}")
|
||||
print(f" Current context: {last_prompt:,} / {ctx_len:,} ({pct:.0f}%)")
|
||||
print(f" Messages: {msg_count}")
|
||||
print(f" Compressions: {compressions}")
|
||||
if cost_result.status == "unknown":
|
||||
print(f" Note: Pricing unknown for {agent.model}")
|
||||
|
||||
# Account limits -- fetched off-thread with a hard timeout so slow
|
||||
# provider APIs don't hang the prompt.
|
||||
|
||||
@@ -3215,25 +3215,24 @@ class GatewaySlashCommandsMixin:
|
||||
lines.append(t("gateway.usage.label_total", count=f"{agent.session_total_tokens:,}"))
|
||||
lines.append(t("gateway.usage.label_api_calls", count=agent.session_api_calls))
|
||||
|
||||
# Cost estimation
|
||||
# Cost — provider-REPORTED only (OpenRouter usage.cost accumulator
|
||||
# and/or Nous credits-header delta). No estimation: when nothing
|
||||
# was reported the line is omitted entirely, never shown as $0.00.
|
||||
# Subscription-included routes (a billing fact, not a price guess)
|
||||
# still show "included".
|
||||
try:
|
||||
from agent.usage_pricing import CanonicalUsage, estimate_usage_cost
|
||||
cost_result = estimate_usage_cost(
|
||||
agent.model,
|
||||
CanonicalUsage(
|
||||
input_tokens=input_tokens,
|
||||
output_tokens=output_tokens,
|
||||
cache_read_tokens=cache_read,
|
||||
cache_write_tokens=cache_write,
|
||||
),
|
||||
provider=getattr(agent, "provider", None),
|
||||
base_url=getattr(agent, "base_url", None),
|
||||
)
|
||||
if cost_result.amount_usd is not None:
|
||||
prefix = "~" if cost_result.status == "estimated" else ""
|
||||
lines.append(t("gateway.usage.label_cost", prefix=prefix, amount=f"{float(cost_result.amount_usd):.4f}"))
|
||||
elif cost_result.status == "included":
|
||||
lines.append(t("gateway.usage.label_cost_included"))
|
||||
from agent.usage_pricing import real_session_cost_usd, resolve_billing_route
|
||||
real_cost = real_session_cost_usd(agent)
|
||||
if real_cost is not None:
|
||||
lines.append(t("gateway.usage.label_cost", prefix="", amount=f"{real_cost:.4f}"))
|
||||
else:
|
||||
route = resolve_billing_route(
|
||||
agent.model,
|
||||
provider=getattr(agent, "provider", None),
|
||||
base_url=getattr(agent, "base_url", None),
|
||||
)
|
||||
if route.billing_mode == "subscription_included":
|
||||
lines.append(t("gateway.usage.label_cost_included"))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
@@ -1695,6 +1695,36 @@ class SessionDB:
|
||||
|
||||
return self._execute_write(_do) or 0
|
||||
|
||||
def usage_totals(self, days: int = 30) -> Dict[str, Any]:
|
||||
"""Aggregate usage for sessions started in the last ``days``.
|
||||
|
||||
``reported_cost_usd`` sums only provider-REPORTED ``actual_cost_usd``
|
||||
(never estimates) and is None when no session in the window has a
|
||||
reported cost — callers must hide cost rather than print $0.00.
|
||||
"""
|
||||
cutoff = time.time() - days * 86400
|
||||
with self._lock:
|
||||
row = self._conn.execute(
|
||||
"""SELECT COUNT(*) AS sessions,
|
||||
COALESCE(SUM(input_tokens), 0)
|
||||
+ COALESCE(SUM(cache_read_tokens), 0)
|
||||
+ COALESCE(SUM(cache_write_tokens), 0) AS input_tokens,
|
||||
COALESCE(SUM(output_tokens), 0) AS output_tokens,
|
||||
COALESCE(SUM(api_call_count), 0) AS api_calls,
|
||||
SUM(actual_cost_usd) AS reported_cost_usd
|
||||
FROM sessions WHERE started_at >= ?""",
|
||||
(cutoff,),
|
||||
).fetchone()
|
||||
result = dict(row) if row else {}
|
||||
return {
|
||||
"days": days,
|
||||
"sessions": int(result.get("sessions") or 0),
|
||||
"input_tokens": int(result.get("input_tokens") or 0),
|
||||
"output_tokens": int(result.get("output_tokens") or 0),
|
||||
"api_calls": int(result.get("api_calls") or 0),
|
||||
"reported_cost_usd": result.get("reported_cost_usd"),
|
||||
}
|
||||
|
||||
def get_session(self, session_id: str) -> Optional[Dict[str, Any]]:
|
||||
"""Get a session by ID."""
|
||||
with self._lock:
|
||||
|
||||
@@ -82,6 +82,13 @@ class OpenRouterProfile(ProviderProfile):
|
||||
if prefs:
|
||||
body["provider"] = prefs
|
||||
|
||||
# Usage accounting — makes OpenRouter return the REAL cost it charged
|
||||
# in the response `usage.cost` field (credits are 1:1 USD), instead of
|
||||
# Hermes having to estimate from a pricing table. Captured by
|
||||
# agent.usage_pricing.extract_provider_cost_usd in the conversation
|
||||
# loop. https://openrouter.ai/docs/use-cases/usage-accounting
|
||||
body["usage"] = {"include": True}
|
||||
|
||||
# Pareto Code router — model-gated. The plugins block is only
|
||||
# meaningful for openrouter/pareto-code; sending it on any other
|
||||
# model has no documented effect and would be confusing in logs.
|
||||
|
||||
@@ -636,6 +636,9 @@ class AIAgent:
|
||||
self.session_reasoning_tokens = 0
|
||||
self.session_api_calls = 0
|
||||
self.session_estimated_cost_usd = 0.0
|
||||
# Provider-REPORTED cost only — None means "nothing reported".
|
||||
self.session_actual_cost_usd = None
|
||||
self.session_model_usage = {}
|
||||
self.session_cost_status = "unknown"
|
||||
self.session_cost_source = "none"
|
||||
|
||||
|
||||
246
tests/agent/test_provider_cost_capture.py
Normal file
246
tests/agent/test_provider_cost_capture.py
Normal file
@@ -0,0 +1,246 @@
|
||||
"""Real provider-reported cost capture — never estimated, absent ≠ zero.
|
||||
|
||||
Covers the three fixture shapes from the cost-tracking fix:
|
||||
- OpenRouter usage accounting: response ``usage.cost`` present → accumulates.
|
||||
- Nous: ``x-nous-credits-*`` headers present → header delta accumulates.
|
||||
- Provider reports nothing → cost stays None/absent (NOT zero-as-real).
|
||||
"""
|
||||
|
||||
from types import SimpleNamespace
|
||||
|
||||
import pytest
|
||||
|
||||
from agent.usage_pricing import extract_provider_cost_usd, nous_header_cost_usd, real_session_cost_usd
|
||||
|
||||
|
||||
# ── extract_provider_cost_usd — the per-response REAL cost reader ────────────
|
||||
|
||||
|
||||
class TestExtractProviderCost:
|
||||
def test_openrouter_usage_cost_attr(self):
|
||||
usage = SimpleNamespace(prompt_tokens=10, completion_tokens=5, cost=0.001234)
|
||||
assert extract_provider_cost_usd(usage) == pytest.approx(0.001234)
|
||||
|
||||
def test_dict_shaped_usage(self):
|
||||
assert extract_provider_cost_usd({"cost": 0.5}) == pytest.approx(0.5)
|
||||
|
||||
def test_reported_zero_is_real_zero(self):
|
||||
# Free-tier models really cost $0 — distinct from "not reported".
|
||||
usage = SimpleNamespace(cost=0)
|
||||
assert extract_provider_cost_usd(usage) == 0.0
|
||||
|
||||
def test_absent_cost_is_none_not_zero(self):
|
||||
usage = SimpleNamespace(prompt_tokens=10, completion_tokens=5)
|
||||
assert extract_provider_cost_usd(usage) is None
|
||||
assert extract_provider_cost_usd({"prompt_tokens": 10}) is None
|
||||
|
||||
def test_none_usage_is_none(self):
|
||||
assert extract_provider_cost_usd(None) is None
|
||||
|
||||
def test_garbage_cost_values_are_none(self):
|
||||
for bad in ("0.01", True, float("nan"), float("inf"), -0.5, [], {}):
|
||||
assert extract_provider_cost_usd(SimpleNamespace(cost=bad)) is None, bad
|
||||
|
||||
|
||||
# ── real_session_cost_usd — the session accumulator surface ─────────────────
|
||||
|
||||
|
||||
class _FakeAgent:
|
||||
def __init__(self, actual=None, credits_micros=None):
|
||||
self.session_actual_cost_usd = actual
|
||||
self._credits_micros = credits_micros
|
||||
|
||||
def get_credits_spent_micros(self):
|
||||
return self._credits_micros
|
||||
|
||||
|
||||
class TestRealSessionCost:
|
||||
def test_nothing_reported_is_none(self):
|
||||
assert real_session_cost_usd(_FakeAgent()) is None
|
||||
|
||||
def test_openrouter_accumulator_only(self):
|
||||
assert real_session_cost_usd(_FakeAgent(actual=0.42)) == pytest.approx(0.42)
|
||||
|
||||
def test_nous_credits_delta_only(self):
|
||||
# 123_400 micros = $0.1234
|
||||
assert real_session_cost_usd(
|
||||
_FakeAgent(credits_micros=123_400)
|
||||
) == pytest.approx(0.1234)
|
||||
|
||||
def test_both_sources_sum(self):
|
||||
assert real_session_cost_usd(
|
||||
_FakeAgent(actual=0.10, credits_micros=200_000)
|
||||
) == pytest.approx(0.30)
|
||||
|
||||
def test_negative_credits_delta_clamped(self):
|
||||
# A mid-session top-up makes the delta negative — never show negative cost.
|
||||
assert real_session_cost_usd(_FakeAgent(credits_micros=-50_000)) == 0.0
|
||||
|
||||
def test_agent_without_credits_method(self):
|
||||
agent = SimpleNamespace(session_actual_cost_usd=None)
|
||||
assert real_session_cost_usd(agent) is None
|
||||
|
||||
def test_non_numeric_actual_ignored(self):
|
||||
agent = _FakeAgent()
|
||||
agent.session_actual_cost_usd = "0.42" # corrupted attr → ignore
|
||||
assert real_session_cost_usd(agent) is None
|
||||
|
||||
|
||||
# ── nous_header_cost_usd — the CHROME status-bar cost (F3: header-only) ──────
|
||||
|
||||
|
||||
class TestNousHeaderCost:
|
||||
def test_header_delta_only(self):
|
||||
# 123_400 micros = $0.1234 — the Nous header source feeds the chrome.
|
||||
assert nous_header_cost_usd(_FakeAgent(credits_micros=123_400)) == pytest.approx(0.1234)
|
||||
|
||||
def test_openrouter_accumulator_ignored(self):
|
||||
# The OpenRouter usage.cost accumulator must NOT feed the chrome bar:
|
||||
# a non-Nous session (no header → None) reports no cost even when the
|
||||
# OpenRouter accumulator has a value.
|
||||
assert nous_header_cost_usd(_FakeAgent(actual=0.42)) is None
|
||||
|
||||
def test_no_header_is_none(self):
|
||||
assert nous_header_cost_usd(_FakeAgent()) is None
|
||||
|
||||
def test_negative_delta_clamped(self):
|
||||
# A mid-session top-up makes the delta negative — never show negative.
|
||||
assert nous_header_cost_usd(_FakeAgent(credits_micros=-50_000)) == 0.0
|
||||
|
||||
def test_agent_without_credits_method_is_none(self):
|
||||
agent = SimpleNamespace(session_actual_cost_usd=0.42)
|
||||
assert nous_header_cost_usd(agent) is None
|
||||
|
||||
|
||||
# ── Nous header fixture → real accumulator (full _capture_credits path) ─────
|
||||
|
||||
|
||||
def _nous_headers(remaining_micros: int) -> dict:
|
||||
return {
|
||||
"x-nous-credits-version": "1",
|
||||
"x-nous-credits-remaining-micros": str(remaining_micros),
|
||||
"x-nous-credits-remaining-usd": f"{remaining_micros / 1_000_000:.2f}",
|
||||
"x-nous-credits-subscription-micros": str(remaining_micros),
|
||||
"x-nous-credits-subscription-usd": f"{remaining_micros / 1_000_000:.2f}",
|
||||
"x-nous-credits-rollover-micros": "0",
|
||||
"x-nous-credits-purchased-micros": "0",
|
||||
"x-nous-credits-purchased-usd": "0.00",
|
||||
"x-nous-credits-denominator-kind": "none",
|
||||
"x-nous-credits-paid-access": "true",
|
||||
"x-nous-credits-as-of-ms": "1717000000000",
|
||||
}
|
||||
|
||||
|
||||
def _bare_nous_agent():
|
||||
"""Minimal AIAgent shell exercising the real _capture_credits path."""
|
||||
from run_agent import AIAgent
|
||||
|
||||
agent = object.__new__(AIAgent)
|
||||
agent.provider = "nous"
|
||||
agent._credits_state = None
|
||||
agent._credits_session_start_micros = None
|
||||
agent.notice_callback = None
|
||||
agent.notice_clear_callback = None
|
||||
agent.session_actual_cost_usd = None
|
||||
return agent
|
||||
|
||||
|
||||
class TestNousHeaderAccumulation:
|
||||
def test_headers_accumulate_into_real_session_cost(self, monkeypatch):
|
||||
monkeypatch.delenv("HERMES_DEV_CREDITS_FIXTURE", raising=False)
|
||||
agent = _bare_nous_agent()
|
||||
|
||||
# First response latches the session-start balance ($10.00).
|
||||
agent._capture_credits(SimpleNamespace(headers=_nous_headers(10_000_000)))
|
||||
assert real_session_cost_usd(agent) == 0.0 # real zero: headers seen, $0 spent
|
||||
|
||||
# Second response: balance dropped by $0.25 → real reported spend.
|
||||
agent._capture_credits(SimpleNamespace(headers=_nous_headers(9_750_000)))
|
||||
assert real_session_cost_usd(agent) == pytest.approx(0.25)
|
||||
|
||||
def test_no_headers_means_no_cost(self, monkeypatch):
|
||||
monkeypatch.delenv("HERMES_DEV_CREDITS_FIXTURE", raising=False)
|
||||
agent = _bare_nous_agent()
|
||||
agent._capture_credits(SimpleNamespace(headers={"content-type": "application/json"}))
|
||||
assert real_session_cost_usd(agent) is None
|
||||
|
||||
|
||||
# ── OpenRouter request param — usage accounting must be requested ────────────
|
||||
|
||||
|
||||
class TestOpenRouterUsageParam:
|
||||
def test_profile_extra_body_requests_usage_accounting(self):
|
||||
import importlib.util
|
||||
from pathlib import Path
|
||||
|
||||
from providers import get_provider_profile
|
||||
|
||||
profile = get_provider_profile("openrouter")
|
||||
if profile is None:
|
||||
# Force plugin discovery in minimal test envs.
|
||||
plugin = Path(__file__).resolve().parents[2] / "plugins" / "model-providers" / "openrouter" / "__init__.py"
|
||||
spec = importlib.util.spec_from_file_location("_or_plugin", plugin)
|
||||
mod = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(mod)
|
||||
profile = mod.openrouter
|
||||
|
||||
body = profile.build_extra_body(session_id="s-1")
|
||||
assert body["usage"] == {"include": True}
|
||||
|
||||
def test_legacy_transport_path_requests_usage_accounting(self):
|
||||
from agent.transports.chat_completions import ChatCompletionsTransport
|
||||
|
||||
transport = ChatCompletionsTransport()
|
||||
kwargs = transport.build_kwargs(
|
||||
model="anthropic/claude-sonnet-4.6",
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
tools=None,
|
||||
is_openrouter=True,
|
||||
)
|
||||
assert kwargs["extra_body"]["usage"] == {"include": True}
|
||||
|
||||
def test_non_openrouter_does_not_send_usage_param(self):
|
||||
from agent.transports.chat_completions import ChatCompletionsTransport
|
||||
|
||||
transport = ChatCompletionsTransport()
|
||||
kwargs = transport.build_kwargs(
|
||||
model="deepseek-chat",
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
tools=None,
|
||||
is_openrouter=False,
|
||||
)
|
||||
assert "usage" not in (kwargs.get("extra_body") or {})
|
||||
|
||||
|
||||
# ── nous_credits_compact_line — one-liner for the compact /usage page ───────
|
||||
|
||||
|
||||
class TestNousCreditsCompactLine:
|
||||
def test_condenses_snapshot_details(self, monkeypatch):
|
||||
import agent.account_usage as au
|
||||
|
||||
snap = au.AccountUsageSnapshot(
|
||||
provider="nous",
|
||||
source="portal-account",
|
||||
fetched_at=au._utc_now(),
|
||||
title="Nous credits",
|
||||
plan="Ultra",
|
||||
details=(
|
||||
"Subscription credits: $-0.79",
|
||||
"Top-up credits: $988.99",
|
||||
"Total usable: $988.99",
|
||||
"Renews: 2026-06-11T08:14:55.000Z",
|
||||
"Manage / top up: https://portal.nousresearch.com/billing",
|
||||
),
|
||||
)
|
||||
monkeypatch.setattr(au, "_fetch_nous_credits_snapshot", lambda timeout=10.0: snap)
|
||||
line = au.nous_credits_compact_line()
|
||||
assert line == (
|
||||
"Nous credits (Ultra): Total usable: $988.99 · Renews: 2026-06-11T08:14:55.000Z"
|
||||
)
|
||||
|
||||
def test_none_when_no_snapshot(self, monkeypatch):
|
||||
import agent.account_usage as au
|
||||
|
||||
monkeypatch.setattr(au, "_fetch_nous_credits_snapshot", lambda timeout=10.0: None)
|
||||
assert au.nous_credits_compact_line() is None
|
||||
@@ -457,7 +457,7 @@ class TestCLIStatusBar:
|
||||
|
||||
|
||||
class TestCLIUsageReport:
|
||||
def test_show_usage_includes_estimated_cost(self, capsys):
|
||||
def test_show_usage_reports_real_provider_cost(self, capsys):
|
||||
cli_obj = _attach_agent(
|
||||
_make_cli(),
|
||||
prompt_tokens=10_230,
|
||||
@@ -469,20 +469,22 @@ class TestCLIUsageReport:
|
||||
compressions=1,
|
||||
)
|
||||
cli_obj.verbose = False
|
||||
# Provider-reported cost (e.g. OpenRouter usage accounting accumulator).
|
||||
cli_obj.agent.session_actual_cost_usd = 0.0640
|
||||
|
||||
cli_obj._show_usage()
|
||||
output = capsys.readouterr().out
|
||||
|
||||
assert "Model:" in output
|
||||
assert "Cost status:" in output
|
||||
assert "Cost source:" in output
|
||||
assert "Total cost:" in output
|
||||
assert "Cost (provider-reported):" in output
|
||||
assert "$" in output
|
||||
assert "0.064" in output
|
||||
assert "Session duration:" in output
|
||||
assert "Compressions:" in output
|
||||
|
||||
def test_show_usage_marks_unknown_pricing(self, capsys):
|
||||
def test_show_usage_unreported_cost_is_not_a_dollar_figure(self, capsys):
|
||||
"""No estimation: when the provider reports nothing, /usage must NOT
|
||||
fabricate a dollar amount — not even $0.00."""
|
||||
cli_obj = _attach_agent(
|
||||
_make_cli(model="local/my-custom-model"),
|
||||
prompt_tokens=1_000,
|
||||
@@ -497,13 +499,15 @@ class TestCLIUsageReport:
|
||||
cli_obj._show_usage()
|
||||
output = capsys.readouterr().out
|
||||
|
||||
assert "Total cost:" in output
|
||||
assert "n/a" in output
|
||||
assert "Pricing unknown for local/my-custom-model" in output
|
||||
assert "not reported by provider" in output
|
||||
assert "Cost (provider-reported):" not in output
|
||||
assert "$0.00" not in output
|
||||
|
||||
def test_zero_priced_provider_models_stay_unknown(self, capsys):
|
||||
def test_show_usage_never_estimates_even_with_known_pricing(self, capsys):
|
||||
"""A model with a pricing-table entry must still show NO cost when the
|
||||
provider reported nothing (hard requirement: real cost only)."""
|
||||
cli_obj = _attach_agent(
|
||||
_make_cli(model="glm-5"),
|
||||
_make_cli(model="anthropic/claude-sonnet-4-6"),
|
||||
prompt_tokens=1_000,
|
||||
completion_tokens=500,
|
||||
total_tokens=1_500,
|
||||
@@ -516,9 +520,8 @@ class TestCLIUsageReport:
|
||||
cli_obj._show_usage()
|
||||
output = capsys.readouterr().out
|
||||
|
||||
assert "Total cost:" in output
|
||||
assert "n/a" in output
|
||||
assert "Pricing unknown for glm-5" in output
|
||||
assert "not reported by provider" in output
|
||||
assert "Cost (provider-reported):" not in output
|
||||
|
||||
|
||||
class TestStatusBarWidthSource:
|
||||
|
||||
@@ -21,11 +21,16 @@ def _make_mock_agent(**overrides):
|
||||
"session_output_tokens": 10_000,
|
||||
"session_cache_read_tokens": 5_000,
|
||||
"session_cache_write_tokens": 2_000,
|
||||
# Real provider-reported cost: None = nothing reported (the default).
|
||||
"session_actual_cost_usd": None,
|
||||
}
|
||||
defaults.update(overrides)
|
||||
for k, v in defaults.items():
|
||||
setattr(agent, k, v)
|
||||
|
||||
# No Nous credits headers seen unless a test overrides this.
|
||||
agent.get_credits_spent_micros = MagicMock(return_value=None)
|
||||
|
||||
# Rate limit state
|
||||
rl = MagicMock()
|
||||
rl.has_data = True
|
||||
@@ -72,13 +77,11 @@ class TestUsageCachedAgent:
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_cached_agent_shows_detailed_usage(self):
|
||||
agent = _make_mock_agent()
|
||||
agent = _make_mock_agent(session_actual_cost_usd=0.1234)
|
||||
runner = _make_runner(SK, cached_agent=agent)
|
||||
event = MagicMock()
|
||||
|
||||
with patch("agent.rate_limit_tracker.format_rate_limit_compact", return_value="RPM: 50/60"), \
|
||||
patch("agent.usage_pricing.estimate_usage_cost") as mock_cost:
|
||||
mock_cost.return_value = MagicMock(amount_usd=0.1234, status="estimated")
|
||||
with patch("agent.rate_limit_tracker.format_rate_limit_compact", return_value="RPM: 50/60"):
|
||||
result = await runner._handle_usage_command(event)
|
||||
|
||||
assert "claude-sonnet-4.6" in result
|
||||
@@ -99,9 +102,7 @@ class TestUsageCachedAgent:
|
||||
runner = _make_runner(SK, agent=running, cached_agent=cached)
|
||||
event = MagicMock()
|
||||
|
||||
with patch("agent.rate_limit_tracker.format_rate_limit_compact", return_value="RPM: 50/60"), \
|
||||
patch("agent.usage_pricing.estimate_usage_cost") as mock_cost:
|
||||
mock_cost.return_value = MagicMock(amount_usd=None, status="unknown")
|
||||
with patch("agent.rate_limit_tracker.format_rate_limit_compact", return_value="RPM: 50/60"):
|
||||
result = await runner._handle_usage_command(event)
|
||||
|
||||
assert "80,000" in result # running agent's total
|
||||
@@ -117,9 +118,7 @@ class TestUsageCachedAgent:
|
||||
runner._running_agents[SK] = _AGENT_PENDING_SENTINEL
|
||||
event = MagicMock()
|
||||
|
||||
with patch("agent.rate_limit_tracker.format_rate_limit_compact", return_value="RPM: 50/60"), \
|
||||
patch("agent.usage_pricing.estimate_usage_cost") as mock_cost:
|
||||
mock_cost.return_value = MagicMock(amount_usd=None, status="unknown")
|
||||
with patch("agent.rate_limit_tracker.format_rate_limit_compact", return_value="RPM: 50/60"):
|
||||
result = await runner._handle_usage_command(event)
|
||||
|
||||
assert "claude-sonnet-4.6" in result
|
||||
@@ -153,9 +152,7 @@ class TestUsageCachedAgent:
|
||||
runner = _make_runner(SK, cached_agent=agent)
|
||||
event = MagicMock()
|
||||
|
||||
with patch("agent.rate_limit_tracker.format_rate_limit_compact", return_value="RPM: 50/60"), \
|
||||
patch("agent.usage_pricing.estimate_usage_cost") as mock_cost:
|
||||
mock_cost.return_value = MagicMock(amount_usd=None, status="unknown")
|
||||
with patch("agent.rate_limit_tracker.format_rate_limit_compact", return_value="RPM: 50/60"):
|
||||
result = await runner._handle_usage_command(event)
|
||||
|
||||
assert "Cache read" not in result
|
||||
@@ -168,9 +165,7 @@ class TestUsageCachedAgent:
|
||||
runner = _make_runner(SK, cached_agent=agent)
|
||||
event = MagicMock()
|
||||
|
||||
with patch("agent.rate_limit_tracker.format_rate_limit_compact", return_value="RPM: 50/60"), \
|
||||
patch("agent.usage_pricing.estimate_usage_cost") as mock_cost:
|
||||
mock_cost.return_value = MagicMock(amount_usd=None, status="included")
|
||||
with patch("agent.rate_limit_tracker.format_rate_limit_compact", return_value="RPM: 50/60"):
|
||||
result = await runner._handle_usage_command(event)
|
||||
|
||||
assert "Cost: included" in result
|
||||
@@ -199,9 +194,7 @@ class TestUsageAccountSection:
|
||||
"Session: 85% remaining (15% used)",
|
||||
],
|
||||
)
|
||||
with patch("agent.rate_limit_tracker.format_rate_limit_compact", return_value="RPM: 50/60"), \
|
||||
patch("agent.usage_pricing.estimate_usage_cost") as mock_cost:
|
||||
mock_cost.return_value = MagicMock(amount_usd=None, status="included")
|
||||
with patch("agent.rate_limit_tracker.format_rate_limit_compact", return_value="RPM: 50/60"):
|
||||
result = await runner._handle_usage_command(event)
|
||||
|
||||
assert "📊 **Session Token Usage**" in result
|
||||
@@ -256,3 +249,42 @@ class TestUsageAccountSection:
|
||||
assert account_call["kwargs"]["base_url"] == "https://chatgpt.com/backend-api/codex"
|
||||
assert "📊 **Session Info**" in result
|
||||
assert "📈 **Account limits**" in result
|
||||
|
||||
|
||||
class TestUsageRealCostOnly:
|
||||
"""Cost lines are provider-REPORTED only — never estimated, never $0.00."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_unreported_cost_renders_no_cost_line(self):
|
||||
agent = _make_mock_agent() # openrouter, nothing reported
|
||||
runner = _make_runner(SK, cached_agent=agent)
|
||||
event = MagicMock()
|
||||
|
||||
with patch("agent.rate_limit_tracker.format_rate_limit_compact", return_value="RPM: 50/60"):
|
||||
result = await runner._handle_usage_command(event)
|
||||
|
||||
assert "Cost:" not in result
|
||||
assert "$0.00" not in result
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_nous_credits_delta_renders_as_cost(self):
|
||||
agent = _make_mock_agent(provider="nous", model="Hermes-4.1-405B")
|
||||
agent.get_credits_spent_micros = MagicMock(return_value=123_400)
|
||||
runner = _make_runner(SK, cached_agent=agent)
|
||||
event = MagicMock()
|
||||
|
||||
with patch("agent.rate_limit_tracker.format_rate_limit_compact", return_value="RPM: 50/60"):
|
||||
result = await runner._handle_usage_command(event)
|
||||
|
||||
assert "$0.1234" in result
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_openrouter_reported_cost_renders(self):
|
||||
agent = _make_mock_agent(session_actual_cost_usd=0.9876)
|
||||
runner = _make_runner(SK, cached_agent=agent)
|
||||
event = MagicMock()
|
||||
|
||||
with patch("agent.rate_limit_tracker.format_rate_limit_compact", return_value="RPM: 50/60"):
|
||||
result = await runner._handle_usage_command(event)
|
||||
|
||||
assert "$0.9876" in result
|
||||
|
||||
@@ -110,7 +110,9 @@ class TestOpenRouterProfile:
|
||||
def test_extra_body_no_prefs(self):
|
||||
p = get_provider_profile("openrouter")
|
||||
body = p.build_extra_body()
|
||||
assert body == {}
|
||||
# Usage accounting is always requested (real provider-reported cost);
|
||||
# nothing else should appear without prefs/session.
|
||||
assert body == {"usage": {"include": True}}
|
||||
|
||||
def test_pareto_min_coding_score_emitted_for_pareto_model(self):
|
||||
"""min_coding_score → plugins block when model is openrouter/pareto-code."""
|
||||
|
||||
@@ -158,6 +158,37 @@ class TestSessionLifecycle:
|
||||
assert session["api_call_count"] == 5
|
||||
assert session["input_tokens"] == 300
|
||||
|
||||
def test_update_token_counts_actual_cost_null_keeps_value(self, db):
|
||||
"""A NULL actual_cost_usd delta must not touch the stored REAL cost."""
|
||||
db.create_session(session_id="s1", source="cli")
|
||||
db.update_token_counts("s1", input_tokens=100, actual_cost_usd=0.25)
|
||||
db.update_token_counts("s1", input_tokens=100, actual_cost_usd=None)
|
||||
db.update_token_counts("s1", input_tokens=100, actual_cost_usd=0.10)
|
||||
|
||||
session = db.get_session("s1")
|
||||
assert session["actual_cost_usd"] == pytest.approx(0.35)
|
||||
|
||||
def test_usage_totals_reported_cost_none_when_nothing_reported(self, db):
|
||||
"""usage_totals must distinguish 'no reported cost' (None) from $0."""
|
||||
db.create_session(session_id="s1", source="cli")
|
||||
db.update_token_counts("s1", input_tokens=100, output_tokens=50, api_call_count=1)
|
||||
|
||||
totals = db.usage_totals(days=30)
|
||||
assert totals["sessions"] == 1
|
||||
assert totals["input_tokens"] == 100
|
||||
assert totals["output_tokens"] == 50
|
||||
assert totals["reported_cost_usd"] is None
|
||||
|
||||
def test_usage_totals_sums_reported_costs(self, db):
|
||||
db.create_session(session_id="s1", source="cli")
|
||||
db.create_session(session_id="s2", source="tui")
|
||||
db.update_token_counts("s1", input_tokens=100, actual_cost_usd=0.20)
|
||||
db.update_token_counts("s2", input_tokens=300, actual_cost_usd=0.05)
|
||||
|
||||
totals = db.usage_totals(days=30)
|
||||
assert totals["sessions"] == 2
|
||||
assert totals["reported_cost_usd"] == pytest.approx(0.25)
|
||||
|
||||
def test_update_token_counts_backfills_model_when_null(self, db):
|
||||
db.create_session(session_id="s1", source="telegram")
|
||||
db.update_token_counts("s1", input_tokens=10, output_tokens=5, model="openai/gpt-5.4")
|
||||
|
||||
@@ -7,6 +7,7 @@ import time
|
||||
import types
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
import pytest
|
||||
from unittest.mock import patch
|
||||
|
||||
from hermes_constants import reset_hermes_home_override, set_hermes_home_override
|
||||
@@ -7497,3 +7498,183 @@ def test_reap_idle_sessions_closes_only_evictable(monkeypatch):
|
||||
assert closed == [("stale", "idle_timeout")]
|
||||
finally:
|
||||
server._sessions.clear()
|
||||
|
||||
|
||||
# ── /usage: compact in-process page with real-only costs ─────────────────────
|
||||
|
||||
|
||||
def _usage_agent(**overrides):
|
||||
"""SimpleNamespace agent with realistic session counters for /usage."""
|
||||
base = dict(
|
||||
model="anthropic/claude-sonnet-4.6",
|
||||
provider="openrouter",
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
session_input_tokens=35_000,
|
||||
session_output_tokens=10_000,
|
||||
session_cache_read_tokens=5_000,
|
||||
session_cache_write_tokens=2_000,
|
||||
session_reasoning_tokens=0,
|
||||
session_prompt_tokens=40_000,
|
||||
session_completion_tokens=10_000,
|
||||
session_total_tokens=50_000,
|
||||
session_api_calls=5,
|
||||
session_actual_cost_usd=None,
|
||||
session_model_usage={},
|
||||
context_compressor=None,
|
||||
)
|
||||
base.update(overrides)
|
||||
return types.SimpleNamespace(**base)
|
||||
|
||||
|
||||
def _mute_usage_externals(monkeypatch):
|
||||
import agent.account_usage as account_usage
|
||||
|
||||
monkeypatch.setattr(server, "_get_db", lambda: None)
|
||||
monkeypatch.setattr(account_usage, "nous_credits_compact_line", lambda **kw: None)
|
||||
|
||||
|
||||
def test_get_usage_cost_absent_when_provider_reports_nothing(monkeypatch):
|
||||
"""No estimation: even a model with known pricing gets NO cost_usd field."""
|
||||
agent = _usage_agent()
|
||||
usage = server._get_usage(agent)
|
||||
assert "cost_usd" not in usage
|
||||
assert usage["input"] == 35_000
|
||||
|
||||
|
||||
def test_get_usage_cost_absent_for_openrouter_accumulator_chrome(monkeypatch):
|
||||
"""F3: the chrome status bar shows cost ONLY from the Nous header delta.
|
||||
An OpenRouter usage.cost accumulator alone (no x-nous-credits header) must
|
||||
NOT surface cost_usd in _get_usage — the chrome hides cost off-Nous. (The
|
||||
/usage accounting page still counts the accumulator; that's a separate path.)
|
||||
"""
|
||||
agent = _usage_agent(session_actual_cost_usd=0.4321)
|
||||
usage = server._get_usage(agent)
|
||||
assert "cost_usd" not in usage
|
||||
|
||||
|
||||
def test_get_usage_cost_from_nous_credits_delta(monkeypatch):
|
||||
agent = _usage_agent(provider="nous")
|
||||
agent.get_credits_spent_micros = lambda: 250_000 # $0.25 real header delta
|
||||
usage = server._get_usage(agent)
|
||||
assert usage["cost_usd"] == pytest.approx(0.25)
|
||||
assert usage["cost_status"] == "actual"
|
||||
|
||||
|
||||
def test_compact_usage_per_model_rows_and_real_cost(monkeypatch):
|
||||
_mute_usage_externals(monkeypatch)
|
||||
agent = _usage_agent(
|
||||
session_actual_cost_usd=0.42,
|
||||
session_model_usage={
|
||||
"anthropic/claude-sonnet-4.6": {
|
||||
"calls": 4, "input": 30_000, "output": 9_000,
|
||||
"cache_read": 5_000, "cache_write": 2_000, "cost_usd": 0.42,
|
||||
},
|
||||
"deepseek/deepseek-chat": {
|
||||
"calls": 1, "input": 5_000, "output": 1_000,
|
||||
"cache_read": 0, "cache_write": 0, "cost_usd": None,
|
||||
},
|
||||
},
|
||||
)
|
||||
text = server._compact_usage_text(_session(agent=agent))
|
||||
|
||||
assert "Session — anthropic/claude-sonnet-4.6 (openrouter)" in text
|
||||
sonnet_row = next(l for l in text.splitlines() if "claude-sonnet-4.6" in l and "reqs" in l)
|
||||
assert "reqs 4" in sonnet_row and "$0.4200" in sonnet_row
|
||||
deepseek_row = next(l for l in text.splitlines() if "deepseek-chat" in l)
|
||||
# Cost not reported for this model → no dollar figure on its row.
|
||||
assert "reqs 1" in deepseek_row and "$" not in deepseek_row
|
||||
assert "session cost: $0.4200 (provider-reported)" in text
|
||||
assert "/usage full" in text
|
||||
|
||||
|
||||
def test_compact_usage_absent_cost_never_renders_zero(monkeypatch):
|
||||
_mute_usage_externals(monkeypatch)
|
||||
agent = _usage_agent() # nothing reported
|
||||
text = server._compact_usage_text(_session(agent=agent))
|
||||
assert "session cost: not reported by provider" in text
|
||||
assert "$0.00" not in text
|
||||
|
||||
|
||||
def test_compact_usage_no_agent(monkeypatch):
|
||||
_mute_usage_externals(monkeypatch)
|
||||
text = server._compact_usage_text(_session(agent=None) | {"agent": None})
|
||||
assert "no API calls yet" in text
|
||||
|
||||
|
||||
def test_compact_usage_recent_summary_and_credits_line(monkeypatch):
|
||||
import agent.account_usage as account_usage
|
||||
|
||||
class _DB:
|
||||
def usage_totals(self, days=30):
|
||||
return {
|
||||
"days": 30, "sessions": 12, "input_tokens": 1_200_000,
|
||||
"output_tokens": 90_000, "api_calls": 64,
|
||||
"reported_cost_usd": 4.5678,
|
||||
}
|
||||
|
||||
monkeypatch.setattr(server, "_get_db", lambda: _DB())
|
||||
monkeypatch.setattr(
|
||||
account_usage, "nous_credits_compact_line",
|
||||
lambda **kw: "Nous credits (Ultra): Total usable: $988.99 · Renews: 2026-06-11",
|
||||
)
|
||||
text = server._compact_usage_text(_session(agent=_usage_agent()))
|
||||
assert "Last 30d: 12 sessions" in text
|
||||
assert "reported cost $4.57" in text
|
||||
assert "Nous credits (Ultra)" in text
|
||||
|
||||
|
||||
def test_compact_usage_recent_summary_hides_unreported_cost(monkeypatch):
|
||||
_mute_usage_externals(monkeypatch)
|
||||
|
||||
class _DB:
|
||||
def usage_totals(self, days=30):
|
||||
return {
|
||||
"days": 30, "sessions": 3, "input_tokens": 10_000,
|
||||
"output_tokens": 2_000, "api_calls": 7,
|
||||
"reported_cost_usd": None,
|
||||
}
|
||||
|
||||
monkeypatch.setattr(server, "_get_db", lambda: _DB())
|
||||
text = server._compact_usage_text(_session(agent=_usage_agent()))
|
||||
assert "Last 30d: 3 sessions" in text
|
||||
assert "reported cost" not in text
|
||||
|
||||
|
||||
def test_slash_exec_usage_is_answered_in_process(monkeypatch):
|
||||
"""/usage must not hit the slash worker (it has no live agent)."""
|
||||
_mute_usage_externals(monkeypatch)
|
||||
server._sessions["sid-usage"] = _session(agent=_usage_agent())
|
||||
try:
|
||||
resp = server.handle_request(
|
||||
{"id": "1", "method": "slash.exec",
|
||||
"params": {"session_id": "sid-usage", "command": "usage"}}
|
||||
)
|
||||
out = resp["result"]["output"]
|
||||
assert "Session — anthropic/claude-sonnet-4.6" in out
|
||||
# Worker untouched.
|
||||
assert server._sessions["sid-usage"]["slash_worker"] is None
|
||||
finally:
|
||||
server._sessions.pop("sid-usage", None)
|
||||
|
||||
|
||||
def test_slash_exec_usage_full_falls_through_to_worker(monkeypatch):
|
||||
ran = []
|
||||
|
||||
class _Worker:
|
||||
def run(self, cmd):
|
||||
ran.append(cmd)
|
||||
return "detailed legacy page"
|
||||
|
||||
sess = _session(agent=_usage_agent())
|
||||
sess["slash_worker"] = _Worker()
|
||||
server._sessions["sid-usage-full"] = sess
|
||||
try:
|
||||
monkeypatch.setattr(server, "_mirror_slash_side_effects", lambda *a: "")
|
||||
resp = server.handle_request(
|
||||
{"id": "1", "method": "slash.exec",
|
||||
"params": {"session_id": "sid-usage-full", "command": "usage full"}}
|
||||
)
|
||||
assert resp["result"]["output"] == "detailed legacy page"
|
||||
assert ran == ["usage full"]
|
||||
finally:
|
||||
server._sessions.pop("sid-usage-full", None)
|
||||
|
||||
@@ -2281,23 +2281,20 @@ def _get_usage(agent) -> dict:
|
||||
usage["context_max"] = ctx_max
|
||||
usage["context_percent"] = max(0, min(100, round(ctx_used / ctx_max * 100)))
|
||||
usage["compressions"] = getattr(comp, "compression_count", 0) or 0
|
||||
# Cost (chrome status bar): Nous portal header delta ONLY (F3, glitch
|
||||
# 2026-06-13). The OpenRouter usage.cost accumulator is deliberately NOT
|
||||
# used here — per-model cache/input/output pricing is unreliable across the
|
||||
# model long tail, so the bar shows cost ONLY on a Nous-portal session and
|
||||
# hides it everywhere else. `cost_usd` is ABSENT (not $0.00) when no header
|
||||
# was seen, and the TUI hides its cost segment. (The /usage accounting page
|
||||
# still uses real_session_cost_usd — both provider-reported sources.)
|
||||
try:
|
||||
from agent.usage_pricing import CanonicalUsage, estimate_usage_cost
|
||||
from agent.usage_pricing import nous_header_cost_usd
|
||||
|
||||
cost = estimate_usage_cost(
|
||||
usage["model"],
|
||||
CanonicalUsage(
|
||||
input_tokens=usage["input"],
|
||||
output_tokens=usage["output"],
|
||||
cache_read_tokens=usage["cache_read"],
|
||||
cache_write_tokens=usage["cache_write"],
|
||||
),
|
||||
provider=getattr(agent, "provider", None),
|
||||
base_url=getattr(agent, "base_url", None),
|
||||
)
|
||||
usage["cost_status"] = cost.status
|
||||
if cost.amount_usd is not None:
|
||||
usage["cost_usd"] = float(cost.amount_usd)
|
||||
real_cost = nous_header_cost_usd(agent)
|
||||
if real_cost is not None:
|
||||
usage["cost_usd"] = real_cost
|
||||
usage["cost_status"] = "actual"
|
||||
except Exception:
|
||||
pass
|
||||
# Dev-only live credits-spent readout (L0 usage-aware-credits). Gated on
|
||||
@@ -2312,6 +2309,112 @@ def _get_usage(agent) -> dict:
|
||||
return usage
|
||||
|
||||
|
||||
def _compact_usage_text(session: dict) -> str:
|
||||
"""Compact /usage page: current session per-model rows + a tight recent
|
||||
summary + a one-line Nous credits gauge.
|
||||
|
||||
Costs are provider-REPORTED only. When a provider reports nothing the
|
||||
cost is simply omitted (never rendered as $0.00). The detailed legacy
|
||||
page stays reachable via `/usage full` (slash-worker → CLI path).
|
||||
"""
|
||||
from agent.usage_pricing import format_token_count_compact as _fmt
|
||||
|
||||
agent = session.get("agent")
|
||||
lines: list[str] = []
|
||||
|
||||
calls = (getattr(agent, "session_api_calls", 0) or 0) if agent is not None else 0
|
||||
if agent is not None and calls > 0:
|
||||
u = _get_usage(agent)
|
||||
header = f"Session — {u['model']}"
|
||||
provider = getattr(agent, "provider", None)
|
||||
if provider:
|
||||
header += f" ({provider})"
|
||||
lines.append(header)
|
||||
|
||||
per_model = getattr(agent, "session_model_usage", None) or {}
|
||||
rows = list(per_model.items()) or [(
|
||||
u["model"],
|
||||
{
|
||||
"calls": u["calls"], "input": u["input"], "output": u["output"],
|
||||
"cache_read": u["cache_read"], "cache_write": u["cache_write"],
|
||||
"cost_usd": None,
|
||||
},
|
||||
)]
|
||||
name_w = max(len(name or "?") for name, _ in rows)
|
||||
for name, row in rows:
|
||||
cells = [
|
||||
f"{(name or '?'):<{name_w}}",
|
||||
f"reqs {row.get('calls', 0)}",
|
||||
f"in {_fmt(int(row.get('input', 0) or 0))}",
|
||||
f"out {_fmt(int(row.get('output', 0) or 0))}",
|
||||
]
|
||||
cache_read = int(row.get("cache_read", 0) or 0)
|
||||
if cache_read:
|
||||
cells.append(f"cache {_fmt(cache_read)}")
|
||||
cost = row.get("cost_usd")
|
||||
if cost is not None:
|
||||
cells.append(f"${cost:.4f}")
|
||||
lines.append(" " + " · ".join(cells))
|
||||
|
||||
ctx_pct = u.get("context_percent")
|
||||
tail = [f"total {_fmt(int(u['total'] or 0))} tokens", f"{u['calls']} calls"]
|
||||
if ctx_pct is not None:
|
||||
tail.append(f"context {ctx_pct}%")
|
||||
if u.get("compressions"):
|
||||
tail.append(f"compressions {u['compressions']}")
|
||||
lines.append(" " + " · ".join(tail))
|
||||
|
||||
# The /usage page reports the FULL provider-reported cost (OpenRouter
|
||||
# usage.cost accumulator AND/OR the Nous header delta) — NOT the chrome's
|
||||
# Nous-header-only figure (F3 narrowed `_get_usage["cost_usd"]` to the
|
||||
# status bar). Read it straight from real_session_cost_usd here so the
|
||||
# accounting page keeps both sources.
|
||||
try:
|
||||
from agent.usage_pricing import real_session_cost_usd
|
||||
|
||||
cost_usd = real_session_cost_usd(agent)
|
||||
except Exception:
|
||||
cost_usd = None
|
||||
if cost_usd is not None:
|
||||
lines.append(f" session cost: ${cost_usd:.4f} (provider-reported)")
|
||||
else:
|
||||
lines.append(" session cost: not reported by provider")
|
||||
else:
|
||||
lines.append("Session — no API calls yet")
|
||||
|
||||
# Tight recent summary from the session DB (real costs only).
|
||||
try:
|
||||
db = _get_db()
|
||||
totals = db.usage_totals(days=30) if db is not None else None
|
||||
except Exception:
|
||||
totals = None
|
||||
if totals and totals.get("sessions"):
|
||||
from agent.usage_pricing import format_token_count_compact as _fmt30
|
||||
|
||||
parts = [
|
||||
f"{totals['sessions']} sessions",
|
||||
f"in {_fmt30(totals['input_tokens'])}",
|
||||
f"out {_fmt30(totals['output_tokens'])}",
|
||||
]
|
||||
reported = totals.get("reported_cost_usd")
|
||||
if reported is not None:
|
||||
parts.append(f"reported cost ${float(reported):.2f}")
|
||||
lines.append("Last 30d: " + " · ".join(parts))
|
||||
|
||||
# Nous credits one-liner (account-level; independent of the live agent).
|
||||
try:
|
||||
from agent.account_usage import nous_credits_compact_line
|
||||
|
||||
credits_line = nous_credits_compact_line()
|
||||
except Exception:
|
||||
credits_line = None
|
||||
if credits_line:
|
||||
lines.append(credits_line)
|
||||
|
||||
lines.append("(/usage full — detailed page)")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _probe_credentials(agent) -> str:
|
||||
"""Light credential check at session creation — returns warning or ''."""
|
||||
try:
|
||||
@@ -9189,6 +9292,17 @@ def _(rid, params: dict) -> dict:
|
||||
except Exception as e:
|
||||
return _ok(rid, {"output": f"Plugin command error: {e}"})
|
||||
|
||||
# /usage — answered in-process from the LIVE agent's session counters.
|
||||
# The slash worker is a separate subprocess that resumes the session
|
||||
# WITHOUT an agent, so it can never see current-session tokens/costs
|
||||
# (it only printed the Nous credits block). `/usage full` still falls
|
||||
# through to the worker for the detailed CLI page.
|
||||
if _cmd_base == "usage" and _cmd_arg.strip().lower() not in {"full", "--full"}:
|
||||
try:
|
||||
return _ok(rid, {"output": _compact_usage_text(session)})
|
||||
except Exception:
|
||||
pass # fall through to the slash worker
|
||||
|
||||
worker = session.get("slash_worker")
|
||||
if not worker:
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user