Compare commits

...

1 Commits

Author SHA1 Message Date
Teknium
dad42014ae Port from Kilo-Org/kilocode#9448: roll up subagent costs into parent session total
Child subagents built by delegate_task() each track their own
session_estimated_cost_usd, but the parent agent's total never folded
those numbers in.  On runs where the parent mostly delegates and the
children do the expensive work, the footer/UI was reporting a fraction
of the actual spend — sometimes $0.00 when the parent itself made no
billed calls.

Fix:
- Capture each child's session_estimated_cost_usd into _child_cost_usd
  on the result entry (before child.close() drops the counter).
- After the existing subagent_stop hook loop, sum the children's costs
  and add the total to parent.session_estimated_cost_usd.
- Promote session_cost_source from 'none' -> 'subagent' when the parent
  had no direct spend but children did, so the UI doesn't label the
  total as having unknown provenance.  Real sources (openrouter,
  anthropic, etc.) are preserved.

Nested orchestrator -> worker trees roll up naturally: each layer's own
delegate_task() folds its direct children in, and when the orchestrator
itself returns, its parent folds the orchestrator's now-inflated total
on top.

Internal fields (_child_cost_usd, _child_role) are stripped from the
results dict before it's serialised back to the model — same contract
as _child_role already followed.

Tests: TestSubagentCostRollup (5 cases) covers single-child, batch,
zero-cost-children, preserved-source, and legacy-fixture paths.

Source: https://github.com/Kilo-Org/kilocode/pull/9448
2026-04-27 17:05:44 -07:00
2 changed files with 204 additions and 0 deletions

View File

@@ -568,6 +568,163 @@ class TestDelegateObservability(unittest.TestCase):
self.assertEqual(result["results"][0]["exit_reason"], "max_iterations")
class TestSubagentCostRollup(unittest.TestCase):
"""Port of Kilo-Org/kilocode#9448 — parent's session_estimated_cost_usd
must include subagent spend, not just the parent's own API calls."""
def _make_parent_with_cost_counters(self, depth=0, starting_cost=0.0):
parent = _make_mock_parent(depth=depth)
# The fields AIAgent exposes and the footer reads from. Set real
# floats/strings so the rollup can add to them rather than tripping
# on MagicMock auto-attrs.
parent.session_estimated_cost_usd = starting_cost
parent.session_cost_status = "unknown"
parent.session_cost_source = "none"
return parent
def test_single_child_cost_folded_into_parent(self):
parent = self._make_parent_with_cost_counters(starting_cost=0.10)
with patch("run_agent.AIAgent") as MockAgent:
mock_child = MagicMock()
mock_child.model = "claude-sonnet-4-6"
mock_child.session_prompt_tokens = 1000
mock_child.session_completion_tokens = 200
mock_child.session_estimated_cost_usd = 0.42
mock_child.run_conversation.return_value = {
"final_response": "done",
"completed": True,
"interrupted": False,
"api_calls": 2,
"messages": [],
}
MockAgent.return_value = mock_child
result = json.loads(delegate_task(goal="do stuff", parent_agent=parent))
# Parent footer must reflect parent_cost + child_cost.
self.assertAlmostEqual(parent.session_estimated_cost_usd, 0.52, places=6)
# Rollup must strip the internal field before serialising to the model.
self.assertNotIn("_child_cost_usd", result["results"][0])
self.assertNotIn("_child_role", result["results"][0])
def test_batch_children_costs_sum_into_parent(self):
parent = self._make_parent_with_cost_counters(starting_cost=0.00)
with patch("tools.delegate_tool._run_single_child") as mock_run:
mock_run.side_effect = [
{
"task_index": 0,
"status": "completed",
"summary": "A",
"api_calls": 2,
"duration_seconds": 1.0,
"_child_role": "leaf",
"_child_cost_usd": 0.15,
},
{
"task_index": 1,
"status": "completed",
"summary": "B",
"api_calls": 2,
"duration_seconds": 1.0,
"_child_role": "leaf",
"_child_cost_usd": 0.27,
},
{
"task_index": 2,
"status": "failed",
"summary": "",
"error": "boom",
"api_calls": 0,
"duration_seconds": 0.1,
"_child_role": "leaf",
"_child_cost_usd": 0.03,
},
]
result = json.loads(
delegate_task(
tasks=[{"goal": "A"}, {"goal": "B"}, {"goal": "C"}],
parent_agent=parent,
)
)
# 0.15 + 0.27 + 0.03 even though one child failed — the API calls it
# made before failing still cost money.
self.assertAlmostEqual(parent.session_estimated_cost_usd, 0.45, places=6)
# cost_source promoted from "none" since the parent had no direct spend.
self.assertEqual(parent.session_cost_source, "subagent")
self.assertEqual(parent.session_cost_status, "estimated")
# All internal fields stripped from results.
for entry in result["results"]:
self.assertNotIn("_child_cost_usd", entry)
self.assertNotIn("_child_role", entry)
def test_zero_cost_children_leave_parent_source_untouched(self):
"""If every child reports 0 cost (e.g. free local model), we should
not invent a fake 'subagent' source — the parent's 'none' stays."""
parent = self._make_parent_with_cost_counters(starting_cost=0.00)
with patch("tools.delegate_tool._run_single_child") as mock_run:
mock_run.return_value = {
"task_index": 0,
"status": "completed",
"summary": "done",
"api_calls": 1,
"duration_seconds": 0.5,
"_child_role": "leaf",
"_child_cost_usd": 0.0,
}
delegate_task(goal="free local run", parent_agent=parent)
self.assertEqual(parent.session_estimated_cost_usd, 0.0)
self.assertEqual(parent.session_cost_source, "none")
def test_parent_with_real_source_not_overwritten(self):
"""If the parent already has its own cost billed (cost_source != 'none'),
adding subagent cost must not clobber the existing source label."""
parent = self._make_parent_with_cost_counters(starting_cost=0.20)
parent.session_cost_status = "exact"
parent.session_cost_source = "openrouter"
with patch("tools.delegate_tool._run_single_child") as mock_run:
mock_run.return_value = {
"task_index": 0,
"status": "completed",
"summary": "done",
"api_calls": 1,
"duration_seconds": 0.5,
"_child_role": "leaf",
"_child_cost_usd": 0.30,
}
delegate_task(goal="billed run", parent_agent=parent)
self.assertAlmostEqual(parent.session_estimated_cost_usd, 0.50, places=6)
# Real source label preserved.
self.assertEqual(parent.session_cost_source, "openrouter")
self.assertEqual(parent.session_cost_status, "exact")
def test_rollup_tolerates_missing_cost_fields(self):
"""Older fixtures / fabricated error entries may not carry
_child_cost_usd. Rollup must degrade to zero-add silently."""
parent = self._make_parent_with_cost_counters(starting_cost=0.10)
with patch("tools.delegate_tool._run_single_child") as mock_run:
mock_run.return_value = {
"task_index": 0,
"status": "completed",
"summary": "done",
"api_calls": 1,
"duration_seconds": 0.5,
# no _child_role, no _child_cost_usd
}
result = json.loads(delegate_task(goal="legacy", parent_agent=parent))
# Parent cost unchanged.
self.assertEqual(parent.session_estimated_cost_usd, 0.10)
self.assertEqual(len(result["results"]), 1)
class TestBlockedTools(unittest.TestCase):
def test_blocked_tools_constant(self):
for tool in ["delegate_task", "clarify", "memory", "send_message", "execute_code"]:

View File

@@ -1616,6 +1616,19 @@ def _run_single_child(
# parent thread can fire subagent_stop with the correct role.
# Stripped before the dict is serialised back to the model.
"_child_role": getattr(child, "_delegate_role", None),
# Captured before child.close() so the parent aggregator can fold
# the child's total spend into the parent's session cost. Port of
# Kilo-Org/kilocode#9448 — previously the footer only reflected the
# parent's direct API calls and under-counted subagent-heavy runs.
# Stripped before the dict is serialised back to the model.
"_child_cost_usd": (
float(getattr(child, "session_estimated_cost_usd", 0.0) or 0.0)
if isinstance(
getattr(child, "session_estimated_cost_usd", 0.0),
(int, float),
)
else 0.0
),
}
if status == "failed":
entry["error"] = result.get("error", "Subagent did not produce a response.")
@@ -2112,8 +2125,20 @@ def delegate_task(
from hermes_cli.plugins import invoke_hook as _invoke_hook
except Exception:
_invoke_hook = None
# Aggregate child spend here so the parent's footer/UI reflect the true
# cost of a subagent-heavy turn. Port of Kilo-Org/kilocode#9448. Each
# child's cost was captured in _run_single_child before its AIAgent was
# closed; we fold them into the parent in one pass alongside the
# subagent_stop hook loop so we don't walk `results` twice.
_children_cost_total = 0.0
for entry in results:
child_role = entry.pop("_child_role", None)
child_cost = entry.pop("_child_cost_usd", 0.0)
try:
if child_cost:
_children_cost_total += float(child_cost)
except (TypeError, ValueError):
pass
if _invoke_hook is None:
continue
try:
@@ -2128,6 +2153,28 @@ def delegate_task(
except Exception:
logger.debug("subagent_stop hook invocation failed", exc_info=True)
# Fold the aggregated child cost into the parent's session total. This is
# additive — each delegate_task call contributes its own children — so
# nested orchestrator→worker trees roll up naturally: each layer's own
# delegate_task() folds its direct children in, and when the orchestrator
# itself finishes, its parent folds the orchestrator's now-inflated total
# on top. Degrades silently if the parent lacks the counter (older test
# fixtures, etc.).
if _children_cost_total > 0.0:
try:
current = float(getattr(parent_agent, "session_estimated_cost_usd", 0.0) or 0.0)
parent_agent.session_estimated_cost_usd = current + _children_cost_total
# Upgrade the cost_source so the UI doesn't label a partially-real
# total as "none" when the parent itself hadn't billed any calls
# yet (rare but possible when the parent's only action this turn
# was delegate_task).
if getattr(parent_agent, "session_cost_source", "none") in (None, "", "none"):
parent_agent.session_cost_source = "subagent"
if getattr(parent_agent, "session_cost_status", "unknown") in (None, "", "unknown"):
parent_agent.session_cost_status = "estimated"
except Exception:
logger.debug("Subagent cost rollup failed", exc_info=True)
total_duration = round(time.monotonic() - overall_start, 2)
return json.dumps(