From dad42014aeb11bf7e655868f00d1c295875dddc1 Mon Sep 17 00:00:00 2001 From: Teknium Date: Mon, 27 Apr 2026 17:05:44 -0700 Subject: [PATCH] Port from Kilo-Org/kilocode#9448: roll up subagent costs into parent session total MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Child subagents built by delegate_task() each track their own session_estimated_cost_usd, but the parent agent's total never folded those numbers in. On runs where the parent mostly delegates and the children do the expensive work, the footer/UI was reporting a fraction of the actual spend — sometimes $0.00 when the parent itself made no billed calls. Fix: - Capture each child's session_estimated_cost_usd into _child_cost_usd on the result entry (before child.close() drops the counter). - After the existing subagent_stop hook loop, sum the children's costs and add the total to parent.session_estimated_cost_usd. - Promote session_cost_source from 'none' -> 'subagent' when the parent had no direct spend but children did, so the UI doesn't label the total as having unknown provenance. Real sources (openrouter, anthropic, etc.) are preserved. Nested orchestrator -> worker trees roll up naturally: each layer's own delegate_task() folds its direct children in, and when the orchestrator itself returns, its parent folds the orchestrator's now-inflated total on top. Internal fields (_child_cost_usd, _child_role) are stripped from the results dict before it's serialised back to the model — same contract as _child_role already followed. Tests: TestSubagentCostRollup (5 cases) covers single-child, batch, zero-cost-children, preserved-source, and legacy-fixture paths. Source: https://github.com/Kilo-Org/kilocode/pull/9448 --- tests/tools/test_delegate.py | 157 +++++++++++++++++++++++++++++++++++ tools/delegate_tool.py | 47 +++++++++++ 2 files changed, 204 insertions(+) diff --git a/tests/tools/test_delegate.py b/tests/tools/test_delegate.py index c27908da8f..6b4cc99150 100644 --- a/tests/tools/test_delegate.py +++ b/tests/tools/test_delegate.py @@ -568,6 +568,163 @@ class TestDelegateObservability(unittest.TestCase): self.assertEqual(result["results"][0]["exit_reason"], "max_iterations") +class TestSubagentCostRollup(unittest.TestCase): + """Port of Kilo-Org/kilocode#9448 — parent's session_estimated_cost_usd + must include subagent spend, not just the parent's own API calls.""" + + def _make_parent_with_cost_counters(self, depth=0, starting_cost=0.0): + parent = _make_mock_parent(depth=depth) + # The fields AIAgent exposes and the footer reads from. Set real + # floats/strings so the rollup can add to them rather than tripping + # on MagicMock auto-attrs. + parent.session_estimated_cost_usd = starting_cost + parent.session_cost_status = "unknown" + parent.session_cost_source = "none" + return parent + + def test_single_child_cost_folded_into_parent(self): + parent = self._make_parent_with_cost_counters(starting_cost=0.10) + + with patch("run_agent.AIAgent") as MockAgent: + mock_child = MagicMock() + mock_child.model = "claude-sonnet-4-6" + mock_child.session_prompt_tokens = 1000 + mock_child.session_completion_tokens = 200 + mock_child.session_estimated_cost_usd = 0.42 + mock_child.run_conversation.return_value = { + "final_response": "done", + "completed": True, + "interrupted": False, + "api_calls": 2, + "messages": [], + } + MockAgent.return_value = mock_child + + result = json.loads(delegate_task(goal="do stuff", parent_agent=parent)) + + # Parent footer must reflect parent_cost + child_cost. + self.assertAlmostEqual(parent.session_estimated_cost_usd, 0.52, places=6) + # Rollup must strip the internal field before serialising to the model. + self.assertNotIn("_child_cost_usd", result["results"][0]) + self.assertNotIn("_child_role", result["results"][0]) + + def test_batch_children_costs_sum_into_parent(self): + parent = self._make_parent_with_cost_counters(starting_cost=0.00) + + with patch("tools.delegate_tool._run_single_child") as mock_run: + mock_run.side_effect = [ + { + "task_index": 0, + "status": "completed", + "summary": "A", + "api_calls": 2, + "duration_seconds": 1.0, + "_child_role": "leaf", + "_child_cost_usd": 0.15, + }, + { + "task_index": 1, + "status": "completed", + "summary": "B", + "api_calls": 2, + "duration_seconds": 1.0, + "_child_role": "leaf", + "_child_cost_usd": 0.27, + }, + { + "task_index": 2, + "status": "failed", + "summary": "", + "error": "boom", + "api_calls": 0, + "duration_seconds": 0.1, + "_child_role": "leaf", + "_child_cost_usd": 0.03, + }, + ] + result = json.loads( + delegate_task( + tasks=[{"goal": "A"}, {"goal": "B"}, {"goal": "C"}], + parent_agent=parent, + ) + ) + + # 0.15 + 0.27 + 0.03 even though one child failed — the API calls it + # made before failing still cost money. + self.assertAlmostEqual(parent.session_estimated_cost_usd, 0.45, places=6) + # cost_source promoted from "none" since the parent had no direct spend. + self.assertEqual(parent.session_cost_source, "subagent") + self.assertEqual(parent.session_cost_status, "estimated") + # All internal fields stripped from results. + for entry in result["results"]: + self.assertNotIn("_child_cost_usd", entry) + self.assertNotIn("_child_role", entry) + + def test_zero_cost_children_leave_parent_source_untouched(self): + """If every child reports 0 cost (e.g. free local model), we should + not invent a fake 'subagent' source — the parent's 'none' stays.""" + parent = self._make_parent_with_cost_counters(starting_cost=0.00) + + with patch("tools.delegate_tool._run_single_child") as mock_run: + mock_run.return_value = { + "task_index": 0, + "status": "completed", + "summary": "done", + "api_calls": 1, + "duration_seconds": 0.5, + "_child_role": "leaf", + "_child_cost_usd": 0.0, + } + delegate_task(goal="free local run", parent_agent=parent) + + self.assertEqual(parent.session_estimated_cost_usd, 0.0) + self.assertEqual(parent.session_cost_source, "none") + + def test_parent_with_real_source_not_overwritten(self): + """If the parent already has its own cost billed (cost_source != 'none'), + adding subagent cost must not clobber the existing source label.""" + parent = self._make_parent_with_cost_counters(starting_cost=0.20) + parent.session_cost_status = "exact" + parent.session_cost_source = "openrouter" + + with patch("tools.delegate_tool._run_single_child") as mock_run: + mock_run.return_value = { + "task_index": 0, + "status": "completed", + "summary": "done", + "api_calls": 1, + "duration_seconds": 0.5, + "_child_role": "leaf", + "_child_cost_usd": 0.30, + } + delegate_task(goal="billed run", parent_agent=parent) + + self.assertAlmostEqual(parent.session_estimated_cost_usd, 0.50, places=6) + # Real source label preserved. + self.assertEqual(parent.session_cost_source, "openrouter") + self.assertEqual(parent.session_cost_status, "exact") + + def test_rollup_tolerates_missing_cost_fields(self): + """Older fixtures / fabricated error entries may not carry + _child_cost_usd. Rollup must degrade to zero-add silently.""" + parent = self._make_parent_with_cost_counters(starting_cost=0.10) + + with patch("tools.delegate_tool._run_single_child") as mock_run: + mock_run.return_value = { + "task_index": 0, + "status": "completed", + "summary": "done", + "api_calls": 1, + "duration_seconds": 0.5, + # no _child_role, no _child_cost_usd + } + result = json.loads(delegate_task(goal="legacy", parent_agent=parent)) + + # Parent cost unchanged. + self.assertEqual(parent.session_estimated_cost_usd, 0.10) + self.assertEqual(len(result["results"]), 1) + + class TestBlockedTools(unittest.TestCase): def test_blocked_tools_constant(self): for tool in ["delegate_task", "clarify", "memory", "send_message", "execute_code"]: diff --git a/tools/delegate_tool.py b/tools/delegate_tool.py index 397b7c958b..bceb9833c7 100644 --- a/tools/delegate_tool.py +++ b/tools/delegate_tool.py @@ -1616,6 +1616,19 @@ def _run_single_child( # parent thread can fire subagent_stop with the correct role. # Stripped before the dict is serialised back to the model. "_child_role": getattr(child, "_delegate_role", None), + # Captured before child.close() so the parent aggregator can fold + # the child's total spend into the parent's session cost. Port of + # Kilo-Org/kilocode#9448 — previously the footer only reflected the + # parent's direct API calls and under-counted subagent-heavy runs. + # Stripped before the dict is serialised back to the model. + "_child_cost_usd": ( + float(getattr(child, "session_estimated_cost_usd", 0.0) or 0.0) + if isinstance( + getattr(child, "session_estimated_cost_usd", 0.0), + (int, float), + ) + else 0.0 + ), } if status == "failed": entry["error"] = result.get("error", "Subagent did not produce a response.") @@ -2112,8 +2125,20 @@ def delegate_task( from hermes_cli.plugins import invoke_hook as _invoke_hook except Exception: _invoke_hook = None + # Aggregate child spend here so the parent's footer/UI reflect the true + # cost of a subagent-heavy turn. Port of Kilo-Org/kilocode#9448. Each + # child's cost was captured in _run_single_child before its AIAgent was + # closed; we fold them into the parent in one pass alongside the + # subagent_stop hook loop so we don't walk `results` twice. + _children_cost_total = 0.0 for entry in results: child_role = entry.pop("_child_role", None) + child_cost = entry.pop("_child_cost_usd", 0.0) + try: + if child_cost: + _children_cost_total += float(child_cost) + except (TypeError, ValueError): + pass if _invoke_hook is None: continue try: @@ -2128,6 +2153,28 @@ def delegate_task( except Exception: logger.debug("subagent_stop hook invocation failed", exc_info=True) + # Fold the aggregated child cost into the parent's session total. This is + # additive — each delegate_task call contributes its own children — so + # nested orchestrator→worker trees roll up naturally: each layer's own + # delegate_task() folds its direct children in, and when the orchestrator + # itself finishes, its parent folds the orchestrator's now-inflated total + # on top. Degrades silently if the parent lacks the counter (older test + # fixtures, etc.). + if _children_cost_total > 0.0: + try: + current = float(getattr(parent_agent, "session_estimated_cost_usd", 0.0) or 0.0) + parent_agent.session_estimated_cost_usd = current + _children_cost_total + # Upgrade the cost_source so the UI doesn't label a partially-real + # total as "none" when the parent itself hadn't billed any calls + # yet (rare but possible when the parent's only action this turn + # was delegate_task). + if getattr(parent_agent, "session_cost_source", "none") in (None, "", "none"): + parent_agent.session_cost_source = "subagent" + if getattr(parent_agent, "session_cost_status", "unknown") in (None, "", "unknown"): + parent_agent.session_cost_status = "estimated" + except Exception: + logger.debug("Subagent cost rollup failed", exc_info=True) + total_duration = round(time.monotonic() - overall_start, 2) return json.dumps(