Port from Kilo-Org/kilocode#9448 : roll up subagent costs into parent session total

Child subagents built by delegate_task() each track their own session_estimated_cost_usd, but the parent agent's total never folded those numbers in. On runs where the parent mostly delegates and the children do the expensive work, the footer/UI was reporting a fraction of the actual spend — sometimes $0.00 when the parent itself made no billed calls. Fix: - Capture each child's session_estimated_cost_usd into _child_cost_usd on the result entry (before child.close() drops the counter). - After the existing subagent_stop hook loop, sum the children's costs and add the total to parent.session_estimated_cost_usd. - Promote session_cost_source from 'none' -> 'subagent' when the parent had no direct spend but children did, so the UI doesn't label the total as having unknown provenance. Real sources (openrouter, anthropic, etc.) are preserved. Nested orchestrator -> worker trees roll up naturally: each layer's own delegate_task() folds its direct children in, and when the orchestrator itself returns, its parent folds the orchestrator's now-inflated total on top. Internal fields (_child_cost_usd, _child_role) are stripped from the results dict before it's serialised back to the model — same contract as _child_role already followed. Tests: TestSubagentCostRollup (5 cases) covers single-child, batch, zero-cost-children, preserved-source, and legacy-fixture paths. Source: https://github.com/Kilo-Org/kilocode/pull/9448
2026-05-06 02:37:05 +08:00 · 2026-04-27 17:05:44 -07:00
2 changed files with 204 additions and 0 deletions
--- a/tests/tools/test_delegate.py
+++ b/tests/tools/test_delegate.py
@@ -568,6 +568,163 @@ class TestDelegateObservability(unittest.TestCase):
            self.assertEqual(result["results"][0]["exit_reason"], "max_iterations")


+class TestSubagentCostRollup(unittest.TestCase):
+    """Port of Kilo-Org/kilocode#9448 — parent's session_estimated_cost_usd
+    must include subagent spend, not just the parent's own API calls."""
+
+    def _make_parent_with_cost_counters(self, depth=0, starting_cost=0.0):
+        parent = _make_mock_parent(depth=depth)
+        # The fields AIAgent exposes and the footer reads from.  Set real
+        # floats/strings so the rollup can add to them rather than tripping
+        # on MagicMock auto-attrs.
+        parent.session_estimated_cost_usd = starting_cost
+        parent.session_cost_status = "unknown"
+        parent.session_cost_source = "none"
+        return parent
+
+    def test_single_child_cost_folded_into_parent(self):
+        parent = self._make_parent_with_cost_counters(starting_cost=0.10)
+
+        with patch("run_agent.AIAgent") as MockAgent:
+            mock_child = MagicMock()
+            mock_child.model = "claude-sonnet-4-6"
+            mock_child.session_prompt_tokens = 1000
+            mock_child.session_completion_tokens = 200
+            mock_child.session_estimated_cost_usd = 0.42
+            mock_child.run_conversation.return_value = {
+                "final_response": "done",
+                "completed": True,
+                "interrupted": False,
+                "api_calls": 2,
+                "messages": [],
+            }
+            MockAgent.return_value = mock_child
+
+            result = json.loads(delegate_task(goal="do stuff", parent_agent=parent))
+
+        # Parent footer must reflect parent_cost + child_cost.
+        self.assertAlmostEqual(parent.session_estimated_cost_usd, 0.52, places=6)
+        # Rollup must strip the internal field before serialising to the model.
+        self.assertNotIn("_child_cost_usd", result["results"][0])
+        self.assertNotIn("_child_role", result["results"][0])
+
+    def test_batch_children_costs_sum_into_parent(self):
+        parent = self._make_parent_with_cost_counters(starting_cost=0.00)
+
+        with patch("tools.delegate_tool._run_single_child") as mock_run:
+            mock_run.side_effect = [
+                {
+                    "task_index": 0,
+                    "status": "completed",
+                    "summary": "A",
+                    "api_calls": 2,
+                    "duration_seconds": 1.0,
+                    "_child_role": "leaf",
+                    "_child_cost_usd": 0.15,
+                },
+                {
+                    "task_index": 1,
+                    "status": "completed",
+                    "summary": "B",
+                    "api_calls": 2,
+                    "duration_seconds": 1.0,
+                    "_child_role": "leaf",
+                    "_child_cost_usd": 0.27,
+                },
+                {
+                    "task_index": 2,
+                    "status": "failed",
+                    "summary": "",
+                    "error": "boom",
+                    "api_calls": 0,
+                    "duration_seconds": 0.1,
+                    "_child_role": "leaf",
+                    "_child_cost_usd": 0.03,
+                },
+            ]
+            result = json.loads(
+                delegate_task(
+                    tasks=[{"goal": "A"}, {"goal": "B"}, {"goal": "C"}],
+                    parent_agent=parent,
+                )
+            )
+
+        # 0.15 + 0.27 + 0.03 even though one child failed — the API calls it
+        # made before failing still cost money.
+        self.assertAlmostEqual(parent.session_estimated_cost_usd, 0.45, places=6)
+        # cost_source promoted from "none" since the parent had no direct spend.
+        self.assertEqual(parent.session_cost_source, "subagent")
+        self.assertEqual(parent.session_cost_status, "estimated")
+        # All internal fields stripped from results.
+        for entry in result["results"]:
+            self.assertNotIn("_child_cost_usd", entry)
+            self.assertNotIn("_child_role", entry)
+
+    def test_zero_cost_children_leave_parent_source_untouched(self):
+        """If every child reports 0 cost (e.g. free local model), we should
+        not invent a fake 'subagent' source — the parent's 'none' stays."""
+        parent = self._make_parent_with_cost_counters(starting_cost=0.00)
+
+        with patch("tools.delegate_tool._run_single_child") as mock_run:
+            mock_run.return_value = {
+                "task_index": 0,
+                "status": "completed",
+                "summary": "done",
+                "api_calls": 1,
+                "duration_seconds": 0.5,
+                "_child_role": "leaf",
+                "_child_cost_usd": 0.0,
+            }
+            delegate_task(goal="free local run", parent_agent=parent)
+
+        self.assertEqual(parent.session_estimated_cost_usd, 0.0)
+        self.assertEqual(parent.session_cost_source, "none")
+
+    def test_parent_with_real_source_not_overwritten(self):
+        """If the parent already has its own cost billed (cost_source != 'none'),
+        adding subagent cost must not clobber the existing source label."""
+        parent = self._make_parent_with_cost_counters(starting_cost=0.20)
+        parent.session_cost_status = "exact"
+        parent.session_cost_source = "openrouter"
+
+        with patch("tools.delegate_tool._run_single_child") as mock_run:
+            mock_run.return_value = {
+                "task_index": 0,
+                "status": "completed",
+                "summary": "done",
+                "api_calls": 1,
+                "duration_seconds": 0.5,
+                "_child_role": "leaf",
+                "_child_cost_usd": 0.30,
+            }
+            delegate_task(goal="billed run", parent_agent=parent)
+
+        self.assertAlmostEqual(parent.session_estimated_cost_usd, 0.50, places=6)
+        # Real source label preserved.
+        self.assertEqual(parent.session_cost_source, "openrouter")
+        self.assertEqual(parent.session_cost_status, "exact")
+
+    def test_rollup_tolerates_missing_cost_fields(self):
+        """Older fixtures / fabricated error entries may not carry
+        _child_cost_usd.  Rollup must degrade to zero-add silently."""
+        parent = self._make_parent_with_cost_counters(starting_cost=0.10)
+
+        with patch("tools.delegate_tool._run_single_child") as mock_run:
+            mock_run.return_value = {
+                "task_index": 0,
+                "status": "completed",
+                "summary": "done",
+                "api_calls": 1,
+                "duration_seconds": 0.5,
+                # no _child_role, no _child_cost_usd
+            }
+            result = json.loads(delegate_task(goal="legacy", parent_agent=parent))
+
+        # Parent cost unchanged.
+        self.assertEqual(parent.session_estimated_cost_usd, 0.10)
+        self.assertEqual(len(result["results"]), 1)
+
+
 class TestBlockedTools(unittest.TestCase):
    def test_blocked_tools_constant(self):
        for tool in ["delegate_task", "clarify", "memory", "send_message", "execute_code"]:
--- a/tools/delegate_tool.py
+++ b/tools/delegate_tool.py
@@ -1616,6 +1616,19 @@ def _run_single_child(
            # parent thread can fire subagent_stop with the correct role.
            # Stripped before the dict is serialised back to the model.
            "_child_role": getattr(child, "_delegate_role", None),
+            # Captured before child.close() so the parent aggregator can fold
+            # the child's total spend into the parent's session cost.  Port of
+            # Kilo-Org/kilocode#9448 — previously the footer only reflected the
+            # parent's direct API calls and under-counted subagent-heavy runs.
+            # Stripped before the dict is serialised back to the model.
+            "_child_cost_usd": (
+                float(getattr(child, "session_estimated_cost_usd", 0.0) or 0.0)
+                if isinstance(
+                    getattr(child, "session_estimated_cost_usd", 0.0),
+                    (int, float),
+                )
+                else 0.0
+            ),
        }
        if status == "failed":
            entry["error"] = result.get("error", "Subagent did not produce a response.")
@@ -2112,8 +2125,20 @@ def delegate_task(
        from hermes_cli.plugins import invoke_hook as _invoke_hook
    except Exception:
        _invoke_hook = None
+    # Aggregate child spend here so the parent's footer/UI reflect the true
+    # cost of a subagent-heavy turn.  Port of Kilo-Org/kilocode#9448.  Each
+    # child's cost was captured in _run_single_child before its AIAgent was
+    # closed; we fold them into the parent in one pass alongside the
+    # subagent_stop hook loop so we don't walk `results` twice.
+    _children_cost_total = 0.0
    for entry in results:
        child_role = entry.pop("_child_role", None)
+        child_cost = entry.pop("_child_cost_usd", 0.0)
+        try:
+            if child_cost:
+                _children_cost_total += float(child_cost)
+        except (TypeError, ValueError):
+            pass
        if _invoke_hook is None:
            continue
        try:
@@ -2128,6 +2153,28 @@ def delegate_task(
        except Exception:
            logger.debug("subagent_stop hook invocation failed", exc_info=True)

+    # Fold the aggregated child cost into the parent's session total.  This is
+    # additive — each delegate_task call contributes its own children — so
+    # nested orchestrator→worker trees roll up naturally: each layer's own
+    # delegate_task() folds its direct children in, and when the orchestrator
+    # itself finishes, its parent folds the orchestrator's now-inflated total
+    # on top.  Degrades silently if the parent lacks the counter (older test
+    # fixtures, etc.).
+    if _children_cost_total > 0.0:
+        try:
+            current = float(getattr(parent_agent, "session_estimated_cost_usd", 0.0) or 0.0)
+            parent_agent.session_estimated_cost_usd = current + _children_cost_total
+            # Upgrade the cost_source so the UI doesn't label a partially-real
+            # total as "none" when the parent itself hadn't billed any calls
+            # yet (rare but possible when the parent's only action this turn
+            # was delegate_task).
+            if getattr(parent_agent, "session_cost_source", "none") in (None, "", "none"):
+                parent_agent.session_cost_source = "subagent"
+            if getattr(parent_agent, "session_cost_status", "unknown") in (None, "", "unknown"):
+                parent_agent.session_cost_status = "estimated"
+        except Exception:
+            logger.debug("Subagent cost rollup failed", exc_info=True)
+
    total_duration = round(time.monotonic() - overall_start, 2)

    return json.dumps(