mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-06 10:47:12 +08:00
Compare commits
1 Commits
bb/widget-
...
kilocode-p
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
dad42014ae |
@@ -568,6 +568,163 @@ class TestDelegateObservability(unittest.TestCase):
|
||||
self.assertEqual(result["results"][0]["exit_reason"], "max_iterations")
|
||||
|
||||
|
||||
class TestSubagentCostRollup(unittest.TestCase):
|
||||
"""Port of Kilo-Org/kilocode#9448 — parent's session_estimated_cost_usd
|
||||
must include subagent spend, not just the parent's own API calls."""
|
||||
|
||||
def _make_parent_with_cost_counters(self, depth=0, starting_cost=0.0):
|
||||
parent = _make_mock_parent(depth=depth)
|
||||
# The fields AIAgent exposes and the footer reads from. Set real
|
||||
# floats/strings so the rollup can add to them rather than tripping
|
||||
# on MagicMock auto-attrs.
|
||||
parent.session_estimated_cost_usd = starting_cost
|
||||
parent.session_cost_status = "unknown"
|
||||
parent.session_cost_source = "none"
|
||||
return parent
|
||||
|
||||
def test_single_child_cost_folded_into_parent(self):
|
||||
parent = self._make_parent_with_cost_counters(starting_cost=0.10)
|
||||
|
||||
with patch("run_agent.AIAgent") as MockAgent:
|
||||
mock_child = MagicMock()
|
||||
mock_child.model = "claude-sonnet-4-6"
|
||||
mock_child.session_prompt_tokens = 1000
|
||||
mock_child.session_completion_tokens = 200
|
||||
mock_child.session_estimated_cost_usd = 0.42
|
||||
mock_child.run_conversation.return_value = {
|
||||
"final_response": "done",
|
||||
"completed": True,
|
||||
"interrupted": False,
|
||||
"api_calls": 2,
|
||||
"messages": [],
|
||||
}
|
||||
MockAgent.return_value = mock_child
|
||||
|
||||
result = json.loads(delegate_task(goal="do stuff", parent_agent=parent))
|
||||
|
||||
# Parent footer must reflect parent_cost + child_cost.
|
||||
self.assertAlmostEqual(parent.session_estimated_cost_usd, 0.52, places=6)
|
||||
# Rollup must strip the internal field before serialising to the model.
|
||||
self.assertNotIn("_child_cost_usd", result["results"][0])
|
||||
self.assertNotIn("_child_role", result["results"][0])
|
||||
|
||||
def test_batch_children_costs_sum_into_parent(self):
|
||||
parent = self._make_parent_with_cost_counters(starting_cost=0.00)
|
||||
|
||||
with patch("tools.delegate_tool._run_single_child") as mock_run:
|
||||
mock_run.side_effect = [
|
||||
{
|
||||
"task_index": 0,
|
||||
"status": "completed",
|
||||
"summary": "A",
|
||||
"api_calls": 2,
|
||||
"duration_seconds": 1.0,
|
||||
"_child_role": "leaf",
|
||||
"_child_cost_usd": 0.15,
|
||||
},
|
||||
{
|
||||
"task_index": 1,
|
||||
"status": "completed",
|
||||
"summary": "B",
|
||||
"api_calls": 2,
|
||||
"duration_seconds": 1.0,
|
||||
"_child_role": "leaf",
|
||||
"_child_cost_usd": 0.27,
|
||||
},
|
||||
{
|
||||
"task_index": 2,
|
||||
"status": "failed",
|
||||
"summary": "",
|
||||
"error": "boom",
|
||||
"api_calls": 0,
|
||||
"duration_seconds": 0.1,
|
||||
"_child_role": "leaf",
|
||||
"_child_cost_usd": 0.03,
|
||||
},
|
||||
]
|
||||
result = json.loads(
|
||||
delegate_task(
|
||||
tasks=[{"goal": "A"}, {"goal": "B"}, {"goal": "C"}],
|
||||
parent_agent=parent,
|
||||
)
|
||||
)
|
||||
|
||||
# 0.15 + 0.27 + 0.03 even though one child failed — the API calls it
|
||||
# made before failing still cost money.
|
||||
self.assertAlmostEqual(parent.session_estimated_cost_usd, 0.45, places=6)
|
||||
# cost_source promoted from "none" since the parent had no direct spend.
|
||||
self.assertEqual(parent.session_cost_source, "subagent")
|
||||
self.assertEqual(parent.session_cost_status, "estimated")
|
||||
# All internal fields stripped from results.
|
||||
for entry in result["results"]:
|
||||
self.assertNotIn("_child_cost_usd", entry)
|
||||
self.assertNotIn("_child_role", entry)
|
||||
|
||||
def test_zero_cost_children_leave_parent_source_untouched(self):
|
||||
"""If every child reports 0 cost (e.g. free local model), we should
|
||||
not invent a fake 'subagent' source — the parent's 'none' stays."""
|
||||
parent = self._make_parent_with_cost_counters(starting_cost=0.00)
|
||||
|
||||
with patch("tools.delegate_tool._run_single_child") as mock_run:
|
||||
mock_run.return_value = {
|
||||
"task_index": 0,
|
||||
"status": "completed",
|
||||
"summary": "done",
|
||||
"api_calls": 1,
|
||||
"duration_seconds": 0.5,
|
||||
"_child_role": "leaf",
|
||||
"_child_cost_usd": 0.0,
|
||||
}
|
||||
delegate_task(goal="free local run", parent_agent=parent)
|
||||
|
||||
self.assertEqual(parent.session_estimated_cost_usd, 0.0)
|
||||
self.assertEqual(parent.session_cost_source, "none")
|
||||
|
||||
def test_parent_with_real_source_not_overwritten(self):
|
||||
"""If the parent already has its own cost billed (cost_source != 'none'),
|
||||
adding subagent cost must not clobber the existing source label."""
|
||||
parent = self._make_parent_with_cost_counters(starting_cost=0.20)
|
||||
parent.session_cost_status = "exact"
|
||||
parent.session_cost_source = "openrouter"
|
||||
|
||||
with patch("tools.delegate_tool._run_single_child") as mock_run:
|
||||
mock_run.return_value = {
|
||||
"task_index": 0,
|
||||
"status": "completed",
|
||||
"summary": "done",
|
||||
"api_calls": 1,
|
||||
"duration_seconds": 0.5,
|
||||
"_child_role": "leaf",
|
||||
"_child_cost_usd": 0.30,
|
||||
}
|
||||
delegate_task(goal="billed run", parent_agent=parent)
|
||||
|
||||
self.assertAlmostEqual(parent.session_estimated_cost_usd, 0.50, places=6)
|
||||
# Real source label preserved.
|
||||
self.assertEqual(parent.session_cost_source, "openrouter")
|
||||
self.assertEqual(parent.session_cost_status, "exact")
|
||||
|
||||
def test_rollup_tolerates_missing_cost_fields(self):
|
||||
"""Older fixtures / fabricated error entries may not carry
|
||||
_child_cost_usd. Rollup must degrade to zero-add silently."""
|
||||
parent = self._make_parent_with_cost_counters(starting_cost=0.10)
|
||||
|
||||
with patch("tools.delegate_tool._run_single_child") as mock_run:
|
||||
mock_run.return_value = {
|
||||
"task_index": 0,
|
||||
"status": "completed",
|
||||
"summary": "done",
|
||||
"api_calls": 1,
|
||||
"duration_seconds": 0.5,
|
||||
# no _child_role, no _child_cost_usd
|
||||
}
|
||||
result = json.loads(delegate_task(goal="legacy", parent_agent=parent))
|
||||
|
||||
# Parent cost unchanged.
|
||||
self.assertEqual(parent.session_estimated_cost_usd, 0.10)
|
||||
self.assertEqual(len(result["results"]), 1)
|
||||
|
||||
|
||||
class TestBlockedTools(unittest.TestCase):
|
||||
def test_blocked_tools_constant(self):
|
||||
for tool in ["delegate_task", "clarify", "memory", "send_message", "execute_code"]:
|
||||
|
||||
@@ -1616,6 +1616,19 @@ def _run_single_child(
|
||||
# parent thread can fire subagent_stop with the correct role.
|
||||
# Stripped before the dict is serialised back to the model.
|
||||
"_child_role": getattr(child, "_delegate_role", None),
|
||||
# Captured before child.close() so the parent aggregator can fold
|
||||
# the child's total spend into the parent's session cost. Port of
|
||||
# Kilo-Org/kilocode#9448 — previously the footer only reflected the
|
||||
# parent's direct API calls and under-counted subagent-heavy runs.
|
||||
# Stripped before the dict is serialised back to the model.
|
||||
"_child_cost_usd": (
|
||||
float(getattr(child, "session_estimated_cost_usd", 0.0) or 0.0)
|
||||
if isinstance(
|
||||
getattr(child, "session_estimated_cost_usd", 0.0),
|
||||
(int, float),
|
||||
)
|
||||
else 0.0
|
||||
),
|
||||
}
|
||||
if status == "failed":
|
||||
entry["error"] = result.get("error", "Subagent did not produce a response.")
|
||||
@@ -2112,8 +2125,20 @@ def delegate_task(
|
||||
from hermes_cli.plugins import invoke_hook as _invoke_hook
|
||||
except Exception:
|
||||
_invoke_hook = None
|
||||
# Aggregate child spend here so the parent's footer/UI reflect the true
|
||||
# cost of a subagent-heavy turn. Port of Kilo-Org/kilocode#9448. Each
|
||||
# child's cost was captured in _run_single_child before its AIAgent was
|
||||
# closed; we fold them into the parent in one pass alongside the
|
||||
# subagent_stop hook loop so we don't walk `results` twice.
|
||||
_children_cost_total = 0.0
|
||||
for entry in results:
|
||||
child_role = entry.pop("_child_role", None)
|
||||
child_cost = entry.pop("_child_cost_usd", 0.0)
|
||||
try:
|
||||
if child_cost:
|
||||
_children_cost_total += float(child_cost)
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
if _invoke_hook is None:
|
||||
continue
|
||||
try:
|
||||
@@ -2128,6 +2153,28 @@ def delegate_task(
|
||||
except Exception:
|
||||
logger.debug("subagent_stop hook invocation failed", exc_info=True)
|
||||
|
||||
# Fold the aggregated child cost into the parent's session total. This is
|
||||
# additive — each delegate_task call contributes its own children — so
|
||||
# nested orchestrator→worker trees roll up naturally: each layer's own
|
||||
# delegate_task() folds its direct children in, and when the orchestrator
|
||||
# itself finishes, its parent folds the orchestrator's now-inflated total
|
||||
# on top. Degrades silently if the parent lacks the counter (older test
|
||||
# fixtures, etc.).
|
||||
if _children_cost_total > 0.0:
|
||||
try:
|
||||
current = float(getattr(parent_agent, "session_estimated_cost_usd", 0.0) or 0.0)
|
||||
parent_agent.session_estimated_cost_usd = current + _children_cost_total
|
||||
# Upgrade the cost_source so the UI doesn't label a partially-real
|
||||
# total as "none" when the parent itself hadn't billed any calls
|
||||
# yet (rare but possible when the parent's only action this turn
|
||||
# was delegate_task).
|
||||
if getattr(parent_agent, "session_cost_source", "none") in (None, "", "none"):
|
||||
parent_agent.session_cost_source = "subagent"
|
||||
if getattr(parent_agent, "session_cost_status", "unknown") in (None, "", "unknown"):
|
||||
parent_agent.session_cost_status = "estimated"
|
||||
except Exception:
|
||||
logger.debug("Subagent cost rollup failed", exc_info=True)
|
||||
|
||||
total_duration = round(time.monotonic() - overall_start, 2)
|
||||
|
||||
return json.dumps(
|
||||
|
||||
Reference in New Issue
Block a user