2026-02-20 03:15:53 -08:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
"""
|
|
|
|
|
Tests for the subagent delegation tool.
|
|
|
|
|
|
|
|
|
|
Uses mock AIAgent instances to test the delegation logic without
|
|
|
|
|
requiring API keys or real LLM calls.
|
|
|
|
|
|
|
|
|
|
Run with: python -m pytest tests/test_delegate.py -v
|
|
|
|
|
or: python tests/test_delegate.py
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
import json
|
2026-03-14 20:48:29 -07:00
|
|
|
import os
|
2026-02-20 03:15:53 -08:00
|
|
|
import sys
|
2026-03-17 02:53:33 -07:00
|
|
|
import threading
|
2026-04-10 12:51:30 -07:00
|
|
|
import time
|
2026-02-20 03:15:53 -08:00
|
|
|
import unittest
|
|
|
|
|
from unittest.mock import MagicMock, patch
|
|
|
|
|
|
|
|
|
|
from tools.delegate_tool import (
|
|
|
|
|
DELEGATE_BLOCKED_TOOLS,
|
|
|
|
|
DELEGATE_TASK_SCHEMA,
|
2026-04-21 14:11:53 -07:00
|
|
|
DelegateEvent,
|
2026-04-10 01:34:39 +00:00
|
|
|
_get_max_concurrent_children,
|
2026-04-21 14:11:53 -07:00
|
|
|
_LEGACY_EVENT_MAP,
|
2026-02-20 03:15:53 -08:00
|
|
|
MAX_DEPTH,
|
|
|
|
|
check_delegate_requirements,
|
|
|
|
|
delegate_task,
|
2026-03-19 09:25:38 -07:00
|
|
|
_build_child_agent,
|
2026-04-21 14:11:53 -07:00
|
|
|
_build_child_progress_callback,
|
2026-02-20 03:15:53 -08:00
|
|
|
_build_child_system_prompt,
|
|
|
|
|
_strip_blocked_tools,
|
2026-04-06 22:59:14 -07:00
|
|
|
_resolve_child_credential_pool,
|
feat: configurable subagent provider:model with full credential resolution
Adds delegation.model and delegation.provider config fields so subagents
can run on a completely different provider:model pair than the parent agent.
When delegation.provider is set, the system resolves the full credential
bundle (base_url, api_key, api_mode) via resolve_runtime_provider() —
the same path used by CLI/gateway startup. This means all configured
providers work out of the box: openrouter, nous, zai, kimi-coding,
minimax, minimax-cn.
Key design decisions:
- Provider resolution uses hermes_cli.runtime_provider (single source of
truth for credential resolution across CLI, gateway, cron, and now
delegation)
- When only delegation.model is set (no provider), the model name changes
but parent credentials are inherited (for switching models within the
same provider like OpenRouter)
- When delegation.provider is set, full credentials are resolved
independently — enabling cross-provider delegation (e.g. parent on
Nous Portal, subagents on OpenRouter)
- Clear error messages if provider resolution fails (missing API key,
unknown provider name)
- _load_config() now falls back to hermes_cli.config.load_config() for
gateway/cron contexts where CLI_CONFIG is unavailable
Based on PR #791 by 0xbyt4 (closes #609), reworked to use proper
provider credential resolution instead of passing provider as metadata.
Co-authored-by: 0xbyt4 <0xbyt4@users.noreply.github.com>
2026-03-11 06:12:21 -07:00
|
|
|
_resolve_delegation_credentials,
|
2026-02-20 03:15:53 -08:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _make_mock_parent(depth=0):
|
|
|
|
|
"""Create a mock parent agent with the fields delegate_task expects."""
|
|
|
|
|
parent = MagicMock()
|
|
|
|
|
parent.base_url = "https://openrouter.ai/api/v1"
|
2026-04-05 11:42:13 -07:00
|
|
|
parent.api_key="***"
|
2026-02-25 18:20:38 -08:00
|
|
|
parent.provider = "openrouter"
|
|
|
|
|
parent.api_mode = "chat_completions"
|
2026-02-20 03:15:53 -08:00
|
|
|
parent.model = "anthropic/claude-sonnet-4"
|
|
|
|
|
parent.platform = "cli"
|
|
|
|
|
parent.providers_allowed = None
|
|
|
|
|
parent.providers_ignored = None
|
|
|
|
|
parent.providers_order = None
|
|
|
|
|
parent.provider_sort = None
|
|
|
|
|
parent._session_db = None
|
|
|
|
|
parent._delegate_depth = depth
|
|
|
|
|
parent._active_children = []
|
2026-03-17 02:53:33 -07:00
|
|
|
parent._active_children_lock = threading.Lock()
|
2026-04-05 11:42:13 -07:00
|
|
|
parent._print_fn = None
|
|
|
|
|
parent.tool_progress_callback = None
|
|
|
|
|
parent.thinking_callback = None
|
2026-02-20 03:15:53 -08:00
|
|
|
return parent
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestDelegateRequirements(unittest.TestCase):
|
|
|
|
|
def test_always_available(self):
|
|
|
|
|
self.assertTrue(check_delegate_requirements())
|
|
|
|
|
|
|
|
|
|
def test_schema_valid(self):
|
|
|
|
|
self.assertEqual(DELEGATE_TASK_SCHEMA["name"], "delegate_task")
|
|
|
|
|
props = DELEGATE_TASK_SCHEMA["parameters"]["properties"]
|
|
|
|
|
self.assertIn("goal", props)
|
|
|
|
|
self.assertIn("tasks", props)
|
|
|
|
|
self.assertIn("context", props)
|
|
|
|
|
self.assertIn("toolsets", props)
|
2026-04-23 13:56:26 -07:00
|
|
|
# max_iterations is intentionally NOT exposed to the model — it's
|
|
|
|
|
# config-authoritative via delegation.max_iterations so users get
|
|
|
|
|
# predictable budgets.
|
|
|
|
|
self.assertNotIn("max_iterations", props)
|
2026-04-10 13:10:27 -07:00
|
|
|
self.assertNotIn("maxItems", props["tasks"]) # removed — limit is now runtime-configurable
|
2026-02-20 03:15:53 -08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestChildSystemPrompt(unittest.TestCase):
|
|
|
|
|
def test_goal_only(self):
|
|
|
|
|
prompt = _build_child_system_prompt("Fix the tests")
|
|
|
|
|
self.assertIn("Fix the tests", prompt)
|
|
|
|
|
self.assertIn("YOUR TASK", prompt)
|
|
|
|
|
self.assertNotIn("CONTEXT", prompt)
|
|
|
|
|
|
|
|
|
|
def test_goal_with_context(self):
|
|
|
|
|
prompt = _build_child_system_prompt("Fix the tests", "Error: assertion failed in test_foo.py line 42")
|
|
|
|
|
self.assertIn("Fix the tests", prompt)
|
|
|
|
|
self.assertIn("CONTEXT", prompt)
|
|
|
|
|
self.assertIn("assertion failed", prompt)
|
|
|
|
|
|
|
|
|
|
def test_empty_context_ignored(self):
|
|
|
|
|
prompt = _build_child_system_prompt("Do something", " ")
|
|
|
|
|
self.assertNotIn("CONTEXT", prompt)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestStripBlockedTools(unittest.TestCase):
|
|
|
|
|
def test_removes_blocked_toolsets(self):
|
|
|
|
|
result = _strip_blocked_tools(["terminal", "file", "delegation", "clarify", "memory", "code_execution"])
|
|
|
|
|
self.assertEqual(sorted(result), ["file", "terminal"])
|
|
|
|
|
|
|
|
|
|
def test_preserves_allowed_toolsets(self):
|
|
|
|
|
result = _strip_blocked_tools(["terminal", "file", "web", "browser"])
|
|
|
|
|
self.assertEqual(sorted(result), ["browser", "file", "terminal", "web"])
|
|
|
|
|
|
|
|
|
|
def test_empty_input(self):
|
|
|
|
|
result = _strip_blocked_tools([])
|
|
|
|
|
self.assertEqual(result, [])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestDelegateTask(unittest.TestCase):
|
|
|
|
|
def test_no_parent_agent(self):
|
|
|
|
|
result = json.loads(delegate_task(goal="test"))
|
|
|
|
|
self.assertIn("error", result)
|
|
|
|
|
self.assertIn("parent agent", result["error"])
|
|
|
|
|
|
|
|
|
|
def test_depth_limit(self):
|
|
|
|
|
parent = _make_mock_parent(depth=2)
|
|
|
|
|
result = json.loads(delegate_task(goal="test", parent_agent=parent))
|
|
|
|
|
self.assertIn("error", result)
|
|
|
|
|
self.assertIn("depth limit", result["error"].lower())
|
|
|
|
|
|
|
|
|
|
def test_no_goal_or_tasks(self):
|
|
|
|
|
parent = _make_mock_parent()
|
|
|
|
|
result = json.loads(delegate_task(parent_agent=parent))
|
|
|
|
|
self.assertIn("error", result)
|
|
|
|
|
|
|
|
|
|
def test_empty_goal(self):
|
|
|
|
|
parent = _make_mock_parent()
|
|
|
|
|
result = json.loads(delegate_task(goal=" ", parent_agent=parent))
|
|
|
|
|
self.assertIn("error", result)
|
|
|
|
|
|
|
|
|
|
def test_task_missing_goal(self):
|
|
|
|
|
parent = _make_mock_parent()
|
|
|
|
|
result = json.loads(delegate_task(tasks=[{"context": "no goal here"}], parent_agent=parent))
|
|
|
|
|
self.assertIn("error", result)
|
|
|
|
|
|
|
|
|
|
@patch("tools.delegate_tool._run_single_child")
|
|
|
|
|
def test_single_task_mode(self, mock_run):
|
|
|
|
|
mock_run.return_value = {
|
|
|
|
|
"task_index": 0, "status": "completed",
|
|
|
|
|
"summary": "Done!", "api_calls": 3, "duration_seconds": 5.0
|
|
|
|
|
}
|
|
|
|
|
parent = _make_mock_parent()
|
|
|
|
|
result = json.loads(delegate_task(goal="Fix tests", context="error log...", parent_agent=parent))
|
|
|
|
|
self.assertIn("results", result)
|
|
|
|
|
self.assertEqual(len(result["results"]), 1)
|
|
|
|
|
self.assertEqual(result["results"][0]["status"], "completed")
|
|
|
|
|
self.assertEqual(result["results"][0]["summary"], "Done!")
|
|
|
|
|
mock_run.assert_called_once()
|
|
|
|
|
|
|
|
|
|
@patch("tools.delegate_tool._run_single_child")
|
|
|
|
|
def test_batch_mode(self, mock_run):
|
|
|
|
|
mock_run.side_effect = [
|
|
|
|
|
{"task_index": 0, "status": "completed", "summary": "Result A", "api_calls": 2, "duration_seconds": 3.0},
|
|
|
|
|
{"task_index": 1, "status": "completed", "summary": "Result B", "api_calls": 4, "duration_seconds": 6.0},
|
|
|
|
|
]
|
|
|
|
|
parent = _make_mock_parent()
|
|
|
|
|
tasks = [
|
|
|
|
|
{"goal": "Research topic A"},
|
|
|
|
|
{"goal": "Research topic B"},
|
|
|
|
|
]
|
|
|
|
|
result = json.loads(delegate_task(tasks=tasks, parent_agent=parent))
|
|
|
|
|
self.assertIn("results", result)
|
|
|
|
|
self.assertEqual(len(result["results"]), 2)
|
|
|
|
|
self.assertEqual(result["results"][0]["summary"], "Result A")
|
|
|
|
|
self.assertEqual(result["results"][1]["summary"], "Result B")
|
|
|
|
|
self.assertIn("total_duration_seconds", result)
|
|
|
|
|
|
|
|
|
|
@patch("tools.delegate_tool._run_single_child")
|
|
|
|
|
def test_batch_capped_at_3(self, mock_run):
|
|
|
|
|
mock_run.return_value = {
|
|
|
|
|
"task_index": 0, "status": "completed",
|
|
|
|
|
"summary": "Done", "api_calls": 1, "duration_seconds": 1.0
|
|
|
|
|
}
|
|
|
|
|
parent = _make_mock_parent()
|
2026-04-10 01:34:39 +00:00
|
|
|
limit = _get_max_concurrent_children()
|
|
|
|
|
tasks = [{"goal": f"Task {i}"} for i in range(limit + 2)]
|
2026-02-20 03:15:53 -08:00
|
|
|
result = json.loads(delegate_task(tasks=tasks, parent_agent=parent))
|
2026-04-10 01:34:39 +00:00
|
|
|
# Should return an error instead of silently truncating
|
|
|
|
|
self.assertIn("error", result)
|
|
|
|
|
self.assertIn("Too many tasks", result["error"])
|
|
|
|
|
mock_run.assert_not_called()
|
2026-02-20 03:15:53 -08:00
|
|
|
|
|
|
|
|
@patch("tools.delegate_tool._run_single_child")
|
|
|
|
|
def test_batch_ignores_toplevel_goal(self, mock_run):
|
|
|
|
|
"""When tasks array is provided, top-level goal/context/toolsets are ignored."""
|
|
|
|
|
mock_run.return_value = {
|
|
|
|
|
"task_index": 0, "status": "completed",
|
|
|
|
|
"summary": "Done", "api_calls": 1, "duration_seconds": 1.0
|
|
|
|
|
}
|
|
|
|
|
parent = _make_mock_parent()
|
|
|
|
|
result = json.loads(delegate_task(
|
|
|
|
|
goal="This should be ignored",
|
|
|
|
|
tasks=[{"goal": "Actual task"}],
|
|
|
|
|
parent_agent=parent,
|
|
|
|
|
))
|
|
|
|
|
# The mock was called with the tasks array item, not the top-level goal
|
|
|
|
|
call_args = mock_run.call_args
|
|
|
|
|
self.assertEqual(call_args.kwargs.get("goal") or call_args[1].get("goal", call_args[0][1] if len(call_args[0]) > 1 else None), "Actual task")
|
|
|
|
|
|
|
|
|
|
@patch("tools.delegate_tool._run_single_child")
|
|
|
|
|
def test_failed_child_included_in_results(self, mock_run):
|
|
|
|
|
mock_run.return_value = {
|
|
|
|
|
"task_index": 0, "status": "error",
|
|
|
|
|
"summary": None, "error": "Something broke",
|
|
|
|
|
"api_calls": 0, "duration_seconds": 0.5
|
|
|
|
|
}
|
|
|
|
|
parent = _make_mock_parent()
|
|
|
|
|
result = json.loads(delegate_task(goal="Break things", parent_agent=parent))
|
|
|
|
|
self.assertEqual(result["results"][0]["status"], "error")
|
|
|
|
|
self.assertIn("Something broke", result["results"][0]["error"])
|
|
|
|
|
|
|
|
|
|
def test_depth_increments(self):
|
|
|
|
|
"""Verify child gets parent's depth + 1."""
|
|
|
|
|
parent = _make_mock_parent(depth=0)
|
|
|
|
|
|
|
|
|
|
with patch("run_agent.AIAgent") as MockAgent:
|
|
|
|
|
mock_child = MagicMock()
|
|
|
|
|
mock_child.run_conversation.return_value = {
|
|
|
|
|
"final_response": "done", "completed": True, "api_calls": 1
|
|
|
|
|
}
|
|
|
|
|
MockAgent.return_value = mock_child
|
|
|
|
|
|
|
|
|
|
delegate_task(goal="Test depth", parent_agent=parent)
|
|
|
|
|
self.assertEqual(mock_child._delegate_depth, 1)
|
|
|
|
|
|
|
|
|
|
def test_active_children_tracking(self):
|
|
|
|
|
"""Verify children are registered/unregistered for interrupt propagation."""
|
|
|
|
|
parent = _make_mock_parent(depth=0)
|
|
|
|
|
|
|
|
|
|
with patch("run_agent.AIAgent") as MockAgent:
|
|
|
|
|
mock_child = MagicMock()
|
|
|
|
|
mock_child.run_conversation.return_value = {
|
|
|
|
|
"final_response": "done", "completed": True, "api_calls": 1
|
|
|
|
|
}
|
|
|
|
|
MockAgent.return_value = mock_child
|
|
|
|
|
|
|
|
|
|
delegate_task(goal="Test tracking", parent_agent=parent)
|
|
|
|
|
self.assertEqual(len(parent._active_children), 0)
|
|
|
|
|
|
2026-02-25 18:20:38 -08:00
|
|
|
def test_child_inherits_runtime_credentials(self):
|
|
|
|
|
parent = _make_mock_parent(depth=0)
|
|
|
|
|
parent.base_url = "https://chatgpt.com/backend-api/codex"
|
2026-04-05 11:42:13 -07:00
|
|
|
parent.api_key="***"
|
2026-02-25 18:20:38 -08:00
|
|
|
parent.provider = "openai-codex"
|
|
|
|
|
parent.api_mode = "codex_responses"
|
|
|
|
|
|
|
|
|
|
with patch("run_agent.AIAgent") as MockAgent:
|
|
|
|
|
mock_child = MagicMock()
|
|
|
|
|
mock_child.run_conversation.return_value = {
|
|
|
|
|
"final_response": "ok",
|
|
|
|
|
"completed": True,
|
|
|
|
|
"api_calls": 1,
|
|
|
|
|
}
|
|
|
|
|
MockAgent.return_value = mock_child
|
|
|
|
|
|
|
|
|
|
delegate_task(goal="Test runtime inheritance", parent_agent=parent)
|
|
|
|
|
|
|
|
|
|
_, kwargs = MockAgent.call_args
|
|
|
|
|
self.assertEqual(kwargs["base_url"], parent.base_url)
|
|
|
|
|
self.assertEqual(kwargs["api_key"], parent.api_key)
|
|
|
|
|
self.assertEqual(kwargs["provider"], parent.provider)
|
|
|
|
|
self.assertEqual(kwargs["api_mode"], parent.api_mode)
|
|
|
|
|
|
2026-04-05 11:42:13 -07:00
|
|
|
def test_child_inherits_parent_print_fn(self):
|
|
|
|
|
parent = _make_mock_parent(depth=0)
|
|
|
|
|
sink = MagicMock()
|
|
|
|
|
parent._print_fn = sink
|
|
|
|
|
|
|
|
|
|
with patch("run_agent.AIAgent") as MockAgent:
|
|
|
|
|
mock_child = MagicMock()
|
|
|
|
|
MockAgent.return_value = mock_child
|
|
|
|
|
|
|
|
|
|
_build_child_agent(
|
|
|
|
|
task_index=0,
|
|
|
|
|
goal="Keep stdout clean",
|
|
|
|
|
context=None,
|
|
|
|
|
toolsets=None,
|
|
|
|
|
model=None,
|
|
|
|
|
max_iterations=10,
|
|
|
|
|
parent_agent=parent,
|
2026-04-17 21:35:30 -07:00
|
|
|
task_count=1,
|
2026-04-05 11:42:13 -07:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
self.assertIs(mock_child._print_fn, sink)
|
|
|
|
|
|
|
|
|
|
def test_child_uses_thinking_callback_when_progress_callback_available(self):
|
|
|
|
|
parent = _make_mock_parent(depth=0)
|
|
|
|
|
parent.tool_progress_callback = MagicMock()
|
|
|
|
|
|
|
|
|
|
with patch("run_agent.AIAgent") as MockAgent:
|
|
|
|
|
mock_child = MagicMock()
|
|
|
|
|
MockAgent.return_value = mock_child
|
|
|
|
|
|
|
|
|
|
_build_child_agent(
|
|
|
|
|
task_index=0,
|
|
|
|
|
goal="Avoid raw child spinners",
|
|
|
|
|
context=None,
|
|
|
|
|
toolsets=None,
|
|
|
|
|
model=None,
|
|
|
|
|
max_iterations=10,
|
|
|
|
|
parent_agent=parent,
|
2026-04-17 21:35:30 -07:00
|
|
|
task_count=1,
|
2026-04-05 11:42:13 -07:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
self.assertTrue(callable(mock_child.thinking_callback))
|
|
|
|
|
mock_child.thinking_callback("deliberating...")
|
|
|
|
|
parent.tool_progress_callback.assert_not_called()
|
|
|
|
|
|
2026-02-20 03:15:53 -08:00
|
|
|
|
2026-03-17 10:31:38 -07:00
|
|
|
class TestToolNamePreservation(unittest.TestCase):
|
|
|
|
|
"""Verify _last_resolved_tool_names is restored after subagent runs."""
|
|
|
|
|
|
|
|
|
|
def test_global_tool_names_restored_after_delegation(self):
|
|
|
|
|
"""The process-global _last_resolved_tool_names must be restored
|
|
|
|
|
after a subagent completes so the parent's execute_code sandbox
|
|
|
|
|
generates correct imports."""
|
|
|
|
|
import model_tools
|
|
|
|
|
|
|
|
|
|
parent = _make_mock_parent(depth=0)
|
|
|
|
|
original_tools = ["terminal", "read_file", "web_search", "execute_code", "delegate_task"]
|
|
|
|
|
model_tools._last_resolved_tool_names = list(original_tools)
|
|
|
|
|
|
|
|
|
|
with patch("run_agent.AIAgent") as MockAgent:
|
|
|
|
|
mock_child = MagicMock()
|
|
|
|
|
mock_child.run_conversation.return_value = {
|
|
|
|
|
"final_response": "done", "completed": True, "api_calls": 1,
|
|
|
|
|
}
|
|
|
|
|
MockAgent.return_value = mock_child
|
|
|
|
|
|
|
|
|
|
delegate_task(goal="Test tool preservation", parent_agent=parent)
|
|
|
|
|
|
|
|
|
|
self.assertEqual(model_tools._last_resolved_tool_names, original_tools)
|
|
|
|
|
|
|
|
|
|
def test_global_tool_names_restored_after_child_failure(self):
|
|
|
|
|
"""Even when the child agent raises, the global must be restored."""
|
|
|
|
|
import model_tools
|
|
|
|
|
|
|
|
|
|
parent = _make_mock_parent(depth=0)
|
|
|
|
|
original_tools = ["terminal", "read_file", "web_search"]
|
|
|
|
|
model_tools._last_resolved_tool_names = list(original_tools)
|
|
|
|
|
|
|
|
|
|
with patch("run_agent.AIAgent") as MockAgent:
|
|
|
|
|
mock_child = MagicMock()
|
|
|
|
|
mock_child.run_conversation.side_effect = RuntimeError("boom")
|
|
|
|
|
MockAgent.return_value = mock_child
|
|
|
|
|
|
|
|
|
|
result = json.loads(delegate_task(goal="Crash test", parent_agent=parent))
|
|
|
|
|
self.assertEqual(result["results"][0]["status"], "error")
|
|
|
|
|
|
|
|
|
|
self.assertEqual(model_tools._last_resolved_tool_names, original_tools)
|
|
|
|
|
|
2026-03-19 09:25:38 -07:00
|
|
|
def test_build_child_agent_does_not_raise_name_error(self):
|
|
|
|
|
"""Regression: _build_child_agent must not reference _saved_tool_names.
|
|
|
|
|
|
|
|
|
|
The bug introduced by the e7844e9c merge conflict: line 235 inside
|
|
|
|
|
_build_child_agent read `list(_saved_tool_names)` where that variable
|
|
|
|
|
is only defined later in _run_single_child. Calling _build_child_agent
|
|
|
|
|
standalone (without _run_single_child's scope) must never raise NameError.
|
|
|
|
|
"""
|
|
|
|
|
parent = _make_mock_parent(depth=0)
|
|
|
|
|
|
|
|
|
|
with patch("run_agent.AIAgent"):
|
|
|
|
|
try:
|
|
|
|
|
_build_child_agent(
|
|
|
|
|
task_index=0,
|
|
|
|
|
goal="regression check",
|
|
|
|
|
context=None,
|
|
|
|
|
toolsets=None,
|
|
|
|
|
model=None,
|
|
|
|
|
max_iterations=10,
|
|
|
|
|
parent_agent=parent,
|
2026-04-17 21:35:30 -07:00
|
|
|
task_count=1,
|
2026-03-19 09:25:38 -07:00
|
|
|
)
|
|
|
|
|
except NameError as exc:
|
|
|
|
|
self.fail(
|
|
|
|
|
f"_build_child_agent raised NameError — "
|
|
|
|
|
f"_saved_tool_names leaked back into wrong scope: {exc}"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
def test_saved_tool_names_set_on_child_before_run(self):
|
|
|
|
|
"""_run_single_child must set _delegate_saved_tool_names on the child
|
|
|
|
|
from model_tools._last_resolved_tool_names before run_conversation."""
|
|
|
|
|
import model_tools
|
|
|
|
|
|
|
|
|
|
parent = _make_mock_parent(depth=0)
|
|
|
|
|
expected_tools = ["read_file", "web_search", "execute_code"]
|
|
|
|
|
model_tools._last_resolved_tool_names = list(expected_tools)
|
|
|
|
|
|
|
|
|
|
captured = {}
|
|
|
|
|
|
|
|
|
|
with patch("run_agent.AIAgent") as MockAgent:
|
|
|
|
|
mock_child = MagicMock()
|
|
|
|
|
|
feat(delegate): cross-agent file state coordination for concurrent subagents (#13718)
* feat(models): hide OpenRouter models that don't advertise tool support
Port from Kilo-Org/kilocode#9068.
hermes-agent is tool-calling-first — every provider path assumes the
model can invoke tools. Models whose OpenRouter supported_parameters
doesn't include 'tools' (e.g. image-only or completion-only models)
cannot be driven by the agent loop and fail at the first tool call.
Filter them out of fetch_openrouter_models() so they never appear in
the model picker (`hermes model`, setup wizard, /model slash command).
Permissive when the field is missing — OpenRouter-compatible gateways
(Nous Portal, private mirrors, older snapshots) don't always populate
supported_parameters. Treat missing as 'unknown → allow' rather than
silently emptying the picker on those gateways. Only hide models
whose supported_parameters is an explicit list that omits tools.
Tests cover: tools present → kept, tools absent → dropped, field
missing → kept, malformed non-list → kept, non-dict item → kept,
empty list → dropped.
* feat(delegate): cross-agent file state coordination for concurrent subagents
Prevents mangled edits when concurrent subagents touch the same file
(same process, same filesystem — the mangle scenario from #11215).
Three layers, all opt-out via HERMES_DISABLE_FILE_STATE_GUARD=1:
1. FileStateRegistry (tools/file_state.py) — process-wide singleton
tracking per-agent read stamps and the last writer globally.
check_stale() names the sibling subagent in the warning when a
non-owning agent wrote after this agent's last read.
2. Per-path threading.Lock wrapped around the read-modify-write
region in write_file_tool and patch_tool. Concurrent siblings on
the same path serialize; different paths stay fully parallel.
V4A multi-file patches lock in sorted path order (deadlock-free).
3. Delegate-completion reminder in tools/delegate_tool.py: after a
subagent returns, writes_since(parent, child_start, parent_reads)
appends '[NOTE: subagent modified files the parent previously
read — re-read before editing: ...]' to entry.summary when the
child touched anything the parent had already seen.
Complements (does not replace) the existing path-overlap check in
run_agent._should_parallelize_tool_batch — batch check prevents
same-file parallel dispatch within one agent's turn (cheap prevention,
zero API cost), registry catches cross-subagent and cross-turn
staleness at write time (detection).
Behavior is warning-only, not hard-failing — matches existing project
style. Errors surface naturally: sibling writes often invalidate the
old_string in patch operations, which already errors cleanly.
Tests: tests/tools/test_file_state_registry.py — 16 tests covering
registry state transitions, per-path locking, per-path-not-global
locking, writes_since filtering, kill switch, and end-to-end
integration through the real read_file/write_file/patch handlers.
2026-04-21 16:41:26 -07:00
|
|
|
def capture_and_return(user_message, task_id=None):
|
2026-03-19 09:25:38 -07:00
|
|
|
captured["saved"] = list(mock_child._delegate_saved_tool_names)
|
|
|
|
|
return {"final_response": "ok", "completed": True, "api_calls": 1}
|
|
|
|
|
|
|
|
|
|
mock_child.run_conversation.side_effect = capture_and_return
|
|
|
|
|
MockAgent.return_value = mock_child
|
|
|
|
|
|
|
|
|
|
delegate_task(goal="capture test", parent_agent=parent)
|
|
|
|
|
|
|
|
|
|
self.assertEqual(captured["saved"], expected_tools)
|
|
|
|
|
|
2026-03-17 10:31:38 -07:00
|
|
|
|
feat(delegate): add observability metadata to subagent results (#1175)
* fix: Home Assistant event filtering now closed by default
Previously, when no watch_domains or watch_entities were configured,
ALL state_changed events passed through to the agent, causing users
to be flooded with notifications for every HA entity change.
Now events are dropped by default unless the user explicitly configures:
- watch_domains: list of domains to monitor (e.g. climate, light)
- watch_entities: list of specific entity IDs to monitor
- watch_all: true (new option — opt-in to receive all events)
A warning is logged at connect time if no filters are configured,
guiding users to set up their HA platform config.
All 49 gateway HA tests + 52 HA tool tests pass.
* docs: update Home Assistant integration documentation
- homeassistant.md: Fix event filtering docs to reflect closed-by-default
behavior. Add watch_all option. Replace Python dict config example with
YAML. Fix defaults table (was incorrectly showing 'all'). Add required
configuration warning admonition.
- environment-variables.md: Add HASS_TOKEN and HASS_URL to Messaging section.
- messaging/index.md: Add Home Assistant to description, architecture
diagram, platform toolsets table, and Next Steps links.
* fix(terminal): strip provider env vars from background and PTY subprocesses
Extends the env var blocklist from #1157 to also cover the two remaining
leaky paths in process_registry.py:
- spawn_local() PTY path (line 156)
- spawn_local() background Popen path (line 197)
Both were still using raw os.environ, leaking provider vars to background
processes and interactive PTY sessions. Now uses the same dynamic
_HERMES_PROVIDER_ENV_BLOCKLIST from local.py.
Explicit env_vars passed to spawn_local() still override the blocklist,
matching the existing behavior for callers that intentionally need these.
Gap identified by PR #1004 (@PeterFile).
* feat(delegate): add observability metadata to subagent results
Enrich delegate_task results with metadata from the child AIAgent:
- model: which model the child used
- exit_reason: completed | interrupted | max_iterations
- tokens.input / tokens.output: token counts
- tool_trace: per-tool-call trace with byte sizes and ok/error status
Tool trace uses tool_call_id matching to correctly pair parallel tool
calls with their results, with a fallback for messages without IDs.
Cherry-picked from PR #872 by @omerkaz, with fixes:
- Fixed parallel tool call trace pairing (was always updating last entry)
- Removed redundant 'iterations' field (identical to existing 'api_calls')
- Added test for parallel tool call trace correctness
Co-authored-by: omerkaz <omerkaz@users.noreply.github.com>
---------
Co-authored-by: omerkaz <omerkaz@users.noreply.github.com>
2026-03-13 08:07:12 -07:00
|
|
|
class TestDelegateObservability(unittest.TestCase):
|
|
|
|
|
"""Tests for enriched metadata returned by _run_single_child."""
|
|
|
|
|
|
|
|
|
|
def test_observability_fields_present(self):
|
|
|
|
|
"""Completed child should return tool_trace, tokens, model, exit_reason."""
|
|
|
|
|
parent = _make_mock_parent(depth=0)
|
|
|
|
|
|
|
|
|
|
with patch("run_agent.AIAgent") as MockAgent:
|
|
|
|
|
mock_child = MagicMock()
|
|
|
|
|
mock_child.model = "claude-sonnet-4-6"
|
|
|
|
|
mock_child.session_prompt_tokens = 5000
|
|
|
|
|
mock_child.session_completion_tokens = 1200
|
|
|
|
|
mock_child.run_conversation.return_value = {
|
|
|
|
|
"final_response": "done",
|
|
|
|
|
"completed": True,
|
|
|
|
|
"interrupted": False,
|
|
|
|
|
"api_calls": 3,
|
|
|
|
|
"messages": [
|
|
|
|
|
{"role": "user", "content": "do something"},
|
|
|
|
|
{"role": "assistant", "tool_calls": [
|
|
|
|
|
{"id": "tc_1", "function": {"name": "web_search", "arguments": '{"query": "test"}'}}
|
|
|
|
|
]},
|
|
|
|
|
{"role": "tool", "tool_call_id": "tc_1", "content": '{"results": [1,2,3]}'},
|
|
|
|
|
{"role": "assistant", "content": "done"},
|
|
|
|
|
],
|
|
|
|
|
}
|
|
|
|
|
MockAgent.return_value = mock_child
|
|
|
|
|
|
|
|
|
|
result = json.loads(delegate_task(goal="Test observability", parent_agent=parent))
|
|
|
|
|
entry = result["results"][0]
|
|
|
|
|
|
|
|
|
|
# Core observability fields
|
|
|
|
|
self.assertEqual(entry["model"], "claude-sonnet-4-6")
|
|
|
|
|
self.assertEqual(entry["exit_reason"], "completed")
|
|
|
|
|
self.assertEqual(entry["tokens"]["input"], 5000)
|
|
|
|
|
self.assertEqual(entry["tokens"]["output"], 1200)
|
|
|
|
|
|
|
|
|
|
# Tool trace
|
|
|
|
|
self.assertEqual(len(entry["tool_trace"]), 1)
|
|
|
|
|
self.assertEqual(entry["tool_trace"][0]["tool"], "web_search")
|
|
|
|
|
self.assertIn("args_bytes", entry["tool_trace"][0])
|
|
|
|
|
self.assertIn("result_bytes", entry["tool_trace"][0])
|
|
|
|
|
self.assertEqual(entry["tool_trace"][0]["status"], "ok")
|
|
|
|
|
|
|
|
|
|
def test_tool_trace_detects_error(self):
|
|
|
|
|
"""Tool results containing 'error' should be marked as error status."""
|
|
|
|
|
parent = _make_mock_parent(depth=0)
|
|
|
|
|
|
|
|
|
|
with patch("run_agent.AIAgent") as MockAgent:
|
|
|
|
|
mock_child = MagicMock()
|
|
|
|
|
mock_child.model = "claude-sonnet-4-6"
|
|
|
|
|
mock_child.session_prompt_tokens = 0
|
|
|
|
|
mock_child.session_completion_tokens = 0
|
|
|
|
|
mock_child.run_conversation.return_value = {
|
|
|
|
|
"final_response": "failed",
|
|
|
|
|
"completed": True,
|
|
|
|
|
"interrupted": False,
|
|
|
|
|
"api_calls": 1,
|
|
|
|
|
"messages": [
|
|
|
|
|
{"role": "assistant", "tool_calls": [
|
|
|
|
|
{"id": "tc_1", "function": {"name": "terminal", "arguments": '{"cmd": "ls"}'}}
|
|
|
|
|
]},
|
|
|
|
|
{"role": "tool", "tool_call_id": "tc_1", "content": "Error: command not found"},
|
|
|
|
|
],
|
|
|
|
|
}
|
|
|
|
|
MockAgent.return_value = mock_child
|
|
|
|
|
|
|
|
|
|
result = json.loads(delegate_task(goal="Test error trace", parent_agent=parent))
|
|
|
|
|
trace = result["results"][0]["tool_trace"]
|
|
|
|
|
self.assertEqual(trace[0]["status"], "error")
|
|
|
|
|
|
|
|
|
|
def test_parallel_tool_calls_paired_correctly(self):
|
|
|
|
|
"""Parallel tool calls should each get their own result via tool_call_id matching."""
|
|
|
|
|
parent = _make_mock_parent(depth=0)
|
|
|
|
|
|
|
|
|
|
with patch("run_agent.AIAgent") as MockAgent:
|
|
|
|
|
mock_child = MagicMock()
|
|
|
|
|
mock_child.model = "claude-sonnet-4-6"
|
|
|
|
|
mock_child.session_prompt_tokens = 3000
|
|
|
|
|
mock_child.session_completion_tokens = 800
|
|
|
|
|
mock_child.run_conversation.return_value = {
|
|
|
|
|
"final_response": "done",
|
|
|
|
|
"completed": True,
|
|
|
|
|
"interrupted": False,
|
|
|
|
|
"api_calls": 1,
|
|
|
|
|
"messages": [
|
|
|
|
|
{"role": "assistant", "tool_calls": [
|
|
|
|
|
{"id": "tc_a", "function": {"name": "web_search", "arguments": '{"q": "a"}'}},
|
|
|
|
|
{"id": "tc_b", "function": {"name": "web_search", "arguments": '{"q": "b"}'}},
|
|
|
|
|
{"id": "tc_c", "function": {"name": "terminal", "arguments": '{"cmd": "ls"}'}},
|
|
|
|
|
]},
|
|
|
|
|
{"role": "tool", "tool_call_id": "tc_a", "content": '{"ok": true}'},
|
|
|
|
|
{"role": "tool", "tool_call_id": "tc_b", "content": "Error: rate limited"},
|
|
|
|
|
{"role": "tool", "tool_call_id": "tc_c", "content": "file1.txt\nfile2.txt"},
|
|
|
|
|
{"role": "assistant", "content": "done"},
|
|
|
|
|
],
|
|
|
|
|
}
|
|
|
|
|
MockAgent.return_value = mock_child
|
|
|
|
|
|
|
|
|
|
result = json.loads(delegate_task(goal="Test parallel", parent_agent=parent))
|
|
|
|
|
trace = result["results"][0]["tool_trace"]
|
|
|
|
|
|
|
|
|
|
# All three tool calls should have results
|
|
|
|
|
self.assertEqual(len(trace), 3)
|
|
|
|
|
|
|
|
|
|
# First: web_search → ok
|
|
|
|
|
self.assertEqual(trace[0]["tool"], "web_search")
|
|
|
|
|
self.assertEqual(trace[0]["status"], "ok")
|
|
|
|
|
self.assertIn("result_bytes", trace[0])
|
|
|
|
|
|
|
|
|
|
# Second: web_search → error
|
|
|
|
|
self.assertEqual(trace[1]["tool"], "web_search")
|
|
|
|
|
self.assertEqual(trace[1]["status"], "error")
|
|
|
|
|
self.assertIn("result_bytes", trace[1])
|
|
|
|
|
|
|
|
|
|
# Third: terminal → ok
|
|
|
|
|
self.assertEqual(trace[2]["tool"], "terminal")
|
|
|
|
|
self.assertEqual(trace[2]["status"], "ok")
|
|
|
|
|
self.assertIn("result_bytes", trace[2])
|
|
|
|
|
|
|
|
|
|
def test_exit_reason_interrupted(self):
|
|
|
|
|
"""Interrupted child should report exit_reason='interrupted'."""
|
|
|
|
|
parent = _make_mock_parent(depth=0)
|
|
|
|
|
|
|
|
|
|
with patch("run_agent.AIAgent") as MockAgent:
|
|
|
|
|
mock_child = MagicMock()
|
|
|
|
|
mock_child.model = "claude-sonnet-4-6"
|
|
|
|
|
mock_child.session_prompt_tokens = 0
|
|
|
|
|
mock_child.session_completion_tokens = 0
|
|
|
|
|
mock_child.run_conversation.return_value = {
|
|
|
|
|
"final_response": "",
|
|
|
|
|
"completed": False,
|
|
|
|
|
"interrupted": True,
|
|
|
|
|
"api_calls": 2,
|
|
|
|
|
"messages": [],
|
|
|
|
|
}
|
|
|
|
|
MockAgent.return_value = mock_child
|
|
|
|
|
|
|
|
|
|
result = json.loads(delegate_task(goal="Test interrupt", parent_agent=parent))
|
|
|
|
|
self.assertEqual(result["results"][0]["exit_reason"], "interrupted")
|
|
|
|
|
|
|
|
|
|
def test_exit_reason_max_iterations(self):
|
|
|
|
|
"""Child that didn't complete and wasn't interrupted hit max_iterations."""
|
|
|
|
|
parent = _make_mock_parent(depth=0)
|
|
|
|
|
|
|
|
|
|
with patch("run_agent.AIAgent") as MockAgent:
|
|
|
|
|
mock_child = MagicMock()
|
|
|
|
|
mock_child.model = "claude-sonnet-4-6"
|
|
|
|
|
mock_child.session_prompt_tokens = 0
|
|
|
|
|
mock_child.session_completion_tokens = 0
|
|
|
|
|
mock_child.run_conversation.return_value = {
|
|
|
|
|
"final_response": "",
|
|
|
|
|
"completed": False,
|
|
|
|
|
"interrupted": False,
|
|
|
|
|
"api_calls": 50,
|
|
|
|
|
"messages": [],
|
|
|
|
|
}
|
|
|
|
|
MockAgent.return_value = mock_child
|
|
|
|
|
|
|
|
|
|
result = json.loads(delegate_task(goal="Test max iter", parent_agent=parent))
|
|
|
|
|
self.assertEqual(result["results"][0]["exit_reason"], "max_iterations")
|
|
|
|
|
|
|
|
|
|
|
fix(web): scope dashboard config Reset button to the current tab (#16813)
* Port from Kilo-Org/kilocode#9448: roll up subagent costs into parent session total
Child subagents built by delegate_task() each track their own
session_estimated_cost_usd, but the parent agent's total never folded
those numbers in. On runs where the parent mostly delegates and the
children do the expensive work, the footer/UI was reporting a fraction
of the actual spend — sometimes $0.00 when the parent itself made no
billed calls.
Fix:
- Capture each child's session_estimated_cost_usd into _child_cost_usd
on the result entry (before child.close() drops the counter).
- After the existing subagent_stop hook loop, sum the children's costs
and add the total to parent.session_estimated_cost_usd.
- Promote session_cost_source from 'none' -> 'subagent' when the parent
had no direct spend but children did, so the UI doesn't label the
total as having unknown provenance. Real sources (openrouter,
anthropic, etc.) are preserved.
Nested orchestrator -> worker trees roll up naturally: each layer's own
delegate_task() folds its direct children in, and when the orchestrator
itself returns, its parent folds the orchestrator's now-inflated total
on top.
Internal fields (_child_cost_usd, _child_role) are stripped from the
results dict before it's serialised back to the model — same contract
as _child_role already followed.
Tests: TestSubagentCostRollup (5 cases) covers single-child, batch,
zero-cost-children, preserved-source, and legacy-fixture paths.
Source: https://github.com/Kilo-Org/kilocode/pull/9448
* fix(web): scope dashboard config Reset button to the current tab
Reported by @ykmfb001 via X: clicking 'Restore Defaults' (恢复默认值) on
the Auxiliary page wiped the entire config.yaml to defaults, not just
the auxiliary section. The button sits next to the category tabs and
users reasonably assumed 'reset this tab', not 'reset everything'.
Changes:
- handleReset now scopes to the fields in the current view:
active category's fields (form mode) or search-matched fields
(search mode). Only those keys are copied from defaults; the rest
of the config is left alone.
- Added a window.confirm() with the scope name before applying.
- Button is hidden in YAML mode (scoping doesn't apply there).
- Tooltip/aria-label now name the scope, e.g. 'Reset Auxiliary to
defaults'.
- i18n: new resetScopeTooltip / confirmResetScope / resetScopeToast
strings in en + zh; resetDefaults key preserved for compat.
2026-04-27 21:09:14 -07:00
|
|
|
class TestSubagentCostRollup(unittest.TestCase):
|
|
|
|
|
"""Port of Kilo-Org/kilocode#9448 — parent's session_estimated_cost_usd
|
|
|
|
|
must include subagent spend, not just the parent's own API calls."""
|
|
|
|
|
|
|
|
|
|
def _make_parent_with_cost_counters(self, depth=0, starting_cost=0.0):
|
|
|
|
|
parent = _make_mock_parent(depth=depth)
|
|
|
|
|
# The fields AIAgent exposes and the footer reads from. Set real
|
|
|
|
|
# floats/strings so the rollup can add to them rather than tripping
|
|
|
|
|
# on MagicMock auto-attrs.
|
|
|
|
|
parent.session_estimated_cost_usd = starting_cost
|
|
|
|
|
parent.session_cost_status = "unknown"
|
|
|
|
|
parent.session_cost_source = "none"
|
|
|
|
|
return parent
|
|
|
|
|
|
|
|
|
|
def test_single_child_cost_folded_into_parent(self):
|
|
|
|
|
parent = self._make_parent_with_cost_counters(starting_cost=0.10)
|
|
|
|
|
|
|
|
|
|
with patch("run_agent.AIAgent") as MockAgent:
|
|
|
|
|
mock_child = MagicMock()
|
|
|
|
|
mock_child.model = "claude-sonnet-4-6"
|
|
|
|
|
mock_child.session_prompt_tokens = 1000
|
|
|
|
|
mock_child.session_completion_tokens = 200
|
|
|
|
|
mock_child.session_estimated_cost_usd = 0.42
|
|
|
|
|
mock_child.run_conversation.return_value = {
|
|
|
|
|
"final_response": "done",
|
|
|
|
|
"completed": True,
|
|
|
|
|
"interrupted": False,
|
|
|
|
|
"api_calls": 2,
|
|
|
|
|
"messages": [],
|
|
|
|
|
}
|
|
|
|
|
MockAgent.return_value = mock_child
|
|
|
|
|
|
|
|
|
|
result = json.loads(delegate_task(goal="do stuff", parent_agent=parent))
|
|
|
|
|
|
|
|
|
|
# Parent footer must reflect parent_cost + child_cost.
|
|
|
|
|
self.assertAlmostEqual(parent.session_estimated_cost_usd, 0.52, places=6)
|
|
|
|
|
# Rollup must strip the internal field before serialising to the model.
|
|
|
|
|
self.assertNotIn("_child_cost_usd", result["results"][0])
|
|
|
|
|
self.assertNotIn("_child_role", result["results"][0])
|
|
|
|
|
|
|
|
|
|
def test_batch_children_costs_sum_into_parent(self):
|
|
|
|
|
parent = self._make_parent_with_cost_counters(starting_cost=0.00)
|
|
|
|
|
|
|
|
|
|
with patch("tools.delegate_tool._run_single_child") as mock_run:
|
|
|
|
|
mock_run.side_effect = [
|
|
|
|
|
{
|
|
|
|
|
"task_index": 0,
|
|
|
|
|
"status": "completed",
|
|
|
|
|
"summary": "A",
|
|
|
|
|
"api_calls": 2,
|
|
|
|
|
"duration_seconds": 1.0,
|
|
|
|
|
"_child_role": "leaf",
|
|
|
|
|
"_child_cost_usd": 0.15,
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"task_index": 1,
|
|
|
|
|
"status": "completed",
|
|
|
|
|
"summary": "B",
|
|
|
|
|
"api_calls": 2,
|
|
|
|
|
"duration_seconds": 1.0,
|
|
|
|
|
"_child_role": "leaf",
|
|
|
|
|
"_child_cost_usd": 0.27,
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"task_index": 2,
|
|
|
|
|
"status": "failed",
|
|
|
|
|
"summary": "",
|
|
|
|
|
"error": "boom",
|
|
|
|
|
"api_calls": 0,
|
|
|
|
|
"duration_seconds": 0.1,
|
|
|
|
|
"_child_role": "leaf",
|
|
|
|
|
"_child_cost_usd": 0.03,
|
|
|
|
|
},
|
|
|
|
|
]
|
|
|
|
|
result = json.loads(
|
|
|
|
|
delegate_task(
|
|
|
|
|
tasks=[{"goal": "A"}, {"goal": "B"}, {"goal": "C"}],
|
|
|
|
|
parent_agent=parent,
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# 0.15 + 0.27 + 0.03 even though one child failed — the API calls it
|
|
|
|
|
# made before failing still cost money.
|
|
|
|
|
self.assertAlmostEqual(parent.session_estimated_cost_usd, 0.45, places=6)
|
|
|
|
|
# cost_source promoted from "none" since the parent had no direct spend.
|
|
|
|
|
self.assertEqual(parent.session_cost_source, "subagent")
|
|
|
|
|
self.assertEqual(parent.session_cost_status, "estimated")
|
|
|
|
|
# All internal fields stripped from results.
|
|
|
|
|
for entry in result["results"]:
|
|
|
|
|
self.assertNotIn("_child_cost_usd", entry)
|
|
|
|
|
self.assertNotIn("_child_role", entry)
|
|
|
|
|
|
|
|
|
|
def test_zero_cost_children_leave_parent_source_untouched(self):
|
|
|
|
|
"""If every child reports 0 cost (e.g. free local model), we should
|
|
|
|
|
not invent a fake 'subagent' source — the parent's 'none' stays."""
|
|
|
|
|
parent = self._make_parent_with_cost_counters(starting_cost=0.00)
|
|
|
|
|
|
|
|
|
|
with patch("tools.delegate_tool._run_single_child") as mock_run:
|
|
|
|
|
mock_run.return_value = {
|
|
|
|
|
"task_index": 0,
|
|
|
|
|
"status": "completed",
|
|
|
|
|
"summary": "done",
|
|
|
|
|
"api_calls": 1,
|
|
|
|
|
"duration_seconds": 0.5,
|
|
|
|
|
"_child_role": "leaf",
|
|
|
|
|
"_child_cost_usd": 0.0,
|
|
|
|
|
}
|
|
|
|
|
delegate_task(goal="free local run", parent_agent=parent)
|
|
|
|
|
|
|
|
|
|
self.assertEqual(parent.session_estimated_cost_usd, 0.0)
|
|
|
|
|
self.assertEqual(parent.session_cost_source, "none")
|
|
|
|
|
|
|
|
|
|
def test_parent_with_real_source_not_overwritten(self):
|
|
|
|
|
"""If the parent already has its own cost billed (cost_source != 'none'),
|
|
|
|
|
adding subagent cost must not clobber the existing source label."""
|
|
|
|
|
parent = self._make_parent_with_cost_counters(starting_cost=0.20)
|
|
|
|
|
parent.session_cost_status = "exact"
|
|
|
|
|
parent.session_cost_source = "openrouter"
|
|
|
|
|
|
|
|
|
|
with patch("tools.delegate_tool._run_single_child") as mock_run:
|
|
|
|
|
mock_run.return_value = {
|
|
|
|
|
"task_index": 0,
|
|
|
|
|
"status": "completed",
|
|
|
|
|
"summary": "done",
|
|
|
|
|
"api_calls": 1,
|
|
|
|
|
"duration_seconds": 0.5,
|
|
|
|
|
"_child_role": "leaf",
|
|
|
|
|
"_child_cost_usd": 0.30,
|
|
|
|
|
}
|
|
|
|
|
delegate_task(goal="billed run", parent_agent=parent)
|
|
|
|
|
|
|
|
|
|
self.assertAlmostEqual(parent.session_estimated_cost_usd, 0.50, places=6)
|
|
|
|
|
# Real source label preserved.
|
|
|
|
|
self.assertEqual(parent.session_cost_source, "openrouter")
|
|
|
|
|
self.assertEqual(parent.session_cost_status, "exact")
|
|
|
|
|
|
|
|
|
|
def test_rollup_tolerates_missing_cost_fields(self):
|
|
|
|
|
"""Older fixtures / fabricated error entries may not carry
|
|
|
|
|
_child_cost_usd. Rollup must degrade to zero-add silently."""
|
|
|
|
|
parent = self._make_parent_with_cost_counters(starting_cost=0.10)
|
|
|
|
|
|
|
|
|
|
with patch("tools.delegate_tool._run_single_child") as mock_run:
|
|
|
|
|
mock_run.return_value = {
|
|
|
|
|
"task_index": 0,
|
|
|
|
|
"status": "completed",
|
|
|
|
|
"summary": "done",
|
|
|
|
|
"api_calls": 1,
|
|
|
|
|
"duration_seconds": 0.5,
|
|
|
|
|
# no _child_role, no _child_cost_usd
|
|
|
|
|
}
|
|
|
|
|
result = json.loads(delegate_task(goal="legacy", parent_agent=parent))
|
|
|
|
|
|
|
|
|
|
# Parent cost unchanged.
|
|
|
|
|
self.assertEqual(parent.session_estimated_cost_usd, 0.10)
|
|
|
|
|
self.assertEqual(len(result["results"]), 1)
|
|
|
|
|
|
|
|
|
|
|
2026-02-20 03:15:53 -08:00
|
|
|
class TestBlockedTools(unittest.TestCase):
|
|
|
|
|
def test_blocked_tools_constant(self):
|
|
|
|
|
for tool in ["delegate_task", "clarify", "memory", "send_message", "execute_code"]:
|
|
|
|
|
self.assertIn(tool, DELEGATE_BLOCKED_TOOLS)
|
|
|
|
|
|
|
|
|
|
def test_constants(self):
|
2026-04-21 14:11:53 -07:00
|
|
|
from tools.delegate_tool import (
|
|
|
|
|
_get_max_spawn_depth, _get_orchestrator_enabled,
|
|
|
|
|
_MIN_SPAWN_DEPTH, _MAX_SPAWN_DEPTH_CAP,
|
|
|
|
|
)
|
2026-04-10 01:34:39 +00:00
|
|
|
self.assertEqual(_get_max_concurrent_children(), 3)
|
2026-04-21 14:11:53 -07:00
|
|
|
self.assertEqual(MAX_DEPTH, 1)
|
|
|
|
|
self.assertEqual(_get_max_spawn_depth(), 1) # default: flat
|
|
|
|
|
self.assertTrue(_get_orchestrator_enabled()) # default
|
|
|
|
|
self.assertEqual(_MIN_SPAWN_DEPTH, 1)
|
|
|
|
|
self.assertEqual(_MAX_SPAWN_DEPTH_CAP, 3)
|
2026-02-20 03:15:53 -08:00
|
|
|
|
|
|
|
|
|
feat: configurable subagent provider:model with full credential resolution
Adds delegation.model and delegation.provider config fields so subagents
can run on a completely different provider:model pair than the parent agent.
When delegation.provider is set, the system resolves the full credential
bundle (base_url, api_key, api_mode) via resolve_runtime_provider() —
the same path used by CLI/gateway startup. This means all configured
providers work out of the box: openrouter, nous, zai, kimi-coding,
minimax, minimax-cn.
Key design decisions:
- Provider resolution uses hermes_cli.runtime_provider (single source of
truth for credential resolution across CLI, gateway, cron, and now
delegation)
- When only delegation.model is set (no provider), the model name changes
but parent credentials are inherited (for switching models within the
same provider like OpenRouter)
- When delegation.provider is set, full credentials are resolved
independently — enabling cross-provider delegation (e.g. parent on
Nous Portal, subagents on OpenRouter)
- Clear error messages if provider resolution fails (missing API key,
unknown provider name)
- _load_config() now falls back to hermes_cli.config.load_config() for
gateway/cron contexts where CLI_CONFIG is unavailable
Based on PR #791 by 0xbyt4 (closes #609), reworked to use proper
provider credential resolution instead of passing provider as metadata.
Co-authored-by: 0xbyt4 <0xbyt4@users.noreply.github.com>
2026-03-11 06:12:21 -07:00
|
|
|
class TestDelegationCredentialResolution(unittest.TestCase):
|
|
|
|
|
"""Tests for provider:model credential resolution in delegation config."""
|
|
|
|
|
|
|
|
|
|
def test_no_provider_returns_none_credentials(self):
|
|
|
|
|
"""When delegation.provider is empty, all credentials are None (inherit parent)."""
|
|
|
|
|
parent = _make_mock_parent(depth=0)
|
|
|
|
|
cfg = {"model": "", "provider": ""}
|
|
|
|
|
creds = _resolve_delegation_credentials(cfg, parent)
|
|
|
|
|
self.assertIsNone(creds["provider"])
|
|
|
|
|
self.assertIsNone(creds["base_url"])
|
|
|
|
|
self.assertIsNone(creds["api_key"])
|
|
|
|
|
self.assertIsNone(creds["api_mode"])
|
|
|
|
|
self.assertIsNone(creds["model"])
|
|
|
|
|
|
|
|
|
|
def test_model_only_no_provider(self):
|
|
|
|
|
"""When only model is set (no provider), model is returned but credentials are None."""
|
|
|
|
|
parent = _make_mock_parent(depth=0)
|
|
|
|
|
cfg = {"model": "google/gemini-3-flash-preview", "provider": ""}
|
|
|
|
|
creds = _resolve_delegation_credentials(cfg, parent)
|
|
|
|
|
self.assertEqual(creds["model"], "google/gemini-3-flash-preview")
|
|
|
|
|
self.assertIsNone(creds["provider"])
|
|
|
|
|
self.assertIsNone(creds["base_url"])
|
|
|
|
|
self.assertIsNone(creds["api_key"])
|
|
|
|
|
|
|
|
|
|
@patch("hermes_cli.runtime_provider.resolve_runtime_provider")
|
|
|
|
|
def test_provider_resolves_full_credentials(self, mock_resolve):
|
|
|
|
|
"""When delegation.provider is set, full credentials are resolved."""
|
|
|
|
|
mock_resolve.return_value = {
|
|
|
|
|
"provider": "openrouter",
|
|
|
|
|
"base_url": "https://openrouter.ai/api/v1",
|
|
|
|
|
"api_key": "sk-or-test-key",
|
|
|
|
|
"api_mode": "chat_completions",
|
|
|
|
|
}
|
|
|
|
|
parent = _make_mock_parent(depth=0)
|
|
|
|
|
cfg = {"model": "google/gemini-3-flash-preview", "provider": "openrouter"}
|
|
|
|
|
creds = _resolve_delegation_credentials(cfg, parent)
|
|
|
|
|
self.assertEqual(creds["model"], "google/gemini-3-flash-preview")
|
|
|
|
|
self.assertEqual(creds["provider"], "openrouter")
|
|
|
|
|
self.assertEqual(creds["base_url"], "https://openrouter.ai/api/v1")
|
|
|
|
|
self.assertEqual(creds["api_key"], "sk-or-test-key")
|
|
|
|
|
self.assertEqual(creds["api_mode"], "chat_completions")
|
|
|
|
|
mock_resolve.assert_called_once_with(requested="openrouter")
|
|
|
|
|
|
2026-03-14 20:48:29 -07:00
|
|
|
def test_direct_endpoint_uses_configured_base_url_and_api_key(self):
|
|
|
|
|
parent = _make_mock_parent(depth=0)
|
|
|
|
|
cfg = {
|
|
|
|
|
"model": "qwen2.5-coder",
|
|
|
|
|
"provider": "openrouter",
|
|
|
|
|
"base_url": "http://localhost:1234/v1",
|
|
|
|
|
"api_key": "local-key",
|
|
|
|
|
}
|
|
|
|
|
creds = _resolve_delegation_credentials(cfg, parent)
|
|
|
|
|
self.assertEqual(creds["model"], "qwen2.5-coder")
|
|
|
|
|
self.assertEqual(creds["provider"], "custom")
|
|
|
|
|
self.assertEqual(creds["base_url"], "http://localhost:1234/v1")
|
|
|
|
|
self.assertEqual(creds["api_key"], "local-key")
|
|
|
|
|
self.assertEqual(creds["api_mode"], "chat_completions")
|
|
|
|
|
|
|
|
|
|
def test_direct_endpoint_falls_back_to_openai_api_key_env(self):
|
|
|
|
|
parent = _make_mock_parent(depth=0)
|
|
|
|
|
cfg = {
|
|
|
|
|
"model": "qwen2.5-coder",
|
|
|
|
|
"base_url": "http://localhost:1234/v1",
|
|
|
|
|
}
|
|
|
|
|
with patch.dict(os.environ, {"OPENAI_API_KEY": "env-openai-key"}, clear=False):
|
|
|
|
|
creds = _resolve_delegation_credentials(cfg, parent)
|
|
|
|
|
self.assertEqual(creds["api_key"], "env-openai-key")
|
|
|
|
|
self.assertEqual(creds["provider"], "custom")
|
|
|
|
|
|
|
|
|
|
def test_direct_endpoint_does_not_fall_back_to_openrouter_api_key_env(self):
|
|
|
|
|
parent = _make_mock_parent(depth=0)
|
|
|
|
|
cfg = {
|
|
|
|
|
"model": "qwen2.5-coder",
|
|
|
|
|
"base_url": "http://localhost:1234/v1",
|
|
|
|
|
}
|
feat(auth): same-provider credential pools with rotation, custom endpoint support, and interactive CLI (#2647)
* feat(auth): add same-provider credential pools and rotation UX
Add same-provider credential pooling so Hermes can rotate across
multiple credentials for a single provider, recover from exhausted
credentials without jumping providers immediately, and configure
that behavior directly in hermes setup.
- agent/credential_pool.py: persisted per-provider credential pools
- hermes auth add/list/remove/reset CLI commands
- 429/402/401 recovery with pool rotation in run_agent.py
- Setup wizard integration for pool strategy configuration
- Auto-seeding from env vars and existing OAuth state
Co-authored-by: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com>
Salvaged from PR #2647
* fix(tests): prevent pool auto-seeding from host env in credential pool tests
Tests for non-pool Anthropic paths and auth remove were failing when
host env vars (ANTHROPIC_API_KEY) or file-backed OAuth credentials
were present. The pool auto-seeding picked these up, causing unexpected
pool entries in tests.
- Mock _select_pool_entry in auxiliary_client OAuth flag tests
- Clear Anthropic env vars and mock _seed_from_singletons in auth remove test
* feat(auth): add thread safety, least_used strategy, and request counting
- Add threading.Lock to CredentialPool for gateway thread safety
(concurrent requests from multiple gateway sessions could race on
pool state mutations without this)
- Add 'least_used' rotation strategy that selects the credential
with the lowest request_count, distributing load more evenly
- Add request_count field to PooledCredential for usage tracking
- Add mark_used() method to increment per-credential request counts
- Wrap select(), mark_exhausted_and_rotate(), and try_refresh_current()
with lock acquisition
- Add tests: least_used selection, mark_used counting, concurrent
thread safety (4 threads × 20 selects with no corruption)
* feat(auth): add interactive mode for bare 'hermes auth' command
When 'hermes auth' is called without a subcommand, it now launches an
interactive wizard that:
1. Shows full credential pool status across all providers
2. Offers a menu: add, remove, reset cooldowns, set strategy
3. For OAuth-capable providers (anthropic, nous, openai-codex), the
add flow explicitly asks 'API key or OAuth login?' — making it
clear that both auth types are supported for the same provider
4. Strategy picker shows all 4 options (fill_first, round_robin,
least_used, random) with the current selection marked
5. Remove flow shows entries with indices for easy selection
The subcommand paths (hermes auth add/list/remove/reset) still work
exactly as before for scripted/non-interactive use.
* fix(tests): update runtime_provider tests for config.yaml source of truth (#4165)
Tests were using OPENAI_BASE_URL env var which is no longer consulted
after #4165. Updated to use model config (provider, base_url, api_key)
which is the new single source of truth for custom endpoint URLs.
* feat(auth): support custom endpoint credential pools keyed by provider name
Custom OpenAI-compatible endpoints all share provider='custom', making
the provider-keyed pool useless. Now pools for custom endpoints are
keyed by 'custom:<normalized_name>' where the name comes from the
custom_providers config list (auto-generated from URL hostname).
- Pool key format: 'custom:together.ai', 'custom:local-(localhost:8080)'
- load_pool('custom:name') seeds from custom_providers api_key AND
model.api_key when base_url matches
- hermes auth add/list now shows custom endpoints alongside registry
providers
- _resolve_openrouter_runtime and _resolve_named_custom_runtime check
pool before falling back to single config key
- 6 new tests covering custom pool keying, seeding, and listing
* docs: add Excalidraw diagram of full credential pool flow
Comprehensive architecture diagram showing:
- Credential sources (env vars, auth.json OAuth, config.yaml, CLI)
- Pool storage and auto-seeding
- Runtime resolution paths (registry, custom, OpenRouter)
- Error recovery (429 retry-then-rotate, 402 immediate, 401 refresh)
- CLI management commands and strategy configuration
Open at: https://excalidraw.com/#json=2Ycqhqpi6f12E_3ITyiwh,c7u9jSt5BwrmiVzHGbm87g
* fix(tests): update setup wizard pool tests for unified select_provider_and_model flow
The setup wizard now delegates to select_provider_and_model() instead
of using its own prompt_choice-based provider picker. Tests needed:
- Mock select_provider_and_model as no-op (provider pre-written to config)
- Call _stub_tts BEFORE custom prompt_choice mock (it overwrites it)
- Pre-write model.provider to config so the pool step is reached
* docs: add comprehensive credential pool documentation
- New page: website/docs/user-guide/features/credential-pools.md
Full guide covering quick start, CLI commands, rotation strategies,
error recovery, custom endpoint pools, auto-discovery, thread safety,
architecture, and storage format.
- Updated fallback-providers.md to reference credential pools as the
first layer of resilience (same-provider rotation before cross-provider)
- Added hermes auth to CLI commands reference with usage examples
- Added credential_pool_strategies to configuration guide
* chore: remove excalidraw diagram from repo (external link only)
* refactor: simplify credential pool code — extract helpers, collapse extras, dedup patterns
- _load_config_safe(): replace 4 identical try/except/import blocks
- _iter_custom_providers(): shared generator for custom provider iteration
- PooledCredential.extra dict: collapse 11 round-trip-only fields
(token_type, scope, client_id, portal_base_url, obtained_at,
expires_in, agent_key_id, agent_key_expires_in, agent_key_reused,
agent_key_obtained_at, tls) into a single extra dict with
__getattr__ for backward-compatible access
- _available_entries(): shared exhaustion-check between select and peek
- Dedup anthropic OAuth seeding (hermes_pkce + claude_code identical)
- SimpleNamespace replaces class _Args boilerplate in auth_commands
- _try_resolve_from_custom_pool(): shared pool-check in runtime_provider
Net -17 lines. All 383 targeted tests pass.
---------
Co-authored-by: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com>
2026-03-31 03:10:01 -07:00
|
|
|
with patch.dict(
|
|
|
|
|
os.environ,
|
|
|
|
|
{
|
|
|
|
|
"OPENROUTER_API_KEY": "env-openrouter-key",
|
|
|
|
|
"OPENAI_API_KEY": "",
|
|
|
|
|
},
|
|
|
|
|
clear=False,
|
|
|
|
|
):
|
2026-03-14 20:48:29 -07:00
|
|
|
with self.assertRaises(ValueError) as ctx:
|
|
|
|
|
_resolve_delegation_credentials(cfg, parent)
|
|
|
|
|
self.assertIn("OPENAI_API_KEY", str(ctx.exception))
|
|
|
|
|
|
feat: configurable subagent provider:model with full credential resolution
Adds delegation.model and delegation.provider config fields so subagents
can run on a completely different provider:model pair than the parent agent.
When delegation.provider is set, the system resolves the full credential
bundle (base_url, api_key, api_mode) via resolve_runtime_provider() —
the same path used by CLI/gateway startup. This means all configured
providers work out of the box: openrouter, nous, zai, kimi-coding,
minimax, minimax-cn.
Key design decisions:
- Provider resolution uses hermes_cli.runtime_provider (single source of
truth for credential resolution across CLI, gateway, cron, and now
delegation)
- When only delegation.model is set (no provider), the model name changes
but parent credentials are inherited (for switching models within the
same provider like OpenRouter)
- When delegation.provider is set, full credentials are resolved
independently — enabling cross-provider delegation (e.g. parent on
Nous Portal, subagents on OpenRouter)
- Clear error messages if provider resolution fails (missing API key,
unknown provider name)
- _load_config() now falls back to hermes_cli.config.load_config() for
gateway/cron contexts where CLI_CONFIG is unavailable
Based on PR #791 by 0xbyt4 (closes #609), reworked to use proper
provider credential resolution instead of passing provider as metadata.
Co-authored-by: 0xbyt4 <0xbyt4@users.noreply.github.com>
2026-03-11 06:12:21 -07:00
|
|
|
@patch("hermes_cli.runtime_provider.resolve_runtime_provider")
|
|
|
|
|
def test_nous_provider_resolves_nous_credentials(self, mock_resolve):
|
|
|
|
|
"""Nous provider resolves Nous Portal base_url and api_key."""
|
|
|
|
|
mock_resolve.return_value = {
|
|
|
|
|
"provider": "nous",
|
|
|
|
|
"base_url": "https://inference-api.nousresearch.com/v1",
|
|
|
|
|
"api_key": "nous-agent-key-xyz",
|
|
|
|
|
"api_mode": "chat_completions",
|
|
|
|
|
}
|
|
|
|
|
parent = _make_mock_parent(depth=0)
|
|
|
|
|
cfg = {"model": "hermes-3-llama-3.1-8b", "provider": "nous"}
|
|
|
|
|
creds = _resolve_delegation_credentials(cfg, parent)
|
|
|
|
|
self.assertEqual(creds["provider"], "nous")
|
|
|
|
|
self.assertEqual(creds["base_url"], "https://inference-api.nousresearch.com/v1")
|
|
|
|
|
self.assertEqual(creds["api_key"], "nous-agent-key-xyz")
|
|
|
|
|
mock_resolve.assert_called_once_with(requested="nous")
|
|
|
|
|
|
|
|
|
|
@patch("hermes_cli.runtime_provider.resolve_runtime_provider")
|
|
|
|
|
def test_provider_resolution_failure_raises_valueerror(self, mock_resolve):
|
|
|
|
|
"""When provider resolution fails, ValueError is raised with helpful message."""
|
|
|
|
|
mock_resolve.side_effect = RuntimeError("OPENROUTER_API_KEY not set")
|
|
|
|
|
parent = _make_mock_parent(depth=0)
|
|
|
|
|
cfg = {"model": "some-model", "provider": "openrouter"}
|
|
|
|
|
with self.assertRaises(ValueError) as ctx:
|
|
|
|
|
_resolve_delegation_credentials(cfg, parent)
|
|
|
|
|
self.assertIn("openrouter", str(ctx.exception).lower())
|
|
|
|
|
self.assertIn("Cannot resolve", str(ctx.exception))
|
|
|
|
|
|
|
|
|
|
@patch("hermes_cli.runtime_provider.resolve_runtime_provider")
|
|
|
|
|
def test_provider_resolves_but_no_api_key_raises(self, mock_resolve):
|
|
|
|
|
"""When provider resolves but has no API key, ValueError is raised."""
|
|
|
|
|
mock_resolve.return_value = {
|
|
|
|
|
"provider": "openrouter",
|
|
|
|
|
"base_url": "https://openrouter.ai/api/v1",
|
|
|
|
|
"api_key": "",
|
|
|
|
|
"api_mode": "chat_completions",
|
|
|
|
|
}
|
|
|
|
|
parent = _make_mock_parent(depth=0)
|
|
|
|
|
cfg = {"model": "some-model", "provider": "openrouter"}
|
|
|
|
|
with self.assertRaises(ValueError) as ctx:
|
|
|
|
|
_resolve_delegation_credentials(cfg, parent)
|
|
|
|
|
self.assertIn("no API key", str(ctx.exception))
|
|
|
|
|
|
|
|
|
|
def test_missing_config_keys_inherit_parent(self):
|
|
|
|
|
"""When config dict has no model/provider keys at all, inherits parent."""
|
|
|
|
|
parent = _make_mock_parent(depth=0)
|
|
|
|
|
cfg = {"max_iterations": 45}
|
|
|
|
|
creds = _resolve_delegation_credentials(cfg, parent)
|
|
|
|
|
self.assertIsNone(creds["model"])
|
|
|
|
|
self.assertIsNone(creds["provider"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestDelegationProviderIntegration(unittest.TestCase):
|
|
|
|
|
"""Integration tests: delegation config → _run_single_child → AIAgent construction."""
|
|
|
|
|
|
|
|
|
|
@patch("tools.delegate_tool._load_config")
|
|
|
|
|
@patch("tools.delegate_tool._resolve_delegation_credentials")
|
|
|
|
|
def test_config_provider_credentials_reach_child_agent(self, mock_creds, mock_cfg):
|
|
|
|
|
"""When delegation.provider is configured, child agent gets resolved credentials."""
|
|
|
|
|
mock_cfg.return_value = {
|
|
|
|
|
"max_iterations": 45,
|
|
|
|
|
"model": "google/gemini-3-flash-preview",
|
|
|
|
|
"provider": "openrouter",
|
|
|
|
|
}
|
|
|
|
|
mock_creds.return_value = {
|
|
|
|
|
"model": "google/gemini-3-flash-preview",
|
|
|
|
|
"provider": "openrouter",
|
|
|
|
|
"base_url": "https://openrouter.ai/api/v1",
|
|
|
|
|
"api_key": "sk-or-delegation-key",
|
|
|
|
|
"api_mode": "chat_completions",
|
|
|
|
|
}
|
|
|
|
|
parent = _make_mock_parent(depth=0)
|
|
|
|
|
|
|
|
|
|
with patch("run_agent.AIAgent") as MockAgent:
|
|
|
|
|
mock_child = MagicMock()
|
|
|
|
|
mock_child.run_conversation.return_value = {
|
|
|
|
|
"final_response": "done", "completed": True, "api_calls": 1
|
|
|
|
|
}
|
|
|
|
|
MockAgent.return_value = mock_child
|
|
|
|
|
|
|
|
|
|
delegate_task(goal="Test provider routing", parent_agent=parent)
|
|
|
|
|
|
|
|
|
|
_, kwargs = MockAgent.call_args
|
|
|
|
|
self.assertEqual(kwargs["model"], "google/gemini-3-flash-preview")
|
|
|
|
|
self.assertEqual(kwargs["provider"], "openrouter")
|
|
|
|
|
self.assertEqual(kwargs["base_url"], "https://openrouter.ai/api/v1")
|
|
|
|
|
self.assertEqual(kwargs["api_key"], "sk-or-delegation-key")
|
|
|
|
|
self.assertEqual(kwargs["api_mode"], "chat_completions")
|
|
|
|
|
|
|
|
|
|
@patch("tools.delegate_tool._load_config")
|
|
|
|
|
@patch("tools.delegate_tool._resolve_delegation_credentials")
|
|
|
|
|
def test_cross_provider_delegation(self, mock_creds, mock_cfg):
|
|
|
|
|
"""Parent on Nous, subagent on OpenRouter — full credential switch."""
|
|
|
|
|
mock_cfg.return_value = {
|
|
|
|
|
"max_iterations": 45,
|
|
|
|
|
"model": "google/gemini-3-flash-preview",
|
|
|
|
|
"provider": "openrouter",
|
|
|
|
|
}
|
|
|
|
|
mock_creds.return_value = {
|
|
|
|
|
"model": "google/gemini-3-flash-preview",
|
|
|
|
|
"provider": "openrouter",
|
|
|
|
|
"base_url": "https://openrouter.ai/api/v1",
|
|
|
|
|
"api_key": "sk-or-key",
|
|
|
|
|
"api_mode": "chat_completions",
|
|
|
|
|
}
|
|
|
|
|
parent = _make_mock_parent(depth=0)
|
|
|
|
|
parent.provider = "nous"
|
|
|
|
|
parent.base_url = "https://inference-api.nousresearch.com/v1"
|
|
|
|
|
parent.api_key = "nous-key-abc"
|
|
|
|
|
|
|
|
|
|
with patch("run_agent.AIAgent") as MockAgent:
|
|
|
|
|
mock_child = MagicMock()
|
|
|
|
|
mock_child.run_conversation.return_value = {
|
|
|
|
|
"final_response": "done", "completed": True, "api_calls": 1
|
|
|
|
|
}
|
|
|
|
|
MockAgent.return_value = mock_child
|
|
|
|
|
|
|
|
|
|
delegate_task(goal="Cross-provider test", parent_agent=parent)
|
|
|
|
|
|
|
|
|
|
_, kwargs = MockAgent.call_args
|
|
|
|
|
# Child should use OpenRouter, NOT Nous
|
|
|
|
|
self.assertEqual(kwargs["provider"], "openrouter")
|
|
|
|
|
self.assertEqual(kwargs["base_url"], "https://openrouter.ai/api/v1")
|
|
|
|
|
self.assertEqual(kwargs["api_key"], "sk-or-key")
|
|
|
|
|
self.assertNotEqual(kwargs["base_url"], parent.base_url)
|
|
|
|
|
self.assertNotEqual(kwargs["api_key"], parent.api_key)
|
|
|
|
|
|
2026-03-14 20:48:29 -07:00
|
|
|
@patch("tools.delegate_tool._load_config")
|
|
|
|
|
@patch("tools.delegate_tool._resolve_delegation_credentials")
|
|
|
|
|
def test_direct_endpoint_credentials_reach_child_agent(self, mock_creds, mock_cfg):
|
|
|
|
|
mock_cfg.return_value = {
|
|
|
|
|
"max_iterations": 45,
|
|
|
|
|
"model": "qwen2.5-coder",
|
|
|
|
|
"base_url": "http://localhost:1234/v1",
|
|
|
|
|
"api_key": "local-key",
|
|
|
|
|
}
|
|
|
|
|
mock_creds.return_value = {
|
|
|
|
|
"model": "qwen2.5-coder",
|
|
|
|
|
"provider": "custom",
|
|
|
|
|
"base_url": "http://localhost:1234/v1",
|
|
|
|
|
"api_key": "local-key",
|
|
|
|
|
"api_mode": "chat_completions",
|
|
|
|
|
}
|
|
|
|
|
parent = _make_mock_parent(depth=0)
|
|
|
|
|
|
|
|
|
|
with patch("run_agent.AIAgent") as MockAgent:
|
|
|
|
|
mock_child = MagicMock()
|
|
|
|
|
mock_child.run_conversation.return_value = {
|
|
|
|
|
"final_response": "done", "completed": True, "api_calls": 1
|
|
|
|
|
}
|
|
|
|
|
MockAgent.return_value = mock_child
|
|
|
|
|
|
|
|
|
|
delegate_task(goal="Direct endpoint test", parent_agent=parent)
|
|
|
|
|
|
|
|
|
|
_, kwargs = MockAgent.call_args
|
|
|
|
|
self.assertEqual(kwargs["model"], "qwen2.5-coder")
|
|
|
|
|
self.assertEqual(kwargs["provider"], "custom")
|
|
|
|
|
self.assertEqual(kwargs["base_url"], "http://localhost:1234/v1")
|
|
|
|
|
self.assertEqual(kwargs["api_key"], "local-key")
|
|
|
|
|
self.assertEqual(kwargs["api_mode"], "chat_completions")
|
|
|
|
|
|
feat: configurable subagent provider:model with full credential resolution
Adds delegation.model and delegation.provider config fields so subagents
can run on a completely different provider:model pair than the parent agent.
When delegation.provider is set, the system resolves the full credential
bundle (base_url, api_key, api_mode) via resolve_runtime_provider() —
the same path used by CLI/gateway startup. This means all configured
providers work out of the box: openrouter, nous, zai, kimi-coding,
minimax, minimax-cn.
Key design decisions:
- Provider resolution uses hermes_cli.runtime_provider (single source of
truth for credential resolution across CLI, gateway, cron, and now
delegation)
- When only delegation.model is set (no provider), the model name changes
but parent credentials are inherited (for switching models within the
same provider like OpenRouter)
- When delegation.provider is set, full credentials are resolved
independently — enabling cross-provider delegation (e.g. parent on
Nous Portal, subagents on OpenRouter)
- Clear error messages if provider resolution fails (missing API key,
unknown provider name)
- _load_config() now falls back to hermes_cli.config.load_config() for
gateway/cron contexts where CLI_CONFIG is unavailable
Based on PR #791 by 0xbyt4 (closes #609), reworked to use proper
provider credential resolution instead of passing provider as metadata.
Co-authored-by: 0xbyt4 <0xbyt4@users.noreply.github.com>
2026-03-11 06:12:21 -07:00
|
|
|
@patch("tools.delegate_tool._load_config")
|
|
|
|
|
@patch("tools.delegate_tool._resolve_delegation_credentials")
|
|
|
|
|
def test_empty_config_inherits_parent(self, mock_creds, mock_cfg):
|
|
|
|
|
"""When delegation config is empty, child inherits parent credentials."""
|
|
|
|
|
mock_cfg.return_value = {"max_iterations": 45, "model": "", "provider": ""}
|
|
|
|
|
mock_creds.return_value = {
|
|
|
|
|
"model": None,
|
|
|
|
|
"provider": None,
|
|
|
|
|
"base_url": None,
|
|
|
|
|
"api_key": None,
|
|
|
|
|
"api_mode": None,
|
|
|
|
|
}
|
|
|
|
|
parent = _make_mock_parent(depth=0)
|
|
|
|
|
|
|
|
|
|
with patch("run_agent.AIAgent") as MockAgent:
|
|
|
|
|
mock_child = MagicMock()
|
|
|
|
|
mock_child.run_conversation.return_value = {
|
|
|
|
|
"final_response": "done", "completed": True, "api_calls": 1
|
|
|
|
|
}
|
|
|
|
|
MockAgent.return_value = mock_child
|
|
|
|
|
|
|
|
|
|
delegate_task(goal="Test inherit", parent_agent=parent)
|
|
|
|
|
|
|
|
|
|
_, kwargs = MockAgent.call_args
|
|
|
|
|
self.assertEqual(kwargs["model"], parent.model)
|
|
|
|
|
self.assertEqual(kwargs["provider"], parent.provider)
|
|
|
|
|
self.assertEqual(kwargs["base_url"], parent.base_url)
|
|
|
|
|
|
|
|
|
|
@patch("tools.delegate_tool._load_config")
|
|
|
|
|
@patch("tools.delegate_tool._resolve_delegation_credentials")
|
|
|
|
|
def test_credential_error_returns_json_error(self, mock_creds, mock_cfg):
|
|
|
|
|
"""When credential resolution fails, delegate_task returns a JSON error."""
|
|
|
|
|
mock_cfg.return_value = {"model": "bad-model", "provider": "nonexistent"}
|
|
|
|
|
mock_creds.side_effect = ValueError(
|
|
|
|
|
"Cannot resolve delegation provider 'nonexistent': Unknown provider"
|
|
|
|
|
)
|
|
|
|
|
parent = _make_mock_parent(depth=0)
|
|
|
|
|
|
|
|
|
|
result = json.loads(delegate_task(goal="Should fail", parent_agent=parent))
|
|
|
|
|
self.assertIn("error", result)
|
|
|
|
|
self.assertIn("Cannot resolve", result["error"])
|
|
|
|
|
self.assertIn("nonexistent", result["error"])
|
|
|
|
|
|
|
|
|
|
@patch("tools.delegate_tool._load_config")
|
|
|
|
|
@patch("tools.delegate_tool._resolve_delegation_credentials")
|
|
|
|
|
def test_batch_mode_all_children_get_credentials(self, mock_creds, mock_cfg):
|
|
|
|
|
"""In batch mode, all children receive the resolved credentials."""
|
|
|
|
|
mock_cfg.return_value = {
|
|
|
|
|
"max_iterations": 45,
|
|
|
|
|
"model": "meta-llama/llama-4-scout",
|
|
|
|
|
"provider": "openrouter",
|
|
|
|
|
}
|
|
|
|
|
mock_creds.return_value = {
|
|
|
|
|
"model": "meta-llama/llama-4-scout",
|
|
|
|
|
"provider": "openrouter",
|
|
|
|
|
"base_url": "https://openrouter.ai/api/v1",
|
|
|
|
|
"api_key": "sk-or-batch",
|
|
|
|
|
"api_mode": "chat_completions",
|
|
|
|
|
}
|
|
|
|
|
parent = _make_mock_parent(depth=0)
|
|
|
|
|
|
2026-03-17 02:53:33 -07:00
|
|
|
# Patch _build_child_agent since credentials are now passed there
|
|
|
|
|
# (agents are built in the main thread before being handed to workers)
|
|
|
|
|
with patch("tools.delegate_tool._build_child_agent") as mock_build, \
|
|
|
|
|
patch("tools.delegate_tool._run_single_child") as mock_run:
|
|
|
|
|
mock_child = MagicMock()
|
|
|
|
|
mock_build.return_value = mock_child
|
feat: configurable subagent provider:model with full credential resolution
Adds delegation.model and delegation.provider config fields so subagents
can run on a completely different provider:model pair than the parent agent.
When delegation.provider is set, the system resolves the full credential
bundle (base_url, api_key, api_mode) via resolve_runtime_provider() —
the same path used by CLI/gateway startup. This means all configured
providers work out of the box: openrouter, nous, zai, kimi-coding,
minimax, minimax-cn.
Key design decisions:
- Provider resolution uses hermes_cli.runtime_provider (single source of
truth for credential resolution across CLI, gateway, cron, and now
delegation)
- When only delegation.model is set (no provider), the model name changes
but parent credentials are inherited (for switching models within the
same provider like OpenRouter)
- When delegation.provider is set, full credentials are resolved
independently — enabling cross-provider delegation (e.g. parent on
Nous Portal, subagents on OpenRouter)
- Clear error messages if provider resolution fails (missing API key,
unknown provider name)
- _load_config() now falls back to hermes_cli.config.load_config() for
gateway/cron contexts where CLI_CONFIG is unavailable
Based on PR #791 by 0xbyt4 (closes #609), reworked to use proper
provider credential resolution instead of passing provider as metadata.
Co-authored-by: 0xbyt4 <0xbyt4@users.noreply.github.com>
2026-03-11 06:12:21 -07:00
|
|
|
mock_run.return_value = {
|
|
|
|
|
"task_index": 0, "status": "completed",
|
|
|
|
|
"summary": "Done", "api_calls": 1, "duration_seconds": 1.0
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
tasks = [{"goal": "Task A"}, {"goal": "Task B"}]
|
|
|
|
|
delegate_task(tasks=tasks, parent_agent=parent)
|
|
|
|
|
|
2026-03-17 02:53:33 -07:00
|
|
|
self.assertEqual(mock_build.call_count, 2)
|
|
|
|
|
for call in mock_build.call_args_list:
|
feat: configurable subagent provider:model with full credential resolution
Adds delegation.model and delegation.provider config fields so subagents
can run on a completely different provider:model pair than the parent agent.
When delegation.provider is set, the system resolves the full credential
bundle (base_url, api_key, api_mode) via resolve_runtime_provider() —
the same path used by CLI/gateway startup. This means all configured
providers work out of the box: openrouter, nous, zai, kimi-coding,
minimax, minimax-cn.
Key design decisions:
- Provider resolution uses hermes_cli.runtime_provider (single source of
truth for credential resolution across CLI, gateway, cron, and now
delegation)
- When only delegation.model is set (no provider), the model name changes
but parent credentials are inherited (for switching models within the
same provider like OpenRouter)
- When delegation.provider is set, full credentials are resolved
independently — enabling cross-provider delegation (e.g. parent on
Nous Portal, subagents on OpenRouter)
- Clear error messages if provider resolution fails (missing API key,
unknown provider name)
- _load_config() now falls back to hermes_cli.config.load_config() for
gateway/cron contexts where CLI_CONFIG is unavailable
Based on PR #791 by 0xbyt4 (closes #609), reworked to use proper
provider credential resolution instead of passing provider as metadata.
Co-authored-by: 0xbyt4 <0xbyt4@users.noreply.github.com>
2026-03-11 06:12:21 -07:00
|
|
|
self.assertEqual(call.kwargs.get("model"), "meta-llama/llama-4-scout")
|
|
|
|
|
self.assertEqual(call.kwargs.get("override_provider"), "openrouter")
|
|
|
|
|
self.assertEqual(call.kwargs.get("override_base_url"), "https://openrouter.ai/api/v1")
|
|
|
|
|
self.assertEqual(call.kwargs.get("override_api_key"), "sk-or-batch")
|
|
|
|
|
self.assertEqual(call.kwargs.get("override_api_mode"), "chat_completions")
|
|
|
|
|
|
2026-04-20 22:19:36 +03:00
|
|
|
@patch("tools.delegate_tool._load_config")
|
|
|
|
|
@patch("tools.delegate_tool._resolve_delegation_credentials")
|
|
|
|
|
def test_delegation_acp_runtime_reaches_child_agent(self, mock_creds, mock_cfg):
|
|
|
|
|
"""Resolved ACP runtime command/args must be forwarded to child agents."""
|
|
|
|
|
mock_cfg.return_value = {
|
|
|
|
|
"max_iterations": 45,
|
|
|
|
|
"model": "copilot-model",
|
|
|
|
|
"provider": "copilot-acp",
|
|
|
|
|
}
|
|
|
|
|
mock_creds.return_value = {
|
|
|
|
|
"model": "copilot-model",
|
|
|
|
|
"provider": "copilot-acp",
|
|
|
|
|
"base_url": "acp://copilot",
|
|
|
|
|
"api_key": "copilot-acp",
|
|
|
|
|
"api_mode": "chat_completions",
|
|
|
|
|
"command": "custom-copilot",
|
|
|
|
|
"args": ["--stdio-custom"],
|
|
|
|
|
}
|
|
|
|
|
parent = _make_mock_parent(depth=0)
|
|
|
|
|
|
|
|
|
|
with patch("tools.delegate_tool._build_child_agent") as mock_build, \
|
|
|
|
|
patch("tools.delegate_tool._run_single_child") as mock_run:
|
|
|
|
|
mock_child = MagicMock()
|
|
|
|
|
mock_build.return_value = mock_child
|
|
|
|
|
mock_run.return_value = {
|
|
|
|
|
"task_index": 0, "status": "completed",
|
|
|
|
|
"summary": "Done", "api_calls": 1, "duration_seconds": 1.0
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
delegate_task(goal="ACP delegation test", parent_agent=parent)
|
|
|
|
|
|
|
|
|
|
_, kwargs = mock_build.call_args
|
|
|
|
|
self.assertEqual(kwargs.get("override_provider"), "copilot-acp")
|
|
|
|
|
self.assertEqual(kwargs.get("override_base_url"), "acp://copilot")
|
|
|
|
|
self.assertEqual(kwargs.get("override_api_key"), "copilot-acp")
|
|
|
|
|
self.assertEqual(kwargs.get("override_api_mode"), "chat_completions")
|
|
|
|
|
self.assertEqual(kwargs.get("override_acp_command"), "custom-copilot")
|
|
|
|
|
self.assertEqual(kwargs.get("override_acp_args"), ["--stdio-custom"])
|
|
|
|
|
|
feat: configurable subagent provider:model with full credential resolution
Adds delegation.model and delegation.provider config fields so subagents
can run on a completely different provider:model pair than the parent agent.
When delegation.provider is set, the system resolves the full credential
bundle (base_url, api_key, api_mode) via resolve_runtime_provider() —
the same path used by CLI/gateway startup. This means all configured
providers work out of the box: openrouter, nous, zai, kimi-coding,
minimax, minimax-cn.
Key design decisions:
- Provider resolution uses hermes_cli.runtime_provider (single source of
truth for credential resolution across CLI, gateway, cron, and now
delegation)
- When only delegation.model is set (no provider), the model name changes
but parent credentials are inherited (for switching models within the
same provider like OpenRouter)
- When delegation.provider is set, full credentials are resolved
independently — enabling cross-provider delegation (e.g. parent on
Nous Portal, subagents on OpenRouter)
- Clear error messages if provider resolution fails (missing API key,
unknown provider name)
- _load_config() now falls back to hermes_cli.config.load_config() for
gateway/cron contexts where CLI_CONFIG is unavailable
Based on PR #791 by 0xbyt4 (closes #609), reworked to use proper
provider credential resolution instead of passing provider as metadata.
Co-authored-by: 0xbyt4 <0xbyt4@users.noreply.github.com>
2026-03-11 06:12:21 -07:00
|
|
|
@patch("tools.delegate_tool._load_config")
|
|
|
|
|
@patch("tools.delegate_tool._resolve_delegation_credentials")
|
|
|
|
|
def test_model_only_no_provider_inherits_parent_credentials(self, mock_creds, mock_cfg):
|
|
|
|
|
"""Setting only model (no provider) changes model but keeps parent credentials."""
|
|
|
|
|
mock_cfg.return_value = {
|
|
|
|
|
"max_iterations": 45,
|
|
|
|
|
"model": "google/gemini-3-flash-preview",
|
|
|
|
|
"provider": "",
|
|
|
|
|
}
|
|
|
|
|
mock_creds.return_value = {
|
|
|
|
|
"model": "google/gemini-3-flash-preview",
|
|
|
|
|
"provider": None,
|
|
|
|
|
"base_url": None,
|
|
|
|
|
"api_key": None,
|
|
|
|
|
"api_mode": None,
|
|
|
|
|
}
|
|
|
|
|
parent = _make_mock_parent(depth=0)
|
|
|
|
|
|
|
|
|
|
with patch("run_agent.AIAgent") as MockAgent:
|
|
|
|
|
mock_child = MagicMock()
|
|
|
|
|
mock_child.run_conversation.return_value = {
|
|
|
|
|
"final_response": "done", "completed": True, "api_calls": 1
|
|
|
|
|
}
|
|
|
|
|
MockAgent.return_value = mock_child
|
|
|
|
|
|
|
|
|
|
delegate_task(goal="Model only test", parent_agent=parent)
|
|
|
|
|
|
|
|
|
|
_, kwargs = MockAgent.call_args
|
|
|
|
|
# Model should be overridden
|
|
|
|
|
self.assertEqual(kwargs["model"], "google/gemini-3-flash-preview")
|
|
|
|
|
# But provider/base_url/api_key should inherit from parent
|
|
|
|
|
self.assertEqual(kwargs["provider"], parent.provider)
|
|
|
|
|
self.assertEqual(kwargs["base_url"], parent.base_url)
|
|
|
|
|
|
|
|
|
|
|
2026-04-06 22:59:14 -07:00
|
|
|
class TestChildCredentialPoolResolution(unittest.TestCase):
|
|
|
|
|
def test_same_provider_shares_parent_pool(self):
|
|
|
|
|
parent = _make_mock_parent()
|
|
|
|
|
mock_pool = MagicMock()
|
|
|
|
|
parent._credential_pool = mock_pool
|
|
|
|
|
|
|
|
|
|
result = _resolve_child_credential_pool("openrouter", parent)
|
|
|
|
|
self.assertIs(result, mock_pool)
|
|
|
|
|
|
|
|
|
|
def test_no_provider_inherits_parent_pool(self):
|
|
|
|
|
parent = _make_mock_parent()
|
|
|
|
|
mock_pool = MagicMock()
|
|
|
|
|
parent._credential_pool = mock_pool
|
|
|
|
|
|
|
|
|
|
result = _resolve_child_credential_pool(None, parent)
|
|
|
|
|
self.assertIs(result, mock_pool)
|
|
|
|
|
|
|
|
|
|
def test_different_provider_loads_own_pool(self):
|
|
|
|
|
parent = _make_mock_parent()
|
|
|
|
|
parent._credential_pool = MagicMock()
|
|
|
|
|
mock_pool = MagicMock()
|
|
|
|
|
mock_pool.has_credentials.return_value = True
|
|
|
|
|
|
|
|
|
|
with patch("agent.credential_pool.load_pool", return_value=mock_pool):
|
|
|
|
|
result = _resolve_child_credential_pool("anthropic", parent)
|
|
|
|
|
|
|
|
|
|
self.assertIs(result, mock_pool)
|
|
|
|
|
|
|
|
|
|
def test_different_provider_empty_pool_returns_none(self):
|
|
|
|
|
parent = _make_mock_parent()
|
|
|
|
|
parent._credential_pool = MagicMock()
|
|
|
|
|
mock_pool = MagicMock()
|
|
|
|
|
mock_pool.has_credentials.return_value = False
|
|
|
|
|
|
|
|
|
|
with patch("agent.credential_pool.load_pool", return_value=mock_pool):
|
|
|
|
|
result = _resolve_child_credential_pool("anthropic", parent)
|
|
|
|
|
|
|
|
|
|
self.assertIsNone(result)
|
|
|
|
|
|
|
|
|
|
def test_different_provider_load_failure_returns_none(self):
|
|
|
|
|
parent = _make_mock_parent()
|
|
|
|
|
parent._credential_pool = MagicMock()
|
|
|
|
|
|
|
|
|
|
with patch("agent.credential_pool.load_pool", side_effect=Exception("disk error")):
|
|
|
|
|
result = _resolve_child_credential_pool("anthropic", parent)
|
|
|
|
|
|
|
|
|
|
self.assertIsNone(result)
|
|
|
|
|
|
|
|
|
|
def test_build_child_agent_assigns_parent_pool_when_shared(self):
|
|
|
|
|
parent = _make_mock_parent()
|
|
|
|
|
mock_pool = MagicMock()
|
|
|
|
|
parent._credential_pool = mock_pool
|
|
|
|
|
|
|
|
|
|
with patch("run_agent.AIAgent") as MockAgent:
|
|
|
|
|
mock_child = MagicMock()
|
|
|
|
|
MockAgent.return_value = mock_child
|
|
|
|
|
|
|
|
|
|
_build_child_agent(
|
|
|
|
|
task_index=0,
|
|
|
|
|
goal="Test pool assignment",
|
|
|
|
|
context=None,
|
|
|
|
|
toolsets=["terminal"],
|
|
|
|
|
model=None,
|
|
|
|
|
max_iterations=10,
|
|
|
|
|
parent_agent=parent,
|
2026-04-17 21:35:30 -07:00
|
|
|
task_count=1,
|
2026-04-06 22:59:14 -07:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
self.assertEqual(mock_child._credential_pool, mock_pool)
|
|
|
|
|
|
2026-04-22 18:18:50 -06:00
|
|
|
@patch("tools.delegate_tool._load_config", return_value={})
|
2026-04-22 17:44:52 -07:00
|
|
|
def test_build_child_agent_preserves_mcp_toolsets_by_default(self, mock_cfg):
|
2026-04-22 18:18:50 -06:00
|
|
|
parent = _make_mock_parent()
|
|
|
|
|
parent.enabled_toolsets = ["web", "browser", "mcp-MiniMax"]
|
|
|
|
|
|
|
|
|
|
with patch("run_agent.AIAgent") as MockAgent:
|
|
|
|
|
mock_child = MagicMock()
|
|
|
|
|
MockAgent.return_value = mock_child
|
|
|
|
|
|
|
|
|
|
_build_child_agent(
|
|
|
|
|
task_index=0,
|
|
|
|
|
goal="Test narrowed toolsets",
|
|
|
|
|
context=None,
|
|
|
|
|
toolsets=["web", "browser"],
|
|
|
|
|
model=None,
|
|
|
|
|
max_iterations=10,
|
|
|
|
|
parent_agent=parent,
|
|
|
|
|
task_count=1,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
self.assertEqual(
|
|
|
|
|
MockAgent.call_args[1]["enabled_toolsets"],
|
2026-04-22 17:44:52 -07:00
|
|
|
["web", "browser", "mcp-MiniMax"],
|
2026-04-22 18:18:50 -06:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
@patch(
|
|
|
|
|
"tools.delegate_tool._load_config",
|
2026-04-22 17:44:52 -07:00
|
|
|
return_value={"inherit_mcp_toolsets": False},
|
2026-04-22 18:18:50 -06:00
|
|
|
)
|
2026-04-22 17:44:52 -07:00
|
|
|
def test_build_child_agent_strict_intersection_when_opted_out(self, mock_cfg):
|
2026-04-22 18:18:50 -06:00
|
|
|
parent = _make_mock_parent()
|
|
|
|
|
parent.enabled_toolsets = ["web", "browser", "mcp-MiniMax"]
|
|
|
|
|
|
|
|
|
|
with patch("run_agent.AIAgent") as MockAgent:
|
|
|
|
|
mock_child = MagicMock()
|
|
|
|
|
MockAgent.return_value = mock_child
|
|
|
|
|
|
|
|
|
|
_build_child_agent(
|
|
|
|
|
task_index=0,
|
|
|
|
|
goal="Test narrowed toolsets",
|
|
|
|
|
context=None,
|
|
|
|
|
toolsets=["web", "browser"],
|
|
|
|
|
model=None,
|
|
|
|
|
max_iterations=10,
|
|
|
|
|
parent_agent=parent,
|
|
|
|
|
task_count=1,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
self.assertEqual(
|
|
|
|
|
MockAgent.call_args[1]["enabled_toolsets"],
|
2026-04-22 17:44:52 -07:00
|
|
|
["web", "browser"],
|
2026-04-22 18:18:50 -06:00
|
|
|
)
|
|
|
|
|
|
2026-04-06 22:59:14 -07:00
|
|
|
|
|
|
|
|
class TestChildCredentialLeasing(unittest.TestCase):
|
|
|
|
|
def test_run_single_child_acquires_and_releases_lease(self):
|
|
|
|
|
from tools.delegate_tool import _run_single_child
|
|
|
|
|
|
|
|
|
|
leased_entry = MagicMock()
|
|
|
|
|
leased_entry.id = "cred-b"
|
|
|
|
|
|
|
|
|
|
child = MagicMock()
|
|
|
|
|
child._credential_pool = MagicMock()
|
|
|
|
|
child._credential_pool.acquire_lease.return_value = "cred-b"
|
|
|
|
|
child._credential_pool.current.return_value = leased_entry
|
|
|
|
|
child.run_conversation.return_value = {
|
|
|
|
|
"final_response": "done",
|
|
|
|
|
"completed": True,
|
|
|
|
|
"interrupted": False,
|
|
|
|
|
"api_calls": 1,
|
|
|
|
|
"messages": [],
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
result = _run_single_child(
|
|
|
|
|
task_index=0,
|
|
|
|
|
goal="Investigate rate limits",
|
|
|
|
|
child=child,
|
|
|
|
|
parent_agent=_make_mock_parent(),
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
self.assertEqual(result["status"], "completed")
|
|
|
|
|
child._credential_pool.acquire_lease.assert_called_once_with()
|
|
|
|
|
child._swap_credential.assert_called_once_with(leased_entry)
|
|
|
|
|
child._credential_pool.release_lease.assert_called_once_with("cred-b")
|
|
|
|
|
|
|
|
|
|
def test_run_single_child_releases_lease_after_failure(self):
|
|
|
|
|
from tools.delegate_tool import _run_single_child
|
|
|
|
|
|
|
|
|
|
child = MagicMock()
|
|
|
|
|
child._credential_pool = MagicMock()
|
|
|
|
|
child._credential_pool.acquire_lease.return_value = "cred-a"
|
|
|
|
|
child._credential_pool.current.return_value = MagicMock(id="cred-a")
|
|
|
|
|
child.run_conversation.side_effect = RuntimeError("boom")
|
|
|
|
|
|
|
|
|
|
result = _run_single_child(
|
|
|
|
|
task_index=1,
|
|
|
|
|
goal="Trigger failure",
|
|
|
|
|
child=child,
|
|
|
|
|
parent_agent=_make_mock_parent(),
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
self.assertEqual(result["status"], "error")
|
|
|
|
|
child._credential_pool.release_lease.assert_called_once_with("cred-a")
|
|
|
|
|
|
|
|
|
|
|
2026-04-10 12:51:30 -07:00
|
|
|
class TestDelegateHeartbeat(unittest.TestCase):
|
|
|
|
|
"""Heartbeat propagates child activity to parent during delegation.
|
|
|
|
|
|
|
|
|
|
Without the heartbeat, the gateway inactivity timeout fires because the
|
|
|
|
|
parent's _last_activity_ts freezes when delegate_task starts.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
def test_heartbeat_touches_parent_activity_during_child_run(self):
|
|
|
|
|
"""Parent's _touch_activity is called while child.run_conversation blocks."""
|
|
|
|
|
from tools.delegate_tool import _run_single_child
|
|
|
|
|
|
|
|
|
|
parent = _make_mock_parent()
|
|
|
|
|
touch_calls = []
|
|
|
|
|
parent._touch_activity = lambda desc: touch_calls.append(desc)
|
|
|
|
|
|
|
|
|
|
child = MagicMock()
|
|
|
|
|
child.get_activity_summary.return_value = {
|
|
|
|
|
"current_tool": "terminal",
|
|
|
|
|
"api_call_count": 3,
|
|
|
|
|
"max_iterations": 50,
|
|
|
|
|
"last_activity_desc": "executing tool: terminal",
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# Make run_conversation block long enough for heartbeats to fire
|
|
|
|
|
def slow_run(**kwargs):
|
|
|
|
|
time.sleep(0.25)
|
|
|
|
|
return {"final_response": "done", "completed": True, "api_calls": 3}
|
|
|
|
|
|
|
|
|
|
child.run_conversation.side_effect = slow_run
|
|
|
|
|
|
|
|
|
|
# Patch the heartbeat interval to fire quickly
|
|
|
|
|
with patch("tools.delegate_tool._HEARTBEAT_INTERVAL", 0.05):
|
|
|
|
|
_run_single_child(
|
|
|
|
|
task_index=0,
|
|
|
|
|
goal="Test heartbeat",
|
|
|
|
|
child=child,
|
|
|
|
|
parent_agent=parent,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Heartbeat should have fired at least once during the 0.25s sleep
|
|
|
|
|
self.assertGreater(len(touch_calls), 0,
|
|
|
|
|
"Heartbeat did not propagate activity to parent")
|
|
|
|
|
# Verify the description includes child's current tool detail
|
|
|
|
|
self.assertTrue(
|
|
|
|
|
any("terminal" in desc for desc in touch_calls),
|
|
|
|
|
f"Heartbeat descriptions should include child tool info: {touch_calls}")
|
|
|
|
|
|
|
|
|
|
def test_heartbeat_stops_after_child_completes(self):
|
|
|
|
|
"""Heartbeat thread is cleaned up when the child finishes."""
|
|
|
|
|
from tools.delegate_tool import _run_single_child
|
|
|
|
|
|
|
|
|
|
parent = _make_mock_parent()
|
|
|
|
|
touch_calls = []
|
|
|
|
|
parent._touch_activity = lambda desc: touch_calls.append(desc)
|
|
|
|
|
|
|
|
|
|
child = MagicMock()
|
|
|
|
|
child.get_activity_summary.return_value = {
|
|
|
|
|
"current_tool": None,
|
|
|
|
|
"api_call_count": 1,
|
|
|
|
|
"max_iterations": 50,
|
|
|
|
|
"last_activity_desc": "done",
|
|
|
|
|
}
|
|
|
|
|
child.run_conversation.return_value = {
|
|
|
|
|
"final_response": "done", "completed": True, "api_calls": 1,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
with patch("tools.delegate_tool._HEARTBEAT_INTERVAL", 0.05):
|
|
|
|
|
_run_single_child(
|
|
|
|
|
task_index=0,
|
|
|
|
|
goal="Test cleanup",
|
|
|
|
|
child=child,
|
|
|
|
|
parent_agent=parent,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Record count after completion, wait, and verify no more calls
|
|
|
|
|
count_after = len(touch_calls)
|
|
|
|
|
time.sleep(0.15)
|
|
|
|
|
self.assertEqual(len(touch_calls), count_after,
|
|
|
|
|
"Heartbeat continued firing after child completed")
|
|
|
|
|
|
|
|
|
|
def test_heartbeat_stops_after_child_error(self):
|
|
|
|
|
"""Heartbeat thread is cleaned up even when the child raises."""
|
|
|
|
|
from tools.delegate_tool import _run_single_child
|
|
|
|
|
|
|
|
|
|
parent = _make_mock_parent()
|
|
|
|
|
touch_calls = []
|
|
|
|
|
parent._touch_activity = lambda desc: touch_calls.append(desc)
|
|
|
|
|
|
|
|
|
|
child = MagicMock()
|
|
|
|
|
child.get_activity_summary.return_value = {
|
|
|
|
|
"current_tool": "web_search",
|
|
|
|
|
"api_call_count": 2,
|
|
|
|
|
"max_iterations": 50,
|
|
|
|
|
"last_activity_desc": "executing tool: web_search",
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def slow_fail(**kwargs):
|
|
|
|
|
time.sleep(0.15)
|
|
|
|
|
raise RuntimeError("network timeout")
|
|
|
|
|
|
|
|
|
|
child.run_conversation.side_effect = slow_fail
|
|
|
|
|
|
|
|
|
|
with patch("tools.delegate_tool._HEARTBEAT_INTERVAL", 0.05):
|
|
|
|
|
result = _run_single_child(
|
|
|
|
|
task_index=0,
|
|
|
|
|
goal="Test error cleanup",
|
|
|
|
|
child=child,
|
|
|
|
|
parent_agent=parent,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
self.assertEqual(result["status"], "error")
|
|
|
|
|
|
|
|
|
|
# Verify heartbeat stopped
|
|
|
|
|
count_after = len(touch_calls)
|
|
|
|
|
time.sleep(0.15)
|
|
|
|
|
self.assertEqual(len(touch_calls), count_after,
|
|
|
|
|
"Heartbeat continued firing after child error")
|
|
|
|
|
|
|
|
|
|
def test_heartbeat_includes_child_activity_desc_when_no_tool(self):
|
|
|
|
|
"""When child has no current_tool, heartbeat uses last_activity_desc."""
|
|
|
|
|
from tools.delegate_tool import _run_single_child
|
|
|
|
|
|
|
|
|
|
parent = _make_mock_parent()
|
|
|
|
|
touch_calls = []
|
|
|
|
|
parent._touch_activity = lambda desc: touch_calls.append(desc)
|
|
|
|
|
|
|
|
|
|
child = MagicMock()
|
|
|
|
|
child.get_activity_summary.return_value = {
|
|
|
|
|
"current_tool": None,
|
|
|
|
|
"api_call_count": 5,
|
|
|
|
|
"max_iterations": 90,
|
|
|
|
|
"last_activity_desc": "API call #5 completed",
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def slow_run(**kwargs):
|
|
|
|
|
time.sleep(0.15)
|
|
|
|
|
return {"final_response": "done", "completed": True, "api_calls": 5}
|
|
|
|
|
|
|
|
|
|
child.run_conversation.side_effect = slow_run
|
|
|
|
|
|
|
|
|
|
with patch("tools.delegate_tool._HEARTBEAT_INTERVAL", 0.05):
|
|
|
|
|
_run_single_child(
|
|
|
|
|
task_index=0,
|
|
|
|
|
goal="Test desc fallback",
|
|
|
|
|
child=child,
|
|
|
|
|
parent_agent=parent,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
self.assertGreater(len(touch_calls), 0)
|
|
|
|
|
self.assertTrue(
|
|
|
|
|
any("API call #5 completed" in desc for desc in touch_calls),
|
|
|
|
|
f"Heartbeat should include last_activity_desc: {touch_calls}")
|
|
|
|
|
|
2026-04-24 07:25:19 -07:00
|
|
|
def test_heartbeat_does_not_trip_idle_stale_while_inside_tool(self):
|
|
|
|
|
"""A long-running tool (no iteration advance, but current_tool set)
|
|
|
|
|
must not be flagged stale at the idle threshold.
|
|
|
|
|
|
|
|
|
|
Bug #13041: when a child is legitimately busy inside a slow tool
|
|
|
|
|
(terminal command, browser fetch), api_call_count does not advance.
|
|
|
|
|
The previous stale check treated this as idle and stopped the
|
|
|
|
|
heartbeat after 5 cycles (~150s), letting the gateway kill the
|
|
|
|
|
session. The fix uses a much higher in-tool threshold and only
|
|
|
|
|
applies the tight idle threshold when current_tool is None.
|
|
|
|
|
"""
|
|
|
|
|
from tools.delegate_tool import _run_single_child
|
|
|
|
|
|
|
|
|
|
parent = _make_mock_parent()
|
|
|
|
|
touch_calls = []
|
|
|
|
|
parent._touch_activity = lambda desc: touch_calls.append(desc)
|
|
|
|
|
|
|
|
|
|
child = MagicMock()
|
|
|
|
|
# Child is stuck inside a single terminal call for the whole run.
|
|
|
|
|
# api_call_count never advances, current_tool is always set.
|
|
|
|
|
child.get_activity_summary.return_value = {
|
|
|
|
|
"current_tool": "terminal",
|
|
|
|
|
"api_call_count": 1,
|
|
|
|
|
"max_iterations": 50,
|
|
|
|
|
"last_activity_desc": "executing tool: terminal",
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def slow_run(**kwargs):
|
|
|
|
|
# Long enough to exceed the OLD idle threshold (5 cycles) at
|
|
|
|
|
# the patched interval, but shorter than the new in-tool
|
|
|
|
|
# threshold.
|
|
|
|
|
time.sleep(0.4)
|
|
|
|
|
return {"final_response": "done", "completed": True, "api_calls": 1}
|
|
|
|
|
|
|
|
|
|
child.run_conversation.side_effect = slow_run
|
|
|
|
|
|
|
|
|
|
# Patch both the interval AND the idle ceiling so the test proves
|
|
|
|
|
# the in-tool branch takes effect: with a 0.05s interval and the
|
|
|
|
|
# default _HEARTBEAT_STALE_CYCLES_IDLE=5, the old behavior would
|
|
|
|
|
# trip after 0.25s and stop firing. We should see heartbeats
|
|
|
|
|
# continuing through the full 0.4s run.
|
|
|
|
|
with patch("tools.delegate_tool._HEARTBEAT_INTERVAL", 0.05):
|
|
|
|
|
_run_single_child(
|
|
|
|
|
task_index=0,
|
|
|
|
|
goal="Test long-running tool",
|
|
|
|
|
child=child,
|
|
|
|
|
parent_agent=parent,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# With the old idle threshold (5 cycles = 0.25s), touch_calls
|
|
|
|
|
# would cap at ~5. With the in-tool threshold (20 cycles = 1.0s),
|
|
|
|
|
# we should see substantially more heartbeats over 0.4s.
|
|
|
|
|
self.assertGreater(
|
|
|
|
|
len(touch_calls), 6,
|
|
|
|
|
f"Heartbeat stopped too early while child was inside a tool; "
|
|
|
|
|
f"got {len(touch_calls)} touches over 0.4s at 0.05s interval",
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
def test_heartbeat_still_trips_idle_stale_when_no_tool(self):
|
|
|
|
|
"""A wedged child with no current_tool still trips the idle threshold.
|
|
|
|
|
|
|
|
|
|
Regression guard: the fix for #13041 must not disable stale
|
|
|
|
|
detection entirely. A child that's hung between turns (no tool
|
|
|
|
|
running, no iteration progress) must still stop touching the
|
|
|
|
|
parent so the gateway timeout can fire.
|
|
|
|
|
"""
|
|
|
|
|
from tools.delegate_tool import _run_single_child
|
|
|
|
|
|
|
|
|
|
parent = _make_mock_parent()
|
|
|
|
|
touch_calls = []
|
|
|
|
|
parent._touch_activity = lambda desc: touch_calls.append(desc)
|
|
|
|
|
|
|
|
|
|
child = MagicMock()
|
|
|
|
|
# Wedged child: no tool running, iteration frozen.
|
|
|
|
|
child.get_activity_summary.return_value = {
|
|
|
|
|
"current_tool": None,
|
|
|
|
|
"api_call_count": 3,
|
|
|
|
|
"max_iterations": 50,
|
|
|
|
|
"last_activity_desc": "waiting for API response",
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def slow_run(**kwargs):
|
|
|
|
|
time.sleep(0.6)
|
|
|
|
|
return {"final_response": "done", "completed": True, "api_calls": 3}
|
|
|
|
|
|
|
|
|
|
child.run_conversation.side_effect = slow_run
|
|
|
|
|
|
|
|
|
|
# At interval 0.05s, idle threshold (5 cycles) trips at ~0.25s.
|
|
|
|
|
# We should see the heartbeat stop firing well before 0.6s.
|
|
|
|
|
with patch("tools.delegate_tool._HEARTBEAT_INTERVAL", 0.05):
|
|
|
|
|
_run_single_child(
|
|
|
|
|
task_index=0,
|
|
|
|
|
goal="Test wedged child",
|
|
|
|
|
child=child,
|
|
|
|
|
parent_agent=parent,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# With idle threshold=5 + interval=0.05s, touches should cap
|
|
|
|
|
# around 5. Bound loosely to avoid timing flakes.
|
|
|
|
|
self.assertLess(
|
|
|
|
|
len(touch_calls), 9,
|
|
|
|
|
f"Idle stale detection did not fire: got {len(touch_calls)} "
|
|
|
|
|
f"touches over 0.6s — expected heartbeat to stop after "
|
|
|
|
|
f"~5 stale cycles",
|
|
|
|
|
)
|
|
|
|
|
|
2026-04-10 12:51:30 -07:00
|
|
|
|
2026-04-09 20:55:59 +00:00
|
|
|
class TestDelegationReasoningEffort(unittest.TestCase):
|
|
|
|
|
"""Tests for delegation.reasoning_effort config override."""
|
|
|
|
|
|
|
|
|
|
@patch("tools.delegate_tool._load_config")
|
|
|
|
|
@patch("run_agent.AIAgent")
|
|
|
|
|
def test_inherits_parent_reasoning_when_no_override(self, MockAgent, mock_cfg):
|
|
|
|
|
"""With no delegation.reasoning_effort, child inherits parent's config."""
|
|
|
|
|
mock_cfg.return_value = {"max_iterations": 50, "reasoning_effort": ""}
|
|
|
|
|
MockAgent.return_value = MagicMock()
|
|
|
|
|
parent = _make_mock_parent()
|
|
|
|
|
parent.reasoning_config = {"enabled": True, "effort": "xhigh"}
|
|
|
|
|
|
|
|
|
|
_build_child_agent(
|
|
|
|
|
task_index=0, goal="test", context=None, toolsets=None,
|
|
|
|
|
model=None, max_iterations=50, parent_agent=parent,
|
2026-04-17 21:35:30 -07:00
|
|
|
task_count=1,
|
2026-04-09 20:55:59 +00:00
|
|
|
)
|
|
|
|
|
call_kwargs = MockAgent.call_args[1]
|
|
|
|
|
self.assertEqual(call_kwargs["reasoning_config"], {"enabled": True, "effort": "xhigh"})
|
|
|
|
|
|
|
|
|
|
@patch("tools.delegate_tool._load_config")
|
|
|
|
|
@patch("run_agent.AIAgent")
|
|
|
|
|
def test_override_reasoning_effort_from_config(self, MockAgent, mock_cfg):
|
|
|
|
|
"""delegation.reasoning_effort overrides the parent's level."""
|
|
|
|
|
mock_cfg.return_value = {"max_iterations": 50, "reasoning_effort": "low"}
|
|
|
|
|
MockAgent.return_value = MagicMock()
|
|
|
|
|
parent = _make_mock_parent()
|
|
|
|
|
parent.reasoning_config = {"enabled": True, "effort": "xhigh"}
|
|
|
|
|
|
|
|
|
|
_build_child_agent(
|
|
|
|
|
task_index=0, goal="test", context=None, toolsets=None,
|
|
|
|
|
model=None, max_iterations=50, parent_agent=parent,
|
2026-04-17 21:35:30 -07:00
|
|
|
task_count=1,
|
2026-04-09 20:55:59 +00:00
|
|
|
)
|
|
|
|
|
call_kwargs = MockAgent.call_args[1]
|
|
|
|
|
self.assertEqual(call_kwargs["reasoning_config"], {"enabled": True, "effort": "low"})
|
|
|
|
|
|
|
|
|
|
@patch("tools.delegate_tool._load_config")
|
|
|
|
|
@patch("run_agent.AIAgent")
|
|
|
|
|
def test_override_reasoning_effort_none_disables(self, MockAgent, mock_cfg):
|
|
|
|
|
"""delegation.reasoning_effort: 'none' disables thinking for subagents."""
|
|
|
|
|
mock_cfg.return_value = {"max_iterations": 50, "reasoning_effort": "none"}
|
|
|
|
|
MockAgent.return_value = MagicMock()
|
|
|
|
|
parent = _make_mock_parent()
|
|
|
|
|
parent.reasoning_config = {"enabled": True, "effort": "high"}
|
|
|
|
|
|
|
|
|
|
_build_child_agent(
|
|
|
|
|
task_index=0, goal="test", context=None, toolsets=None,
|
|
|
|
|
model=None, max_iterations=50, parent_agent=parent,
|
2026-04-17 21:35:30 -07:00
|
|
|
task_count=1,
|
2026-04-09 20:55:59 +00:00
|
|
|
)
|
|
|
|
|
call_kwargs = MockAgent.call_args[1]
|
|
|
|
|
self.assertEqual(call_kwargs["reasoning_config"], {"enabled": False})
|
|
|
|
|
|
|
|
|
|
@patch("tools.delegate_tool._load_config")
|
|
|
|
|
@patch("run_agent.AIAgent")
|
|
|
|
|
def test_invalid_reasoning_effort_falls_back_to_parent(self, MockAgent, mock_cfg):
|
|
|
|
|
"""Invalid delegation.reasoning_effort falls back to parent's config."""
|
|
|
|
|
mock_cfg.return_value = {"max_iterations": 50, "reasoning_effort": "banana"}
|
|
|
|
|
MockAgent.return_value = MagicMock()
|
|
|
|
|
parent = _make_mock_parent()
|
|
|
|
|
parent.reasoning_config = {"enabled": True, "effort": "medium"}
|
|
|
|
|
|
|
|
|
|
_build_child_agent(
|
|
|
|
|
task_index=0, goal="test", context=None, toolsets=None,
|
|
|
|
|
model=None, max_iterations=50, parent_agent=parent,
|
2026-04-17 21:35:30 -07:00
|
|
|
task_count=1,
|
2026-04-09 20:55:59 +00:00
|
|
|
)
|
|
|
|
|
call_kwargs = MockAgent.call_args[1]
|
|
|
|
|
self.assertEqual(call_kwargs["reasoning_config"], {"enabled": True, "effort": "medium"})
|
|
|
|
|
|
|
|
|
|
|
2026-04-21 14:11:53 -07:00
|
|
|
# =========================================================================
|
|
|
|
|
# Dispatch helper, progress events, concurrency
|
|
|
|
|
# =========================================================================
|
|
|
|
|
|
|
|
|
|
class TestDispatchDelegateTask(unittest.TestCase):
|
|
|
|
|
"""Tests for the _dispatch_delegate_task helper and full param forwarding."""
|
|
|
|
|
|
|
|
|
|
@patch("tools.delegate_tool._load_config", return_value={})
|
|
|
|
|
@patch("tools.delegate_tool._resolve_delegation_credentials")
|
|
|
|
|
def test_acp_args_forwarded(self, mock_creds, mock_cfg):
|
|
|
|
|
"""Both acp_command and acp_args reach delegate_task via the helper."""
|
|
|
|
|
mock_creds.return_value = {
|
|
|
|
|
"provider": None, "base_url": None,
|
|
|
|
|
"api_key": None, "api_mode": None, "model": None,
|
|
|
|
|
}
|
|
|
|
|
parent = _make_mock_parent(depth=0)
|
|
|
|
|
with patch("tools.delegate_tool._build_child_agent") as mock_build:
|
|
|
|
|
mock_child = MagicMock()
|
|
|
|
|
mock_child.run_conversation.return_value = {
|
|
|
|
|
"final_response": "done", "completed": True,
|
|
|
|
|
"api_calls": 1, "messages": [],
|
|
|
|
|
}
|
|
|
|
|
mock_child._delegate_saved_tool_names = []
|
|
|
|
|
mock_child._credential_pool = None
|
|
|
|
|
mock_child.session_prompt_tokens = 0
|
|
|
|
|
mock_child.session_completion_tokens = 0
|
|
|
|
|
mock_child.model = "test"
|
|
|
|
|
mock_build.return_value = mock_child
|
|
|
|
|
|
|
|
|
|
delegate_task(
|
|
|
|
|
goal="test",
|
|
|
|
|
acp_command="claude",
|
|
|
|
|
acp_args=["--acp", "--stdio"],
|
|
|
|
|
parent_agent=parent,
|
|
|
|
|
)
|
|
|
|
|
_, kwargs = mock_build.call_args
|
|
|
|
|
self.assertEqual(kwargs["override_acp_command"], "claude")
|
|
|
|
|
self.assertEqual(kwargs["override_acp_args"], ["--acp", "--stdio"])
|
|
|
|
|
|
|
|
|
|
class TestDelegateEventEnum(unittest.TestCase):
|
|
|
|
|
"""Tests for DelegateEvent enum and back-compat aliases."""
|
|
|
|
|
|
|
|
|
|
def test_enum_values_are_strings(self):
|
|
|
|
|
for event in DelegateEvent:
|
|
|
|
|
self.assertIsInstance(event.value, str)
|
|
|
|
|
self.assertTrue(event.value.startswith("delegate."))
|
|
|
|
|
|
|
|
|
|
def test_legacy_map_covers_all_old_names(self):
|
|
|
|
|
expected_legacy = {"_thinking", "reasoning.available",
|
|
|
|
|
"tool.started", "tool.completed", "subagent_progress"}
|
|
|
|
|
self.assertEqual(set(_LEGACY_EVENT_MAP.keys()), expected_legacy)
|
|
|
|
|
|
|
|
|
|
def test_legacy_map_values_are_delegate_events(self):
|
|
|
|
|
for old_name, event in _LEGACY_EVENT_MAP.items():
|
|
|
|
|
self.assertIsInstance(event, DelegateEvent)
|
|
|
|
|
|
|
|
|
|
def test_progress_callback_normalises_tool_started(self):
|
|
|
|
|
"""_build_child_progress_callback handles tool.started via enum."""
|
|
|
|
|
parent = _make_mock_parent()
|
|
|
|
|
parent._delegate_spinner = MagicMock()
|
|
|
|
|
parent.tool_progress_callback = MagicMock()
|
|
|
|
|
|
|
|
|
|
cb = _build_child_progress_callback(0, "test goal", parent, task_count=1)
|
|
|
|
|
self.assertIsNotNone(cb)
|
|
|
|
|
|
|
|
|
|
cb("tool.started", tool_name="terminal", preview="ls")
|
|
|
|
|
parent._delegate_spinner.print_above.assert_called()
|
|
|
|
|
|
|
|
|
|
def test_progress_callback_normalises_thinking(self):
|
|
|
|
|
"""Both _thinking and reasoning.available route to TASK_THINKING."""
|
|
|
|
|
parent = _make_mock_parent()
|
|
|
|
|
parent._delegate_spinner = MagicMock()
|
|
|
|
|
parent.tool_progress_callback = None
|
|
|
|
|
|
|
|
|
|
cb = _build_child_progress_callback(0, "test goal", parent, task_count=1)
|
|
|
|
|
|
|
|
|
|
cb("_thinking", tool_name=None, preview="pondering...")
|
|
|
|
|
assert any("💭" in str(c) for c in parent._delegate_spinner.print_above.call_args_list)
|
|
|
|
|
|
|
|
|
|
parent._delegate_spinner.print_above.reset_mock()
|
|
|
|
|
cb("reasoning.available", tool_name=None, preview="hmm")
|
|
|
|
|
assert any("💭" in str(c) for c in parent._delegate_spinner.print_above.call_args_list)
|
|
|
|
|
|
|
|
|
|
def test_progress_callback_tool_completed_is_noop(self):
|
|
|
|
|
"""tool.completed is normalised but produces no display output."""
|
|
|
|
|
parent = _make_mock_parent()
|
|
|
|
|
parent._delegate_spinner = MagicMock()
|
|
|
|
|
parent.tool_progress_callback = None
|
|
|
|
|
|
|
|
|
|
cb = _build_child_progress_callback(0, "test goal", parent, task_count=1)
|
|
|
|
|
cb("tool.completed", tool_name="terminal")
|
|
|
|
|
parent._delegate_spinner.print_above.assert_not_called()
|
|
|
|
|
|
|
|
|
|
def test_progress_callback_ignores_unknown_events(self):
|
|
|
|
|
"""Unknown event types are silently ignored."""
|
|
|
|
|
parent = _make_mock_parent()
|
|
|
|
|
parent._delegate_spinner = MagicMock()
|
|
|
|
|
|
|
|
|
|
cb = _build_child_progress_callback(0, "test goal", parent, task_count=1)
|
|
|
|
|
# Should not raise
|
|
|
|
|
cb("some.unknown.event", tool_name="x")
|
|
|
|
|
parent._delegate_spinner.print_above.assert_not_called()
|
|
|
|
|
|
|
|
|
|
def test_progress_callback_accepts_enum_value_directly(self):
|
|
|
|
|
"""cb(DelegateEvent.TASK_THINKING, ...) must route to the thinking
|
|
|
|
|
branch. Pre-fix the callback only handled legacy strings via
|
|
|
|
|
_LEGACY_EVENT_MAP.get and silently dropped enum-typed callers."""
|
|
|
|
|
parent = _make_mock_parent()
|
|
|
|
|
parent._delegate_spinner = MagicMock()
|
|
|
|
|
parent.tool_progress_callback = None
|
|
|
|
|
|
|
|
|
|
cb = _build_child_progress_callback(0, "test goal", parent, task_count=1)
|
|
|
|
|
cb(DelegateEvent.TASK_THINKING, preview="pondering")
|
|
|
|
|
# If the enum was accepted, the thinking emoji got printed.
|
|
|
|
|
assert any(
|
|
|
|
|
"💭" in str(c)
|
|
|
|
|
for c in parent._delegate_spinner.print_above.call_args_list
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
def test_progress_callback_accepts_new_style_string(self):
|
|
|
|
|
"""cb('delegate.task_thinking', ...) — the string form of the
|
|
|
|
|
enum value — must route to the thinking branch too, so new-style
|
|
|
|
|
emitters don't have to import DelegateEvent."""
|
|
|
|
|
parent = _make_mock_parent()
|
|
|
|
|
parent._delegate_spinner = MagicMock()
|
|
|
|
|
|
|
|
|
|
cb = _build_child_progress_callback(0, "test goal", parent, task_count=1)
|
|
|
|
|
cb("delegate.task_thinking", preview="hmm")
|
|
|
|
|
assert any(
|
|
|
|
|
"💭" in str(c)
|
|
|
|
|
for c in parent._delegate_spinner.print_above.call_args_list
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
def test_progress_callback_task_progress_not_misrendered(self):
|
|
|
|
|
"""'subagent_progress' (legacy name for TASK_PROGRESS) carries a
|
|
|
|
|
pre-batched summary in the tool_name slot. Before the fix, this
|
|
|
|
|
fell through to the TASK_TOOL_STARTED rendering path, treating
|
|
|
|
|
the summary string as a tool name. After the fix: distinct
|
|
|
|
|
render (no tool-start emoji lookup) and pass-through relay
|
|
|
|
|
upward (no re-batching).
|
|
|
|
|
|
|
|
|
|
Regression path only reachable once nested orchestration is
|
|
|
|
|
enabled: nested orchestrators relay subagent_progress from
|
|
|
|
|
grandchildren upward through this callback.
|
|
|
|
|
"""
|
|
|
|
|
parent = _make_mock_parent()
|
|
|
|
|
parent._delegate_spinner = MagicMock()
|
|
|
|
|
parent.tool_progress_callback = MagicMock()
|
|
|
|
|
|
|
|
|
|
cb = _build_child_progress_callback(0, "test goal", parent, task_count=1)
|
|
|
|
|
cb("subagent_progress", tool_name="🔀 [1] terminal, file")
|
|
|
|
|
|
|
|
|
|
# Spinner gets a distinct 🔀-prefixed line, NOT a tool emoji
|
|
|
|
|
# followed by the summary string as if it were a tool name.
|
|
|
|
|
calls = parent._delegate_spinner.print_above.call_args_list
|
|
|
|
|
self.assertTrue(any("🔀 🔀 [1] terminal, file" in str(c) for c in calls))
|
|
|
|
|
# Parent callback receives the relay (pass-through, no re-batching).
|
|
|
|
|
parent.tool_progress_callback.assert_called_once()
|
|
|
|
|
# No '⚡' tool-start emoji should appear — that's the pre-fix bug.
|
|
|
|
|
self.assertFalse(any("⚡" in str(c) for c in calls))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestConcurrencyDefaults(unittest.TestCase):
|
|
|
|
|
"""Tests for the concurrency default and no hard ceiling."""
|
|
|
|
|
|
|
|
|
|
@patch("tools.delegate_tool._load_config", return_value={})
|
|
|
|
|
def test_default_is_three(self, mock_cfg):
|
|
|
|
|
# Clear env var if set
|
|
|
|
|
with patch.dict(os.environ, {}, clear=True):
|
|
|
|
|
self.assertEqual(_get_max_concurrent_children(), 3)
|
|
|
|
|
|
|
|
|
|
@patch("tools.delegate_tool._load_config",
|
|
|
|
|
return_value={"max_concurrent_children": 10})
|
|
|
|
|
def test_no_upper_ceiling(self, mock_cfg):
|
|
|
|
|
"""Users can raise concurrency as high as they want — no hard cap."""
|
|
|
|
|
self.assertEqual(_get_max_concurrent_children(), 10)
|
|
|
|
|
|
|
|
|
|
@patch("tools.delegate_tool._load_config",
|
|
|
|
|
return_value={"max_concurrent_children": 100})
|
|
|
|
|
def test_very_high_values_honored(self, mock_cfg):
|
|
|
|
|
self.assertEqual(_get_max_concurrent_children(), 100)
|
|
|
|
|
|
|
|
|
|
@patch("tools.delegate_tool._load_config",
|
|
|
|
|
return_value={"max_concurrent_children": 0})
|
|
|
|
|
def test_zero_clamped_to_one(self, mock_cfg):
|
|
|
|
|
"""Floor of 1 is enforced; zero or negative values raise to 1."""
|
|
|
|
|
self.assertEqual(_get_max_concurrent_children(), 1)
|
|
|
|
|
|
|
|
|
|
@patch("tools.delegate_tool._load_config", return_value={})
|
|
|
|
|
def test_env_var_honored_uncapped(self, mock_cfg):
|
|
|
|
|
with patch.dict(os.environ, {"DELEGATION_MAX_CONCURRENT_CHILDREN": "12"}):
|
|
|
|
|
self.assertEqual(_get_max_concurrent_children(), 12)
|
|
|
|
|
|
|
|
|
|
@patch("tools.delegate_tool._load_config",
|
|
|
|
|
return_value={"max_concurrent_children": 6})
|
|
|
|
|
def test_configured_value_returned(self, mock_cfg):
|
|
|
|
|
self.assertEqual(_get_max_concurrent_children(), 6)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# =========================================================================
|
|
|
|
|
# max_spawn_depth clamping
|
|
|
|
|
# =========================================================================
|
|
|
|
|
|
|
|
|
|
class TestMaxSpawnDepth(unittest.TestCase):
|
|
|
|
|
"""Tests for _get_max_spawn_depth clamping and fallback behavior."""
|
|
|
|
|
|
|
|
|
|
@patch("tools.delegate_tool._load_config", return_value={})
|
|
|
|
|
def test_max_spawn_depth_defaults_to_1(self, mock_cfg):
|
|
|
|
|
from tools.delegate_tool import _get_max_spawn_depth
|
|
|
|
|
self.assertEqual(_get_max_spawn_depth(), 1)
|
|
|
|
|
|
|
|
|
|
@patch("tools.delegate_tool._load_config",
|
|
|
|
|
return_value={"max_spawn_depth": 0})
|
|
|
|
|
def test_max_spawn_depth_clamped_below_one(self, mock_cfg):
|
|
|
|
|
import logging
|
|
|
|
|
from tools.delegate_tool import _get_max_spawn_depth
|
|
|
|
|
with self.assertLogs("tools.delegate_tool", level=logging.WARNING) as cm:
|
|
|
|
|
result = _get_max_spawn_depth()
|
|
|
|
|
self.assertEqual(result, 1)
|
|
|
|
|
self.assertTrue(any("clamping to 1" in m for m in cm.output))
|
|
|
|
|
|
|
|
|
|
@patch("tools.delegate_tool._load_config",
|
|
|
|
|
return_value={"max_spawn_depth": 99})
|
|
|
|
|
def test_max_spawn_depth_clamped_above_three(self, mock_cfg):
|
|
|
|
|
import logging
|
|
|
|
|
from tools.delegate_tool import _get_max_spawn_depth
|
|
|
|
|
with self.assertLogs("tools.delegate_tool", level=logging.WARNING) as cm:
|
|
|
|
|
result = _get_max_spawn_depth()
|
|
|
|
|
self.assertEqual(result, 3)
|
|
|
|
|
self.assertTrue(any("clamping to 3" in m for m in cm.output))
|
|
|
|
|
|
|
|
|
|
@patch("tools.delegate_tool._load_config",
|
|
|
|
|
return_value={"max_spawn_depth": "not-a-number"})
|
|
|
|
|
def test_max_spawn_depth_invalid_falls_back_to_default(self, mock_cfg):
|
|
|
|
|
from tools.delegate_tool import _get_max_spawn_depth
|
|
|
|
|
self.assertEqual(_get_max_spawn_depth(), 1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# =========================================================================
|
|
|
|
|
# role param plumbing
|
|
|
|
|
# =========================================================================
|
|
|
|
|
#
|
|
|
|
|
# These tests cover the schema + signature + stash plumbing of the role
|
|
|
|
|
# param. The full role-honoring behavior (toolset re-add, role-aware
|
|
|
|
|
# prompt) lives in TestOrchestratorRoleBehavior below; these tests only
|
|
|
|
|
# assert on _delegate_role stashing and on the schema shape.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestOrchestratorRoleSchema(unittest.TestCase):
|
|
|
|
|
"""Tests that the role param reaches the child via dispatch."""
|
|
|
|
|
|
|
|
|
|
@patch("tools.delegate_tool._resolve_delegation_credentials")
|
|
|
|
|
@patch("tools.delegate_tool._load_config",
|
|
|
|
|
return_value={"max_spawn_depth": 2})
|
|
|
|
|
def _run_with_mock_child(self, role_arg, mock_cfg, mock_creds):
|
|
|
|
|
mock_creds.return_value = {
|
|
|
|
|
"provider": None, "base_url": None,
|
|
|
|
|
"api_key": None, "api_mode": None, "model": None,
|
|
|
|
|
}
|
|
|
|
|
parent = _make_mock_parent(depth=0)
|
|
|
|
|
with patch("run_agent.AIAgent") as MockAgent:
|
|
|
|
|
mock_child = MagicMock()
|
|
|
|
|
mock_child.run_conversation.return_value = {
|
|
|
|
|
"final_response": "done", "completed": True,
|
|
|
|
|
"api_calls": 1, "messages": [],
|
|
|
|
|
}
|
|
|
|
|
mock_child._delegate_saved_tool_names = []
|
|
|
|
|
mock_child._credential_pool = None
|
|
|
|
|
mock_child.session_prompt_tokens = 0
|
|
|
|
|
mock_child.session_completion_tokens = 0
|
|
|
|
|
mock_child.model = "test"
|
|
|
|
|
MockAgent.return_value = mock_child
|
|
|
|
|
kwargs = {"goal": "test", "parent_agent": parent}
|
|
|
|
|
if role_arg is not _SENTINEL:
|
|
|
|
|
kwargs["role"] = role_arg
|
|
|
|
|
delegate_task(**kwargs)
|
|
|
|
|
return mock_child
|
|
|
|
|
|
|
|
|
|
def test_default_role_is_leaf(self):
|
|
|
|
|
child = self._run_with_mock_child(_SENTINEL)
|
|
|
|
|
self.assertEqual(child._delegate_role, "leaf")
|
|
|
|
|
|
|
|
|
|
def test_explicit_orchestrator_role_stashed(self):
|
|
|
|
|
"""role='orchestrator' reaches _build_child_agent and is stashed.
|
|
|
|
|
Full behavior (toolset re-add) lands in commit 3; commit 2 only
|
|
|
|
|
verifies the plumbing."""
|
|
|
|
|
child = self._run_with_mock_child("orchestrator")
|
|
|
|
|
self.assertEqual(child._delegate_role, "orchestrator")
|
|
|
|
|
|
|
|
|
|
def test_unknown_role_coerces_to_leaf(self):
|
|
|
|
|
"""role='nonsense' → _normalize_role warns and returns 'leaf'."""
|
|
|
|
|
import logging
|
|
|
|
|
with self.assertLogs("tools.delegate_tool", level=logging.WARNING) as cm:
|
|
|
|
|
child = self._run_with_mock_child("nonsense")
|
|
|
|
|
self.assertEqual(child._delegate_role, "leaf")
|
|
|
|
|
self.assertTrue(any("coercing" in m.lower() for m in cm.output))
|
|
|
|
|
|
|
|
|
|
def test_schema_has_role_top_level_and_per_task(self):
|
|
|
|
|
from tools.delegate_tool import DELEGATE_TASK_SCHEMA
|
|
|
|
|
props = DELEGATE_TASK_SCHEMA["parameters"]["properties"]
|
|
|
|
|
self.assertIn("role", props)
|
|
|
|
|
self.assertEqual(props["role"]["enum"], ["leaf", "orchestrator"])
|
|
|
|
|
task_props = props["tasks"]["items"]["properties"]
|
|
|
|
|
self.assertIn("role", task_props)
|
|
|
|
|
self.assertEqual(task_props["role"]["enum"], ["leaf", "orchestrator"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Sentinel used to distinguish "role kwarg omitted" from "role=None".
|
|
|
|
|
_SENTINEL = object()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# =========================================================================
|
|
|
|
|
# role-honoring behavior
|
|
|
|
|
# =========================================================================
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _make_role_mock_child():
|
|
|
|
|
"""Helper: mock child with minimal fields for delegate_task to process."""
|
|
|
|
|
mock_child = MagicMock()
|
|
|
|
|
mock_child.run_conversation.return_value = {
|
|
|
|
|
"final_response": "done", "completed": True,
|
|
|
|
|
"api_calls": 1, "messages": [],
|
|
|
|
|
}
|
|
|
|
|
mock_child._delegate_saved_tool_names = []
|
|
|
|
|
mock_child._credential_pool = None
|
|
|
|
|
mock_child.session_prompt_tokens = 0
|
|
|
|
|
mock_child.session_completion_tokens = 0
|
|
|
|
|
mock_child.model = "test"
|
|
|
|
|
return mock_child
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestOrchestratorRoleBehavior(unittest.TestCase):
|
|
|
|
|
"""Tests that role='orchestrator' actually changes toolset + prompt."""
|
|
|
|
|
|
|
|
|
|
@patch("tools.delegate_tool._resolve_delegation_credentials")
|
|
|
|
|
@patch("tools.delegate_tool._load_config",
|
|
|
|
|
return_value={"max_spawn_depth": 2})
|
|
|
|
|
def test_orchestrator_role_keeps_delegation_at_depth_1(
|
|
|
|
|
self, mock_cfg, mock_creds
|
|
|
|
|
):
|
|
|
|
|
"""role='orchestrator' + depth-0 parent with max_spawn_depth=2 →
|
|
|
|
|
child at depth 1 gets 'delegation' in enabled_toolsets (can
|
|
|
|
|
further delegate). Requires max_spawn_depth>=2 since the new
|
|
|
|
|
default is 1 (flat)."""
|
|
|
|
|
mock_creds.return_value = {
|
|
|
|
|
"provider": None, "base_url": None,
|
|
|
|
|
"api_key": None, "api_mode": None, "model": None,
|
|
|
|
|
}
|
|
|
|
|
parent = _make_mock_parent(depth=0)
|
|
|
|
|
parent.enabled_toolsets = ["terminal", "file"]
|
|
|
|
|
with patch("run_agent.AIAgent") as MockAgent:
|
|
|
|
|
mock_child = _make_role_mock_child()
|
|
|
|
|
MockAgent.return_value = mock_child
|
|
|
|
|
delegate_task(goal="test", role="orchestrator", parent_agent=parent)
|
|
|
|
|
kwargs = MockAgent.call_args[1]
|
|
|
|
|
self.assertIn("delegation", kwargs["enabled_toolsets"])
|
|
|
|
|
self.assertEqual(mock_child._delegate_role, "orchestrator")
|
|
|
|
|
|
|
|
|
|
@patch("tools.delegate_tool._resolve_delegation_credentials")
|
|
|
|
|
@patch("tools.delegate_tool._load_config",
|
|
|
|
|
return_value={"max_spawn_depth": 2})
|
|
|
|
|
def test_orchestrator_blocked_at_max_spawn_depth(
|
|
|
|
|
self, mock_cfg, mock_creds
|
|
|
|
|
):
|
|
|
|
|
"""Parent at depth 1 with max_spawn_depth=2 spawns child
|
|
|
|
|
at depth 2 (the floor); role='orchestrator' degrades to leaf."""
|
|
|
|
|
mock_creds.return_value = {
|
|
|
|
|
"provider": None, "base_url": None,
|
|
|
|
|
"api_key": None, "api_mode": None, "model": None,
|
|
|
|
|
}
|
|
|
|
|
parent = _make_mock_parent(depth=1)
|
|
|
|
|
parent.enabled_toolsets = ["terminal", "delegation"]
|
|
|
|
|
with patch("run_agent.AIAgent") as MockAgent:
|
|
|
|
|
mock_child = _make_role_mock_child()
|
|
|
|
|
MockAgent.return_value = mock_child
|
|
|
|
|
delegate_task(goal="test", role="orchestrator", parent_agent=parent)
|
|
|
|
|
kwargs = MockAgent.call_args[1]
|
|
|
|
|
self.assertNotIn("delegation", kwargs["enabled_toolsets"])
|
|
|
|
|
self.assertEqual(mock_child._delegate_role, "leaf")
|
|
|
|
|
|
|
|
|
|
@patch("tools.delegate_tool._resolve_delegation_credentials")
|
|
|
|
|
@patch("tools.delegate_tool._load_config", return_value={})
|
|
|
|
|
def test_orchestrator_blocked_at_default_flat_depth(
|
|
|
|
|
self, mock_cfg, mock_creds
|
|
|
|
|
):
|
|
|
|
|
"""With default max_spawn_depth=1 (flat), role='orchestrator'
|
|
|
|
|
on a depth-0 parent produces a depth-1 child that is already at
|
|
|
|
|
the floor — the role degrades to 'leaf' and the delegation
|
|
|
|
|
toolset is stripped. This is the new default posture."""
|
|
|
|
|
mock_creds.return_value = {
|
|
|
|
|
"provider": None, "base_url": None,
|
|
|
|
|
"api_key": None, "api_mode": None, "model": None,
|
|
|
|
|
}
|
|
|
|
|
parent = _make_mock_parent(depth=0)
|
|
|
|
|
parent.enabled_toolsets = ["terminal", "file", "delegation"]
|
|
|
|
|
with patch("run_agent.AIAgent") as MockAgent:
|
|
|
|
|
mock_child = _make_role_mock_child()
|
|
|
|
|
MockAgent.return_value = mock_child
|
|
|
|
|
delegate_task(goal="test", role="orchestrator", parent_agent=parent)
|
|
|
|
|
kwargs = MockAgent.call_args[1]
|
|
|
|
|
self.assertNotIn("delegation", kwargs["enabled_toolsets"])
|
|
|
|
|
self.assertEqual(mock_child._delegate_role, "leaf")
|
|
|
|
|
|
|
|
|
|
@patch("tools.delegate_tool._resolve_delegation_credentials")
|
|
|
|
|
def test_orchestrator_enabled_false_forces_leaf(self, mock_creds):
|
|
|
|
|
"""Kill switch delegation.orchestrator_enabled=false overrides
|
|
|
|
|
role='orchestrator'."""
|
|
|
|
|
mock_creds.return_value = {
|
|
|
|
|
"provider": None, "base_url": None,
|
|
|
|
|
"api_key": None, "api_mode": None, "model": None,
|
|
|
|
|
}
|
|
|
|
|
parent = _make_mock_parent(depth=0)
|
|
|
|
|
parent.enabled_toolsets = ["terminal", "delegation"]
|
|
|
|
|
with patch("tools.delegate_tool._load_config",
|
|
|
|
|
return_value={"orchestrator_enabled": False}):
|
|
|
|
|
with patch("run_agent.AIAgent") as MockAgent:
|
|
|
|
|
mock_child = _make_role_mock_child()
|
|
|
|
|
MockAgent.return_value = mock_child
|
|
|
|
|
delegate_task(goal="test", role="orchestrator",
|
|
|
|
|
parent_agent=parent)
|
|
|
|
|
kwargs = MockAgent.call_args[1]
|
|
|
|
|
self.assertNotIn("delegation", kwargs["enabled_toolsets"])
|
|
|
|
|
self.assertEqual(mock_child._delegate_role, "leaf")
|
|
|
|
|
|
|
|
|
|
# ── Role-aware system prompt ────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
def test_leaf_prompt_does_not_mention_delegation(self):
|
|
|
|
|
prompt = _build_child_system_prompt(
|
|
|
|
|
"Fix tests", role="leaf",
|
|
|
|
|
max_spawn_depth=2, child_depth=1,
|
|
|
|
|
)
|
|
|
|
|
self.assertNotIn("delegate_task", prompt)
|
|
|
|
|
self.assertNotIn("Orchestrator Role", prompt)
|
|
|
|
|
|
|
|
|
|
def test_orchestrator_prompt_mentions_delegation_capability(self):
|
|
|
|
|
prompt = _build_child_system_prompt(
|
|
|
|
|
"Survey approaches", role="orchestrator",
|
|
|
|
|
max_spawn_depth=2, child_depth=1,
|
|
|
|
|
)
|
|
|
|
|
self.assertIn("delegate_task", prompt)
|
|
|
|
|
self.assertIn("Orchestrator Role", prompt)
|
|
|
|
|
# Depth/max-depth note present and literal:
|
|
|
|
|
self.assertIn("depth 1", prompt)
|
|
|
|
|
self.assertIn("max_spawn_depth=2", prompt)
|
|
|
|
|
|
|
|
|
|
def test_orchestrator_prompt_at_depth_floor_says_children_are_leaves(self):
|
|
|
|
|
"""With max_spawn_depth=2 and child_depth=1, the orchestrator's
|
|
|
|
|
own children would be at depth 2 (the floor) → must be leaves."""
|
|
|
|
|
prompt = _build_child_system_prompt(
|
|
|
|
|
"Survey", role="orchestrator",
|
|
|
|
|
max_spawn_depth=2, child_depth=1,
|
|
|
|
|
)
|
|
|
|
|
self.assertIn("MUST be leaves", prompt)
|
|
|
|
|
|
|
|
|
|
def test_orchestrator_prompt_below_floor_allows_more_nesting(self):
|
|
|
|
|
"""With max_spawn_depth=3 and child_depth=1, the orchestrator's
|
|
|
|
|
own children can themselves be orchestrators (depth 2 < 3)."""
|
|
|
|
|
prompt = _build_child_system_prompt(
|
|
|
|
|
"Deep work", role="orchestrator",
|
|
|
|
|
max_spawn_depth=3, child_depth=1,
|
|
|
|
|
)
|
|
|
|
|
self.assertIn("can themselves be orchestrators", prompt)
|
|
|
|
|
|
|
|
|
|
# ── Batch mode and intersection ─────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
@patch("tools.delegate_tool._resolve_delegation_credentials")
|
|
|
|
|
@patch("tools.delegate_tool._load_config",
|
|
|
|
|
return_value={"max_spawn_depth": 2})
|
|
|
|
|
def test_batch_mode_per_task_role_override(self, mock_cfg, mock_creds):
|
|
|
|
|
"""Per-task role beats top-level; no top-level role → "leaf".
|
|
|
|
|
|
|
|
|
|
tasks=[{role:'orchestrator'},{role:'leaf'},{}] → first gets
|
|
|
|
|
delegation, second and third don't. Requires max_spawn_depth>=2
|
|
|
|
|
(raised explicitly here) since the new default is 1 (flat).
|
|
|
|
|
"""
|
|
|
|
|
mock_creds.return_value = {
|
|
|
|
|
"provider": None, "base_url": None,
|
|
|
|
|
"api_key": None, "api_mode": None, "model": None,
|
|
|
|
|
}
|
|
|
|
|
parent = _make_mock_parent(depth=0)
|
|
|
|
|
parent.enabled_toolsets = ["terminal", "file", "delegation"]
|
|
|
|
|
built_toolsets = []
|
|
|
|
|
|
|
|
|
|
def _factory(*a, **kw):
|
|
|
|
|
m = _make_role_mock_child()
|
|
|
|
|
built_toolsets.append(kw.get("enabled_toolsets"))
|
|
|
|
|
return m
|
|
|
|
|
|
|
|
|
|
with patch("run_agent.AIAgent", side_effect=_factory):
|
|
|
|
|
delegate_task(
|
|
|
|
|
tasks=[
|
|
|
|
|
{"goal": "A", "role": "orchestrator"},
|
|
|
|
|
{"goal": "B", "role": "leaf"},
|
|
|
|
|
{"goal": "C"}, # no role → falls back to top_role (leaf)
|
|
|
|
|
],
|
|
|
|
|
parent_agent=parent,
|
|
|
|
|
)
|
|
|
|
|
self.assertIn("delegation", built_toolsets[0])
|
|
|
|
|
self.assertNotIn("delegation", built_toolsets[1])
|
|
|
|
|
self.assertNotIn("delegation", built_toolsets[2])
|
|
|
|
|
|
|
|
|
|
@patch("tools.delegate_tool._resolve_delegation_credentials")
|
|
|
|
|
@patch("tools.delegate_tool._load_config",
|
|
|
|
|
return_value={"max_spawn_depth": 2})
|
|
|
|
|
def test_intersection_preserves_delegation_bound(
|
|
|
|
|
self, mock_cfg, mock_creds
|
|
|
|
|
):
|
|
|
|
|
"""Design decision: orchestrator capability is granted by role,
|
|
|
|
|
NOT inherited from the parent's toolset. A parent without
|
|
|
|
|
'delegation' in its enabled_toolsets can still spawn an
|
|
|
|
|
orchestrator child — the re-add in _build_child_agent runs
|
|
|
|
|
unconditionally for orchestrators (when max_spawn_depth allows).
|
|
|
|
|
|
|
|
|
|
If you want to change to "parent must have delegation too",
|
|
|
|
|
update _build_child_agent to check parent_toolsets before the
|
|
|
|
|
re-add and update this test to match.
|
|
|
|
|
"""
|
|
|
|
|
mock_creds.return_value = {
|
|
|
|
|
"provider": None, "base_url": None,
|
|
|
|
|
"api_key": None, "api_mode": None, "model": None,
|
|
|
|
|
}
|
|
|
|
|
parent = _make_mock_parent(depth=0)
|
|
|
|
|
parent.enabled_toolsets = ["terminal", "file"] # no delegation
|
|
|
|
|
with patch("run_agent.AIAgent") as MockAgent:
|
|
|
|
|
mock_child = _make_role_mock_child()
|
|
|
|
|
MockAgent.return_value = mock_child
|
|
|
|
|
delegate_task(goal="test", role="orchestrator",
|
|
|
|
|
parent_agent=parent)
|
|
|
|
|
self.assertIn("delegation", MockAgent.call_args[1]["enabled_toolsets"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestOrchestratorEndToEnd(unittest.TestCase):
|
|
|
|
|
"""End-to-end: parent -> orchestrator -> two-leaf nested orchestration.
|
|
|
|
|
|
|
|
|
|
Covers the acceptance gate: parent delegates to an orchestrator
|
|
|
|
|
child; the orchestrator delegates to two leaf grandchildren; the
|
|
|
|
|
role/toolset/depth chain all resolve correctly.
|
|
|
|
|
|
|
|
|
|
Mock strategy: a single AIAgent patch with a side_effect factory
|
|
|
|
|
that keys on the child's ephemeral_system_prompt — orchestrator
|
|
|
|
|
prompts contain the string "Orchestrator Role" (see
|
|
|
|
|
_build_child_system_prompt), leaves don't. The orchestrator
|
|
|
|
|
mock's run_conversation recursively calls delegate_task with
|
|
|
|
|
tasks=[{goal:...},{goal:...}] to spawn two leaves. This keeps
|
|
|
|
|
the test in one patch context and avoids depth-indexed nesting.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
@patch("tools.delegate_tool._resolve_delegation_credentials")
|
|
|
|
|
@patch("tools.delegate_tool._load_config",
|
|
|
|
|
return_value={"max_spawn_depth": 2})
|
|
|
|
|
def test_end_to_end_nested_orchestration(self, mock_cfg, mock_creds):
|
|
|
|
|
mock_creds.return_value = {
|
|
|
|
|
"provider": None, "base_url": None,
|
|
|
|
|
"api_key": None, "api_mode": None, "model": None,
|
|
|
|
|
}
|
|
|
|
|
parent = _make_mock_parent(depth=0)
|
|
|
|
|
parent.enabled_toolsets = ["terminal", "file", "delegation"]
|
|
|
|
|
|
|
|
|
|
# (enabled_toolsets, _delegate_role) for each agent built
|
|
|
|
|
built_agents: list = []
|
|
|
|
|
# Keep the orchestrator mock around so the re-entrant delegate_task
|
|
|
|
|
# can reach it via closure.
|
|
|
|
|
orch_mock = {}
|
|
|
|
|
|
|
|
|
|
def _factory(*a, **kw):
|
|
|
|
|
prompt = kw.get("ephemeral_system_prompt", "") or ""
|
|
|
|
|
is_orchestrator = "Orchestrator Role" in prompt
|
|
|
|
|
m = _make_role_mock_child()
|
|
|
|
|
built_agents.append({
|
|
|
|
|
"enabled_toolsets": list(kw.get("enabled_toolsets") or []),
|
|
|
|
|
"is_orchestrator_prompt": is_orchestrator,
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
if is_orchestrator:
|
|
|
|
|
# Prepare the orchestrator mock as a parent-capable object
|
|
|
|
|
# so the nested delegate_task call succeeds.
|
|
|
|
|
m._delegate_depth = 1
|
|
|
|
|
m._delegate_role = "orchestrator"
|
|
|
|
|
m._active_children = []
|
|
|
|
|
m._active_children_lock = threading.Lock()
|
|
|
|
|
m._session_db = None
|
|
|
|
|
m.platform = "cli"
|
|
|
|
|
m.enabled_toolsets = ["terminal", "file", "delegation"]
|
|
|
|
|
m.api_key = "***"
|
|
|
|
|
m.base_url = ""
|
|
|
|
|
m.provider = None
|
|
|
|
|
m.api_mode = None
|
|
|
|
|
m.providers_allowed = None
|
|
|
|
|
m.providers_ignored = None
|
|
|
|
|
m.providers_order = None
|
|
|
|
|
m.provider_sort = None
|
|
|
|
|
m._print_fn = None
|
|
|
|
|
m.tool_progress_callback = None
|
|
|
|
|
m.thinking_callback = None
|
|
|
|
|
orch_mock["agent"] = m
|
|
|
|
|
|
feat(delegate): cross-agent file state coordination for concurrent subagents (#13718)
* feat(models): hide OpenRouter models that don't advertise tool support
Port from Kilo-Org/kilocode#9068.
hermes-agent is tool-calling-first — every provider path assumes the
model can invoke tools. Models whose OpenRouter supported_parameters
doesn't include 'tools' (e.g. image-only or completion-only models)
cannot be driven by the agent loop and fail at the first tool call.
Filter them out of fetch_openrouter_models() so they never appear in
the model picker (`hermes model`, setup wizard, /model slash command).
Permissive when the field is missing — OpenRouter-compatible gateways
(Nous Portal, private mirrors, older snapshots) don't always populate
supported_parameters. Treat missing as 'unknown → allow' rather than
silently emptying the picker on those gateways. Only hide models
whose supported_parameters is an explicit list that omits tools.
Tests cover: tools present → kept, tools absent → dropped, field
missing → kept, malformed non-list → kept, non-dict item → kept,
empty list → dropped.
* feat(delegate): cross-agent file state coordination for concurrent subagents
Prevents mangled edits when concurrent subagents touch the same file
(same process, same filesystem — the mangle scenario from #11215).
Three layers, all opt-out via HERMES_DISABLE_FILE_STATE_GUARD=1:
1. FileStateRegistry (tools/file_state.py) — process-wide singleton
tracking per-agent read stamps and the last writer globally.
check_stale() names the sibling subagent in the warning when a
non-owning agent wrote after this agent's last read.
2. Per-path threading.Lock wrapped around the read-modify-write
region in write_file_tool and patch_tool. Concurrent siblings on
the same path serialize; different paths stay fully parallel.
V4A multi-file patches lock in sorted path order (deadlock-free).
3. Delegate-completion reminder in tools/delegate_tool.py: after a
subagent returns, writes_since(parent, child_start, parent_reads)
appends '[NOTE: subagent modified files the parent previously
read — re-read before editing: ...]' to entry.summary when the
child touched anything the parent had already seen.
Complements (does not replace) the existing path-overlap check in
run_agent._should_parallelize_tool_batch — batch check prevents
same-file parallel dispatch within one agent's turn (cheap prevention,
zero API cost), registry catches cross-subagent and cross-turn
staleness at write time (detection).
Behavior is warning-only, not hard-failing — matches existing project
style. Errors surface naturally: sibling writes often invalidate the
old_string in patch operations, which already errors cleanly.
Tests: tests/tools/test_file_state_registry.py — 16 tests covering
registry state transitions, per-path locking, per-path-not-global
locking, writes_since filtering, kill switch, and end-to-end
integration through the real read_file/write_file/patch handlers.
2026-04-21 16:41:26 -07:00
|
|
|
def _orchestrator_run(user_message=None, task_id=None):
|
2026-04-21 14:11:53 -07:00
|
|
|
# Re-entrant: orchestrator spawns two leaves
|
|
|
|
|
delegate_task(
|
|
|
|
|
tasks=[{"goal": "leaf-A"}, {"goal": "leaf-B"}],
|
|
|
|
|
parent_agent=m,
|
|
|
|
|
)
|
|
|
|
|
return {
|
|
|
|
|
"final_response": "orchestrated 2 workers",
|
|
|
|
|
"completed": True, "api_calls": 1,
|
|
|
|
|
"messages": [],
|
|
|
|
|
}
|
|
|
|
|
m.run_conversation.side_effect = _orchestrator_run
|
|
|
|
|
|
|
|
|
|
return m
|
|
|
|
|
|
|
|
|
|
with patch("run_agent.AIAgent", side_effect=_factory) as MockAgent:
|
|
|
|
|
delegate_task(
|
|
|
|
|
goal="top-level orchestration",
|
|
|
|
|
role="orchestrator",
|
|
|
|
|
parent_agent=parent,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# 1 orchestrator + 2 leaf grandchildren = 3 agents
|
|
|
|
|
self.assertEqual(MockAgent.call_count, 3)
|
|
|
|
|
# First built = the orchestrator (parent's direct child)
|
|
|
|
|
self.assertIn("delegation", built_agents[0]["enabled_toolsets"])
|
|
|
|
|
self.assertTrue(built_agents[0]["is_orchestrator_prompt"])
|
|
|
|
|
# Next two = leaves (grandchildren)
|
|
|
|
|
self.assertNotIn("delegation", built_agents[1]["enabled_toolsets"])
|
|
|
|
|
self.assertFalse(built_agents[1]["is_orchestrator_prompt"])
|
|
|
|
|
self.assertNotIn("delegation", built_agents[2]["enabled_toolsets"])
|
|
|
|
|
self.assertFalse(built_agents[2]["is_orchestrator_prompt"])
|
|
|
|
|
|
|
|
|
|
|
fix(delegate): resolve subagent approval prompts without deadlocking parent TUI (#15491)
Subagents run inside a ThreadPoolExecutor. The CLI's interactive approval
callback lives in tools/terminal_tool.py's threading.local(), which worker
threads do not inherit. When a subagent hits a dangerous-command guard,
prompt_dangerous_approval() falls back to input() from the worker thread,
deadlocking against the parent's prompt_toolkit TUI that owns stdin.
Fix: install a non-interactive callback into every subagent worker thread
via ThreadPoolExecutor(initializer=set_approval_callback, initargs=(cb,)).
The callback is config-gated by delegation.subagent_auto_approve:
false (default) -> _subagent_auto_deny (safe; matches leaf tool blocklist)
true -> _subagent_auto_approve (opt-in YOLO for cron/batch)
Both emit a logger.warning audit line. Gateway sessions are unaffected
because they resolve approvals via tools/approval.py's per-session queue,
not through these TLS callbacks. Diagnosis credit: @MorAlekss (#14685).
- hermes_cli/config.py: DEFAULT_CONFIG.delegation.subagent_auto_approve: False
- cli-config.yaml.example: documented, commented (default)
- tools/delegate_tool.py: _subagent_auto_deny, _subagent_auto_approve,
_get_subagent_approval_callback, wired into the child timeout executor
- tests/tools/test_delegate.py: 7 tests covering defaults, truthy coercion,
and TLS scoping in the worker thread
2026-04-24 22:37:22 -07:00
|
|
|
class TestSubagentApprovalCallback(unittest.TestCase):
|
|
|
|
|
"""Subagent worker threads must have a non-interactive approval callback
|
|
|
|
|
installed so dangerous-command prompts don't fall back to input() and
|
|
|
|
|
deadlock the parent's prompt_toolkit TUI.
|
|
|
|
|
|
|
|
|
|
Governed by delegation.subagent_auto_approve:
|
|
|
|
|
false (default) → _subagent_auto_deny
|
|
|
|
|
true → _subagent_auto_approve
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
def test_auto_deny_returns_deny(self):
|
|
|
|
|
from tools.delegate_tool import _subagent_auto_deny
|
|
|
|
|
self.assertEqual(
|
|
|
|
|
_subagent_auto_deny("rm -rf /tmp/x", "dangerous"),
|
|
|
|
|
"deny",
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
def test_auto_approve_returns_once(self):
|
|
|
|
|
from tools.delegate_tool import _subagent_auto_approve
|
|
|
|
|
self.assertEqual(
|
|
|
|
|
_subagent_auto_approve("rm -rf /tmp/x", "dangerous"),
|
|
|
|
|
"once",
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
@patch("tools.delegate_tool._load_config", return_value={})
|
|
|
|
|
def test_getter_defaults_to_deny(self, _mock_cfg):
|
|
|
|
|
from tools.delegate_tool import (
|
|
|
|
|
_get_subagent_approval_callback,
|
|
|
|
|
_subagent_auto_deny,
|
|
|
|
|
)
|
|
|
|
|
self.assertIs(_get_subagent_approval_callback(), _subagent_auto_deny)
|
|
|
|
|
|
|
|
|
|
@patch(
|
|
|
|
|
"tools.delegate_tool._load_config",
|
|
|
|
|
return_value={"subagent_auto_approve": False},
|
|
|
|
|
)
|
|
|
|
|
def test_getter_explicit_false_is_deny(self, _mock_cfg):
|
|
|
|
|
from tools.delegate_tool import (
|
|
|
|
|
_get_subagent_approval_callback,
|
|
|
|
|
_subagent_auto_deny,
|
|
|
|
|
)
|
|
|
|
|
self.assertIs(_get_subagent_approval_callback(), _subagent_auto_deny)
|
|
|
|
|
|
|
|
|
|
@patch(
|
|
|
|
|
"tools.delegate_tool._load_config",
|
|
|
|
|
return_value={"subagent_auto_approve": True},
|
|
|
|
|
)
|
|
|
|
|
def test_getter_true_is_approve(self, _mock_cfg):
|
|
|
|
|
from tools.delegate_tool import (
|
|
|
|
|
_get_subagent_approval_callback,
|
|
|
|
|
_subagent_auto_approve,
|
|
|
|
|
)
|
|
|
|
|
self.assertIs(_get_subagent_approval_callback(), _subagent_auto_approve)
|
|
|
|
|
|
|
|
|
|
@patch(
|
|
|
|
|
"tools.delegate_tool._load_config",
|
|
|
|
|
return_value={"subagent_auto_approve": "yes"},
|
|
|
|
|
)
|
|
|
|
|
def test_getter_truthy_string_is_approve(self, _mock_cfg):
|
|
|
|
|
"""is_truthy_value accepts 'yes'/'1'/'true' as truthy."""
|
|
|
|
|
from tools.delegate_tool import (
|
|
|
|
|
_get_subagent_approval_callback,
|
|
|
|
|
_subagent_auto_approve,
|
|
|
|
|
)
|
|
|
|
|
self.assertIs(_get_subagent_approval_callback(), _subagent_auto_approve)
|
|
|
|
|
|
|
|
|
|
def test_executor_initializer_installs_callback_in_worker(self):
|
|
|
|
|
"""The initializer sets the callback on the worker thread's TLS,
|
|
|
|
|
not the parent's — verifies the fix actually scopes to workers.
|
|
|
|
|
"""
|
|
|
|
|
from concurrent.futures import ThreadPoolExecutor
|
|
|
|
|
from tools.terminal_tool import (
|
|
|
|
|
set_approval_callback as _set_cb,
|
|
|
|
|
_get_approval_callback,
|
|
|
|
|
)
|
|
|
|
|
from tools.delegate_tool import _subagent_auto_deny
|
|
|
|
|
|
|
|
|
|
# Parent thread has no callback.
|
|
|
|
|
_set_cb(None)
|
|
|
|
|
self.assertIsNone(_get_approval_callback())
|
|
|
|
|
|
|
|
|
|
seen = []
|
|
|
|
|
|
|
|
|
|
def worker():
|
|
|
|
|
seen.append(_get_approval_callback())
|
|
|
|
|
|
|
|
|
|
with ThreadPoolExecutor(
|
|
|
|
|
max_workers=1,
|
|
|
|
|
initializer=_set_cb,
|
|
|
|
|
initargs=(_subagent_auto_deny,),
|
|
|
|
|
) as executor:
|
|
|
|
|
executor.submit(worker).result()
|
|
|
|
|
|
|
|
|
|
self.assertEqual(seen, [_subagent_auto_deny])
|
|
|
|
|
# Parent's callback slot is still empty (TLS isolates threads).
|
|
|
|
|
self.assertIsNone(_get_approval_callback())
|
|
|
|
|
|
|
|
|
|
|
2026-02-20 03:15:53 -08:00
|
|
|
if __name__ == "__main__":
|
|
|
|
|
unittest.main()
|