mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-28 06:51:16 +08:00
feat(terminal): collapse subagent task_ids to shared container (#16177)
Before: delegate_task children each allocated their own terminal
sandbox keyed by child task_id. Starting extra containers (or Modal
sandboxes / Daytona workspaces) is expensive, and the subagent's work
is invisible to the parent — files written by the child in its
container don't exist in the parent's when the subagent returns.
After: a single `_resolve_container_task_id` helper maps any
tool-call task_id to "default" UNLESS an env override is registered
for it. The parent agent and all delegate_task children therefore
share one long-lived sandbox — installed packages, cwd, /workspace
files, and /tmp scratch carry over freely between them.
RL and benchmark environments (TerminalBench2, HermesSweEnv, ...)
opt in to isolation via `register_task_env_overrides(task_id, {...})`;
those task_ids survive the collapse and get their own sandbox,
preserving the per-task Docker image behavior these benchmarks rely on.
file_state / active-subagents registry / TUI events still key off the
original child task_id, so the 'subagent wrote a file the parent read'
warning and UI per-subagent panels keep working.
Tradeoff: parallel delegate_task children (tasks=[...]) now share one
bash/container. Concurrent cd, env-var mutations, and writes to the
same path will collide. If that bites a specific workflow, the
subagent can opt back into isolation via register_task_env_overrides.
Applied at four lookup sites:
- tools/terminal_tool.py terminal_tool() and get_active_env()
- tools/file_tools.py _get_file_ops() and _get_live_tracking_cwd()
- tools/code_execution_tool.py _get_or_create_environment()
Docs: website/docs/user-guide/configuration.md updated to reflect the
shared-container reality and document the RL/benchmark carve-out.
Tests: tests/tools/test_shared_container_task_id.py (9 cases).
This commit is contained in:
@@ -440,9 +440,10 @@ def _get_or_create_env(task_id: str):
|
||||
_active_environments, _env_lock, _create_environment,
|
||||
_get_env_config, _last_activity, _start_cleanup_thread,
|
||||
_creation_locks, _creation_locks_lock, _task_env_overrides,
|
||||
_resolve_container_task_id,
|
||||
)
|
||||
|
||||
effective_task_id = task_id or "default"
|
||||
effective_task_id = _resolve_container_task_id(task_id)
|
||||
|
||||
# Fast path: environment already exists
|
||||
with _env_lock:
|
||||
|
||||
@@ -88,8 +88,14 @@ def _resolve_path(filepath: str, task_id: str = "default") -> Path:
|
||||
|
||||
def _get_live_tracking_cwd(task_id: str = "default") -> str | None:
|
||||
"""Return the task's live terminal cwd for bookkeeping when available."""
|
||||
try:
|
||||
from tools.terminal_tool import _resolve_container_task_id
|
||||
container_key = _resolve_container_task_id(task_id)
|
||||
except Exception:
|
||||
container_key = task_id
|
||||
|
||||
with _file_ops_lock:
|
||||
cached = _file_ops_cache.get(task_id)
|
||||
cached = _file_ops_cache.get(container_key) or _file_ops_cache.get(task_id)
|
||||
if cached is not None:
|
||||
live_cwd = getattr(getattr(cached, "env", None), "cwd", None) or getattr(
|
||||
cached, "cwd", None
|
||||
@@ -101,7 +107,7 @@ def _get_live_tracking_cwd(task_id: str = "default") -> str | None:
|
||||
from tools.terminal_tool import _active_environments, _env_lock
|
||||
|
||||
with _env_lock:
|
||||
env = _active_environments.get(task_id)
|
||||
env = _active_environments.get(container_key) or _active_environments.get(task_id)
|
||||
live_cwd = getattr(env, "cwd", None) if env is not None else None
|
||||
if live_cwd:
|
||||
return live_cwd
|
||||
@@ -261,15 +267,23 @@ def _get_file_ops(task_id: str = "default") -> ShellFileOperations:
|
||||
|
||||
Thread-safe: uses the same per-task creation locks as terminal_tool to
|
||||
prevent duplicate sandbox creation from concurrent tool calls.
|
||||
|
||||
Note: subagent task_ids are collapsed to "default" via
|
||||
``_resolve_container_task_id`` so delegate_task children share the
|
||||
parent's container and its cached file_ops. RL/benchmark task_ids with
|
||||
a registered env override keep their isolation.
|
||||
"""
|
||||
from tools.terminal_tool import (
|
||||
_active_environments, _env_lock, _create_environment,
|
||||
_get_env_config, _last_activity, _start_cleanup_thread,
|
||||
_creation_locks,
|
||||
_creation_locks_lock,
|
||||
_resolve_container_task_id,
|
||||
)
|
||||
import time
|
||||
|
||||
task_id = _resolve_container_task_id(task_id)
|
||||
|
||||
# Fast path: check cache -- but also verify the underlying environment
|
||||
# is still alive (it may have been killed by the cleanup thread).
|
||||
with _file_ops_lock:
|
||||
|
||||
@@ -803,6 +803,31 @@ def clear_task_env_overrides(task_id: str):
|
||||
"""
|
||||
_task_env_overrides.pop(task_id, None)
|
||||
|
||||
|
||||
def _resolve_container_task_id(task_id: Optional[str]) -> str:
|
||||
"""
|
||||
Map a tool-call ``task_id`` to the container/sandbox key used by
|
||||
``_active_environments``.
|
||||
|
||||
The top-level agent passes ``task_id=None`` and lands on ``"default"``.
|
||||
``delegate_task`` children pass their own subagent ID so that
|
||||
file-state tracking, the active-subagents registry, and TUI events stay
|
||||
distinct per child -- but we deliberately collapse that ID back to
|
||||
``"default"`` here so subagents share the parent's long-lived container
|
||||
(one bash, one /workspace, one set of installed packages).
|
||||
|
||||
Exception: RL / benchmark environments (TerminalBench2, HermesSweEnv, ...)
|
||||
call ``register_task_env_overrides(task_id, {...})`` to request a
|
||||
per-task Docker/Modal image. When an override is registered for a
|
||||
task_id, we honour it by returning the task_id unchanged -- those
|
||||
rollouts need their own isolated sandbox, which is the whole point of
|
||||
the override.
|
||||
"""
|
||||
if task_id and task_id in _task_env_overrides:
|
||||
return task_id
|
||||
return "default"
|
||||
|
||||
|
||||
# Configuration from environment variables
|
||||
|
||||
def _parse_env_var(name: str, default: str, converter=int, type_label: str = "integer"):
|
||||
@@ -1139,8 +1164,9 @@ def _stop_cleanup_thread():
|
||||
|
||||
def get_active_env(task_id: str):
|
||||
"""Return the active BaseEnvironment for *task_id*, or None."""
|
||||
lookup = _resolve_container_task_id(task_id)
|
||||
with _env_lock:
|
||||
return _active_environments.get(task_id)
|
||||
return _active_environments.get(lookup) or _active_environments.get(task_id)
|
||||
|
||||
|
||||
def is_persistent_env(task_id: str) -> bool:
|
||||
@@ -1473,8 +1499,11 @@ def terminal_tool(
|
||||
config = _get_env_config()
|
||||
env_type = config["env_type"]
|
||||
|
||||
# Use task_id for environment isolation
|
||||
effective_task_id = task_id or "default"
|
||||
# Use task_id for environment isolation. By default all subagent
|
||||
# task_ids collapse back to "default" so the top-level agent and
|
||||
# every delegate_task child share one container; only task_ids with
|
||||
# a registered env override (RL benchmarks) get isolated sandboxes.
|
||||
effective_task_id = _resolve_container_task_id(task_id)
|
||||
|
||||
# Check per-task overrides (set by environments like TerminalBench2Env)
|
||||
# before falling back to global env var config
|
||||
|
||||
Reference in New Issue
Block a user