""" Regression tests for the shared-container task_id mapping. The top-level agent and all delegate_task subagents share a single terminal sandbox keyed by ``"default"``. ``_resolve_container_task_id`` is the sole gatekeeper for which tool-call task_ids go to the shared container vs. get their own isolated sandbox. RL / benchmark environments opt in to isolation by calling ``register_task_env_overrides(task_id, {...})`` before the agent loop; every other task_id collapses back to ``"default"``. If you change the collapse logic, update both the helper and these tests -- see `hermes-agent-dev` skill, "Why do subagents get their own containers?" section, and the Container lifecycle paragraph under Docker Backend in ``website/docs/user-guide/configuration.md``. """ import pytest from tools import terminal_tool @pytest.fixture(autouse=True) def _clean_overrides(): """Ensure no stray overrides from other tests leak in.""" before = dict(terminal_tool._task_env_overrides) terminal_tool._task_env_overrides.clear() yield terminal_tool._task_env_overrides.clear() terminal_tool._task_env_overrides.update(before) def test_none_task_id_maps_to_default(): assert terminal_tool._resolve_container_task_id(None) == "default" def test_empty_task_id_maps_to_default(): assert terminal_tool._resolve_container_task_id("") == "default" def test_literal_default_stays_default(): assert terminal_tool._resolve_container_task_id("default") == "default" def test_subagent_task_id_collapses_to_default(): # delegate_task constructs IDs like "subagent--"; these # should share the parent's container, not spin up their own. assert terminal_tool._resolve_container_task_id("subagent-0-deadbeef") == "default" assert terminal_tool._resolve_container_task_id("subagent-42-cafef00d") == "default" def test_arbitrary_session_id_collapses_to_default(): # Session UUIDs or anything else without an override still collapse. assert terminal_tool._resolve_container_task_id("sess-123e4567-e89b-12d3") == "default" def test_rl_task_with_override_keeps_its_own_id(): # RL / benchmark pattern: register a per-task image, then the task_id # must survive ``_resolve_container_task_id`` so the rollout lands in # its own sandbox. terminal_tool.register_task_env_overrides( "tb2-task-fix-git", {"docker_image": "tb2:fix-git", "cwd": "/app"} ) try: assert ( terminal_tool._resolve_container_task_id("tb2-task-fix-git") == "tb2-task-fix-git" ) finally: terminal_tool.clear_task_env_overrides("tb2-task-fix-git") def test_cleared_override_collapses_again(): terminal_tool.register_task_env_overrides("tb2-x", {"docker_image": "x:y"}) assert terminal_tool._resolve_container_task_id("tb2-x") == "tb2-x" terminal_tool.clear_task_env_overrides("tb2-x") assert terminal_tool._resolve_container_task_id("tb2-x") == "default" def test_get_active_env_reads_shared_container_from_subagent_id(): """``get_active_env`` must see the shared ``"default"`` sandbox when called with a subagent's task_id, so the agent loop's turn-budget enforcement reads the real env (not None) during delegation.""" sentinel = object() terminal_tool._active_environments["default"] = sentinel try: assert terminal_tool.get_active_env("subagent-7-cafe") is sentinel assert terminal_tool.get_active_env(None) is sentinel assert terminal_tool.get_active_env("default") is sentinel finally: terminal_tool._active_environments.pop("default", None) def test_get_active_env_honours_rl_override(): rl_env = object() default_env = object() terminal_tool._active_environments["default"] = default_env terminal_tool._active_environments["rl-42"] = rl_env terminal_tool.register_task_env_overrides("rl-42", {"docker_image": "x"}) try: # With an override registered, lookup returns the task's own env, # not the shared "default" one. assert terminal_tool.get_active_env("rl-42") is rl_env finally: terminal_tool.clear_task_env_overrides("rl-42") terminal_tool._active_environments.pop("default", None) terminal_tool._active_environments.pop("rl-42", None)