tools/environments/modal_utils.py

"""Shared Hermes-side execution flow for Modal transports.

This module deliberately stops at the Hermes boundary:
- command preparation
- cwd/timeout normalization
- stdin/sudo shell wrapping
- common result shape
- interrupt/cancel polling

Direct Modal and managed Modal keep separate transport logic, persistence, and
trust-boundary decisions in their own modules.
"""

from __future__ import annotations

import shlex
import time
import uuid
from abc import abstractmethod
from dataclasses import dataclass
from typing import Any

from tools.environments.base import BaseEnvironment
from tools.interrupt import is_interrupted


@dataclass(frozen=True)
class PreparedModalExec:
    """Normalized command data passed to a transport-specific exec runner."""

    command: str
    cwd: str
    timeout: int
    stdin_data: str | None = None


@dataclass(frozen=True)
class ModalExecStart:
    """Transport response after starting an exec."""

    handle: Any | None = None
    immediate_result: dict | None = None


def wrap_modal_stdin_heredoc(command: str, stdin_data: str) -> str:
    """Append stdin as a shell heredoc for transports without stdin piping."""
    marker = f"HERMES_EOF_{uuid.uuid4().hex[:8]}"
    while marker in stdin_data:
        marker = f"HERMES_EOF_{uuid.uuid4().hex[:8]}"
    return f"{command} << '{marker}'\n{stdin_data}\n{marker}"


def wrap_modal_sudo_pipe(command: str, sudo_stdin: str) -> str:
    """Feed sudo via a shell pipe for transports without direct stdin piping."""
    return f"printf '%s\\n' {shlex.quote(sudo_stdin.rstrip())} | {command}"


class BaseModalExecutionEnvironment(BaseEnvironment):
    """Execution flow for the *managed* Modal transport (gateway-owned sandbox).

    This deliberately overrides :meth:`BaseEnvironment.execute` because the
    tool-gateway handles command preparation, CWD tracking, and env-snapshot
    management on the server side.  The base class's ``_wrap_command`` /
    ``_wait_for_process`` / snapshot machinery does not apply here — the
    gateway owns that responsibility.  See ``ManagedModalEnvironment`` for the
    concrete subclass.
    """

    _stdin_mode = "payload"
    _poll_interval_seconds = 0.25
    _client_timeout_grace_seconds: float | None = None
    _interrupt_output = "[Command interrupted]"
    _unexpected_error_prefix = "Modal execution error"

    def execute(
        self,
        command: str,
        cwd: str = "",
        *,
        timeout: int | None = None,
        stdin_data: str | None = None,
    ) -> dict:
        self._before_execute()
        prepared = self._prepare_modal_exec(
            command,
            cwd=cwd,
            timeout=timeout,
            stdin_data=stdin_data,
        )

        try:
            start = self._start_modal_exec(prepared)
        except Exception as exc:
            return self._error_result(f"{self._unexpected_error_prefix}: {exc}")

        if start.immediate_result is not None:
            return start.immediate_result

        if start.handle is None:
            return self._error_result(
                f"{self._unexpected_error_prefix}: transport did not return an exec handle"
            )

        deadline = None
        if self._client_timeout_grace_seconds is not None:
            deadline = time.monotonic() + prepared.timeout + self._client_timeout_grace_seconds

        _last_activity_touch = time.monotonic()
        _modal_exec_start = time.monotonic()
        _ACTIVITY_INTERVAL = 10.0  # match _wait_for_process cadence

        while True:
            if is_interrupted():
                try:
                    self._cancel_modal_exec(start.handle)
                except Exception:
                    pass
                return self._result(self._interrupt_output, 130)

            try:
                result = self._poll_modal_exec(start.handle)
            except Exception as exc:
                return self._error_result(f"{self._unexpected_error_prefix}: {exc}")

            if result is not None:
                return result

            if deadline is not None and time.monotonic() >= deadline:
                try:
                    self._cancel_modal_exec(start.handle)
                except Exception:
                    pass
                return self._timeout_result_for_modal(prepared.timeout)

            # Periodic activity touch so the gateway knows we're alive
            _now = time.monotonic()
            if _now - _last_activity_touch >= _ACTIVITY_INTERVAL:
                _last_activity_touch = _now
                try:
                    from tools.environments.base import _get_activity_callback
                    _cb = _get_activity_callback()
                except Exception:
                    _cb = None
                if _cb:
                    try:
                        _elapsed = int(_now - _modal_exec_start)
                        _cb(f"modal command running ({_elapsed}s elapsed)")
                    except Exception:
                        pass

            time.sleep(self._poll_interval_seconds)

    def _before_execute(self) -> None:
        """Hook for backends that need pre-exec sync or validation."""
        pass

    def _prepare_modal_exec(
        self,
        command: str,
        *,
        cwd: str = "",
        timeout: int | None = None,
        stdin_data: str | None = None,
    ) -> PreparedModalExec:
        effective_cwd = cwd or self.cwd
        effective_timeout = timeout or self.timeout

        exec_command = command
        exec_stdin = stdin_data if self._stdin_mode == "payload" else None
        if stdin_data is not None and self._stdin_mode == "heredoc":
            exec_command = wrap_modal_stdin_heredoc(exec_command, stdin_data)

        exec_command, sudo_stdin = self._prepare_command(exec_command)
        if sudo_stdin is not None:
            exec_command = wrap_modal_sudo_pipe(exec_command, sudo_stdin)

        return PreparedModalExec(
            command=exec_command,
            cwd=effective_cwd,
            timeout=effective_timeout,
            stdin_data=exec_stdin,
        )

    def _result(self, output: str, returncode: int) -> dict:
        return {
            "output": output,
            "returncode": returncode,
        }

    def _error_result(self, output: str) -> dict:
        return self._result(output, 1)

    def _timeout_result_for_modal(self, timeout: int) -> dict:
        return self._result(f"Command timed out after {timeout}s", 124)

    @abstractmethod
    def _start_modal_exec(self, prepared: PreparedModalExec) -> ModalExecStart:
        """Begin a transport-specific exec."""

    @abstractmethod
    def _poll_modal_exec(self, handle: Any) -> dict | None:
        """Return a final result dict when complete, else ``None``."""

    @abstractmethod
    def _cancel_modal_exec(self, handle: Any) -> None:
        """Cancel or terminate the active transport exec."""
Fixes and refactors enabled by recent updates to main. 2026-03-31 09:29:59 +09:00			`"""Shared Hermes-side execution flow for Modal transports.`

			`This module deliberately stops at the Hermes boundary:`
			`- command preparation`
			`- cwd/timeout normalization`
			`- stdin/sudo shell wrapping`
			`- common result shape`
			`- interrupt/cancel polling`

			`Direct Modal and managed Modal keep separate transport logic, persistence, and`
			`trust-boundary decisions in their own modules.`
			`"""`

			`from __future__ import annotations`

			`import shlex`
			`import time`
			`import uuid`
			`from abc import abstractmethod`
			`from dataclasses import dataclass`
			`from typing import Any`

			`from tools.environments.base import BaseEnvironment`
			`from tools.interrupt import is_interrupted`


			`@dataclass(frozen=True)`
			`class PreparedModalExec:`
			`"""Normalized command data passed to a transport-specific exec runner."""`

			`command: str`
			`cwd: str`
			`timeout: int`
			`stdin_data: str \| None = None`


			`@dataclass(frozen=True)`
			`class ModalExecStart:`
			`"""Transport response after starting an exec."""`

			`handle: Any \| None = None`
			`immediate_result: dict \| None = None`


			`def wrap_modal_stdin_heredoc(command: str, stdin_data: str) -> str:`
			`"""Append stdin as a shell heredoc for transports without stdin piping."""`
			`marker = f"HERMES_EOF_{uuid.uuid4().hex[:8]}"`
			`while marker in stdin_data:`
			`marker = f"HERMES_EOF_{uuid.uuid4().hex[:8]}"`
			`return f"{command} << '{marker}'\n{stdin_data}\n{marker}"`


			`def wrap_modal_sudo_pipe(command: str, sudo_stdin: str) -> str:`
			`"""Feed sudo via a shell pipe for transports without direct stdin piping."""`
			`return f"printf '%s\\n' {shlex.quote(sudo_stdin.rstrip())} \| {command}"`


			`class BaseModalExecutionEnvironment(BaseEnvironment):`
feat(environments): unified spawn-per-call execution layer Replace dual execution model (PersistentShellMixin + per-backend oneshot) with spawn-per-call + session snapshot for all backends except ManagedModal. Core changes: - Every command spawns a fresh bash process; session snapshot (env vars, functions, aliases) captured at init and re-sourced before each command - CWD persists via file-based read (local) or in-band stdout markers (remote) - ProcessHandle protocol + _ThreadedProcessHandle adapter for SDK backends - cancel_fn wired for Modal (sandbox.terminate) and Daytona (sandbox.stop) - Shared utilities extracted: _pipe_stdin, _popen_bash, _load_json_store, _save_json_store, _file_mtime_key, _SYNC_INTERVAL_SECONDS - Rate-limited file sync unified in base _before_execute() with _sync_files() hook - execute_oneshot() removed; all 11 call sites in code_execution_tool.py migrated to execute() - Daytona timeout wrapper replaced with SDK-native timeout parameter - persistent_shell.py deleted (291 lines) Backend-specific: - Local: process-group kill via os.killpg, file-based CWD read - Docker: -e env flags only on init_session, not per-command - SSH: shlex.quote transport, ControlMaster connection reuse - Singularity: apptainer exec with instance://, no forced --pwd - Modal: _AsyncWorker + _ThreadedProcessHandle, cancel_fn -> sandbox.terminate - Daytona: SDK-level timeout (not shell wrapper), cancel_fn -> sandbox.stop - ManagedModal: unchanged (gateway owns execution); docstring added explaining why 2026-04-08 13:38:04 -07:00			`"""Execution flow for the managed Modal transport (gateway-owned sandbox).`

			This deliberately overrides :meth:`BaseEnvironment.execute` because the
			`tool-gateway handles command preparation, CWD tracking, and env-snapshot`
			management on the server side. The base class's ``_wrap_command`` /
			``_wait_for_process`` / snapshot machinery does not apply here — the
			gateway owns that responsibility. See ``ManagedModalEnvironment`` for the
			`concrete subclass.`
			`"""`
Fixes and refactors enabled by recent updates to main. 2026-03-31 09:29:59 +09:00
			`_stdin_mode = "payload"`
			`_poll_interval_seconds = 0.25`
			`_client_timeout_grace_seconds: float \| None = None`
			`_interrupt_output = "[Command interrupted]"`
			`_unexpected_error_prefix = "Modal execution error"`

			`def execute(`
			`self,`
			`command: str,`
			`cwd: str = "",`
			`*,`
			`timeout: int \| None = None,`
			`stdin_data: str \| None = None,`
			`) -> dict:`
			`self._before_execute()`
			`prepared = self._prepare_modal_exec(`
			`command,`
			`cwd=cwd,`
			`timeout=timeout,`
			`stdin_data=stdin_data,`
			`)`

			`try:`
			`start = self._start_modal_exec(prepared)`
			`except Exception as exc:`
			`return self._error_result(f"{self._unexpected_error_prefix}: {exc}")`

			`if start.immediate_result is not None:`
			`return start.immediate_result`

			`if start.handle is None:`
			`return self._error_result(`
			`f"{self._unexpected_error_prefix}: transport did not return an exec handle"`
			`)`

			`deadline = None`
			`if self._client_timeout_grace_seconds is not None:`
			`deadline = time.monotonic() + prepared.timeout + self._client_timeout_grace_seconds`

fix: add activity heartbeats to prevent false gateway inactivity timeouts (#10501) Multiple gaps in activity tracking could cause the gateway's inactivity timeout to fire while the agent is actively working: 1. Streaming wait loop had no periodic heartbeat — the outer thread only touched activity when the stale-stream detector fired (180-300s), and for local providers (Ollama) the stale timeout was infinity, meaning zero heartbeats. Now touches activity every 30s. 2. Concurrent tool execution never set the activity callback on worker threads (threading.local invisible across threads) and never set _current_tool. Workers now set the callback, and the concurrent wait uses a polling loop with 30s heartbeats. 3. Modal backend's execute() override had its own polling loop without any activity callback. Now matches _wait_for_process cadence (10s). 2026-04-15 13:29:05 -07:00			`_last_activity_touch = time.monotonic()`
			`_modal_exec_start = time.monotonic()`
			`_ACTIVITY_INTERVAL = 10.0 # match _wait_for_process cadence`

Fixes and refactors enabled by recent updates to main. 2026-03-31 09:29:59 +09:00			`while True:`
			`if is_interrupted():`
			`try:`
			`self._cancel_modal_exec(start.handle)`
			`except Exception:`
			`pass`
			`return self._result(self._interrupt_output, 130)`

			`try:`
			`result = self._poll_modal_exec(start.handle)`
			`except Exception as exc:`
			`return self._error_result(f"{self._unexpected_error_prefix}: {exc}")`

			`if result is not None:`
			`return result`

			`if deadline is not None and time.monotonic() >= deadline:`
			`try:`
			`self._cancel_modal_exec(start.handle)`
			`except Exception:`
			`pass`
			`return self._timeout_result_for_modal(prepared.timeout)`

fix: add activity heartbeats to prevent false gateway inactivity timeouts (#10501) Multiple gaps in activity tracking could cause the gateway's inactivity timeout to fire while the agent is actively working: 1. Streaming wait loop had no periodic heartbeat — the outer thread only touched activity when the stale-stream detector fired (180-300s), and for local providers (Ollama) the stale timeout was infinity, meaning zero heartbeats. Now touches activity every 30s. 2. Concurrent tool execution never set the activity callback on worker threads (threading.local invisible across threads) and never set _current_tool. Workers now set the callback, and the concurrent wait uses a polling loop with 30s heartbeats. 3. Modal backend's execute() override had its own polling loop without any activity callback. Now matches _wait_for_process cadence (10s). 2026-04-15 13:29:05 -07:00			`# Periodic activity touch so the gateway knows we're alive`
			`_now = time.monotonic()`
			`if _now - _last_activity_touch >= _ACTIVITY_INTERVAL:`
			`_last_activity_touch = _now`
			`try:`
			`from tools.environments.base import _get_activity_callback`
			`_cb = _get_activity_callback()`
			`except Exception:`
			`_cb = None`
			`if _cb:`
			`try:`
			`_elapsed = int(_now - _modal_exec_start)`
			`_cb(f"modal command running ({_elapsed}s elapsed)")`
			`except Exception:`
			`pass`

Fixes and refactors enabled by recent updates to main. 2026-03-31 09:29:59 +09:00			`time.sleep(self._poll_interval_seconds)`

			`def _before_execute(self) -> None:`
			`"""Hook for backends that need pre-exec sync or validation."""`
feat(environments): unified spawn-per-call execution layer Replace dual execution model (PersistentShellMixin + per-backend oneshot) with spawn-per-call + session snapshot for all backends except ManagedModal. Core changes: - Every command spawns a fresh bash process; session snapshot (env vars, functions, aliases) captured at init and re-sourced before each command - CWD persists via file-based read (local) or in-band stdout markers (remote) - ProcessHandle protocol + _ThreadedProcessHandle adapter for SDK backends - cancel_fn wired for Modal (sandbox.terminate) and Daytona (sandbox.stop) - Shared utilities extracted: _pipe_stdin, _popen_bash, _load_json_store, _save_json_store, _file_mtime_key, _SYNC_INTERVAL_SECONDS - Rate-limited file sync unified in base _before_execute() with _sync_files() hook - execute_oneshot() removed; all 11 call sites in code_execution_tool.py migrated to execute() - Daytona timeout wrapper replaced with SDK-native timeout parameter - persistent_shell.py deleted (291 lines) Backend-specific: - Local: process-group kill via os.killpg, file-based CWD read - Docker: -e env flags only on init_session, not per-command - SSH: shlex.quote transport, ControlMaster connection reuse - Singularity: apptainer exec with instance://, no forced --pwd - Modal: _AsyncWorker + _ThreadedProcessHandle, cancel_fn -> sandbox.terminate - Daytona: SDK-level timeout (not shell wrapper), cancel_fn -> sandbox.stop - ManagedModal: unchanged (gateway owns execution); docstring added explaining why 2026-04-08 13:38:04 -07:00			`pass`
Fixes and refactors enabled by recent updates to main. 2026-03-31 09:29:59 +09:00
			`def _prepare_modal_exec(`
			`self,`
			`command: str,`
			`*,`
			`cwd: str = "",`
			`timeout: int \| None = None,`
			`stdin_data: str \| None = None,`
			`) -> PreparedModalExec:`
			`effective_cwd = cwd or self.cwd`
			`effective_timeout = timeout or self.timeout`

			`exec_command = command`
			`exec_stdin = stdin_data if self._stdin_mode == "payload" else None`
			`if stdin_data is not None and self._stdin_mode == "heredoc":`
			`exec_command = wrap_modal_stdin_heredoc(exec_command, stdin_data)`

			`exec_command, sudo_stdin = self._prepare_command(exec_command)`
			`if sudo_stdin is not None:`
			`exec_command = wrap_modal_sudo_pipe(exec_command, sudo_stdin)`

			`return PreparedModalExec(`
			`command=exec_command,`
			`cwd=effective_cwd,`
			`timeout=effective_timeout,`
			`stdin_data=exec_stdin,`
			`)`

			`def _result(self, output: str, returncode: int) -> dict:`
			`return {`
			`"output": output,`
			`"returncode": returncode,`
			`}`

			`def _error_result(self, output: str) -> dict:`
			`return self._result(output, 1)`

			`def _timeout_result_for_modal(self, timeout: int) -> dict:`
			`return self._result(f"Command timed out after {timeout}s", 124)`

			`@abstractmethod`
			`def _start_modal_exec(self, prepared: PreparedModalExec) -> ModalExecStart:`
			`"""Begin a transport-specific exec."""`

			`@abstractmethod`
			`def _poll_modal_exec(self, handle: Any) -> dict \| None:`
			"""Return a final result dict when complete, else ``None``."""

			`@abstractmethod`
			`def _cancel_modal_exec(self, handle: Any) -> None:`
			`"""Cancel or terminate the active transport exec."""`