pytest don't load plugins

dedupe work, faster docker tests
wip comments
2026-06-26 20:03:22 +08:00 · 2026-06-23 12:25:30 -04:00 · 2026-06-23 12:25:30 -04:00 · 2026-06-23 12:25:30 -04:00 · 2026-06-23 12:25:30 -04:00 · 2026-06-22 16:25:44 -04:00
35 changed files with 1230 additions and 1113 deletions
--- a/.github/workflows/docker-publish.yml
+++ b/.github/workflows/docker-publish.yml
@@ -109,13 +109,11 @@ jobs:

      - name: Install Python dependencies (for docker tests)
        run: |
-          uv venv .venv --python 3.11
-          source .venv/bin/activate
          # ``dev`` extra pulls in pytest, pytest-asyncio —
          # everything tests/docker/ needs.  We deliberately avoid ``all``
          # here because the docker tests only drive the container via
          # subprocess and don't import hermes_agent's optional deps.
-          uv pip install -e ".[dev]"
+          uv sync --locked --python 3.11 --extra dev

      - name: Run docker integration tests
        env:
@@ -128,7 +126,7 @@ jobs:
          NOUS_API_KEY: ""
        run: |
          source .venv/bin/activate
-          python -m pytest tests/docker/ -v --tb=short
+          python scripts/run_tests_parallel.py tests/docker/ --file-timeout 300 -- -v --tb=short

      - name: Log in to Docker Hub
        if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
--- a/26
+++ b/26
@@ -189,7 +189,13 @@ RUN cd web && npm run build && \

 # ---------- Source code ----------
 # .dockerignore excludes node_modules, so the installs above survive.
-COPY . .
+# --link decouples this layer from parents for cache purposes; --chmod bakes
+# the final read-only permissions at copy time so we skip the separate
+# `chmod -R` pass that previously walked ~30k files across the venv +
+# node_modules + source (21s amd64 / 222s arm64 — #49113).  `a+rX,go-w`
+# gives the non-root hermes user read + traverse but no write; root retains
+# write so the build steps below don't need chmod u+w dances.
+COPY --link --chmod=a+rX,go-w . .

 # ---------- Permissions ----------
 # Link hermes-agent itself (editable). Deps are already installed in the
@@ -197,19 +203,15 @@ COPY . .
 # resolution or downloads.
 RUN uv pip install --no-cache-dir --no-deps -e "."

-# Keep /opt/hermes immutable for the runtime hermes user. Hosted/container
-# instances must not be able to self-edit the installed source or venv; user
-# data, skills, plugins, config, logs, and dashboard uploads live under
-# /opt/data instead. Root can still repair the image during build/boot, but
-# supervised Hermes processes drop to the non-root hermes user.
+# Wire the exec shim and install-method stamp.  Files under /opt/hermes are
+# already root-owned (COPY, uv sync, npm install all run as root) and
+# read-only for the hermes user (go-w from the --chmod above).
+
 USER root
 RUN mkdir -p /opt/hermes/bin && \
    cp /opt/hermes/docker/hermes-exec-shim.sh /opt/hermes/bin/hermes && \
    chmod 0755 /opt/hermes/bin/hermes && \
-    printf 'docker\n' > /opt/hermes/.install_method && \
-    chown -R root:root /opt/hermes && \
-    chmod -R a+rX /opt/hermes && \
-    chmod -R a-w /opt/hermes
+    printf 'docker\n' > /opt/hermes/.install_method
 # The ``.install_method`` stamp is baked next to the running code (the install
 # tree), NOT into $HERMES_HOME. $HERMES_HOME (/opt/data) is a shared data
 # volume that is commonly bind-mounted from the host and even shared with a
@@ -240,9 +242,7 @@ RUN mkdir -p /opt/hermes/bin && \
 # every published image has it.
 ARG HERMES_GIT_SHA=
 RUN if [ -n "${HERMES_GIT_SHA}" ]; then \
-        chmod u+w /opt/hermes && \
-        printf '%s\n' "${HERMES_GIT_SHA}" > /opt/hermes/.hermes_build_sha && \
-        chmod a-w /opt/hermes /opt/hermes/.hermes_build_sha; \
+        printf '%s\n' "${HERMES_GIT_SHA}" > /opt/hermes/.hermes_build_sha; \
    fi

 # ---------- s6-overlay service wiring ----------
--- a/scripts/run_tests.sh
+++ b/scripts/run_tests.sh
@@ -74,6 +74,7 @@ exec env -i \
  LC_ALL=C.UTF-8 \
  PYTHONHASHSEED=0 \
  PYTHONDONTWRITEBYTECODE=1 \
+  PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 \
  ${EXTRA_PYTHONPATH:+PYTHONPATH="$EXTRA_PYTHONPATH"} \
  ${EXTRA_PYTEST_PLUGINS:+PYTEST_PLUGINS="$EXTRA_PYTEST_PLUGINS"} \
  "$PYTHON" "$SCRIPT_DIR/run_tests_parallel.py" "$@"
--- a/tests/docker/conftest.py
+++ b/tests/docker/conftest.py
@@ -8,15 +8,13 @@ Override the image with ``HERMES_TEST_IMAGE`` env var to point at a pre-built
 image (faster local iteration); otherwise the ``built_image`` fixture builds
 the repo's Dockerfile once per session.

-Docker tests need longer timeouts than the suite default (30s), so every
-test under this directory is granted a 180s default via
-``pytest.mark.timeout`` applied at collection time.
 """
 from __future__ import annotations

 import os
 import shutil
 import subprocess
+import time
 from collections.abc import Iterator

 import pytest
@@ -43,11 +41,9 @@ def pytest_collection_modifyitems(config, items):  # noqa: D401 - pytest hook
    skip_docker = pytest.mark.skip(
        reason="Docker not available or daemon not running",
    )
-    extend_timeout = pytest.mark.timeout(180)
    for item in items:
        if "tests/docker/" not in str(item.fspath).replace(os.sep, "/"):
            continue
-        item.add_marker(extend_timeout)
        if not docker_ok:
            item.add_marker(skip_docker)

@@ -137,3 +133,151 @@ def docker_exec_sh(
    return docker_exec(
        container, "sh", "-c", command, user=user, timeout=timeout,
    )
+
+
+def wait_for_container_ready(
+    container: str,
+    *,
+    deadline_s: float = 30.0,
+    interval_s: float = 0.25,
+) -> None:
+    """Poll until the container has finished s6 cont-init (stage2 + reconcile).
+
+    The readiness signal is ``profile=default`` appearing in
+    ``/opt/data/logs/container-boot.log``, which the 02-reconcile-profiles
+    cont-init script writes on every boot. That log entry fires AFTER
+    stage2-hook.sh completes, so by the time it appears the full
+    cont-init chain (UID remap, chown, config seeding, skills sync,
+    browser discovery, config migration) has run.
+
+    Raises ``TimeoutError`` if the container never becomes ready — much
+    better than a fixed ``time.sleep()`` that either wastes time on fast
+    machines or flakes on slow ones.
+    """
+    end = time.monotonic() + deadline_s
+    while time.monotonic() < end:
+        r = docker_exec(
+            container,
+            "sh", "-c",
+            "cat /opt/data/logs/container-boot.log 2>/dev/null",
+            timeout=5,
+        )
+        if r.returncode == 0 and "profile=default" in r.stdout:
+            return
+        time.sleep(interval_s)
+    raise TimeoutError(
+        f"container {container} did not finish cont-init within {deadline_s}s"
+    )
+
+
+def start_container(
+    image: str,
+    name: str,
+    *env: str,
+    cmd: str = "sleep infinity",
+    timeout: int = 60,
+) -> str:
+    """Start a detached container and wait for cont-init to finish.
+
+    Args:
+        image: Docker image to run.
+        name: Container name (cleanup is the caller's responsibility —
+            typically handled by the ``container_name`` fixture).
+        env: Env vars as ``KEY=VALUE`` strings, each passed via ``-e``.
+        cmd: Container CMD (default ``sleep infinity``).
+        timeout: ``docker run`` subprocess timeout.
+
+    Returns the container name. Raises on ``docker run`` failure or if
+    the container never finishes cont-init within 30s.
+    """
+    args = ["docker", "run", "-d", "--name", name]
+    for e in env:
+        args.extend(["-e", e])
+    args.extend([image, *cmd.split()])
+    subprocess.run(args, check=True, capture_output=True, timeout=timeout)
+    wait_for_container_ready(name)
+    return name
+
+
+def restart_container(container: str, timeout: int = 60) -> None:
+    """Restart a container and wait for cont-init to finish.
+
+    Equivalent to ``docker restart <container>`` followed by
+    :func:`wait_for_container_ready`.
+    """
+    subprocess.run(
+        ["docker", "restart", container],
+        check=True, capture_output=True, timeout=timeout,
+    )
+    wait_for_container_ready(container)
+
+
+def poll_container(
+    container: str,
+    probe: str,
+    *,
+    deadline_s: float = 30.0,
+    interval_s: float = 0.5,
+    user: str = "hermes",
+) -> tuple[bool, str]:
+    """Repeatedly run ``probe`` inside the container until it exits 0 or
+    ``deadline_s`` elapses.
+
+    Returns ``(success, last_stdout)``. Useful for waiting on a process
+    to appear, a port to open, a file to contain a string, etc.
+    """
+    end = time.monotonic() + deadline_s
+    last = ""
+    while time.monotonic() < end:
+        r = docker_exec_sh(container, probe, user=user, timeout=10)
+        last = r.stdout
+        if r.returncode == 0:
+            return True, last
+        time.sleep(interval_s)
+    return False, last
+
+
+def wait_for_path(
+    container: str,
+    path: str,
+    *,
+    kind: str = "f",
+    deadline_s: float = 30.0,
+    interval_s: float = 0.25,
+) -> bool:
+    """Poll ``test -<kind> <path>`` inside the container until success or timeout.
+
+    ``kind`` is the ``test`` flag: ``'f'`` for file, ``'d'`` for directory,
+    ``'e'`` for existence. Returns ``True`` on success, ``False`` on timeout.
+    """
+    return poll_container(
+        container, f"test -{kind} {path}",
+        deadline_s=deadline_s, interval_s=interval_s,
+    )[0]
+
+
+def wait_for_log(
+    container: str,
+    log_path: str,
+    needle: str,
+    *,
+    deadline_s: float = 30.0,
+    interval_s: float = 0.25,
+) -> str:
+    """Poll until a log file inside the container contains ``needle``.
+
+    Returns the matching log content on success, or the last observed
+    contents on timeout (so the caller can render a meaningful diagnostic).
+    """
+    end = time.monotonic() + deadline_s
+    last = ""
+    while time.monotonic() < end:
+        r = docker_exec_sh(
+            container, f"cat {log_path} 2>/dev/null", timeout=5,
+        )
+        if r.returncode == 0:
+            last = r.stdout
+            if needle in last:
+                return last
+        time.sleep(interval_s)
+    return last
--- a/tests/docker/test_config_migration.py
+++ b/tests/docker/test_config_migration.py
@@ -0,0 +1,69 @@
+"""Runtime smoke test for Docker config-schema migration on boot.
+
+Build the real image and verify: a config.yaml present in $HERMES_HOME
+is migrated by docker_config_migrate.py on boot, running as the hermes
+user.
+"""
+from __future__ import annotations
+
+from tests.docker.conftest import docker_exec, docker_exec_sh, start_container
+
+
+def test_config_migration_runs_on_boot(
+    built_image: str, container_name: str,
+) -> None:
+    """A config.yaml in $HERMES_HOME must be migrated on boot by
+    docker_config_migrate.py, running as the hermes user."""
+    # Start container
+    start_container(built_image, container_name)
+
+    # Verify config.yaml exists (should be seeded by stage2 if not present)
+    r = docker_exec_sh(
+        container_name,
+        "test -f /opt/data/config.yaml && echo EXISTS || echo MISSING",
+        timeout=10,
+    )
+    assert "EXISTS" in r.stdout, (
+        f"config.yaml not found in $HERMES_HOME: {r.stdout}"
+    )
+
+    # Verify the migration script exists in the image
+    r = docker_exec_sh(
+        container_name,
+        "test -f /opt/hermes/scripts/docker_config_migrate.py && "
+        "echo SCRIPT_EXISTS || echo SCRIPT_MISSING",
+        timeout=10,
+    )
+    assert "SCRIPT_EXISTS" in r.stdout, (
+        f"docker_config_migrate.py not found in image: {r.stdout}"
+    )
+
+    # Verify config.yaml is owned by hermes (migration ran as hermes)
+    r = docker_exec_sh(
+        container_name,
+        'stat -c "%U" /opt/data/config.yaml',
+        timeout=10,
+    )
+    assert r.stdout.strip() == "hermes", (
+        f"config.yaml not owned by hermes (migration may have run as root): "
+        f"{r.stdout.strip()}"
+    )
+
+
+def test_config_migration_opt_out_env_var_respected(
+    built_image: str, container_name: str,
+) -> None:
+    """HERMES_SKIP_CONFIG_MIGRATION=1 must skip the migration."""
+    start_container(
+        built_image, container_name, "HERMES_SKIP_CONFIG_MIGRATION=1",
+    )
+
+    # config.yaml should still be seeded (seeding is separate from migration)
+    r = docker_exec_sh(
+        container_name,
+        "test -f /opt/data/config.yaml && echo EXISTS || echo MISSING",
+        timeout=10,
+    )
+    assert "EXISTS" in r.stdout, (
+        f"config.yaml should be seeded even with migration skipped: {r.stdout}"
+    )
--- a/tests/docker/test_container_restart.py
+++ b/tests/docker/test_container_restart.py
@@ -32,13 +32,6 @@ def _docker(*args: str, **kw) -> subprocess.CompletedProcess[str]:
    )


-def _exec(container: str, *args: str, timeout: int = 30) -> subprocess.CompletedProcess[str]:
-    return docker_exec(container, *args, timeout=timeout)
-
-
-def _sh(container: str, cmd: str, timeout: int = 30) -> subprocess.CompletedProcess[str]:
-    return docker_exec_sh(container, cmd, timeout=timeout)
-

 def _wait_for_path(
    container: str,
@@ -61,7 +54,7 @@ def _wait_for_path(
    """
    end = time.monotonic() + deadline_s
    while time.monotonic() < end:
-        r = _sh(container, f"test -{kind} {path}", timeout=5)
+        r = docker_exec_sh(container, f"test -{kind} {path}", timeout=5)
        if r.returncode == 0:
            return True
        time.sleep(interval_s)
@@ -86,7 +79,7 @@ def _wait_for_reconcile_log_mention(
    end = time.monotonic() + deadline_s
    last = ""
    while time.monotonic() < end:
-        r = _sh(container, "cat /opt/data/logs/container-boot.log", timeout=5)
+        r = docker_exec_sh(container, "cat /opt/data/logs/container-boot.log", timeout=5)
        if r.returncode == 0:
            last = r.stdout
            if f"profile={profile}" in last:
@@ -145,16 +138,16 @@ def test_running_gateway_survives_container_restart(restart_container: str) -> N
    # Create the profile + start its gateway. The Phase 4 hooks
    # register the s6 service slot during create and the dispatch
    # path brings it up via s6-svc -u.
-    r = _exec(container, "hermes", "profile", "create", "coder")
+    r = docker_exec(container, "hermes", "profile", "create", "coder")
    assert r.returncode == 0, f"profile create failed: {r.stderr}"

-    r = _exec(container, "hermes", "-p", "coder", "gateway", "start", timeout=60)
+    r = docker_exec(container, "hermes", "-p", "coder", "gateway", "start", timeout=60)
    assert r.returncode == 0, f"gateway start failed: {r.stderr}"

    # Give the service time to actually come up under supervision.
    deadline = time.monotonic() + 15.0
    while time.monotonic() < deadline:
-        r = _sh(container, "/command/s6-svstat /run/service/gateway-coder")
+        r = docker_exec_sh(container, "/command/s6-svstat /run/service/gateway-coder")
        if r.returncode == 0 and "up " in r.stdout:
            break
        time.sleep(0.5)
@@ -170,7 +163,7 @@ def test_running_gateway_survives_container_restart(restart_container: str) -> N
        "p = pathlib.Path('/opt/data/profiles/coder/gateway_state.json'); "
        "p.write_text(json.dumps({'gateway_state': 'running', 'timestamp': 1}))"
    )
-    _exec(container, "python3", "-c", write_state, timeout=10).check_returncode()
+    docker_exec(container, "python3", "-c", write_state, timeout=10).check_returncode()

    # Restart. After this, /run/service/ is empty until cont-init.d
    # runs the reconciler. We need to wait long enough for the
@@ -190,14 +183,14 @@ def test_running_gateway_survives_container_restart(restart_container: str) -> N
    ), "slot not recreated after restart"

    # No `down` marker — we asked for auto-start.
-    r = _sh(container, "test -f /run/service/gateway-coder/down")
+    r = docker_exec_sh(container, "test -f /run/service/gateway-coder/down")
    assert r.returncode != 0, "down marker present despite prior_state=running"


 def test_stopped_gateway_stays_stopped_after_restart(restart_container: str) -> None:
    container = restart_container

-    _exec(container, "hermes", "profile", "create", "writer").check_returncode()
+    docker_exec(container, "hermes", "profile", "create", "writer").check_returncode()

    # Write 'stopped' directly so we don't have to race against the
    # gateway's own state writes.
@@ -206,7 +199,7 @@ def test_stopped_gateway_stays_stopped_after_restart(restart_container: str) ->
        "p = pathlib.Path('/opt/data/profiles/writer/gateway_state.json'); "
        "p.write_text(json.dumps({'gateway_state': 'stopped', 'timestamp': 1}))"
    )
-    _exec(container, "python3", "-c", write_state, timeout=10).check_returncode()
+    docker_exec(container, "python3", "-c", write_state, timeout=10).check_returncode()

    _docker("restart", container, timeout=60).check_returncode()
    log = _wait_for_reconcile_log_mention(container, "writer", deadline_s=30.0)
@@ -218,7 +211,7 @@ def test_stopped_gateway_stays_stopped_after_restart(restart_container: str) ->
    )

    # Down marker present.
-    r = _sh(container, "test -f /run/service/gateway-writer/down")
+    r = docker_exec_sh(container, "test -f /run/service/gateway-writer/down")
    assert r.returncode == 0, "down marker missing despite prior_state=stopped"


@@ -229,7 +222,7 @@ def test_stale_gateway_pid_cleaned_up_on_restart(restart_container: str) -> None
    process-mismatch checks."""
    container = restart_container

-    _exec(container, "hermes", "profile", "create", "ghost").check_returncode()
+    docker_exec(container, "hermes", "profile", "create", "ghost").check_returncode()

    # Stamp stale runtime files alongside a 'running' state so the
    # reconciler walks this profile.
@@ -240,15 +233,15 @@ def test_stale_gateway_pid_cleaned_up_on_restart(restart_container: str) -> None
        "(p / 'gateway.pid').write_text(json.dumps({'pid': 99999, 'host': 'old'})); "
        "(p / 'processes.json').write_text('[]')"
    )
-    _exec(container, "python3", "-c", stamp, timeout=10).check_returncode()
+    docker_exec(container, "python3", "-c", stamp, timeout=10).check_returncode()

    _docker("restart", container, timeout=60).check_returncode()
    _wait_for_reconcile_log_mention(container, "ghost", deadline_s=30.0)

    # Stale runtime files swept.
-    r = _sh(container, "test -f /opt/data/profiles/ghost/gateway.pid")
+    r = docker_exec_sh(container, "test -f /opt/data/profiles/ghost/gateway.pid")
    assert r.returncode != 0, "stale gateway.pid survived restart"
-    r = _sh(container, "test -f /opt/data/profiles/ghost/processes.json")
+    r = docker_exec_sh(container, "test -f /opt/data/profiles/ghost/processes.json")
    assert r.returncode != 0, "stale processes.json survived restart"


@@ -271,15 +264,15 @@ def test_live_gateway_autostarts_after_real_restart_without_manual_state_stamp(
    """
    container = restart_container

-    _exec(container, "hermes", "profile", "create", "live").check_returncode()
-    r = _exec(container, "hermes", "-p", "live", "gateway", "start", timeout=60)
+    docker_exec(container, "hermes", "profile", "create", "live").check_returncode()
+    r = docker_exec(container, "hermes", "-p", "live", "gateway", "start", timeout=60)
    assert r.returncode == 0, f"gateway start failed: {r.stderr}"

    # Wait for the gateway to actually come up under supervision AND write
    # its own gateway_state=running (we do NOT stamp it ourselves).
    deadline = time.monotonic() + 20.0
    while time.monotonic() < deadline:
-        r = _sh(container, "/command/s6-svstat /run/service/gateway-live")
+        r = docker_exec_sh(container, "/command/s6-svstat /run/service/gateway-live")
        if r.returncode == 0 and "up " in r.stdout:
            break
        time.sleep(0.5)
@@ -290,7 +283,7 @@ def test_live_gateway_autostarts_after_real_restart_without_manual_state_stamp(
    deadline = time.monotonic() + 15.0
    state = ""
    while time.monotonic() < deadline:
-        r = _sh(
+        r = docker_exec_sh(
            container,
            "cat /opt/data/profiles/live/gateway_state.json 2>/dev/null",
        )
@@ -322,7 +315,7 @@ def test_live_gateway_autostarts_after_real_restart_without_manual_state_stamp(
    assert _wait_for_path(
        container, "/run/service/gateway-live", kind="d", deadline_s=10.0,
    ), "slot not recreated after restart"
-    r = _sh(container, "test -f /run/service/gateway-live/down")
+    r = docker_exec_sh(container, "test -f /run/service/gateway-live/down")
    assert r.returncode != 0, (
        "down marker present despite a live gateway being restarted — "
        "the signal-initiated shutdown wrongly persisted 'stopped' (#42675)"
--- a/tests/docker/test_dashboard.py
+++ b/tests/docker/test_dashboard.py
@@ -16,36 +16,14 @@ import json
 import subprocess
 import time

-from tests.docker.conftest import docker_exec, docker_exec_sh
-
-
-def _poll(container: str, probe: str, *, deadline_s: float = 30.0,
-          interval_s: float = 0.5) -> tuple[bool, str]:
-    """Repeatedly run ``probe`` inside the container until it exits 0 or
-    ``deadline_s`` elapses. Returns (success, last stdout)."""
-    end = time.monotonic() + deadline_s
-    last = ""
-    while time.monotonic() < end:
-        r = docker_exec_sh(container, probe, timeout=10)
-        last = r.stdout
-        if r.returncode == 0:
-            return True, last
-        time.sleep(interval_s)
-    return False, last
+from tests.docker.conftest import docker_exec, docker_exec_sh, start_container, poll_container


 def test_dashboard_not_running_by_default(
    built_image: str, container_name: str,
 ) -> None:
    """Without HERMES_DASHBOARD, no dashboard process should be running."""
-    subprocess.run(
-        ["docker", "run", "-d", "--name", container_name, built_image,
-         "sleep", "60"],
-        check=True, capture_output=True, timeout=30,
-    )
-    # Give the entrypoint enough time to finish bootstrap; if a dashboard
-    # were going to start it'd be visible by now.
-    time.sleep(5)
+    start_container(built_image, container_name, cmd="sleep 60")
    r = docker_exec(container_name, "pgrep", "-f", "hermes dashboard")
    # pgrep exits non-zero when no match found
    assert r.returncode != 0, (
@@ -64,12 +42,7 @@ def test_dashboard_slot_reports_down_when_disabled(
    writes a `down` marker file in the live service-dir when
    HERMES_DASHBOARD is unset, so the slot reflects reality.
    """
-    subprocess.run(
-        ["docker", "run", "-d", "--name", container_name, built_image,
-         "sleep", "60"],
-        check=True, capture_output=True, timeout=30,
-    )
-    time.sleep(5)
+    start_container(built_image, container_name, cmd="sleep 60")
    # /command/ isn't on PATH for docker-exec sessions, so call by
    # absolute path.
    r = docker_exec(
@@ -135,7 +108,7 @@ def test_dashboard_opt_in_starts(
    # Poll for the dashboard subprocess to appear — the entrypoint
    # backgrounds it and bootstrap (skills sync etc.) can take a few
    # seconds before the python process actually launches.
-    ok, _ = _poll(
+    ok, _ = poll_container(
        container_name, "pgrep -f 'hermes dashboard'", deadline_s=30.0,
    )
    assert ok, "Dashboard should be running with HERMES_DASHBOARD=1"
@@ -160,7 +133,7 @@ def test_dashboard_port_override(
    # to the port yet — uvicorn takes another second or two to come up.
    # The image doesn't ship ss/netstat, so probe /proc/net/tcp directly:
    # port 9120 = 0x23A0, state 0A = LISTEN.
-    ok, stdout = _poll(
+    ok, stdout = poll_container(
        container_name,
        "grep -E ' 0+:23A0 .* 0A ' /proc/net/tcp /proc/net/tcp6 "
        "2>/dev/null",
@@ -193,7 +166,7 @@ def test_dashboard_restarts_after_crash(
        check=True, capture_output=True, timeout=30,
    )
    # Wait for the first dashboard to come up.
-    ok, _ = _poll(
+    ok, _ = poll_container(
        container_name, "pgrep -f 'hermes dashboard'", deadline_s=30.0,
    )
    assert ok, "Dashboard never started initially"
@@ -409,7 +382,7 @@ def test_dashboard_insecure_env_var_no_longer_bypasses_gate(
    # Fail-closed: the dashboard process must NOT successfully serve. Probe
    # for a few seconds; /api/status should never become reachable because
    # start_server raised SystemExit before binding.
-    ok, _ = _poll(
+    ok, _ = poll_container(
        container_name,
        "curl -fsS -m 2 http://127.0.0.1:9119/api/status >/dev/null 2>&1",
        deadline_s=12.0,
--- a/tests/docker/test_docker_exec_privilege_drop.py
+++ b/tests/docker/test_docker_exec_privilege_drop.py
@@ -287,4 +287,4 @@ def test_e2e_login_then_supervised_gateway_can_read_auth(
        "Files written by `docker exec` are unreadable to the hermes user "
        f"(supervised gateway UID): {unreadable}. The shim failed to drop "
        "privileges before the write."
-    )
+    )
--- a/tests/docker/test_gateway_bootstrap_state.py
+++ b/tests/docker/test_gateway_bootstrap_state.py
@@ -0,0 +1,157 @@
+"""Runtime smoke tests for Docker gateway_state.json bootstrap seeding.
+
+Build the real image and verify the actual runtime behavior:
+
+  1. HERMES_GATEWAY_BOOTSTRAP_STATE=running on a fresh volume seeds
+     gateway_state.json with running state
+  2. An existing gateway_state.json is never clobbered (first-boot-only)
+  3. No env var = no seed (default down-on-first-boot preserved)
+  4. Only literal "running" is honored; other values are ignored
+"""
+from __future__ import annotations
+
+import json
+import subprocess
+
+from tests.docker.conftest import docker_exec, docker_exec_sh, wait_for_container_ready
+
+
+def _start_container(
+    built_image: str, name: str, *env: str,
+) -> str:
+    """Start a container with given env vars, return its name."""
+    args = ["docker", "run", "-d", "--name", name]
+    for e in env:
+        args.extend(["-e", e])
+    args.extend([built_image, "sleep", "infinity"])
+    subprocess.run(args, check=True, capture_output=True, timeout=60)
+    wait_for_container_ready(name)
+    return name
+
+
+def test_seeds_running_state_on_blank_volume(
+    built_image: str, container_name: str,
+) -> None:
+    """HERMES_GATEWAY_BOOTSTRAP_STATE=running on a fresh volume must
+    seed gateway_state.json with a valid running state."""
+    _start_container(
+        built_image, container_name,
+        "HERMES_GATEWAY_BOOTSTRAP_STATE=running",
+    )
+
+    r = docker_exec_sh(
+        container_name,
+        "cat /opt/data/gateway_state.json 2>/dev/null || echo NONE",
+        timeout=10,
+    )
+    assert r.stdout.strip() != "NONE", (
+        f"gateway_state.json not seeded on fresh volume: {r.stdout}"
+    )
+    state = json.loads(r.stdout.strip())
+    assert state.get("gateway_state") == "running", (
+        f"expected gateway_state=running, got: {state}"
+    )
+
+
+def test_does_not_clobber_existing_state(
+    built_image: str, container_name: str,
+) -> None:
+    """An existing gateway_state.json must never be overwritten by the
+    seed, even when the bootstrap env var says running.
+
+    We use a named volume so we can pre-create the state file before
+    the container boots. The [ ! -f ] guard in stage2 must skip seeding
+    because the file already exists. We check the file immediately after
+    boot — before the gateway service has a chance to write its own
+    state — by reading it as fast as possible after container start.
+    """
+    import json as _json
+
+    volume = f"{container_name}-vol"
+    subprocess.run(
+        ["docker", "volume", "create", volume],
+        check=True, capture_output=True, timeout=10,
+    )
+
+    # Pre-create the state file via a throwaway container
+    existing = _json.dumps({"gateway_state": "stopped", "pid": 123})
+    subprocess.run(
+        ["docker", "run", "--rm", "-v", f"{volume}:/opt/data",
+         "--entrypoint", "sh", built_image,
+         "-c", f"printf '{existing}\\n' > /opt/data/gateway_state.json"],
+        check=True, capture_output=True, timeout=30,
+    )
+
+    # Boot with the env var set — stage2 must NOT clobber the existing file
+    subprocess.run(
+        ["docker", "run", "-d", "--name", container_name,
+         "-v", f"{volume}:/opt/data",
+         "-e", "HERMES_GATEWAY_BOOTSTRAP_STATE=running",
+         built_image, "sleep", "infinity"],
+        check=True, capture_output=True, timeout=60,
+    )
+    # Read the file as quickly as possible — the gateway service may
+    # start and write its own state, but the stage2 [ ! -f ] guard runs
+    # during cont-init (before any service starts), so the file must
+    # still be our "stopped" state at this point.
+    wait_for_container_ready(container_name)
+    r = docker_exec_sh(
+        container_name, "cat /opt/data/gateway_state.json", timeout=10,
+    )
+    state = _json.loads(r.stdout.strip())
+    assert state.get("gateway_state") == "stopped", (
+        f"existing state was clobbered by bootstrap seed: {state}"
+    )
+
+    # Cleanup
+    subprocess.run(
+        ["docker", "rm", "-f", container_name],
+        capture_output=True, timeout=10,
+    )
+    subprocess.run(
+        ["docker", "volume", "rm", "-f", volume],
+        capture_output=True, timeout=10,
+    )
+
+
+def test_no_seed_when_env_unset(
+    built_image: str, container_name: str,
+) -> None:
+    """No HERMES_GATEWAY_BOOTSTRAP_STATE = no seed file written."""
+    _start_container(built_image, container_name)
+
+    r = docker_exec_sh(
+        container_name,
+        "test -f /opt/data/gateway_state.json && "
+        "echo EXISTS || echo ABSENT",
+        timeout=10,
+    )
+    assert "ABSENT" in r.stdout, (
+        f"gateway_state.json was seeded without the env var: {r.stdout}"
+    )
+
+
+def test_non_running_value_ignored(
+    built_image: str, container_name: str,
+) -> None:
+    """Only literal 'running' is honored; any other value is ignored."""
+    for bogus in ("stopped", "Running", "1", "true", "starting"):
+        # Need a fresh container per iteration
+        name = f"{container_name}-{bogus}"
+        _start_container(
+            built_image, name,
+            f"HERMES_GATEWAY_BOOTSTRAP_STATE={bogus}",
+        )
+        r = docker_exec_sh(
+            name,
+            "test -f /opt/data/gateway_state.json && "
+            "echo EXISTS || echo ABSENT",
+            timeout=10,
+        )
+        assert "ABSENT" in r.stdout, (
+            f"bogus value {bogus!r} should not seed a state file: {r.stdout}"
+        )
+        subprocess.run(
+            ["docker", "rm", "-f", name],
+            capture_output=True, timeout=10,
+        )
--- a/tests/docker/test_gateway_run_supervised.py
+++ b/tests/docker/test_gateway_run_supervised.py
@@ -26,12 +26,8 @@ import time
 from tests.docker.conftest import docker_exec_sh


-def _sh(container: str, command: str, timeout: int = 30):
-    return docker_exec_sh(container, command, timeout=timeout)
-
-
 def _svstat(container: str, slot: str = "gateway-default") -> str:
-    r = _sh(container, f"/command/s6-svstat /run/service/{slot}")
+    r = docker_exec_sh(container, f"/command/s6-svstat /run/service/{slot}")
    return r.stdout if r.returncode == 0 else ""


@@ -98,7 +94,7 @@ def test_gateway_run_redirects_to_supervised(
    # The CMD process (PID under /init that the wrapper exec'd into)
    # should be sleeping, not the gateway. We grep `ps` for the
    # `sleep infinity` heartbeat.
-    r = _sh(container_name, "ps -eo pid,cmd | grep -v grep | grep 'sleep infinity'")
+    r = docker_exec_sh(container_name, "ps -eo pid,cmd | grep -v grep | grep 'sleep infinity'")
    assert r.returncode == 0 and "sleep infinity" in r.stdout, (
        f"expected `sleep infinity` heartbeat process; got ps:\n{r.stdout}\n"
        f"stderr: {r.stderr}"
@@ -175,7 +171,7 @@ def test_gateway_run_no_supervise_flag_preserves_legacy_behavior(
    if status == "running":
        # Gateway running in foreground — the CMD process should be
        # the gateway itself, NOT a sleep-infinity heartbeat.
-        r = _sh(
+        r = docker_exec_sh(
            container_name,
            "ps -eo pid,ppid,cmd | grep -v grep | awk '/main-wrapper.sh|rc.init top/ { wrapper_pid=$1 } "
            "$3==\"sleep\" && $4==\"infinity\" && $2==wrapper_pid { c++ } END { print c+0 }'",
@@ -186,7 +182,7 @@ def test_gateway_run_no_supervise_flag_preserves_legacy_behavior(
            f"--no-supervise: expected NO `sleep infinity` parented to "
            f"the CMD wrapper (foreground gateway should be the CMD), "
            f"found {redirected_sleeps}. "
-            f"ps:\n{_sh(container_name, 'ps -eo pid,ppid,cmd').stdout}"
+            f"ps:\n{docker_exec_sh(container_name, 'ps -eo pid,ppid,cmd').stdout}"
        )

        # The gateway-default s6 slot exists (the cont-init.d
@@ -271,14 +267,14 @@ def test_supervised_gateway_does_not_recurse(
    # recursion guard fails, s6 would respawn fresh `gateway run`
    # processes on every cycle, leaving multiple Python-process
    # descendants under the gateway-default supervise tree.
-    r = _sh(container_name, "ps -eo pid,cmd | grep -v grep | grep -E 'python.*hermes.*gateway run' | wc -l")
+    r = docker_exec_sh(container_name, "ps -eo pid,cmd | grep -v grep | grep -E 'python.*hermes.*gateway run' | wc -l")
    assert r.returncode == 0
    n = int(r.stdout.strip() or 0)
    assert n <= 1, (
        f"expected at most one supervised python `hermes gateway run` "
        f"process (the legitimately-supervised gateway); found {n}. "
        f"Recursion guard may have failed. "
-        f"ps:\n{_sh(container_name, 'ps -eo pid,ppid,cmd').stdout}"
+        f"ps:\n{docker_exec_sh(container_name, 'ps -eo pid,ppid,cmd').stdout}"
    )

    # Stronger positive assertion: there should be exactly one
@@ -286,7 +282,7 @@ def test_supervised_gateway_does_not_recurse(
    # CMD process (PID 17 typically). The static `main-hermes`
    # service has its own `sleep infinity` child; THAT one is fine
    # and unrelated to our redirect.
-    r = _sh(
+    r = docker_exec_sh(
        container_name,
        # Find PID of the CMD process (main-wrapper.sh or its sh
        # parent), then count `sleep infinity` children.
@@ -298,7 +294,7 @@ def test_supervised_gateway_does_not_recurse(
    assert redirected == 1, (
        f"expected exactly one `sleep infinity` parented to the CMD "
        f"wrapper (the redirect heartbeat); found {redirected}. "
-        f"ps:\n{_sh(container_name, 'ps -eo pid,ppid,cmd').stdout}"
+        f"ps:\n{docker_exec_sh(container_name, 'ps -eo pid,ppid,cmd').stdout}"
    )


@@ -377,19 +373,18 @@ def test_supervised_gateway_stdout_reaches_docker_logs(
        "This means the `1` action directive in _render_log_run isn't "
        "forwarding stdout to /init. "
        f"docker logs (last 2000 chars):\n{combined[-2000:]}\n"
-        f"file contents:\n{_sh(container_name, 'cat /opt/data/logs/gateways/default/current').stdout}"
+        f"file contents:\n{docker_exec_sh(container_name, 'cat /opt/data/logs/gateways/default/current').stdout}"
    )

    # Cross-check: the same banner must also be in the rotated log
    # file (we kept the file destination, just added stdout). The
    # file version has s6-log's ISO 8601 timestamp prefix; the
    # docker logs version is raw.
-    file_contents = _sh(
+    file_contents = docker_exec_sh(
        container_name, "cat /opt/data/logs/gateways/default/current",
    ).stdout
    assert "⚕" in file_contents or "Hermes Gateway Starting" in file_contents, (
        "Banner also missing from rotated log file — the file "
        "destination may have been dropped by the new s6-log script. "
        f"File contents:\n{file_contents}"
-    )
-
+    )
--- a/tests/docker/test_home_override_scripts.py
+++ b/tests/docker/test_home_override_scripts.py
@@ -0,0 +1,169 @@
+"""Runtime smoke tests for Docker HOME overrides and script behavior.
+
+Build the real image and verify the actual runtime behavior:
+
+  1. main-wrapper preserves the Docker ``-w`` working directory
+  2. dashboard service resets HOME to /opt/data before privilege drop
+  3. dashboard does not auto-add ``--insecure`` from a non-loopback bind host
+  4. stage2 hook repairs profiles/ and cron/ ownership on every boot
+"""
+from __future__ import annotations
+
+import subprocess
+
+from tests.docker.conftest import docker_exec, docker_exec_sh, start_container, restart_container
+
+
+def test_main_wrapper_preserves_docker_workdir(
+    built_image: str, container_name: str,
+) -> None:
+    """The main-wrapper MUST save and restore the original working directory
+    so the container starts in the Docker ``-w`` directory, not /opt/data.
+
+    Regression test for #35472. We pass ``-w /tmp`` and a command that
+    prints its cwd; the output must be ``/tmp``, proving the wrapper
+    restored the cwd after its internal ``cd /opt/data``.
+    """
+    r = subprocess.run(
+        ["docker", "run", "--rm", "-w", "/tmp",
+         built_image, "sh", "-c", "pwd"],
+        capture_output=True, text=True, timeout=60,
+    )
+    assert r.returncode == 0, f"container failed: {r.stderr[-1000:]}"
+    # The stage2 hook emits boot logs (config migration, skills sync)
+    # to stdout before the CMD runs. The actual pwd output is the LAST
+    # line of stdout.
+    last_line = r.stdout.strip().split("\n")[-1].strip()
+    assert last_line == "/tmp", (
+        f"expected cwd /tmp, got {last_line!r} — "
+        f"main-wrapper did not preserve the Docker -w directory"
+    )
+
+
+def test_dashboard_service_resets_home(
+    built_image: str, container_name: str,
+) -> None:
+    """The dashboard run script must export HOME=/opt/data before dropping
+    privileges, so HOME-anchored state (discord lockfile, XDG dirs) doesn't
+    try to write to /root (the /init context's HOME).
+
+    We check this by inspecting the environment of the dashboard service
+    process if it's running, or by verifying the run script sets HOME
+    before the exec. At runtime, the cleanest check is: start the
+    container with HERMES_DASHBOARD=1 and verify the dashboard process
+    (if it starts) has HOME=/opt/data.
+
+    Since the dashboard requires an auth provider on non-loopback binds,
+    we bind to 127.0.0.1 where the auth gate doesn't engage, and check
+    the process env.
+    """
+    start_container(built_image, container_name, "HERMES_DASHBOARD=1", "HERMES_DASHBOARD_HOST=127.0.0.1")
+
+    # Check if the dashboard process is running and inspect its HOME.
+    r = docker_exec_sh(
+        container_name,
+        # Find the dashboard process (hermes dashboard) and read its HOME
+        # from /proc/<pid>/environ. If not running, verify the run script
+        # itself exports HOME=/opt/data by grepping the script source.
+        'pid=$(pgrep -f "hermes dashboard" | head -1); '
+        'if [ -n "$pid" ]; then '
+        '  tr "\\0" "\\n" < /proc/$pid/environ | grep "^HOME="; '
+        'else '
+        '  grep -q "export HOME=/opt/data" '
+        '    /opt/hermes/docker/s6-rc.d/dashboard/run && '
+        '  echo "HOME=/opt/data"; '
+        'fi',
+        timeout=15,
+    )
+    assert "HOME=/opt/data" in r.stdout, (
+        f"dashboard process or run script does not set HOME=/opt/data: "
+        f"stdout={r.stdout!r} stderr={r.stderr!r}"
+    )
+
+
+def test_dashboard_does_not_auto_insecure_from_host(
+    built_image: str, container_name: str,
+) -> None:
+    """The dashboard MUST NOT auto-add ``--insecure`` based on
+    HERMES_DASHBOARD_HOST. The auth gate is the authority now.
+
+    The auth gate is the authority on whether non-loopback binds are
+    safe; ``--insecure`` must never be auto-derived from the bind host.
+
+    We start the container with a non-loopback bind host and verify
+    the dashboard process does NOT receive ``--insecure`` in its
+    command line. If the dashboard fails to start (because the auth
+    gate correctly blocks an unauthenticated non-loopback bind), that's
+    also acceptable — the point is no auto-insecure.
+    """
+    start_container(built_image, container_name, "HERMES_DASHBOARD=1", "HERMES_DASHBOARD_HOST=0.0.0.0")
+
+    # Check the dashboard process command line for --insecure.
+    r = docker_exec_sh(
+        container_name,
+        'pid=$(pgrep -f "hermes dashboard" | head -1); '
+        'if [ -n "$pid" ]; then '
+        '  tr "\\0" " " < /proc/$pid/cmdline; '
+        'fi',
+        timeout=10,
+    )
+    cmdline = r.stdout.strip()
+    # If the process is running, it must NOT have --insecure.
+    if cmdline:
+        assert "--insecure" not in cmdline, (
+            f"dashboard process has --insecure in cmdline (auto-derived "
+            f"from host): {cmdline!r}"
+        )
+
+
+def test_stage2_repairs_profiles_and_cron_ownership(
+    built_image: str, container_name: str,
+) -> None:
+    """profiles/ and cron/ must both be reclaimed after root-context writes.
+
+    The stage2 hook chowns these dirs to hermes:hermes on every boot.
+    We simulate a root-owned file in each, then restart the container
+    and verify ownership is repaired.
+    """
+    start_container(built_image, container_name)
+
+    # Create root-owned files in profiles/ and cron/ to simulate
+    # docker exec (root) writes.
+    docker_exec(
+        container_name, "mkdir", "-p", "/opt/data/profiles/testprof",
+        user="root", timeout=5,
+    )
+    docker_exec(
+        container_name, "touch", "/opt/data/profiles/testprof/marker",
+        user="root", timeout=5,
+    )
+    docker_exec(
+        container_name, "touch", "/opt/data/cron/root_owned.json",
+        user="root", timeout=5,
+    )
+
+    # Verify they're root-owned before restart.
+    r = docker_exec_sh(
+        container_name,
+        'stat -c "%U" /opt/data/profiles/testprof/marker '
+        '/opt/data/cron/root_owned.json',
+        timeout=5,
+    )
+    assert "root" in r.stdout, (
+        f"expected root-owned files before restart, got: {r.stdout!r}"
+    )
+
+    # Restart — stage2 hook runs again and repairs ownership.
+    restart_container(container_name)
+
+    # Verify files are now owned by hermes.
+    r = docker_exec_sh(
+        container_name,
+        'stat -c "%U" /opt/data/profiles/testprof/marker '
+        '/opt/data/cron/root_owned.json',
+        timeout=5,
+    )
+    assert "hermes" in r.stdout, (
+        f"expected hermes-owned files after restart, got: {r.stdout!r} — "
+        f"stage2 hook did not repair profiles/ and cron/ ownership"
+    )
--- a/tests/docker/test_immutable_install.py
+++ b/tests/docker/test_immutable_install.py
@@ -0,0 +1,140 @@
+"""Runtime smoke tests for Docker immutable install tree and install-method stamp.
+
+Build the real image and verify at runtime:
+
+  1. /opt/hermes is not writable by the hermes user (immutable install tree)
+  2. PYTHONDONTWRITEBYTECODE and HERMES_DISABLE_LAZY_INSTALLS are set
+  3. /opt/hermes/.install_method contains "docker" (code-scoped stamp)
+  4. $HERMES_HOME/.install_method is NOT stamped as "docker" by stage2
+  5. A stale "docker" stamp in $HERMES_HOME is healed (removed) on boot
+"""
+from __future__ import annotations
+
+from tests.docker.conftest import (
+    docker_exec,
+    docker_exec_sh,
+    restart_container,
+    start_container,
+)
+
+
+def test_install_tree_not_writable_by_hermes(
+    built_image: str, container_name: str,
+) -> None:
+    """The hermes user must not be able to modify /opt/hermes.
+
+    The install tree (source, venv, TUI bundle, node_modules) must remain
+    root-owned and non-writable so an agent session cannot self-modify
+    the installation and brick the gateway.
+    """
+    start_container(built_image, container_name)
+
+    r = docker_exec_sh(
+        container_name,
+        # Try to create a file under /opt/hermes as the hermes user
+        "touch /opt/hermes/test_write 2>&1 && "
+        "echo WRITE_SUCCEEDED || echo WRITE_FAILED",
+        timeout=10,
+    )
+    assert "WRITE_FAILED" in r.stdout, (
+        f"hermes user can write to /opt/hermes (install tree not immutable): "
+        f"{r.stdout}"
+    )
+
+    # Also check a key subdirectory
+    r = docker_exec_sh(
+        container_name,
+        "touch /opt/hermes/.venv/test_write 2>&1 && "
+        "echo WRITE_SUCCEEDED || echo WRITE_FAILED",
+        timeout=10,
+    )
+    assert "WRITE_FAILED" in r.stdout, (
+        f"hermes user can write to /opt/hermes/.venv: {r.stdout}"
+    )
+
+
+def test_hermes_disable_lazy_installs_and_dont_write_bytecode(
+    built_image: str, container_name: str,
+) -> None:
+    """The container must set PYTHONDONTWRITEBYTECODE and
+    HERMES_DISABLE_LAZY_INSTALLS=1 so no .pyc files are written to the
+    immutable install tree and no lazy installs attempt to modify it."""
+    start_container(built_image, container_name)
+
+    r = docker_exec_sh(
+        container_name,
+        'test "$PYTHONDONTWRITEBYTECODE" = "1" && '
+        'test "$HERMES_DISABLE_LAZY_INSTALLS" = "1" && '
+        'echo ENV_OK || echo ENV_MISSING',
+        timeout=10,
+    )
+    assert "ENV_OK" in r.stdout, (
+        f"expected PYTHONDONTWRITEBYTECODE=1 and "
+        f"HERMES_DISABLE_LAZY_INSTALLS=1, got: {r.stdout} stderr={r.stderr}"
+    )
+
+
+def test_install_method_stamp_is_code_scoped(
+    built_image: str, container_name: str,
+) -> None:
+    """The 'docker' install-method stamp must be baked at
+    /opt/hermes/.install_method (code-scoped), NOT in $HERMES_HOME."""
+    start_container(built_image, container_name)
+
+    # Code-scoped stamp must exist and say "docker"
+    r = docker_exec_sh(
+        container_name,
+        "cat /opt/hermes/.install_method",
+        timeout=10,
+    )
+    assert r.returncode == 0, (
+        f"/opt/hermes/.install_method not found: {r.stderr}"
+    )
+    assert r.stdout.strip() == "docker", (
+        f"expected 'docker' stamp, got: {r.stdout.strip()!r}"
+    )
+
+    # $HERMES_HOME must NOT have a 'docker' stamp
+    r = docker_exec_sh(
+        container_name,
+        "cat /opt/data/.install_method 2>/dev/null || echo NONE",
+        timeout=10,
+    )
+    assert r.stdout.strip() != "docker", (
+        f"$HERMES_HOME/.install_method is stamped 'docker' - stage2 must "
+        f"not stamp the data volume (shared with host installs)"
+    )
+
+
+def test_stale_docker_stamp_in_home_is_healed_on_boot(
+    built_image: str, container_name: str,
+) -> None:
+    """A stale 'docker' stamp left in $HERMES_HOME by an older image
+    must be removed on boot so shared homes self-heal."""
+    # Start container, write a stale stamp
+    start_container(built_image, container_name)
+
+    # Write a stale 'docker' stamp as root
+    docker_exec(
+        container_name, "sh", "-c",
+        "printf 'docker\\n' > /opt/data/.install_method",
+        user="root", timeout=5,
+    )
+    # Verify it exists
+    r = docker_exec_sh(container_name, "cat /opt/data/.install_method", timeout=5)
+    assert r.stdout.strip() == "docker"
+
+    # Restart - stage2 should heal it
+    restart_container(container_name)
+
+    # The stale stamp must be gone
+    r = docker_exec_sh(
+        container_name,
+        "test -f /opt/data/.install_method && "
+        "cat /opt/data/.install_method || echo HEALED",
+        timeout=10,
+    )
+    assert "HEALED" in r.stdout or r.stdout.strip() != "docker", (
+        f"stale 'docker' stamp in $HERMES_HOME was not healed on boot: "
+        f"{r.stdout}"
+    )
--- a/tests/docker/test_license_file_present.py
+++ b/tests/docker/test_license_file_present.py
@@ -0,0 +1,26 @@
+"""Runtime smoke test for Docker image license-file presence.
+
+Build the real image and verify the LICENSE file is present inside the
+container (PEP 639 license-files metadata must resolve inside the
+Docker image).
+"""
+from __future__ import annotations
+
+import subprocess
+
+
+def test_docker_image_contains_license_file(built_image: str) -> None:
+    """The LICENSE file must be present inside the built Docker image.
+
+    PEP 639 license-files metadata references LICENSE, and the Docker
+    build context must not exclude it.
+    """
+    r = subprocess.run(
+        ["docker", "run", "--rm", "--entrypoint", "test",
+         built_image, "-f", "/opt/hermes/LICENSE"],
+        capture_output=True, text=True, timeout=60,
+    )
+    assert r.returncode == 0, (
+        f"LICENSE file not found at /opt/hermes/LICENSE inside the Docker "
+        f"image: {r.stderr[-500:]}"
+    )
--- a/tests/docker/test_log_dir_seed.py
+++ b/tests/docker/test_log_dir_seed.py
@@ -0,0 +1,47 @@
+"""Runtime smoke test for Docker $HERMES_HOME/logs/gateways seeding.
+
+Build the real image and verify logs/ and logs/gateways/ exist and are
+owned by the hermes user after container boot.
+
+Regression guard for #45258: if the first gateway log service runs in
+root context, logs/gateways/ is created root-owned; every profile
+registered later runs its log service as the dropped hermes user and
+s6-log crash-loops on mkdir: Permission denied.
+"""
+from __future__ import annotations
+
+from tests.docker.conftest import docker_exec_sh, start_container
+
+
+def test_logs_gateways_seeded_and_hermes_owned(
+    built_image: str, container_name: str,
+) -> None:
+    """logs/ and logs/gateways/ must exist and be owned by hermes after boot."""
+    start_container(built_image, container_name)
+
+    # Both directories must exist
+    r = docker_exec_sh(
+        container_name,
+        "test -d /opt/data/logs && "
+        "test -d /opt/data/logs/gateways && "
+        "echo DIRS_OK || echo DIRS_MISSING",
+        timeout=10,
+    )
+    assert "DIRS_OK" in r.stdout, (
+        f"logs/ or logs/gateways/ not seeded: {r.stdout}"
+    )
+
+    # Both must be owned by hermes
+    r = docker_exec_sh(
+        container_name,
+        'logs_owner=$(stat -c "%U" /opt/data/logs); '
+        'gateways_owner=$(stat -c "%U" /opt/data/logs/gateways); '
+        'echo "logs=$logs_owner gateways=$gateways_owner"',
+        timeout=10,
+    )
+    assert "logs=hermes" in r.stdout, (
+        f"logs/ not owned by hermes: {r.stdout}"
+    )
+    assert "gateways=hermes" in r.stdout, (
+        f"logs/gateways/ not owned by hermes: {r.stdout}"
+    )
--- a/tests/docker/test_profile_gateway.py
+++ b/tests/docker/test_profile_gateway.py
@@ -69,12 +69,7 @@ def _svstat_wants_up(container: str) -> bool:
 def test_profile_create_then_gateway_start(
    built_image: str, container_name: str,
 ) -> None:
-    subprocess.run(
-        ["docker", "run", "-d", "--name", container_name, built_image,
-         "sleep", "120"],
-        check=True, capture_output=True, timeout=30,
-    )
-    time.sleep(3)
+    start_container(built_image, container_name, cmd="sleep 120")

    r = _sh(container_name, f"hermes profile create {PROFILE}")
    assert r.returncode == 0, f"profile create failed: {r.stderr}"
@@ -114,12 +109,7 @@ def test_profile_delete_stops_gateway(
 ) -> None:
    """Deleting a profile should stop its gateway and remove the s6
    service slot."""
-    subprocess.run(
-        ["docker", "run", "-d", "--name", container_name, built_image,
-         "sleep", "120"],
-        check=True, capture_output=True, timeout=30,
-    )
-    time.sleep(3)
+    start_container(built_image, container_name, cmd="sleep 120")

    _sh(container_name, f"hermes profile create {PROFILE}")
    _sh(container_name, f"hermes -p {PROFILE} gateway start", timeout=60)
@@ -135,4 +125,4 @@ def test_profile_delete_stops_gateway(
    time.sleep(2)
    # Service slot should be gone.
    r = _sh(container_name, f"test -d /run/service/gateway-{PROFILE}")
-    assert r.returncode != 0, "s6 service slot still present after profile delete"
+    assert r.returncode != 0, "s6 service slot still present after profile delete"
--- a/tests/docker/test_puid_pgid_remap.py
+++ b/tests/docker/test_puid_pgid_remap.py
@@ -0,0 +1,88 @@
+"""Runtime smoke tests for Docker PUID/PGID and UID/GID remap.
+
+Build the real image and verify the actual runtime behavior:
+
+  1. PUID/PGID env vars remap the hermes user UID/GID at boot
+  2. HERMES_UID/HERMES_GID take precedence over PUID/PGID aliases
+  3. NAS-style low UIDs (99:100) are accepted and remapped
+  4. Invalid UIDs are rejected
+  5. The remapped user can write to the data volume
+"""
+from __future__ import annotations
+
+from tests.docker.conftest import docker_exec_sh, start_container
+
+
+def test_puid_pgid_remaps_hermes_user(
+    built_image: str, container_name: str,
+) -> None:
+    """PUID=1000 PGID=1000 must remap the hermes user to UID 1000."""
+    start_container(built_image, container_name, "PUID=1000", "PGID=1000")
+
+    r = docker_exec_sh(
+        container_name,
+        "id -u hermes",
+        timeout=10,
+    )
+    assert r.stdout.strip() == "1000", (
+        f"expected hermes UID 1000 after PUID remap, got: {r.stdout.strip()}"
+    )
+
+    r = docker_exec_sh(
+        container_name,
+        "id -g hermes",
+        timeout=10,
+    )
+    assert r.stdout.strip() == "1000", (
+        f"expected hermes GID 1000 after PGID remap, got: {r.stdout.strip()}"
+    )
+
+
+def test_hermes_uid_gid_take_precedence_over_aliases(
+    built_image: str, container_name: str,
+) -> None:
+    """HERMES_UID/HERMES_GID must win over PUID/PGID when both are set."""
+    start_container(built_image, container_name, "HERMES_UID=2000", "HERMES_GID=2001", "PUID=1000", "PGID=1000")
+
+    r = docker_exec_sh(container_name, "id -u hermes", timeout=10)
+    assert r.stdout.strip() == "2000", (
+        f"expected hermes UID 2000 (HERMES_UID wins), got: {r.stdout.strip()}"
+    )
+
+    r = docker_exec_sh(container_name, "id -g hermes", timeout=10)
+    assert r.stdout.strip() == "2001", (
+        f"expected hermes GID 2001 (HERMES_GID wins), got: {r.stdout.strip()}"
+    )
+
+
+def test_nas_low_uid_accepted(
+    built_image: str, container_name: str,
+) -> None:
+    """NAS-style low UIDs (99:100, common on Unraid) must be accepted."""
+    start_container(built_image, container_name, "PUID=99", "PGID=100")
+
+    r = docker_exec_sh(container_name, "id -u hermes", timeout=10)
+    assert r.stdout.strip() == "99", (
+        f"expected hermes UID 99, got: {r.stdout.strip()}"
+    )
+
+    r = docker_exec_sh(container_name, "id -g hermes", timeout=10)
+    assert r.stdout.strip() == "100", (
+        f"expected hermes GID 100, got: {r.stdout.strip()}"
+    )
+
+
+def test_remap_enables_data_volume_writes(
+    built_image: str, container_name: str,
+) -> None:
+    """After remap, the hermes user must be able to write to /opt/data."""
+    start_container(built_image, container_name, "PUID=1000", "PGID=1000")
+
+    r = docker_exec_sh(
+        container_name,
+        "touch /opt/data/test_write && echo WRITE_OK || echo WRITE_FAIL",
+        timeout=10,
+    )
+    assert "WRITE_OK" in r.stdout, (
+        f"hermes user cannot write to /opt/data after remap: {r.stdout}"
+    )
--- a/tests/docker/test_s6_profile_gateway_integration.py
+++ b/tests/docker/test_s6_profile_gateway_integration.py
@@ -22,7 +22,7 @@ from __future__ import annotations
 import subprocess
 import time

-from tests.docker.conftest import docker_exec
+from tests.docker.conftest import docker_exec, start_container


 _REGISTER_SCRIPT = """
@@ -45,49 +45,39 @@ print("UNREGISTERED")
 """


-def _exec(container: str, *args: str, timeout: int = 30) -> subprocess.CompletedProcess:
-    return docker_exec(container, *args, timeout=timeout)
-
-
 def test_s6_register_creates_service_dir_in_live_container(
    built_image: str, container_name: str,
 ) -> None:
    """S6ServiceManager.register_profile_gateway must create
    ``/run/service/gateway-<profile>/`` and trigger s6-svscan rescan
    against the real s6 supervision tree."""
-    subprocess.run(
-        ["docker", "run", "-d", "--name", container_name, built_image,
-         "sleep", "120"],
-        check=True, capture_output=True, timeout=30,
-    )
-    # Give the supervision tree a moment to come up.
-    time.sleep(3)
+    start_container(built_image, container_name, cmd="sleep 120")

-    r = _exec(container_name, "python3", "-c", _REGISTER_SCRIPT, timeout=30)
+    r = docker_exec(container_name, "python3", "-c", _REGISTER_SCRIPT, timeout=30)
    assert "REGISTERED" in r.stdout, (
        f"register failed: stderr={r.stderr!r} stdout={r.stdout!r}"
    )

    # Service directory exists with the expected structure.
-    r = _exec(container_name, "test", "-d", "/run/service/gateway-phase3test")
+    r = docker_exec(container_name, "test", "-d", "/run/service/gateway-phase3test")
    assert r.returncode == 0, "service directory not created"

-    r = _exec(container_name, "test", "-f", "/run/service/gateway-phase3test/run")
+    r = docker_exec(container_name, "test", "-f", "/run/service/gateway-phase3test/run")
    assert r.returncode == 0, "run script not created"

-    r = _exec(container_name, "test", "-f",
+    r = docker_exec(container_name, "test", "-f",
              "/run/service/gateway-phase3test/log/run")
    assert r.returncode == 0, "log/run script not created"

    # s6-svscan picked it up — s6-svstat works against the dir.
    # `docker exec` doesn't put /command/ on PATH (only the supervision
    # tree does), so call s6-svstat by absolute path.
-    r = _exec(container_name, "/command/s6-svstat",
+    r = docker_exec(container_name, "/command/s6-svstat",
              "/run/service/gateway-phase3test")
    assert r.returncode == 0, f"s6-svstat failed: {r.stderr or r.stdout}"

    # list_profile_gateways picks it up.
-    r = _exec(container_name, "python3", "-c", (
+    r = docker_exec(container_name, "python3", "-c", (
        "from hermes_cli.service_manager import S6ServiceManager;"
        "print(S6ServiceManager().list_profile_gateways())"
    ))
@@ -108,22 +98,22 @@ def test_s6_unregister_removes_service_dir_in_live_container(
    time.sleep(3)

    # First register so we have something to unregister.
-    r = _exec(container_name, "python3", "-c", _REGISTER_SCRIPT, timeout=30)
+    r = docker_exec(container_name, "python3", "-c", _REGISTER_SCRIPT, timeout=30)
    assert "REGISTERED" in r.stdout

    # Then unregister.
-    r = _exec(container_name, "python3", "-c", _UNREGISTER_SCRIPT, timeout=30)
+    r = docker_exec(container_name, "python3", "-c", _UNREGISTER_SCRIPT, timeout=30)
    assert "UNREGISTERED" in r.stdout, (
        f"unregister failed: stderr={r.stderr!r} stdout={r.stdout!r}"
    )

    # Directory is gone.
-    r = _exec(container_name, "test", "-d", "/run/service/gateway-phase3test")
+    r = docker_exec(container_name, "test", "-d", "/run/service/gateway-phase3test")
    assert r.returncode != 0, "service directory still exists after unregister"

    # list_profile_gateways no longer includes it.
-    r = _exec(container_name, "python3", "-c", (
+    r = docker_exec(container_name, "python3", "-c", (
        "from hermes_cli.service_manager import S6ServiceManager;"
        "print(S6ServiceManager().list_profile_gateways())"
    ))
-    assert "phase3test" not in r.stdout
+    assert "phase3test" not in r.stdout
--- a/tests/docker/test_stage2_browser_discovery.py
+++ b/tests/docker/test_stage2_browser_discovery.py
@@ -0,0 +1,82 @@
+"""Runtime smoke tests for Docker stage2 browser executable discovery.
+
+Build the real image and verify the chromium binary is actually
+discovered at boot: ``AGENT_BROWSER_EXECUTABLE_PATH`` is set, points to
+a real executable, and is a browser binary (not a shared library picked
+up by a broad ``find | grep``).
+"""
+from __future__ import annotations
+
+from tests.docker.conftest import docker_exec_sh, start_container
+
+
+def test_stage2_discovers_chromium_binary(
+    built_image: str, container_name: str,
+) -> None:
+    """The stage2 hook must discover the Playwright chromium binary and
+    export AGENT_BROWSER_EXECUTABLE_PATH so the browser tool can find it.
+
+    The discovery uses filename matching, not a broad ``find | grep``:
+    shared libraries (libGLESv2.so etc.) inherit the executable bit from
+    Playwright's tarball but must not be picked up. This test verifies the
+    discovered binary is a real browser, not a .so.
+    """
+    start_container(built_image, container_name)
+
+    # AGENT_BROWSER_EXECUTABLE_PATH must be set via s6 container_environment.
+    r = docker_exec_sh(
+        container_name,
+        "cat /run/s6/container_environment/AGENT_BROWSER_EXECUTABLE_PATH",
+        timeout=10,
+    )
+    assert r.returncode == 0, (
+        f"AGENT_BROWSER_EXECUTABLE_PATH not set by stage2 hook: {r.stderr}"
+    )
+    browser_path = r.stdout.strip()
+    assert browser_path, "AGENT_BROWSER_EXECUTABLE_PATH is empty"
+
+    # Must be a real file and executable.
+    r = docker_exec_sh(
+        container_name,
+        f'test -x "{browser_path}"',
+        timeout=5,
+    )
+    assert r.returncode == 0, (
+        f"discovered browser path is not executable: {browser_path}"
+    )
+
+    # Must be a browser binary by basename — NOT a shared library.
+    accepted_names = (
+        "chrome", "chromium", "chrome-headless-shell",
+        "headless_shell", "chromium-browser",
+    )
+    r = docker_exec_sh(
+        container_name,
+        f'basename "{browser_path}"',
+        timeout=5,
+    )
+    basename = r.stdout.strip()
+    assert basename in accepted_names, (
+        f"discovered binary basename {basename!r} is not a recognized "
+        f"browser name (accepted: {accepted_names}) — the discovery may "
+        f"have picked up a shared library (.so) instead of the real browser"
+    )
+
+
+def test_stage2_browser_path_accessible_to_hermes_user(
+    built_image: str, container_name: str,
+) -> None:
+    """The discovered browser binary must be accessible to the
+    unprivileged hermes user (UID 10000), since that's who runs
+    agent-browser subprocesses."""
+    start_container(built_image, container_name)
+
+    r = docker_exec_sh(
+        container_name,
+        'path="$(cat /run/s6/container_environment/AGENT_BROWSER_EXECUTABLE_PATH)" '
+        '&& test -r "$path" && test -x "$path"',
+        timeout=10,
+    )
+    assert r.returncode == 0, (
+        f"browser binary not readable+executable by hermes user: {r.stderr}"
+    )
--- a/tests/docker/test_tini_compat_shim.py
+++ b/tests/docker/test_tini_compat_shim.py
@@ -0,0 +1,54 @@
+"""Runtime smoke test for the Docker tini compatibility shim (#34192).
+
+Build the real image and verify:
+
+  1. /usr/bin/tini exists and is a symlink to /init (the compat shim
+     for orchestration templates that still reference /usr/bin/tini)
+  2. The actual ENTRYPOINT is /init (s6-overlay), not /usr/bin/tini
+"""
+from __future__ import annotations
+
+import subprocess
+
+
+def test_tini_compat_symlink_exists(built_image: str) -> None:
+    """/usr/bin/tini must exist as a symlink to /init.
+
+    Regression for #34192: orchestration templates (e.g. Hostinger's
+    'Hermes WebUI' catalog) still pin /usr/bin/tini as the entrypoint.
+    The shim symlinks it to /init so legacy wrappers exec the right
+    PID-1 reaper without behavior change.
+    """
+    r = subprocess.run(
+        ["docker", "run", "--rm", "--entrypoint", "sh",
+         built_image, "-c",
+         'test -L /usr/bin/tini && '
+         'test "$(readlink -f /usr/bin/tini)" = "/init"'],
+        capture_output=True, text=True, timeout=60,
+    )
+    assert r.returncode == 0, (
+        f"/usr/bin/tini is not a symlink to /init: {r.stderr[-500:]}"
+    )
+
+
+def test_entrypoint_is_init_not_tini(built_image: str) -> None:
+    """The image's actual ENTRYPOINT must be /init (s6-overlay).
+
+    The tini shim is only for legacy external wrappers; the image's own
+    runtime must continue to use the canonical /init.
+    """
+    r = subprocess.run(
+        ["docker", "inspect", built_image,
+         "--format", "{{json .Config.Entrypoint}}"],
+        capture_output=True, text=True, timeout=30,
+    )
+    assert r.returncode == 0, f"docker inspect failed: {r.stderr}"
+    entrypoint = r.stdout.strip()
+    assert "/init" in entrypoint, (
+        f"ENTRYPOINT is not /init: {entrypoint!r}"
+    )
+    # The entrypoint array should be ["/init", "/opt/hermes/docker/main-wrapper.sh"]
+    # /usr/bin/tini should NOT be in the entrypoint.
+    assert "tini" not in entrypoint.lower(), (
+        f"ENTRYPOINT references tini instead of /init: {entrypoint!r}"
+    )
--- a/tests/docker/test_toplevel_chown.py
+++ b/tests/docker/test_toplevel_chown.py
@@ -0,0 +1,93 @@
+"""Runtime smoke tests for Docker top-level state-file ownership repair.
+
+Build the real image and verify the actual runtime behavior:
+
+  1. Root-owned top-level state files (auth.json, state.db, gateway.lock,
+     gateway_state.json) are chowned to hermes on boot
+  2. Non-allowlisted host-owned files are NOT touched (targeted, not
+     blanket find -user root sweep)
+"""
+from __future__ import annotations
+
+from tests.docker.conftest import (
+    docker_exec,
+    docker_exec_sh,
+    restart_container,
+    start_container,
+)
+
+
+# The files the stage2 hook should repair (mirrors the allowlist in
+# stage2-hook.sh). We test a representative subset.
+ALLOWLISTED_FILES = ("auth.json", "state.db", "gateway.lock", "gateway_state.json")
+
+
+def test_root_owned_state_files_repaired_on_boot(
+    built_image: str, container_name: str,
+) -> None:
+    """Root-owned top-level state files must be chowned to hermes on boot."""
+    start_container(built_image, container_name)
+
+    # Create root-owned state files to simulate docker exec (root) writes
+    for f in ALLOWLISTED_FILES:
+        docker_exec(
+            container_name, "touch", f"/opt/data/{f}",
+            user="root", timeout=5,
+        )
+
+    # Verify they're root-owned
+    r = docker_exec_sh(
+        container_name,
+        " ".join(f'stat -c %U /opt/data/{f}' for f in ALLOWLISTED_FILES),
+        timeout=5,
+    )
+    for line in r.stdout.split():
+        assert line == "root", f"expected root-owned, got: {line}"
+
+    # Restart - stage2 should repair ownership
+    restart_container(container_name)
+
+    # Verify files are now hermes-owned
+    r = docker_exec_sh(
+        container_name,
+        " ".join(f'stat -c %U /opt/data/{f}' for f in ALLOWLISTED_FILES),
+        timeout=5,
+    )
+    for line in r.stdout.split():
+        assert line == "hermes", (
+            f"expected hermes-owned after restart, got: {line}"
+        )
+
+
+def test_non_allowlisted_host_file_not_touched(
+    built_image: str, container_name: str,
+) -> None:
+    """A non-allowlisted host-owned file must NOT be chowned, even if
+    root-owned. Regression guard for #19788 / #19795: a bind-mounted
+    $HERMES_HOME may contain host-owned files Hermes does not manage."""
+    start_container(built_image, container_name)
+
+    # Create a non-allowlisted file as root
+    docker_exec(
+        container_name, "touch", "/opt/data/host_secret.json",
+        user="root", timeout=5,
+    )
+    # Make it root-owned explicitly (it already is, but be sure)
+    docker_exec(
+        container_name, "chown", "root:root", "/opt/data/host_secret.json",
+        user="root", timeout=5,
+    )
+
+    # Restart
+    restart_container(container_name)
+
+    # The file must STILL be root-owned (not touched by stage2)
+    r = docker_exec_sh(
+        container_name,
+        "stat -c %U /opt/data/host_secret.json",
+        timeout=5,
+    )
+    assert r.stdout.strip() == "root", (
+        f"non-allowlisted host file was chowned by stage2 (should be "
+        f"preserved): {r.stdout.strip()}"
+    )
--- a/tests/docker/test_user_flag_guard.py
+++ b/tests/docker/test_user_flag_guard.py
@@ -0,0 +1,66 @@
+"""Runtime smoke tests for Docker --user flag guard.
+
+Build the real image and verify the actual runtime behavior:
+
+  1. docker run --user <arbitrary-uid> is rejected with actionable guidance
+  2. Root start (default) works fine
+  3. --user <hermes-uid> (10000) is allowed (supported non-root start)
+"""
+from __future__ import annotations
+
+import subprocess
+
+
+def test_arbitrary_user_uid_rejected(
+    built_image: str,
+) -> None:
+    """docker run --user 1000 must be rejected with actionable guidance."""
+    r = subprocess.run(
+        ["docker", "run", "--rm", "--user", "1000:1000",
+         built_image, "echo", "should_not_reach"],
+        capture_output=True, text=True, timeout=60,
+    )
+    assert r.returncode != 0, (
+        f"container started with arbitrary --user UID unexpectedly: {r.stdout}"
+    )
+    assert "should_not_reach" not in r.stdout, (
+        f"container ran despite --user rejection: {r.stdout}"
+    )
+    combined = r.stdout + r.stderr
+    assert "not supported" in combined.lower(), (
+        f"rejection message missing 'not supported': {combined[-500:]}"
+    )
+    # Must mention the remediation env vars
+    assert "HERMES_UID" in combined or "PUID" in combined, (
+        f"rejection message missing remediation guidance: {combined[-500:]}"
+    )
+
+
+def test_root_start_works(
+    built_image: str,
+) -> None:
+    """Root start (the default) must work without issues."""
+    r = subprocess.run(
+        ["docker", "run", "--rm", built_image, "sh", "-c", "echo OK"],
+        capture_output=True, text=True, timeout=60,
+    )
+    assert r.returncode == 0, f"root start failed: {r.stderr[-500:]}"
+    assert "OK" in r.stdout
+
+
+def test_user_pinned_to_hermes_uid_works(
+    built_image: str,
+) -> None:
+    """docker run --user 10000:10000 (the hermes UID) must be allowed.
+
+    This is the supported non-root start from #34648 / #34837.
+    """
+    r = subprocess.run(
+        ["docker", "run", "--rm", "--user", "10000:10000",
+         built_image, "sh", "-c", "echo OK"],
+        capture_output=True, text=True, timeout=60,
+    )
+    assert r.returncode == 0, (
+        f"--user 10000:10000 (hermes UID) was rejected: {r.stderr[-500:]}"
+    )
+    assert "OK" in r.stdout
--- a/tests/docker/test_zombie_reaping.py
+++ b/tests/docker/test_zombie_reaping.py
@@ -12,22 +12,16 @@ docstring.
 """
 from __future__ import annotations

-import subprocess
 import time

-from tests.docker.conftest import docker_exec, docker_exec_sh
+from tests.docker.conftest import docker_exec, docker_exec_sh, start_container, start_container


 def test_orphan_zombies_reaped(
    built_image: str, container_name: str,
 ) -> None:
    """Spawn an orphan child that exits immediately. PID 1 must reap it."""
-    subprocess.run(
-        ["docker", "run", "-d", "--name", container_name, built_image,
-         "sleep", "60"],
-        check=True, capture_output=True, timeout=30,
-    )
-    time.sleep(2)
+    start_container(built_image, container_name, cmd="sleep 60")

    # `( ( sleep 0.1 & ) & ); sleep 1` creates a grandchild detached from
    # the original docker exec session — it becomes an orphan reparented
@@ -42,4 +36,4 @@ def test_orphan_zombies_reaped(
        line for line in r.stdout.split("\n")
        if line.strip().startswith("Z")
    ]
-    assert not zombies, f"Zombies not reaped by PID 1: {zombies}"
+    assert not zombies, f"Zombies not reaped by PID 1: {zombies}"
--- a/tests/test_docker_home_override_scripts.py
+++ b/tests/test_docker_home_override_scripts.py
@@ -1,91 +0,0 @@
-"""Regression tests for Docker HOME overrides under s6/with-contenv."""
-
-from pathlib import Path
-
-
-REPO_ROOT = Path(__file__).resolve().parent.parent
-DASHBOARD_RUN = REPO_ROOT / "docker" / "s6-rc.d" / "dashboard" / "run"
-MAIN_WRAPPER = REPO_ROOT / "docker" / "main-wrapper.sh"
-STAGE2_HOOK = REPO_ROOT / "docker" / "stage2-hook.sh"
-
-
-def test_main_wrapper_preserves_docker_workdir() -> None:
-    """The main-wrapper MUST save and restore the original working
-    directory so the container starts in the Docker ``-w`` directory,
-    not /opt/data.  Regression test for #35472.
-    """
-    text = MAIN_WRAPPER.read_text(encoding="utf-8")
-
-    # Must save original cwd before cd /opt/data.
-    assert "_hermes_orig_cwd" in text, (
-        "main-wrapper.sh must save the original cwd before cd /opt/data"
-    )
-    assert 'HERMES_ORIG_CWD:-$PWD' in text, (
-        "main-wrapper.sh must capture PWD as the fallback original cwd"
-    )
-
-    # Must cd to /opt/data for init (existing behaviour preserved).
-    assert "cd /opt/data" in text
-
-    # Must restore original cwd before exec'ing the user command.
-    # The restore cd must appear AFTER venv activation but BEFORE the
-    # first exec / if-block.
-    activate_idx = text.index("/opt/hermes/.venv/bin/activate")
-    restore_idx = text.index('cd "$_hermes_orig_cwd"')
-    exec_idx = text.index("if [ $# -eq 0 ]")
-    assert activate_idx < restore_idx < exec_idx, (
-        "cd $_hermes_orig_cwd must appear after venv activation and "
-        "before the exec routing block"
-    )
-
-
-def test_dashboard_run_resets_home_before_dropping_privileges() -> None:
-    text = DASHBOARD_RUN.read_text(encoding="utf-8")
-
-    assert "#!/command/with-contenv sh" in text
-    assert "export HOME=/opt/data" in text
-    assert "exec s6-setuidgid hermes hermes dashboard" in text
-
-
-def test_dashboard_run_does_not_derive_insecure_from_bind_host() -> None:
-    """The s6 dashboard run script MUST NOT auto-add ``--insecure`` based on
-    ``HERMES_DASHBOARD_HOST``. Doing so disables the OAuth auth gate on
-    every non-loopback bind even when an auth provider is registered —
-    the exact regression that exposed every wildcard-subdomain agent
-    dashboard publicly until early 2026.
-
-    The opt-in is now explicit: ``HERMES_DASHBOARD_INSECURE=1`` (truthy).
-    The auth gate is the authority on whether non-loopback binds are safe.
-    """
-    text = DASHBOARD_RUN.read_text(encoding="utf-8")
-
-    # No legacy host-derived flip.
-    assert '127.0.0.1|localhost' not in text, (
-        "Run script still derives --insecure from the bind host. The gate "
-        "is the authority now — opt in via HERMES_DASHBOARD_INSECURE instead."
-    )
-    assert 'case "$dash_host" in' not in text, (
-        "Legacy host-derived --insecure case-statement is back."
-    )
-
-    # New opt-in env var present.
-    assert "HERMES_DASHBOARD_INSECURE" in text, (
-        "Explicit HERMES_DASHBOARD_INSECURE opt-in is missing."
-    )
-    # Truthy values aligned with the rest of the s6 scripts
-    # (e.g. HERMES_DASHBOARD).
-    for truthy in ("1", "true", "TRUE", "True", "yes", "YES", "Yes"):
-        assert truthy in text, (
-            f"HERMES_DASHBOARD_INSECURE should accept truthy value {truthy!r}"
-        )
-
-
-def test_stage2_hook_repairs_profiles_and_cron_ownership_on_every_boot() -> None:
-    """profiles/ and cron/ must both be reclaimed after root-context writes."""
-    text = STAGE2_HOOK.read_text(encoding="utf-8")
-
-    assert 'if [ -d "$HERMES_HOME/profiles" ]; then' in text
-    assert 'chown -R hermes:hermes "$HERMES_HOME/profiles" 2>/dev/null || true' in text
-
-    assert 'if [ -d "$HERMES_HOME/cron" ]; then' in text
-    assert 'chown -R hermes:hermes "$HERMES_HOME/cron" 2>/dev/null || true' in text
--- a/tests/test_docker_stage2_browser_discovery.py
+++ b/tests/test_docker_stage2_browser_discovery.py
@@ -1,19 +0,0 @@
-"""Regression tests for Docker stage2 browser executable discovery."""
-
-from pathlib import Path
-
-
-def test_stage2_discovers_playwright_arm64_headless_shell() -> None:
-    """Playwright's --only-shell layout may use a headless_shell basename."""
-    script = Path("docker/stage2-hook.sh").read_text()
-
-    assert "-name 'headless_shell'" in script
-
-
-def test_stage2_discovery_stays_filename_matched() -> None:
-    """Avoid broad path grep that can pick executable shared libraries."""
-    script = Path("docker/stage2-hook.sh").read_text()
-
-    discovery_block = script.split("browser_bin=$(", 1)[1].split(")\n    if", 1)[0]
-    assert "find \"$PLAYWRIGHT_BROWSERS_PATH\" -type f -executable" in discovery_block
-    assert "grep" not in discovery_block
--- a/tests/test_dockerfile_tini_compat_shim.py
+++ b/tests/test_dockerfile_tini_compat_shim.py
@@ -1,49 +0,0 @@
-"""Regression test for #34192 — Dockerfile must keep the tini compat shim
-for orchestration templates that still reference /usr/bin/tini.
-
-This is a documentation-as-test guard: removing the shim is a real
-choice, but it should be done deliberately (e.g. once Hostinger's
-'Hermes WebUI' catalog updates to /init) and not by accident.
-"""
-
-from __future__ import annotations
-
-from pathlib import Path
-
-
-def _dockerfile_text() -> str:
-    return (Path(__file__).parent.parent / "Dockerfile").read_text(encoding="utf-8")
-
-
-def test_tini_compat_symlink_present():
-    """The /usr/bin/tini -> /init symlink line must exist for #34192."""
-    df = _dockerfile_text()
-    assert "ln -sf /init /usr/bin/tini" in df, (
-        "Dockerfile must keep the tini compat symlink (#34192). "
-        "Removing it breaks orchestration templates that still pin "
-        "/usr/bin/tini as the entrypoint (Hostinger 'Hermes WebUI' "
-        "catalog as of v0.14.x)."
-    )
-
-
-def test_tini_compat_comment_explains_why():
-    """The symlink line is comment-anchored to #34192 so a future reader
-    knows why it exists. Removing the comment makes it look like dead
-    code worth deleting."""
-    df = _dockerfile_text()
-    assert "#34192" in df, (
-        "The Dockerfile tini compat shim must keep its #34192 anchor "
-        "comment so future maintainers know why the symlink is there."
-    )
-
-
-def test_entrypoint_still_init_not_tini():
-    """Sanity check: the actual ENTRYPOINT is still /init (s6-overlay).
-    The shim is for legacy external wrappers, not for the image's own
-    runtime — that path must continue to use the canonical /init."""
-    df = _dockerfile_text()
-    assert 'ENTRYPOINT [ "/init"' in df, (
-        "Dockerfile ENTRYPOINT must remain /init (s6-overlay). The "
-        "tini shim is only for external wrappers that haven't been "
-        "updated yet."
-    )
--- a/tests/test_docker_webui_install_surface.py
+++ b/tests/test_docker_webui_install_surface.py
@@ -1,5 +1,6 @@
-"""Guards for the multi-container Hermes WebUI install surface."""
-
+"""Test that setup.py uses temporary output directories when the source
+tree is read-only (as it is inside the Docker WebUI install surface).
+"""
 from __future__ import annotations

 from pathlib import Path
@@ -20,18 +21,6 @@ def _is_under(path: str, root: Path) -> bool:
    return True


-def test_docker_context_includes_license_file() -> None:
-    """PEP 639 license-files metadata must resolve inside the Docker image."""
-    dockerignore = (REPO_ROOT / ".dockerignore").read_text(encoding="utf-8")
-    active_lines = [
-        line.strip()
-        for line in dockerignore.splitlines()
-        if line.strip() and not line.lstrip().startswith("#")
-    ]
-
-    assert "LICENSE" not in active_lines
-
-
 def test_setup_uses_temporary_outputs_when_source_tree_is_read_only(
    monkeypatch,
 ) -> None:
--- a/tests/tools/test_dockerfile_immutable_install.py
+++ b/tests/tools/test_dockerfile_immutable_install.py
@@ -12,22 +12,16 @@ def _dockerfile_text() -> str:
    return DOCKERFILE.read_text()


-def test_dockerfile_makes_opt_hermes_root_owned_and_non_writable() -> None:
+def test_dockerfile_makes_opt_hermes_readonly_for_hermes_user() -> None:
    text = _dockerfile_text()

-    assert "COPY --chown=hermes:hermes . ." not in text
-    assert "COPY . ." in text
-    assert "chown -R root:root /opt/hermes" in text
-    assert "chmod -R a+rX /opt/hermes" in text
-    assert "chmod -R a-w /opt/hermes" in text
-
-    immutable_block = re.search(
-        r"RUN mkdir -p /opt/hermes/bin && \\\n"
-        r"(?:.*\\\n)+?"
-        r"\s+chmod -R a-w /opt/hermes",
-        text,
-    )
-    assert immutable_block, "Dockerfile must lock /opt/hermes after installing code/deps"
+    # --chmod on the source COPY bakes read-only perms at copy time instead
+    # of a separate chmod -R pass (which walked ~30k files — #49113).
+    assert "COPY --link --chmod=a+rX,go-w . ." in text
+    # The old tree-walking passes must not be present.
+    assert "chown -R root:root /opt/hermes" not in text
+    assert "chmod -R a+rX /opt/hermes" not in text
+    assert "chmod -R a-w /opt/hermes" not in text


 def test_dockerfile_keeps_mutable_state_under_opt_data() -> None:
@@ -68,19 +62,17 @@ def test_dockerfile_bakes_code_scoped_install_method_stamp() -> None:
    (/opt/hermes/.install_method) first; baking it at build time keeps the
    published image self-identifying as 'docker' WITHOUT writing into the
    shared $HERMES_HOME data volume (which a host install may also use).
-    It must live inside the immutable block so the runtime user can't alter it.
+    The stamp is created by root in the shim-wiring RUN block; the hermes
+    user can't modify it (go-w from the --chmod on the source COPY).
    """
    text = _dockerfile_text()
    assert "printf 'docker\\n' > /opt/hermes/.install_method" in text

-    immutable_block = re.search(
+    # The stamp must be in the RUN block that wires the exec shim.
+    shim_block = re.search(
        r"RUN mkdir -p /opt/hermes/bin && \\\n"
        r"(?:.*\\\n)+?"
-        r"\s+chmod -R a-w /opt/hermes",
+        r"\s+printf 'docker\\n' > /opt/hermes/\.install_method",
        text,
    )
-    assert immutable_block, "immutable block must exist"
-    assert ".install_method" in immutable_block.group(0), (
-        "the code-scoped install-method stamp must be baked inside the "
-        "immutable /opt/hermes block"
-    )
+    assert shim_block, "install-method stamp must be in the shim-wiring RUN block"
--- a/tests/tools/test_stage2_hook_gateway_bootstrap_state.py
+++ b/tests/tools/test_stage2_hook_gateway_bootstrap_state.py
@@ -1,152 +0,0 @@
-"""Contract test: the s6-overlay stage2 hook seeds gateway_state.json from
-HERMES_GATEWAY_BOOTSTRAP_STATE on first boot, so a freshly-provisioned
-container can come up with the gateway already running.
-
-Background. On a blank volume there is no gateway_state.json, so the boot
-reconciler (cont-init.d/02-reconcile-profiles ->
-container_boot.reconcile_profile_gateways) registers the gateway-default s6
-slot but leaves it DOWN — it only auto-starts when the last recorded state was
-"running". A container provisioned on a fresh volume therefore comes up with
-the gateway down until something starts it.
-
-An orchestrator that wants the gateway running from first boot sets
-HERMES_GATEWAY_BOOTSTRAP_STATE=running; stage2-hook.sh (installed as
-/etc/cont-init.d/01-hermes-setup, which runs lexicographically BEFORE
-02-reconcile-profiles) seeds the state file so the reconciler sees
-prior_state=running and brings the slot up on the very first boot.
-
-This mirrors the existing HERMES_AUTH_JSON_BOOTSTRAP env-seed pattern: it seeds
-the SAME gateway_state.json the reconciler already consults, guarded by
-``[ ! -f ]`` so persisted runtime state always wins on subsequent boots (a
-deliberately-stopped gateway must stay stopped across restarts).
-"""
-from __future__ import annotations
-
-import json
-import re
-import shutil
-import subprocess
-import tempfile
-from pathlib import Path
-
-import pytest
-
-REPO_ROOT = Path(__file__).resolve().parents[2]
-STAGE2_HOOK = REPO_ROOT / "docker" / "stage2-hook.sh"
-
-
-@pytest.fixture(scope="module")
-def stage2_text() -> str:
-    if not STAGE2_HOOK.exists():
-        pytest.skip("docker/stage2-hook.sh not present in this checkout")
-    return STAGE2_HOOK.read_text()
-
-
-def _seed_block(text: str) -> str:
-    """Extract the ``if [ ! -f "$HERMES_HOME/gateway_state.json" ] && … fi``
-    block that seeds the gateway state file from the bootstrap env var."""
-    m = re.search(
-        r'(if \[ ! -f "\$HERMES_HOME/gateway_state\.json" \] && \\\n'
-        r"(?:.*\n)*?fi)",
-        text,
-    )
-    assert m, (
-        "stage2-hook.sh must contain the gateway_state.json bootstrap-seed block "
-        "guarded on HERMES_GATEWAY_BOOTSTRAP_STATE"
-    )
-    return m.group(1)
-
-
-def test_seed_block_present_and_guarded(stage2_text: str) -> None:
-    block = _seed_block(stage2_text)
-    # Must be a first-boot-only seed (the [ ! -f ] guard) keyed on the env var.
-    assert '[ ! -f "$HERMES_HOME/gateway_state.json" ]' in block, (
-        "seed must be guarded by [ ! -f ] so persisted state wins on restart"
-    )
-    assert "HERMES_GATEWAY_BOOTSTRAP_STATE" in block
-    assert "gateway_state" in block
-
-
-def _run_seed(
-    text: str, *, env_value: str | None, preexisting: str | None
-) -> str | None:
-    """Run the extracted seed block in a sandbox $HERMES_HOME.
-
-    ``env_value`` is the HERMES_GATEWAY_BOOTSTRAP_STATE value (None = unset).
-    ``preexisting`` is the contents of a gateway_state.json placed before the
-    block runs (None = no file). Returns the file's contents afterwards, or
-    None if it doesn't exist. ``chown``/``chmod`` are stubbed so the block
-    runs without real root.
-    """
-    bash = shutil.which("bash")
-    if bash is None:
-        pytest.skip("bash not available")
-    block = _seed_block(text)
-
-    with tempfile.TemporaryDirectory() as d:
-        dpath = Path(d)
-        home = dpath / "home"
-        home.mkdir()
-        state_file = home / "gateway_state.json"
-        if preexisting is not None:
-            state_file.write_text(preexisting)
-
-        env_line = (
-            f'export HERMES_GATEWAY_BOOTSTRAP_STATE="{env_value}"\n'
-            if env_value is not None
-            else "unset HERMES_GATEWAY_BOOTSTRAP_STATE\n"
-        )
-        script = (
-            "set -e\n"
-            f'HERMES_HOME="{home}"\n'
-            # Stub privilege ops — the sandbox isn't root.
-            "chown() { :; }\n"
-            "chmod() { :; }\n"
-            + env_line
-            + block
-        )
-        script_path = dpath / "harness.sh"
-        script_path.write_text(script)
-
-        proc = subprocess.run(
-            [bash, str(script_path)], capture_output=True, text=True
-        )
-        assert proc.returncode == 0, proc.stderr
-
-        if not state_file.exists():
-            return None
-        return state_file.read_text()
-
-
-def test_seeds_running_state_on_blank_volume(stage2_text: str) -> None:
-    """env=running + no pre-existing file -> writes a valid running state."""
-    out = _run_seed(stage2_text, env_value="running", preexisting=None)
-    assert out is not None, "seed must create gateway_state.json"
-    assert json.loads(out).get("gateway_state") == "running"
-
-
-def test_does_not_clobber_existing_state(stage2_text: str) -> None:
-    """The [ ! -f ] guard: an existing state file is never overwritten, even
-    when the bootstrap env var says running. A deliberately-stopped gateway
-    must stay stopped across restarts."""
-    existing = json.dumps({"gateway_state": "stopped", "pid": 123})
-    out = _run_seed(stage2_text, env_value="running", preexisting=existing)
-    assert out == existing, "seed must not clobber a persisted state file"
-
-
-def test_no_seed_when_env_unset(stage2_text: str) -> None:
-    """No env var -> no file written (preserves the default down-on-first-boot
-    behaviour for orchestrators that don't opt in)."""
-    out = _run_seed(stage2_text, env_value=None, preexisting=None)
-    assert out is None, "seed must not run when HERMES_GATEWAY_BOOTSTRAP_STATE is unset"
-
-
-def test_non_running_value_ignored(stage2_text: str) -> None:
-    """Only a literal "running" is honoured; any other value is ignored so a
-    typo can't write a bogus state. (The reconciler's _AUTOSTART_STATES is
-    exactly {"running"}.)"""
-    for bogus in ("stopped", "Running", "1", "true", "starting"):
-        out = _run_seed(stage2_text, env_value=bogus, preexisting=None)
-        assert out is None, (
-            f"only 'running' should seed a state file, not {bogus!r}"
-        )
--- a/tests/tools/test_stage2_hook_immutable_install.py
+++ b/tests/tools/test_stage2_hook_immutable_install.py
@@ -1,48 +0,0 @@
-"""Contract tests for the Docker stage2 immutable install-tree policy.
-
-Hosted/container Hermes keeps user-writable state under HERMES_HOME
-(/opt/data). The installed source, venv, TUI bundle, and node_modules under
-/opt/hermes must remain root-owned/non-writable by the runtime hermes user so
-an agent session cannot self-modify the installation and brick the gateway.
-"""
-from __future__ import annotations
-
-from pathlib import Path
-
-import pytest
-
-REPO_ROOT = Path(__file__).resolve().parents[2]
-STAGE2_HOOK = REPO_ROOT / "docker" / "stage2-hook.sh"
-
-
-@pytest.fixture(scope="module")
-def stage2_text() -> str:
-    if not STAGE2_HOOK.exists():
-        pytest.skip("docker/stage2-hook.sh not present in this checkout")
-    return STAGE2_HOOK.read_text()
-
-
-def test_stage2_does_not_chown_install_tree_to_hermes(stage2_text: str) -> None:
-    assert "Fixing ownership of build trees under $INSTALL_DIR" not in stage2_text
-    assert 'chown -R hermes:hermes \\\n        "$INSTALL_DIR/.venv"' not in stage2_text
-
-    assert "venv_owner=$(stat -c %u \"$INSTALL_DIR/.venv\"" not in stage2_text
-    assert "chown of build trees failed" not in stage2_text
-    for install_tree in (
-        '"$INSTALL_DIR/.venv" \\',
-        '"$INSTALL_DIR/ui-tui" \\',
-        '"$INSTALL_DIR/gateway" \\',
-        '"$INSTALL_DIR/node_modules" \\',
-    ):
-        assert install_tree not in stage2_text, (
-            f"stage2 must not chown {install_tree} back to hermes; "
-            "the Dockerfile keeps /opt/hermes immutable and writable state "
-            "belongs under HERMES_HOME"
-        )
-
-
-def test_stage2_documents_immutable_install_contract(stage2_text: str) -> None:
-    assert "Immutable install tree" in stage2_text
-    assert "PYTHONDONTWRITEBYTECODE" in stage2_text
-    assert "HERMES_DISABLE_LAZY_INSTALLS=1" in stage2_text
-    assert "/opt/hermes" in stage2_text
--- a/tests/tools/test_stage2_hook_install_method_stamp.py
+++ b/tests/tools/test_stage2_hook_install_method_stamp.py
@@ -1,61 +0,0 @@
-"""Contract test: the s6-overlay stage2 hook must NOT stamp the install method
-into the shared $HERMES_HOME, and must heal a stale 'docker' stamp left there
-by older images.
-
-Background (shared-$HERMES_HOME bug)
------------------------------------
-$HERMES_HOME (/opt/data) is a DATA volume that users commonly bind-mount from
-the host (``~/.hermes:/opt/data``) and sometimes share with a host-side
-Desktop/CLI install. Older images wrote ``printf 'docker' > $HERMES_HOME/.install_method``
-at boot, which clobbered the host install's own marker — so the host's in-app
-updater read 'docker' and refused to run ``hermes update`` ("doesn't apply
-inside the Docker container").
-
-The fix scopes the stamp to the install tree (baked at
-``/opt/hermes/.install_method`` in the Dockerfile, read first by
-``detect_install_method``). stage2 must therefore:
-
-  * NOT write the 'docker' stamp into $HERMES_HOME any more, and
-  * proactively remove a stale 'docker' stamp from $HERMES_HOME so homes
-    already poisoned by an older image self-heal on the next boot.
-"""
-from __future__ import annotations
-
-import re
-from pathlib import Path
-
-import pytest
-
-REPO_ROOT = Path(__file__).resolve().parents[2]
-STAGE2_HOOK = REPO_ROOT / "docker" / "stage2-hook.sh"
-
-
-@pytest.fixture(scope="module")
-def stage2_text() -> str:
-    if not STAGE2_HOOK.exists():
-        pytest.skip("docker/stage2-hook.sh not present in this checkout")
-    return STAGE2_HOOK.read_text()
-
-
-def test_stage2_does_not_write_install_method_into_home(stage2_text: str) -> None:
-    # No write/tee of the home-scoped install-method stamp anywhere.
-    assert not re.search(
-        r"(tee|>)\s*\"?\$HERMES_HOME/\.install_method", stage2_text
-    ), (
-        "stage2 must not stamp $HERMES_HOME/.install_method — that data dir "
-        "may be shared with a host install whose marker would be clobbered"
-    )
-
-
-def test_stage2_heals_stale_docker_home_stamp(stage2_text: str) -> None:
-    # It must remove a stale 'docker' stamp from $HERMES_HOME so already
-    # poisoned shared homes recover.
-    assert 'rm -f "$HERMES_HOME/.install_method"' in stage2_text, (
-        "stage2 must remove a stale 'docker' stamp from $HERMES_HOME to heal "
-        "homes poisoned by older images"
-    )
-    # The removal must be guarded on the value being 'docker' so we never
-    # delete a legitimately-different stamp a user/host install put there.
-    assert re.search(r'\[\s*"\$stamped"\s*=\s*"docker"\s*\]', stage2_text), (
-        "the stale-stamp removal must be guarded on the value == 'docker'"
-    )
--- a/tests/tools/test_stage2_hook_log_dir_seed.py
+++ b/tests/tools/test_stage2_hook_log_dir_seed.py
@@ -1,60 +0,0 @@
-"""Contract test: the s6-overlay stage2 hook seeds $HERMES_HOME/logs/gateways
-as the hermes user.
-
-Regression guard for #45258: the per-profile gateway log service
-(`gateway-<profile>/log/run`) creates `logs/gateways/` via `mkdir -p` but only
-chowns the leaf `logs/gateways/<profile>`. If the first log service to boot
-runs in root context, the `gateways/` parent is created root-owned and stays
-that way; every profile registered later runs its log service as the dropped
-hermes user and s6-log crash-loops on `mkdir: Permission denied`.
-
-Seeding `logs/gateways` in stage2 (cont-init runs before any service starts)
-guarantees the parent already exists hermes-owned by the time the first
-log/run executes its `mkdir -p`.
-"""
-from __future__ import annotations
-
-import re
-from pathlib import Path
-
-import pytest
-
-REPO_ROOT = Path(__file__).resolve().parents[2]
-STAGE2_HOOK = REPO_ROOT / "docker" / "stage2-hook.sh"
-
-
-@pytest.fixture(scope="module")
-def stage2_text() -> str:
-    if not STAGE2_HOOK.exists():
-        pytest.skip("docker/stage2-hook.sh not present in this checkout")
-    return STAGE2_HOOK.read_text()
-
-
-def _seed_mkdir_block(text: str) -> str:
-    """Extract the `as_hermes mkdir -p \\ ...` seed block."""
-    m = re.search(r"as_hermes mkdir -p \\\n(?:[^\n]*\\\n)*[^\n]*\n", text)
-    assert m, "stage2-hook.sh must contain the as_hermes mkdir -p seed block"
-    return m.group(0)
-
-
-def test_logs_gateways_is_seeded(stage2_text: str) -> None:
-    block = _seed_mkdir_block(stage2_text)
-    assert '"$HERMES_HOME/logs/gateways"' in block, (
-        "logs/gateways must be seeded hermes-owned in stage2 so profiles "
-        "added after first boot can create their log dirs (#45258)"
-    )
-    # The parent must also be seeded so mkdir -p inside the block never
-    # creates logs/ implicitly with surprising ownership.
-    assert '"$HERMES_HOME/logs"' in block
-
-
-def test_logs_subtree_is_healed_when_chown_needed(stage2_text: str) -> None:
-    """The needs_chown repair loop must cover the logs subtree recursively —
-    that is what makes the seed entry above sufficient (no separate
-    logs/gateways loop entry needed)."""
-    m = re.search(r"for sub in ([^;]*); do", stage2_text)
-    assert m, "stage2-hook.sh must contain the needs_chown subdir repair loop"
-    assert "logs" in m.group(1).split(), (
-        "the needs_chown loop must recursively chown logs/ — it covers "
-        "logs/gateways, so the seed list does not need a loop twin"
-    )
--- a/tests/tools/test_stage2_hook_puid_pgid.py
+++ b/tests/tools/test_stage2_hook_puid_pgid.py
@@ -1,110 +0,0 @@
-"""Contract test: the s6-overlay stage2 hook accepts PUID/PGID as aliases for
-HERMES_UID/HERMES_GID.
-
-Regression guard for #15290.  NAS platforms (UGOS, Synology, unRAID) bind-mount
-/opt/data from a host directory owned by the user's own UID and expect the
-LinuxServer.io PUID/PGID convention.  Without the alias those vars are silently
-ignored, the s6-setuidgid drop lands on UID 10000, and the runtime cannot read
-the volume.  HERMES_UID/HERMES_GID must still take precedence when both are
-set.
-
-The s6-overlay rework moved bootstrap from docker/entrypoint.sh (now a shim)
-to docker/stage2-hook.sh, which is installed as /etc/cont-init.d/01-hermes-setup
-by the Dockerfile.  This test targets the post-rework location.
-"""
-from __future__ import annotations
-
-import os
-import shutil
-import subprocess
-from pathlib import Path
-
-import pytest
-
-REPO_ROOT = Path(__file__).resolve().parents[2]
-STAGE2_HOOK = REPO_ROOT / "docker" / "stage2-hook.sh"
-
-
-@pytest.fixture(scope="module")
-def stage2_text() -> str:
-    if not STAGE2_HOOK.exists():
-        pytest.skip("docker/stage2-hook.sh not present in this checkout")
-    return STAGE2_HOOK.read_text()
-
-
-def _alias_lines(text: str) -> list[str]:
-    """The stage2 hook lines that resolve HERMES_UID/HERMES_GID from aliases."""
-    return [
-        line.strip()
-        for line in text.splitlines()
-        if line.strip().startswith(("HERMES_UID=", "HERMES_GID="))
-    ]
-
-
-def test_stage2_hook_resolves_puid_pgid_aliases(stage2_text: str) -> None:
-    alias_lines = _alias_lines(stage2_text)
-    assert any("PUID" in line for line in alias_lines), (
-        "docker/stage2-hook.sh must resolve HERMES_UID from a PUID alias; see #15290"
-    )
-    assert any("PGID" in line for line in alias_lines), (
-        "docker/stage2-hook.sh must resolve HERMES_GID from a PGID alias; see #15290"
-    )
-
-
-def _resolve(stage2_text: str, env: dict[str, str]) -> str:
-    """Run the stage2 hook's alias-resolution lines in isolation and report the
-    resolved ``HERMES_UID:HERMES_GID`` pair."""
-    bash = shutil.which("bash")
-    if bash is None:
-        pytest.skip("bash not available")
-    script = "\n".join(_alias_lines(stage2_text))
-    script += '\necho "${HERMES_UID:-}:${HERMES_GID:-}"\n'
-    proc = subprocess.run(
-        [bash, "-ec", script],
-        env={"PATH": os.environ.get("PATH", "")} | env,
-        capture_output=True,
-        text=True,
-    )
-    assert proc.returncode == 0, proc.stderr
-    return proc.stdout.strip()
-
-
-def test_puid_pgid_populate_hermes_uid_gid(stage2_text: str) -> None:
-    assert _resolve(stage2_text, {"PUID": "1000", "PGID": "10"}) == "1000:10"
-
-
-def test_hermes_uid_gid_take_precedence_over_aliases(stage2_text: str) -> None:
-    resolved = _resolve(
-        stage2_text,
-        {"HERMES_UID": "2000", "HERMES_GID": "2001", "PUID": "1000", "PGID": "10"},
-    )
-    assert resolved == "2000:2001"
-
-
-def test_no_uid_vars_leaves_values_empty(stage2_text: str) -> None:
-    # An empty resolution means the stage2 hook keeps the default hermes user.
-    assert _resolve(stage2_text, {}) == ":"
-
-
-def test_stage2_hook_creates_s6_envdir_before_writing_browser_path(stage2_text: str) -> None:
-    """Regression guard for browser-path export on runtimes where the
-    s6 container_environment directory is absent when the cont-init hook runs.
-    """
-    mkdir_line = "mkdir -p /run/s6/container_environment"
-    write_line = (
-        "printf '%s' \"$browser_bin\" > "
-        "/run/s6/container_environment/AGENT_BROWSER_EXECUTABLE_PATH"
-    )
-
-    assert mkdir_line in stage2_text
-    assert write_line in stage2_text
-    assert stage2_text.index(mkdir_line) < stage2_text.index(write_line)
-
-
-def test_stage2_hook_runs_config_migration_as_hermes(stage2_text: str) -> None:
-    assert "scripts/docker_config_migrate.py" in stage2_text
-    assert 's6-setuidgid hermes "$INSTALL_DIR/.venv/bin/python"' in stage2_text
-
-
-def test_stage2_hook_documents_config_migration_opt_out(stage2_text: str) -> None:
-    assert "HERMES_SKIP_CONFIG_MIGRATION" in stage2_text
--- a/tests/tools/test_stage2_hook_toplevel_chown.py
+++ b/tests/tools/test_stage2_hook_toplevel_chown.py
@@ -1,138 +0,0 @@
-"""Contract test: the s6-overlay stage2 hook resets ownership of hermes-owned
-top-level state files in $HERMES_HOME — but only those, never arbitrary
-host-owned files.
-
-Regression guard for the gateway restart loop reported in #35098: files such
-as gateway.lock / state.db / auth.json live directly under $HERMES_HOME (not in
-a subdir), so the targeted subdir chown misses them. When created or rewritten
-by `docker exec <container> hermes …` (root unless `-u` is passed) they land
-root-owned and the unprivileged hermes runtime then hits PermissionError on next
-startup.
-
-The fix uses an explicit allowlist rather than a blanket `find -user root`
-sweep, preserving the targeted-ownership contract from #19788 / PR #19795: a
-bind-mounted $HERMES_HOME may contain host-owned files Hermes does not manage,
-and those must never be chowned.
-
-The s6-overlay rework moved bootstrap from docker/entrypoint.sh (now a shim) to
-docker/stage2-hook.sh, installed as /etc/cont-init.d/01-hermes-setup. This test
-targets that location.
-"""
-from __future__ import annotations
-
-import os
-import re
-import shutil
-import subprocess
-from pathlib import Path
-
-import pytest
-
-REPO_ROOT = Path(__file__).resolve().parents[2]
-STAGE2_HOOK = REPO_ROOT / "docker" / "stage2-hook.sh"
-
-
-@pytest.fixture(scope="module")
-def stage2_text() -> str:
-    if not STAGE2_HOOK.exists():
-        pytest.skip("docker/stage2-hook.sh not present in this checkout")
-    return STAGE2_HOOK.read_text()
-
-
-def _toplevel_chown_loop(text: str) -> str:
-    """Extract the `for f in … chown hermes:hermes "$HERMES_HOME/$f" … done`
-    block that repairs top-level state-file ownership."""
-    m = re.search(
-        r"(for f in \\\n(?:.*\\\n)*?.*; do\n(?:.*\n)*?done)",
-        text,
-    )
-    assert m, "stage2-hook.sh must contain the top-level-file chown for-loop (#35098)"
-    block = m.group(1)
-    assert 'chown hermes:hermes "$HERMES_HOME/$f"' in block, (
-        "the top-level-file loop must chown each allowlisted file to hermes"
-    )
-    return block
-
-
-def test_toplevel_chown_loop_present(stage2_text: str) -> None:
-    block = _toplevel_chown_loop(stage2_text)
-    # The reported-broken files must be covered.
-    for required in ("auth.json", "state.db", "gateway.lock", "gateway_state.json"):
-        assert required in block, (
-            f"top-level chown allowlist must include {required!r} (#35098)"
-        )
-
-
-def test_no_blanket_find_user_root_sweep(stage2_text: str) -> None:
-    """The fix must NOT reintroduce a blanket `find … -user root` chown of
-    $HERMES_HOME contents — that would clobber host-owned files in a bind mount
-    (#19788 / PR #19795)."""
-    assert not re.search(r"find\s+\"?\$\{?HERMES_HOME\}?\"?[^\n]*-user\s+root", stage2_text), (
-        "stage2-hook.sh must not blanket-chown root-owned files under "
-        "$HERMES_HOME via `find -user root`; use the targeted allowlist instead "
-        "so host-owned bind-mounted files are preserved (#19788, #19795)."
-    )
-
-
-def _run_loop(text: str, present_files: list[str]) -> list[str]:
-    """Run the extracted chown loop in a sandbox $HERMES_HOME, with `chown`
-    stubbed to record which paths it was asked to touch. Returns the basenames
-    the loop attempted to chown."""
-    bash = shutil.which("bash")
-    if bash is None:
-        pytest.skip("bash not available")
-    block = _toplevel_chown_loop(text)
-
-    import tempfile
-
-    with tempfile.TemporaryDirectory() as d:
-        dpath = Path(d)
-        home = dpath / "home"
-        home.mkdir()
-        for f in present_files:
-            (home / f).touch()
-        # A non-allowlisted, "host-owned" file that must never be chowned.
-        (home / "host_secret.json").touch()
-
-        # Stub chown to record the basename of its last argument (the path),
-        # so we observe exactly which files the allowlist loop selected
-        # without needing real root privileges.
-        script = (
-            "set -e\n"
-            f'HERMES_HOME="{home}"\n'
-            f'chown() {{ for a in "$@"; do :; done; echo "${{a##*/}}" >> "{dpath}/chown.log"; }}\n'
-            + block
-        )
-        script_path = dpath / "harness.sh"
-        script_path.write_text(script)
-
-        proc = subprocess.run([bash, str(script_path)], capture_output=True, text=True)
-        assert proc.returncode == 0, proc.stderr
-
-        log = dpath / "chown.log"
-        if not log.exists():
-            return []
-        return [ln for ln in log.read_text().splitlines() if ln]
-
-
-def test_loop_chowns_present_allowlisted_files(stage2_text: str) -> None:
-    touched = _run_loop(stage2_text, ["auth.json", "state.db", "gateway.lock"])
-    assert "auth.json" in touched
-    assert "state.db" in touched
-    assert "gateway.lock" in touched
-
-
-def test_loop_skips_nonallowlisted_host_file(stage2_text: str) -> None:
-    """A file NOT on the allowlist (e.g. a host-owned file in a bind mount) must
-    never be chowned, even if present."""
-    touched = _run_loop(stage2_text, ["auth.json"])
-    assert "host_secret.json" not in touched, (
-        "the allowlist loop must not touch non-allowlisted files (#19788)"
-    )
-
-
-def test_loop_skips_absent_files(stage2_text: str) -> None:
-    """Allowlisted files that don't exist are skipped (no spurious chown)."""
-    touched = _run_loop(stage2_text, ["auth.json"])
-    # state.db wasn't created, so it must not appear.
-    assert "state.db" not in touched
--- a/tests/tools/test_stage2_hook_unraid_uid.py
+++ b/tests/tools/test_stage2_hook_unraid_uid.py
@@ -1,86 +0,0 @@
-"""Regression tests for Docker stage2 UID/GID handling on NAS hosts.
-
-Unraid commonly runs appdata as nobody:users (99:100). The stage2 hook must
-accept those non-root numeric IDs and keep legacy/new pairing stores writable
-after targeted ownership reconciliation.
-"""
-from __future__ import annotations
-
-import os
-import re
-import shutil
-import subprocess
-from pathlib import Path
-
-import pytest
-
-REPO_ROOT = Path(__file__).resolve().parents[2]
-STAGE2_HOOK = REPO_ROOT / "docker" / "stage2-hook.sh"
-
-
-@pytest.fixture(scope="module")
-def stage2_text() -> str:
-    if not STAGE2_HOOK.exists():
-        pytest.skip("docker/stage2-hook.sh not present in this checkout")
-    return STAGE2_HOOK.read_text()
-
-
-def _uid_gid_validator(text: str) -> str:
-    marker = "# --- UID/GID remap ---"
-    before_marker = text.split(marker, 1)[0]
-    start = before_marker.index("validate_uid_gid()")
-    return before_marker[start:]
-
-
-def _validate_uid_gid(text: str, value: str) -> bool:
-    bash = shutil.which("bash")
-    if bash is None:
-        pytest.skip("bash not available")
-    script = _uid_gid_validator(text) + '\nvalidate_uid_gid "$CANDIDATE"\n'
-    proc = subprocess.run(
-        [bash, "-c", script],
-        env={"PATH": os.environ.get("PATH", ""), "CANDIDATE": value},
-        capture_output=True,
-        text=True,
-    )
-    return proc.returncode == 0
-
-
-@pytest.mark.parametrize("value", ["1", "99", "100", "1000", "65534"])
-def test_uid_gid_validator_accepts_non_root_nas_ids(stage2_text: str, value: str) -> None:
-    assert _validate_uid_gid(stage2_text, value), (
-        f"stage2 hook must accept NAS UID/GID {value}; Unraid uses 99:100 (#38070)"
-    )
-
-
-@pytest.mark.parametrize("value", ["", "0", "abc", "99x", "65535"])
-def test_uid_gid_validator_rejects_root_invalid_and_out_of_range(
-    stage2_text: str,
-    value: str,
-) -> None:
-    assert not _validate_uid_gid(stage2_text, value)
-
-
-def _targeted_chown_subdirs(text: str) -> list[str]:
-    m = re.search(
-        r"for sub in (?P<items>.*?); do\n\s*if \[ -e \"\$HERMES_HOME/\$sub\" \]",
-        text,
-        re.DOTALL,
-    )
-    assert m, "stage2-hook.sh must contain the targeted subdir chown loop"
-    return m.group("items").split()
-
-
-def test_targeted_chown_covers_legacy_and_new_pairing_dirs(stage2_text: str) -> None:
-    subdirs = _targeted_chown_subdirs(stage2_text)
-    assert "pairing" in subdirs
-    assert "platforms/pairing" in subdirs
-
-
-def test_seeded_directory_list_covers_legacy_and_new_pairing_dirs(stage2_text: str) -> None:
-    seed_block = stage2_text.split("as_hermes mkdir -p \\", 1)[1].split(
-        "# --- Install-method stamp",
-        1,
-    )[0]
-    assert '"$HERMES_HOME/pairing"' in seed_block
-    assert '"$HERMES_HOME/platforms/pairing"' in seed_block
--- a/tests/tools/test_stage2_hook_user_flag_guard.py
+++ b/tests/tools/test_stage2_hook_user_flag_guard.py
@@ -1,119 +0,0 @@
-"""Contract test: the s6-overlay stage2 hook and main-wrapper reject an
-unsupported `docker run --user <arbitrary-uid>:<gid>` start with actionable
-guidance, while still allowing:
-
-  - root start (id -u == 0)
-  - `--user <hermes-uid>` (the supported non-root start, #34648 / #34837)
-
-Background: in the tini era `docker run --user $(id -u):$(id -g)` was used to
-make container-written files match the host user. Under s6-overlay this can't
-work — the bootstrap (UID remap, volume/build-tree chown, config seeding) needs
-root, and the baked image dirs are owned by the hermes build UID, so an
-arbitrary pinned UID can't write them (EACCES on a bind mount, hard crash on a
-named volume). The supported path is root start + HERMES_UID/HERMES_GID (or the
-PUID/PGID aliases), which remaps the hermes user and chowns the volume.
-
-The guard fires only when the current UID is neither root NOR the hermes UID,
-so the #34648 `--user 10000:10000` case (pinning to the hermes UID itself) is
-unaffected.
-
-Extraction + stubbed-shell-run mirrors
-tests/tools/test_stage2_hook_toplevel_chown.py.
-"""
-from __future__ import annotations
-
-import re
-import shutil
-import subprocess
-import tempfile
-from pathlib import Path
-
-import pytest
-
-REPO_ROOT = Path(__file__).resolve().parents[2]
-STAGE2_HOOK = REPO_ROOT / "docker" / "stage2-hook.sh"
-MAIN_WRAPPER = REPO_ROOT / "docker" / "main-wrapper.sh"
-
-
-def _read(p: Path) -> str:
-    if not p.exists():
-        pytest.skip(f"{p} not present in this checkout")
-    return p.read_text()
-
-
-def _guard_block(text: str) -> str:
-    """Extract the `cur_uid=...; if [ ... ]; then ... exit 1; fi` guard."""
-    m = re.search(
-        r"(cur_uid=\"\$\(id -u\)\"\nif \[ \"\$cur_uid\" != 0 \](?:.*\n)*?fi)",
-        text,
-    )
-    assert m, "expected the --user guard block (cur_uid + non-root/non-hermes check)"
-    return m.group(1)
-
-
-@pytest.mark.parametrize("path", [STAGE2_HOOK, MAIN_WRAPPER])
-def test_guard_present_and_mentions_remediation(path: Path) -> None:
-    text = _read(path)
-    block = _guard_block(text)
-    # Must check non-root AND non-hermes-uid (so --user 10000:10000 is allowed).
-    assert '"$cur_uid" != 0' in block
-    assert '"$cur_uid" != "$(id -u hermes)"' in block
-    assert "exit 1" in block
-    # Must point users at the supported env vars.
-    assert "HERMES_UID" in block and "HERMES_GID" in block
-    assert "PUID" in block and "PGID" in block
-
-
-def _run_guard(text: str, *, cur_uid: int, hermes_uid: int = 10000) -> subprocess.CompletedProcess:
-    """Run the extracted guard with `id` stubbed. Returns the completed process
-    (rc 1 + stderr message when rejected, rc 0 when allowed through)."""
-    bash = shutil.which("bash")
-    if bash is None:
-        pytest.skip("bash not available")
-    block = _guard_block(text)
-    with tempfile.TemporaryDirectory() as d:
-        script = (
-            "set -e\n"
-            # Stub `id`: `id -u` -> cur_uid; `id -u hermes` -> hermes_uid.
-            f'id() {{ if [ "$2" = hermes ]; then echo {hermes_uid}; else echo {cur_uid}; fi; }}\n'
-            + block
-            + "\necho GUARD_PASSED\n"  # only reached when the guard allows through
-        )
-        sp = Path(d) / "h.sh"
-        sp.write_text(script)
-        return subprocess.run([bash, str(sp)], capture_output=True, text=True)
-
-
-def test_arbitrary_user_uid_is_rejected() -> None:
-    """An arbitrary host UID (1000), neither root nor hermes, is rejected."""
-    for text in (_read(STAGE2_HOOK), _read(MAIN_WRAPPER)):
-        proc = _run_guard(text, cur_uid=1000, hermes_uid=10000)
-        assert proc.returncode == 1, f"expected rejection, got rc={proc.returncode}"
-        assert "not supported" in proc.stderr
-        assert "GUARD_PASSED" not in proc.stdout
-
-
-def test_root_start_passes() -> None:
-    """Root start (uid 0) is never blocked."""
-    for text in (_read(STAGE2_HOOK), _read(MAIN_WRAPPER)):
-        proc = _run_guard(text, cur_uid=0, hermes_uid=10000)
-        assert proc.returncode == 0, proc.stderr
-        assert "GUARD_PASSED" in proc.stdout
-
-
-def test_user_pinned_to_hermes_uid_passes() -> None:
-    """`--user 10000:10000` (the hermes UID itself) is the supported non-root
-    start from #34648 / #34837 and must NOT be blocked."""
-    for text in (_read(STAGE2_HOOK), _read(MAIN_WRAPPER)):
-        proc = _run_guard(text, cur_uid=10000, hermes_uid=10000)
-        assert proc.returncode == 0, proc.stderr
-        assert "GUARD_PASSED" in proc.stdout
-
-
-def test_user_pinned_to_remapped_hermes_uid_passes() -> None:
-    """After a HERMES_UID remap the hermes UID is e.g. 4242; a container pinned
-    to that same UID must still pass (cur_uid == hermes_uid)."""
-    for text in (_read(STAGE2_HOOK), _read(MAIN_WRAPPER)):
-        proc = _run_guard(text, cur_uid=4242, hermes_uid=4242)
-        assert proc.returncode == 0, proc.stderr
-        assert "GUARD_PASSED" in proc.stdout
Author	SHA1	Message	Date
ethernet	3dec660a50	pytest don't load plugins	2026-06-23 12:25:30 -04:00
ethernet	b4d88a9e33	dedupe work, faster docker tests	2026-06-23 12:25:30 -04:00
ethernet	bb445b24ad	wip comments	2026-06-23 12:25:30 -04:00
ethernet	f519c1e083	refactor(ci): move tests for docker stuff into actual docker urntime tests, not dockefile assertions	2026-06-23 12:25:30 -04:00
ethernet	31628a0728	faster pippip	2026-06-22 16:25:44 -04:00
ethernet	41c33c390e	faster docker builds	2026-06-22 16:19:28 -04:00