mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-26 20:03:22 +08:00
Compare commits
6 Commits
dependabot
...
pytest-fas
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3dec660a50 | ||
|
|
b4d88a9e33 | ||
|
|
bb445b24ad | ||
|
|
f519c1e083 | ||
|
|
31628a0728 | ||
|
|
41c33c390e |
6
.github/workflows/docker-publish.yml
vendored
6
.github/workflows/docker-publish.yml
vendored
@@ -109,13 +109,11 @@ jobs:
|
||||
|
||||
- name: Install Python dependencies (for docker tests)
|
||||
run: |
|
||||
uv venv .venv --python 3.11
|
||||
source .venv/bin/activate
|
||||
# ``dev`` extra pulls in pytest, pytest-asyncio —
|
||||
# everything tests/docker/ needs. We deliberately avoid ``all``
|
||||
# here because the docker tests only drive the container via
|
||||
# subprocess and don't import hermes_agent's optional deps.
|
||||
uv pip install -e ".[dev]"
|
||||
uv sync --locked --python 3.11 --extra dev
|
||||
|
||||
- name: Run docker integration tests
|
||||
env:
|
||||
@@ -128,7 +126,7 @@ jobs:
|
||||
NOUS_API_KEY: ""
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
python -m pytest tests/docker/ -v --tb=short
|
||||
python scripts/run_tests_parallel.py tests/docker/ --file-timeout 300 -- -v --tb=short
|
||||
|
||||
- name: Log in to Docker Hub
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
|
||||
|
||||
26
Dockerfile
26
Dockerfile
@@ -189,7 +189,13 @@ RUN cd web && npm run build && \
|
||||
|
||||
# ---------- Source code ----------
|
||||
# .dockerignore excludes node_modules, so the installs above survive.
|
||||
COPY . .
|
||||
# --link decouples this layer from parents for cache purposes; --chmod bakes
|
||||
# the final read-only permissions at copy time so we skip the separate
|
||||
# `chmod -R` pass that previously walked ~30k files across the venv +
|
||||
# node_modules + source (21s amd64 / 222s arm64 — #49113). `a+rX,go-w`
|
||||
# gives the non-root hermes user read + traverse but no write; root retains
|
||||
# write so the build steps below don't need chmod u+w dances.
|
||||
COPY --link --chmod=a+rX,go-w . .
|
||||
|
||||
# ---------- Permissions ----------
|
||||
# Link hermes-agent itself (editable). Deps are already installed in the
|
||||
@@ -197,19 +203,15 @@ COPY . .
|
||||
# resolution or downloads.
|
||||
RUN uv pip install --no-cache-dir --no-deps -e "."
|
||||
|
||||
# Keep /opt/hermes immutable for the runtime hermes user. Hosted/container
|
||||
# instances must not be able to self-edit the installed source or venv; user
|
||||
# data, skills, plugins, config, logs, and dashboard uploads live under
|
||||
# /opt/data instead. Root can still repair the image during build/boot, but
|
||||
# supervised Hermes processes drop to the non-root hermes user.
|
||||
# Wire the exec shim and install-method stamp. Files under /opt/hermes are
|
||||
# already root-owned (COPY, uv sync, npm install all run as root) and
|
||||
# read-only for the hermes user (go-w from the --chmod above).
|
||||
|
||||
USER root
|
||||
RUN mkdir -p /opt/hermes/bin && \
|
||||
cp /opt/hermes/docker/hermes-exec-shim.sh /opt/hermes/bin/hermes && \
|
||||
chmod 0755 /opt/hermes/bin/hermes && \
|
||||
printf 'docker\n' > /opt/hermes/.install_method && \
|
||||
chown -R root:root /opt/hermes && \
|
||||
chmod -R a+rX /opt/hermes && \
|
||||
chmod -R a-w /opt/hermes
|
||||
printf 'docker\n' > /opt/hermes/.install_method
|
||||
# The ``.install_method`` stamp is baked next to the running code (the install
|
||||
# tree), NOT into $HERMES_HOME. $HERMES_HOME (/opt/data) is a shared data
|
||||
# volume that is commonly bind-mounted from the host and even shared with a
|
||||
@@ -240,9 +242,7 @@ RUN mkdir -p /opt/hermes/bin && \
|
||||
# every published image has it.
|
||||
ARG HERMES_GIT_SHA=
|
||||
RUN if [ -n "${HERMES_GIT_SHA}" ]; then \
|
||||
chmod u+w /opt/hermes && \
|
||||
printf '%s\n' "${HERMES_GIT_SHA}" > /opt/hermes/.hermes_build_sha && \
|
||||
chmod a-w /opt/hermes /opt/hermes/.hermes_build_sha; \
|
||||
printf '%s\n' "${HERMES_GIT_SHA}" > /opt/hermes/.hermes_build_sha; \
|
||||
fi
|
||||
|
||||
# ---------- s6-overlay service wiring ----------
|
||||
|
||||
@@ -74,6 +74,7 @@ exec env -i \
|
||||
LC_ALL=C.UTF-8 \
|
||||
PYTHONHASHSEED=0 \
|
||||
PYTHONDONTWRITEBYTECODE=1 \
|
||||
PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 \
|
||||
${EXTRA_PYTHONPATH:+PYTHONPATH="$EXTRA_PYTHONPATH"} \
|
||||
${EXTRA_PYTEST_PLUGINS:+PYTEST_PLUGINS="$EXTRA_PYTEST_PLUGINS"} \
|
||||
"$PYTHON" "$SCRIPT_DIR/run_tests_parallel.py" "$@"
|
||||
|
||||
@@ -8,15 +8,13 @@ Override the image with ``HERMES_TEST_IMAGE`` env var to point at a pre-built
|
||||
image (faster local iteration); otherwise the ``built_image`` fixture builds
|
||||
the repo's Dockerfile once per session.
|
||||
|
||||
Docker tests need longer timeouts than the suite default (30s), so every
|
||||
test under this directory is granted a 180s default via
|
||||
``pytest.mark.timeout`` applied at collection time.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import time
|
||||
from collections.abc import Iterator
|
||||
|
||||
import pytest
|
||||
@@ -43,11 +41,9 @@ def pytest_collection_modifyitems(config, items): # noqa: D401 - pytest hook
|
||||
skip_docker = pytest.mark.skip(
|
||||
reason="Docker not available or daemon not running",
|
||||
)
|
||||
extend_timeout = pytest.mark.timeout(180)
|
||||
for item in items:
|
||||
if "tests/docker/" not in str(item.fspath).replace(os.sep, "/"):
|
||||
continue
|
||||
item.add_marker(extend_timeout)
|
||||
if not docker_ok:
|
||||
item.add_marker(skip_docker)
|
||||
|
||||
@@ -137,3 +133,151 @@ def docker_exec_sh(
|
||||
return docker_exec(
|
||||
container, "sh", "-c", command, user=user, timeout=timeout,
|
||||
)
|
||||
|
||||
|
||||
def wait_for_container_ready(
|
||||
container: str,
|
||||
*,
|
||||
deadline_s: float = 30.0,
|
||||
interval_s: float = 0.25,
|
||||
) -> None:
|
||||
"""Poll until the container has finished s6 cont-init (stage2 + reconcile).
|
||||
|
||||
The readiness signal is ``profile=default`` appearing in
|
||||
``/opt/data/logs/container-boot.log``, which the 02-reconcile-profiles
|
||||
cont-init script writes on every boot. That log entry fires AFTER
|
||||
stage2-hook.sh completes, so by the time it appears the full
|
||||
cont-init chain (UID remap, chown, config seeding, skills sync,
|
||||
browser discovery, config migration) has run.
|
||||
|
||||
Raises ``TimeoutError`` if the container never becomes ready — much
|
||||
better than a fixed ``time.sleep()`` that either wastes time on fast
|
||||
machines or flakes on slow ones.
|
||||
"""
|
||||
end = time.monotonic() + deadline_s
|
||||
while time.monotonic() < end:
|
||||
r = docker_exec(
|
||||
container,
|
||||
"sh", "-c",
|
||||
"cat /opt/data/logs/container-boot.log 2>/dev/null",
|
||||
timeout=5,
|
||||
)
|
||||
if r.returncode == 0 and "profile=default" in r.stdout:
|
||||
return
|
||||
time.sleep(interval_s)
|
||||
raise TimeoutError(
|
||||
f"container {container} did not finish cont-init within {deadline_s}s"
|
||||
)
|
||||
|
||||
|
||||
def start_container(
|
||||
image: str,
|
||||
name: str,
|
||||
*env: str,
|
||||
cmd: str = "sleep infinity",
|
||||
timeout: int = 60,
|
||||
) -> str:
|
||||
"""Start a detached container and wait for cont-init to finish.
|
||||
|
||||
Args:
|
||||
image: Docker image to run.
|
||||
name: Container name (cleanup is the caller's responsibility —
|
||||
typically handled by the ``container_name`` fixture).
|
||||
env: Env vars as ``KEY=VALUE`` strings, each passed via ``-e``.
|
||||
cmd: Container CMD (default ``sleep infinity``).
|
||||
timeout: ``docker run`` subprocess timeout.
|
||||
|
||||
Returns the container name. Raises on ``docker run`` failure or if
|
||||
the container never finishes cont-init within 30s.
|
||||
"""
|
||||
args = ["docker", "run", "-d", "--name", name]
|
||||
for e in env:
|
||||
args.extend(["-e", e])
|
||||
args.extend([image, *cmd.split()])
|
||||
subprocess.run(args, check=True, capture_output=True, timeout=timeout)
|
||||
wait_for_container_ready(name)
|
||||
return name
|
||||
|
||||
|
||||
def restart_container(container: str, timeout: int = 60) -> None:
|
||||
"""Restart a container and wait for cont-init to finish.
|
||||
|
||||
Equivalent to ``docker restart <container>`` followed by
|
||||
:func:`wait_for_container_ready`.
|
||||
"""
|
||||
subprocess.run(
|
||||
["docker", "restart", container],
|
||||
check=True, capture_output=True, timeout=timeout,
|
||||
)
|
||||
wait_for_container_ready(container)
|
||||
|
||||
|
||||
def poll_container(
|
||||
container: str,
|
||||
probe: str,
|
||||
*,
|
||||
deadline_s: float = 30.0,
|
||||
interval_s: float = 0.5,
|
||||
user: str = "hermes",
|
||||
) -> tuple[bool, str]:
|
||||
"""Repeatedly run ``probe`` inside the container until it exits 0 or
|
||||
``deadline_s`` elapses.
|
||||
|
||||
Returns ``(success, last_stdout)``. Useful for waiting on a process
|
||||
to appear, a port to open, a file to contain a string, etc.
|
||||
"""
|
||||
end = time.monotonic() + deadline_s
|
||||
last = ""
|
||||
while time.monotonic() < end:
|
||||
r = docker_exec_sh(container, probe, user=user, timeout=10)
|
||||
last = r.stdout
|
||||
if r.returncode == 0:
|
||||
return True, last
|
||||
time.sleep(interval_s)
|
||||
return False, last
|
||||
|
||||
|
||||
def wait_for_path(
|
||||
container: str,
|
||||
path: str,
|
||||
*,
|
||||
kind: str = "f",
|
||||
deadline_s: float = 30.0,
|
||||
interval_s: float = 0.25,
|
||||
) -> bool:
|
||||
"""Poll ``test -<kind> <path>`` inside the container until success or timeout.
|
||||
|
||||
``kind`` is the ``test`` flag: ``'f'`` for file, ``'d'`` for directory,
|
||||
``'e'`` for existence. Returns ``True`` on success, ``False`` on timeout.
|
||||
"""
|
||||
return poll_container(
|
||||
container, f"test -{kind} {path}",
|
||||
deadline_s=deadline_s, interval_s=interval_s,
|
||||
)[0]
|
||||
|
||||
|
||||
def wait_for_log(
|
||||
container: str,
|
||||
log_path: str,
|
||||
needle: str,
|
||||
*,
|
||||
deadline_s: float = 30.0,
|
||||
interval_s: float = 0.25,
|
||||
) -> str:
|
||||
"""Poll until a log file inside the container contains ``needle``.
|
||||
|
||||
Returns the matching log content on success, or the last observed
|
||||
contents on timeout (so the caller can render a meaningful diagnostic).
|
||||
"""
|
||||
end = time.monotonic() + deadline_s
|
||||
last = ""
|
||||
while time.monotonic() < end:
|
||||
r = docker_exec_sh(
|
||||
container, f"cat {log_path} 2>/dev/null", timeout=5,
|
||||
)
|
||||
if r.returncode == 0:
|
||||
last = r.stdout
|
||||
if needle in last:
|
||||
return last
|
||||
time.sleep(interval_s)
|
||||
return last
|
||||
|
||||
69
tests/docker/test_config_migration.py
Normal file
69
tests/docker/test_config_migration.py
Normal file
@@ -0,0 +1,69 @@
|
||||
"""Runtime smoke test for Docker config-schema migration on boot.
|
||||
|
||||
Build the real image and verify: a config.yaml present in $HERMES_HOME
|
||||
is migrated by docker_config_migrate.py on boot, running as the hermes
|
||||
user.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from tests.docker.conftest import docker_exec, docker_exec_sh, start_container
|
||||
|
||||
|
||||
def test_config_migration_runs_on_boot(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""A config.yaml in $HERMES_HOME must be migrated on boot by
|
||||
docker_config_migrate.py, running as the hermes user."""
|
||||
# Start container
|
||||
start_container(built_image, container_name)
|
||||
|
||||
# Verify config.yaml exists (should be seeded by stage2 if not present)
|
||||
r = docker_exec_sh(
|
||||
container_name,
|
||||
"test -f /opt/data/config.yaml && echo EXISTS || echo MISSING",
|
||||
timeout=10,
|
||||
)
|
||||
assert "EXISTS" in r.stdout, (
|
||||
f"config.yaml not found in $HERMES_HOME: {r.stdout}"
|
||||
)
|
||||
|
||||
# Verify the migration script exists in the image
|
||||
r = docker_exec_sh(
|
||||
container_name,
|
||||
"test -f /opt/hermes/scripts/docker_config_migrate.py && "
|
||||
"echo SCRIPT_EXISTS || echo SCRIPT_MISSING",
|
||||
timeout=10,
|
||||
)
|
||||
assert "SCRIPT_EXISTS" in r.stdout, (
|
||||
f"docker_config_migrate.py not found in image: {r.stdout}"
|
||||
)
|
||||
|
||||
# Verify config.yaml is owned by hermes (migration ran as hermes)
|
||||
r = docker_exec_sh(
|
||||
container_name,
|
||||
'stat -c "%U" /opt/data/config.yaml',
|
||||
timeout=10,
|
||||
)
|
||||
assert r.stdout.strip() == "hermes", (
|
||||
f"config.yaml not owned by hermes (migration may have run as root): "
|
||||
f"{r.stdout.strip()}"
|
||||
)
|
||||
|
||||
|
||||
def test_config_migration_opt_out_env_var_respected(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""HERMES_SKIP_CONFIG_MIGRATION=1 must skip the migration."""
|
||||
start_container(
|
||||
built_image, container_name, "HERMES_SKIP_CONFIG_MIGRATION=1",
|
||||
)
|
||||
|
||||
# config.yaml should still be seeded (seeding is separate from migration)
|
||||
r = docker_exec_sh(
|
||||
container_name,
|
||||
"test -f /opt/data/config.yaml && echo EXISTS || echo MISSING",
|
||||
timeout=10,
|
||||
)
|
||||
assert "EXISTS" in r.stdout, (
|
||||
f"config.yaml should be seeded even with migration skipped: {r.stdout}"
|
||||
)
|
||||
@@ -32,13 +32,6 @@ def _docker(*args: str, **kw) -> subprocess.CompletedProcess[str]:
|
||||
)
|
||||
|
||||
|
||||
def _exec(container: str, *args: str, timeout: int = 30) -> subprocess.CompletedProcess[str]:
|
||||
return docker_exec(container, *args, timeout=timeout)
|
||||
|
||||
|
||||
def _sh(container: str, cmd: str, timeout: int = 30) -> subprocess.CompletedProcess[str]:
|
||||
return docker_exec_sh(container, cmd, timeout=timeout)
|
||||
|
||||
|
||||
def _wait_for_path(
|
||||
container: str,
|
||||
@@ -61,7 +54,7 @@ def _wait_for_path(
|
||||
"""
|
||||
end = time.monotonic() + deadline_s
|
||||
while time.monotonic() < end:
|
||||
r = _sh(container, f"test -{kind} {path}", timeout=5)
|
||||
r = docker_exec_sh(container, f"test -{kind} {path}", timeout=5)
|
||||
if r.returncode == 0:
|
||||
return True
|
||||
time.sleep(interval_s)
|
||||
@@ -86,7 +79,7 @@ def _wait_for_reconcile_log_mention(
|
||||
end = time.monotonic() + deadline_s
|
||||
last = ""
|
||||
while time.monotonic() < end:
|
||||
r = _sh(container, "cat /opt/data/logs/container-boot.log", timeout=5)
|
||||
r = docker_exec_sh(container, "cat /opt/data/logs/container-boot.log", timeout=5)
|
||||
if r.returncode == 0:
|
||||
last = r.stdout
|
||||
if f"profile={profile}" in last:
|
||||
@@ -145,16 +138,16 @@ def test_running_gateway_survives_container_restart(restart_container: str) -> N
|
||||
# Create the profile + start its gateway. The Phase 4 hooks
|
||||
# register the s6 service slot during create and the dispatch
|
||||
# path brings it up via s6-svc -u.
|
||||
r = _exec(container, "hermes", "profile", "create", "coder")
|
||||
r = docker_exec(container, "hermes", "profile", "create", "coder")
|
||||
assert r.returncode == 0, f"profile create failed: {r.stderr}"
|
||||
|
||||
r = _exec(container, "hermes", "-p", "coder", "gateway", "start", timeout=60)
|
||||
r = docker_exec(container, "hermes", "-p", "coder", "gateway", "start", timeout=60)
|
||||
assert r.returncode == 0, f"gateway start failed: {r.stderr}"
|
||||
|
||||
# Give the service time to actually come up under supervision.
|
||||
deadline = time.monotonic() + 15.0
|
||||
while time.monotonic() < deadline:
|
||||
r = _sh(container, "/command/s6-svstat /run/service/gateway-coder")
|
||||
r = docker_exec_sh(container, "/command/s6-svstat /run/service/gateway-coder")
|
||||
if r.returncode == 0 and "up " in r.stdout:
|
||||
break
|
||||
time.sleep(0.5)
|
||||
@@ -170,7 +163,7 @@ def test_running_gateway_survives_container_restart(restart_container: str) -> N
|
||||
"p = pathlib.Path('/opt/data/profiles/coder/gateway_state.json'); "
|
||||
"p.write_text(json.dumps({'gateway_state': 'running', 'timestamp': 1}))"
|
||||
)
|
||||
_exec(container, "python3", "-c", write_state, timeout=10).check_returncode()
|
||||
docker_exec(container, "python3", "-c", write_state, timeout=10).check_returncode()
|
||||
|
||||
# Restart. After this, /run/service/ is empty until cont-init.d
|
||||
# runs the reconciler. We need to wait long enough for the
|
||||
@@ -190,14 +183,14 @@ def test_running_gateway_survives_container_restart(restart_container: str) -> N
|
||||
), "slot not recreated after restart"
|
||||
|
||||
# No `down` marker — we asked for auto-start.
|
||||
r = _sh(container, "test -f /run/service/gateway-coder/down")
|
||||
r = docker_exec_sh(container, "test -f /run/service/gateway-coder/down")
|
||||
assert r.returncode != 0, "down marker present despite prior_state=running"
|
||||
|
||||
|
||||
def test_stopped_gateway_stays_stopped_after_restart(restart_container: str) -> None:
|
||||
container = restart_container
|
||||
|
||||
_exec(container, "hermes", "profile", "create", "writer").check_returncode()
|
||||
docker_exec(container, "hermes", "profile", "create", "writer").check_returncode()
|
||||
|
||||
# Write 'stopped' directly so we don't have to race against the
|
||||
# gateway's own state writes.
|
||||
@@ -206,7 +199,7 @@ def test_stopped_gateway_stays_stopped_after_restart(restart_container: str) ->
|
||||
"p = pathlib.Path('/opt/data/profiles/writer/gateway_state.json'); "
|
||||
"p.write_text(json.dumps({'gateway_state': 'stopped', 'timestamp': 1}))"
|
||||
)
|
||||
_exec(container, "python3", "-c", write_state, timeout=10).check_returncode()
|
||||
docker_exec(container, "python3", "-c", write_state, timeout=10).check_returncode()
|
||||
|
||||
_docker("restart", container, timeout=60).check_returncode()
|
||||
log = _wait_for_reconcile_log_mention(container, "writer", deadline_s=30.0)
|
||||
@@ -218,7 +211,7 @@ def test_stopped_gateway_stays_stopped_after_restart(restart_container: str) ->
|
||||
)
|
||||
|
||||
# Down marker present.
|
||||
r = _sh(container, "test -f /run/service/gateway-writer/down")
|
||||
r = docker_exec_sh(container, "test -f /run/service/gateway-writer/down")
|
||||
assert r.returncode == 0, "down marker missing despite prior_state=stopped"
|
||||
|
||||
|
||||
@@ -229,7 +222,7 @@ def test_stale_gateway_pid_cleaned_up_on_restart(restart_container: str) -> None
|
||||
process-mismatch checks."""
|
||||
container = restart_container
|
||||
|
||||
_exec(container, "hermes", "profile", "create", "ghost").check_returncode()
|
||||
docker_exec(container, "hermes", "profile", "create", "ghost").check_returncode()
|
||||
|
||||
# Stamp stale runtime files alongside a 'running' state so the
|
||||
# reconciler walks this profile.
|
||||
@@ -240,15 +233,15 @@ def test_stale_gateway_pid_cleaned_up_on_restart(restart_container: str) -> None
|
||||
"(p / 'gateway.pid').write_text(json.dumps({'pid': 99999, 'host': 'old'})); "
|
||||
"(p / 'processes.json').write_text('[]')"
|
||||
)
|
||||
_exec(container, "python3", "-c", stamp, timeout=10).check_returncode()
|
||||
docker_exec(container, "python3", "-c", stamp, timeout=10).check_returncode()
|
||||
|
||||
_docker("restart", container, timeout=60).check_returncode()
|
||||
_wait_for_reconcile_log_mention(container, "ghost", deadline_s=30.0)
|
||||
|
||||
# Stale runtime files swept.
|
||||
r = _sh(container, "test -f /opt/data/profiles/ghost/gateway.pid")
|
||||
r = docker_exec_sh(container, "test -f /opt/data/profiles/ghost/gateway.pid")
|
||||
assert r.returncode != 0, "stale gateway.pid survived restart"
|
||||
r = _sh(container, "test -f /opt/data/profiles/ghost/processes.json")
|
||||
r = docker_exec_sh(container, "test -f /opt/data/profiles/ghost/processes.json")
|
||||
assert r.returncode != 0, "stale processes.json survived restart"
|
||||
|
||||
|
||||
@@ -271,15 +264,15 @@ def test_live_gateway_autostarts_after_real_restart_without_manual_state_stamp(
|
||||
"""
|
||||
container = restart_container
|
||||
|
||||
_exec(container, "hermes", "profile", "create", "live").check_returncode()
|
||||
r = _exec(container, "hermes", "-p", "live", "gateway", "start", timeout=60)
|
||||
docker_exec(container, "hermes", "profile", "create", "live").check_returncode()
|
||||
r = docker_exec(container, "hermes", "-p", "live", "gateway", "start", timeout=60)
|
||||
assert r.returncode == 0, f"gateway start failed: {r.stderr}"
|
||||
|
||||
# Wait for the gateway to actually come up under supervision AND write
|
||||
# its own gateway_state=running (we do NOT stamp it ourselves).
|
||||
deadline = time.monotonic() + 20.0
|
||||
while time.monotonic() < deadline:
|
||||
r = _sh(container, "/command/s6-svstat /run/service/gateway-live")
|
||||
r = docker_exec_sh(container, "/command/s6-svstat /run/service/gateway-live")
|
||||
if r.returncode == 0 and "up " in r.stdout:
|
||||
break
|
||||
time.sleep(0.5)
|
||||
@@ -290,7 +283,7 @@ def test_live_gateway_autostarts_after_real_restart_without_manual_state_stamp(
|
||||
deadline = time.monotonic() + 15.0
|
||||
state = ""
|
||||
while time.monotonic() < deadline:
|
||||
r = _sh(
|
||||
r = docker_exec_sh(
|
||||
container,
|
||||
"cat /opt/data/profiles/live/gateway_state.json 2>/dev/null",
|
||||
)
|
||||
@@ -322,7 +315,7 @@ def test_live_gateway_autostarts_after_real_restart_without_manual_state_stamp(
|
||||
assert _wait_for_path(
|
||||
container, "/run/service/gateway-live", kind="d", deadline_s=10.0,
|
||||
), "slot not recreated after restart"
|
||||
r = _sh(container, "test -f /run/service/gateway-live/down")
|
||||
r = docker_exec_sh(container, "test -f /run/service/gateway-live/down")
|
||||
assert r.returncode != 0, (
|
||||
"down marker present despite a live gateway being restarted — "
|
||||
"the signal-initiated shutdown wrongly persisted 'stopped' (#42675)"
|
||||
|
||||
@@ -16,36 +16,14 @@ import json
|
||||
import subprocess
|
||||
import time
|
||||
|
||||
from tests.docker.conftest import docker_exec, docker_exec_sh
|
||||
|
||||
|
||||
def _poll(container: str, probe: str, *, deadline_s: float = 30.0,
|
||||
interval_s: float = 0.5) -> tuple[bool, str]:
|
||||
"""Repeatedly run ``probe`` inside the container until it exits 0 or
|
||||
``deadline_s`` elapses. Returns (success, last stdout)."""
|
||||
end = time.monotonic() + deadline_s
|
||||
last = ""
|
||||
while time.monotonic() < end:
|
||||
r = docker_exec_sh(container, probe, timeout=10)
|
||||
last = r.stdout
|
||||
if r.returncode == 0:
|
||||
return True, last
|
||||
time.sleep(interval_s)
|
||||
return False, last
|
||||
from tests.docker.conftest import docker_exec, docker_exec_sh, start_container, poll_container
|
||||
|
||||
|
||||
def test_dashboard_not_running_by_default(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""Without HERMES_DASHBOARD, no dashboard process should be running."""
|
||||
subprocess.run(
|
||||
["docker", "run", "-d", "--name", container_name, built_image,
|
||||
"sleep", "60"],
|
||||
check=True, capture_output=True, timeout=30,
|
||||
)
|
||||
# Give the entrypoint enough time to finish bootstrap; if a dashboard
|
||||
# were going to start it'd be visible by now.
|
||||
time.sleep(5)
|
||||
start_container(built_image, container_name, cmd="sleep 60")
|
||||
r = docker_exec(container_name, "pgrep", "-f", "hermes dashboard")
|
||||
# pgrep exits non-zero when no match found
|
||||
assert r.returncode != 0, (
|
||||
@@ -64,12 +42,7 @@ def test_dashboard_slot_reports_down_when_disabled(
|
||||
writes a `down` marker file in the live service-dir when
|
||||
HERMES_DASHBOARD is unset, so the slot reflects reality.
|
||||
"""
|
||||
subprocess.run(
|
||||
["docker", "run", "-d", "--name", container_name, built_image,
|
||||
"sleep", "60"],
|
||||
check=True, capture_output=True, timeout=30,
|
||||
)
|
||||
time.sleep(5)
|
||||
start_container(built_image, container_name, cmd="sleep 60")
|
||||
# /command/ isn't on PATH for docker-exec sessions, so call by
|
||||
# absolute path.
|
||||
r = docker_exec(
|
||||
@@ -135,7 +108,7 @@ def test_dashboard_opt_in_starts(
|
||||
# Poll for the dashboard subprocess to appear — the entrypoint
|
||||
# backgrounds it and bootstrap (skills sync etc.) can take a few
|
||||
# seconds before the python process actually launches.
|
||||
ok, _ = _poll(
|
||||
ok, _ = poll_container(
|
||||
container_name, "pgrep -f 'hermes dashboard'", deadline_s=30.0,
|
||||
)
|
||||
assert ok, "Dashboard should be running with HERMES_DASHBOARD=1"
|
||||
@@ -160,7 +133,7 @@ def test_dashboard_port_override(
|
||||
# to the port yet — uvicorn takes another second or two to come up.
|
||||
# The image doesn't ship ss/netstat, so probe /proc/net/tcp directly:
|
||||
# port 9120 = 0x23A0, state 0A = LISTEN.
|
||||
ok, stdout = _poll(
|
||||
ok, stdout = poll_container(
|
||||
container_name,
|
||||
"grep -E ' 0+:23A0 .* 0A ' /proc/net/tcp /proc/net/tcp6 "
|
||||
"2>/dev/null",
|
||||
@@ -193,7 +166,7 @@ def test_dashboard_restarts_after_crash(
|
||||
check=True, capture_output=True, timeout=30,
|
||||
)
|
||||
# Wait for the first dashboard to come up.
|
||||
ok, _ = _poll(
|
||||
ok, _ = poll_container(
|
||||
container_name, "pgrep -f 'hermes dashboard'", deadline_s=30.0,
|
||||
)
|
||||
assert ok, "Dashboard never started initially"
|
||||
@@ -409,7 +382,7 @@ def test_dashboard_insecure_env_var_no_longer_bypasses_gate(
|
||||
# Fail-closed: the dashboard process must NOT successfully serve. Probe
|
||||
# for a few seconds; /api/status should never become reachable because
|
||||
# start_server raised SystemExit before binding.
|
||||
ok, _ = _poll(
|
||||
ok, _ = poll_container(
|
||||
container_name,
|
||||
"curl -fsS -m 2 http://127.0.0.1:9119/api/status >/dev/null 2>&1",
|
||||
deadline_s=12.0,
|
||||
|
||||
@@ -287,4 +287,4 @@ def test_e2e_login_then_supervised_gateway_can_read_auth(
|
||||
"Files written by `docker exec` are unreadable to the hermes user "
|
||||
f"(supervised gateway UID): {unreadable}. The shim failed to drop "
|
||||
"privileges before the write."
|
||||
)
|
||||
)
|
||||
157
tests/docker/test_gateway_bootstrap_state.py
Normal file
157
tests/docker/test_gateway_bootstrap_state.py
Normal file
@@ -0,0 +1,157 @@
|
||||
"""Runtime smoke tests for Docker gateway_state.json bootstrap seeding.
|
||||
|
||||
Build the real image and verify the actual runtime behavior:
|
||||
|
||||
1. HERMES_GATEWAY_BOOTSTRAP_STATE=running on a fresh volume seeds
|
||||
gateway_state.json with running state
|
||||
2. An existing gateway_state.json is never clobbered (first-boot-only)
|
||||
3. No env var = no seed (default down-on-first-boot preserved)
|
||||
4. Only literal "running" is honored; other values are ignored
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
|
||||
from tests.docker.conftest import docker_exec, docker_exec_sh, wait_for_container_ready
|
||||
|
||||
|
||||
def _start_container(
|
||||
built_image: str, name: str, *env: str,
|
||||
) -> str:
|
||||
"""Start a container with given env vars, return its name."""
|
||||
args = ["docker", "run", "-d", "--name", name]
|
||||
for e in env:
|
||||
args.extend(["-e", e])
|
||||
args.extend([built_image, "sleep", "infinity"])
|
||||
subprocess.run(args, check=True, capture_output=True, timeout=60)
|
||||
wait_for_container_ready(name)
|
||||
return name
|
||||
|
||||
|
||||
def test_seeds_running_state_on_blank_volume(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""HERMES_GATEWAY_BOOTSTRAP_STATE=running on a fresh volume must
|
||||
seed gateway_state.json with a valid running state."""
|
||||
_start_container(
|
||||
built_image, container_name,
|
||||
"HERMES_GATEWAY_BOOTSTRAP_STATE=running",
|
||||
)
|
||||
|
||||
r = docker_exec_sh(
|
||||
container_name,
|
||||
"cat /opt/data/gateway_state.json 2>/dev/null || echo NONE",
|
||||
timeout=10,
|
||||
)
|
||||
assert r.stdout.strip() != "NONE", (
|
||||
f"gateway_state.json not seeded on fresh volume: {r.stdout}"
|
||||
)
|
||||
state = json.loads(r.stdout.strip())
|
||||
assert state.get("gateway_state") == "running", (
|
||||
f"expected gateway_state=running, got: {state}"
|
||||
)
|
||||
|
||||
|
||||
def test_does_not_clobber_existing_state(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""An existing gateway_state.json must never be overwritten by the
|
||||
seed, even when the bootstrap env var says running.
|
||||
|
||||
We use a named volume so we can pre-create the state file before
|
||||
the container boots. The [ ! -f ] guard in stage2 must skip seeding
|
||||
because the file already exists. We check the file immediately after
|
||||
boot — before the gateway service has a chance to write its own
|
||||
state — by reading it as fast as possible after container start.
|
||||
"""
|
||||
import json as _json
|
||||
|
||||
volume = f"{container_name}-vol"
|
||||
subprocess.run(
|
||||
["docker", "volume", "create", volume],
|
||||
check=True, capture_output=True, timeout=10,
|
||||
)
|
||||
|
||||
# Pre-create the state file via a throwaway container
|
||||
existing = _json.dumps({"gateway_state": "stopped", "pid": 123})
|
||||
subprocess.run(
|
||||
["docker", "run", "--rm", "-v", f"{volume}:/opt/data",
|
||||
"--entrypoint", "sh", built_image,
|
||||
"-c", f"printf '{existing}\\n' > /opt/data/gateway_state.json"],
|
||||
check=True, capture_output=True, timeout=30,
|
||||
)
|
||||
|
||||
# Boot with the env var set — stage2 must NOT clobber the existing file
|
||||
subprocess.run(
|
||||
["docker", "run", "-d", "--name", container_name,
|
||||
"-v", f"{volume}:/opt/data",
|
||||
"-e", "HERMES_GATEWAY_BOOTSTRAP_STATE=running",
|
||||
built_image, "sleep", "infinity"],
|
||||
check=True, capture_output=True, timeout=60,
|
||||
)
|
||||
# Read the file as quickly as possible — the gateway service may
|
||||
# start and write its own state, but the stage2 [ ! -f ] guard runs
|
||||
# during cont-init (before any service starts), so the file must
|
||||
# still be our "stopped" state at this point.
|
||||
wait_for_container_ready(container_name)
|
||||
r = docker_exec_sh(
|
||||
container_name, "cat /opt/data/gateway_state.json", timeout=10,
|
||||
)
|
||||
state = _json.loads(r.stdout.strip())
|
||||
assert state.get("gateway_state") == "stopped", (
|
||||
f"existing state was clobbered by bootstrap seed: {state}"
|
||||
)
|
||||
|
||||
# Cleanup
|
||||
subprocess.run(
|
||||
["docker", "rm", "-f", container_name],
|
||||
capture_output=True, timeout=10,
|
||||
)
|
||||
subprocess.run(
|
||||
["docker", "volume", "rm", "-f", volume],
|
||||
capture_output=True, timeout=10,
|
||||
)
|
||||
|
||||
|
||||
def test_no_seed_when_env_unset(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""No HERMES_GATEWAY_BOOTSTRAP_STATE = no seed file written."""
|
||||
_start_container(built_image, container_name)
|
||||
|
||||
r = docker_exec_sh(
|
||||
container_name,
|
||||
"test -f /opt/data/gateway_state.json && "
|
||||
"echo EXISTS || echo ABSENT",
|
||||
timeout=10,
|
||||
)
|
||||
assert "ABSENT" in r.stdout, (
|
||||
f"gateway_state.json was seeded without the env var: {r.stdout}"
|
||||
)
|
||||
|
||||
|
||||
def test_non_running_value_ignored(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""Only literal 'running' is honored; any other value is ignored."""
|
||||
for bogus in ("stopped", "Running", "1", "true", "starting"):
|
||||
# Need a fresh container per iteration
|
||||
name = f"{container_name}-{bogus}"
|
||||
_start_container(
|
||||
built_image, name,
|
||||
f"HERMES_GATEWAY_BOOTSTRAP_STATE={bogus}",
|
||||
)
|
||||
r = docker_exec_sh(
|
||||
name,
|
||||
"test -f /opt/data/gateway_state.json && "
|
||||
"echo EXISTS || echo ABSENT",
|
||||
timeout=10,
|
||||
)
|
||||
assert "ABSENT" in r.stdout, (
|
||||
f"bogus value {bogus!r} should not seed a state file: {r.stdout}"
|
||||
)
|
||||
subprocess.run(
|
||||
["docker", "rm", "-f", name],
|
||||
capture_output=True, timeout=10,
|
||||
)
|
||||
@@ -26,12 +26,8 @@ import time
|
||||
from tests.docker.conftest import docker_exec_sh
|
||||
|
||||
|
||||
def _sh(container: str, command: str, timeout: int = 30):
|
||||
return docker_exec_sh(container, command, timeout=timeout)
|
||||
|
||||
|
||||
def _svstat(container: str, slot: str = "gateway-default") -> str:
|
||||
r = _sh(container, f"/command/s6-svstat /run/service/{slot}")
|
||||
r = docker_exec_sh(container, f"/command/s6-svstat /run/service/{slot}")
|
||||
return r.stdout if r.returncode == 0 else ""
|
||||
|
||||
|
||||
@@ -98,7 +94,7 @@ def test_gateway_run_redirects_to_supervised(
|
||||
# The CMD process (PID under /init that the wrapper exec'd into)
|
||||
# should be sleeping, not the gateway. We grep `ps` for the
|
||||
# `sleep infinity` heartbeat.
|
||||
r = _sh(container_name, "ps -eo pid,cmd | grep -v grep | grep 'sleep infinity'")
|
||||
r = docker_exec_sh(container_name, "ps -eo pid,cmd | grep -v grep | grep 'sleep infinity'")
|
||||
assert r.returncode == 0 and "sleep infinity" in r.stdout, (
|
||||
f"expected `sleep infinity` heartbeat process; got ps:\n{r.stdout}\n"
|
||||
f"stderr: {r.stderr}"
|
||||
@@ -175,7 +171,7 @@ def test_gateway_run_no_supervise_flag_preserves_legacy_behavior(
|
||||
if status == "running":
|
||||
# Gateway running in foreground — the CMD process should be
|
||||
# the gateway itself, NOT a sleep-infinity heartbeat.
|
||||
r = _sh(
|
||||
r = docker_exec_sh(
|
||||
container_name,
|
||||
"ps -eo pid,ppid,cmd | grep -v grep | awk '/main-wrapper.sh|rc.init top/ { wrapper_pid=$1 } "
|
||||
"$3==\"sleep\" && $4==\"infinity\" && $2==wrapper_pid { c++ } END { print c+0 }'",
|
||||
@@ -186,7 +182,7 @@ def test_gateway_run_no_supervise_flag_preserves_legacy_behavior(
|
||||
f"--no-supervise: expected NO `sleep infinity` parented to "
|
||||
f"the CMD wrapper (foreground gateway should be the CMD), "
|
||||
f"found {redirected_sleeps}. "
|
||||
f"ps:\n{_sh(container_name, 'ps -eo pid,ppid,cmd').stdout}"
|
||||
f"ps:\n{docker_exec_sh(container_name, 'ps -eo pid,ppid,cmd').stdout}"
|
||||
)
|
||||
|
||||
# The gateway-default s6 slot exists (the cont-init.d
|
||||
@@ -271,14 +267,14 @@ def test_supervised_gateway_does_not_recurse(
|
||||
# recursion guard fails, s6 would respawn fresh `gateway run`
|
||||
# processes on every cycle, leaving multiple Python-process
|
||||
# descendants under the gateway-default supervise tree.
|
||||
r = _sh(container_name, "ps -eo pid,cmd | grep -v grep | grep -E 'python.*hermes.*gateway run' | wc -l")
|
||||
r = docker_exec_sh(container_name, "ps -eo pid,cmd | grep -v grep | grep -E 'python.*hermes.*gateway run' | wc -l")
|
||||
assert r.returncode == 0
|
||||
n = int(r.stdout.strip() or 0)
|
||||
assert n <= 1, (
|
||||
f"expected at most one supervised python `hermes gateway run` "
|
||||
f"process (the legitimately-supervised gateway); found {n}. "
|
||||
f"Recursion guard may have failed. "
|
||||
f"ps:\n{_sh(container_name, 'ps -eo pid,ppid,cmd').stdout}"
|
||||
f"ps:\n{docker_exec_sh(container_name, 'ps -eo pid,ppid,cmd').stdout}"
|
||||
)
|
||||
|
||||
# Stronger positive assertion: there should be exactly one
|
||||
@@ -286,7 +282,7 @@ def test_supervised_gateway_does_not_recurse(
|
||||
# CMD process (PID 17 typically). The static `main-hermes`
|
||||
# service has its own `sleep infinity` child; THAT one is fine
|
||||
# and unrelated to our redirect.
|
||||
r = _sh(
|
||||
r = docker_exec_sh(
|
||||
container_name,
|
||||
# Find PID of the CMD process (main-wrapper.sh or its sh
|
||||
# parent), then count `sleep infinity` children.
|
||||
@@ -298,7 +294,7 @@ def test_supervised_gateway_does_not_recurse(
|
||||
assert redirected == 1, (
|
||||
f"expected exactly one `sleep infinity` parented to the CMD "
|
||||
f"wrapper (the redirect heartbeat); found {redirected}. "
|
||||
f"ps:\n{_sh(container_name, 'ps -eo pid,ppid,cmd').stdout}"
|
||||
f"ps:\n{docker_exec_sh(container_name, 'ps -eo pid,ppid,cmd').stdout}"
|
||||
)
|
||||
|
||||
|
||||
@@ -377,19 +373,18 @@ def test_supervised_gateway_stdout_reaches_docker_logs(
|
||||
"This means the `1` action directive in _render_log_run isn't "
|
||||
"forwarding stdout to /init. "
|
||||
f"docker logs (last 2000 chars):\n{combined[-2000:]}\n"
|
||||
f"file contents:\n{_sh(container_name, 'cat /opt/data/logs/gateways/default/current').stdout}"
|
||||
f"file contents:\n{docker_exec_sh(container_name, 'cat /opt/data/logs/gateways/default/current').stdout}"
|
||||
)
|
||||
|
||||
# Cross-check: the same banner must also be in the rotated log
|
||||
# file (we kept the file destination, just added stdout). The
|
||||
# file version has s6-log's ISO 8601 timestamp prefix; the
|
||||
# docker logs version is raw.
|
||||
file_contents = _sh(
|
||||
file_contents = docker_exec_sh(
|
||||
container_name, "cat /opt/data/logs/gateways/default/current",
|
||||
).stdout
|
||||
assert "⚕" in file_contents or "Hermes Gateway Starting" in file_contents, (
|
||||
"Banner also missing from rotated log file — the file "
|
||||
"destination may have been dropped by the new s6-log script. "
|
||||
f"File contents:\n{file_contents}"
|
||||
)
|
||||
|
||||
)
|
||||
169
tests/docker/test_home_override_scripts.py
Normal file
169
tests/docker/test_home_override_scripts.py
Normal file
@@ -0,0 +1,169 @@
|
||||
"""Runtime smoke tests for Docker HOME overrides and script behavior.
|
||||
|
||||
Build the real image and verify the actual runtime behavior:
|
||||
|
||||
1. main-wrapper preserves the Docker ``-w`` working directory
|
||||
2. dashboard service resets HOME to /opt/data before privilege drop
|
||||
3. dashboard does not auto-add ``--insecure`` from a non-loopback bind host
|
||||
4. stage2 hook repairs profiles/ and cron/ ownership on every boot
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import subprocess
|
||||
|
||||
from tests.docker.conftest import docker_exec, docker_exec_sh, start_container, restart_container
|
||||
|
||||
|
||||
def test_main_wrapper_preserves_docker_workdir(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""The main-wrapper MUST save and restore the original working directory
|
||||
so the container starts in the Docker ``-w`` directory, not /opt/data.
|
||||
|
||||
Regression test for #35472. We pass ``-w /tmp`` and a command that
|
||||
prints its cwd; the output must be ``/tmp``, proving the wrapper
|
||||
restored the cwd after its internal ``cd /opt/data``.
|
||||
"""
|
||||
r = subprocess.run(
|
||||
["docker", "run", "--rm", "-w", "/tmp",
|
||||
built_image, "sh", "-c", "pwd"],
|
||||
capture_output=True, text=True, timeout=60,
|
||||
)
|
||||
assert r.returncode == 0, f"container failed: {r.stderr[-1000:]}"
|
||||
# The stage2 hook emits boot logs (config migration, skills sync)
|
||||
# to stdout before the CMD runs. The actual pwd output is the LAST
|
||||
# line of stdout.
|
||||
last_line = r.stdout.strip().split("\n")[-1].strip()
|
||||
assert last_line == "/tmp", (
|
||||
f"expected cwd /tmp, got {last_line!r} — "
|
||||
f"main-wrapper did not preserve the Docker -w directory"
|
||||
)
|
||||
|
||||
|
||||
def test_dashboard_service_resets_home(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""The dashboard run script must export HOME=/opt/data before dropping
|
||||
privileges, so HOME-anchored state (discord lockfile, XDG dirs) doesn't
|
||||
try to write to /root (the /init context's HOME).
|
||||
|
||||
We check this by inspecting the environment of the dashboard service
|
||||
process if it's running, or by verifying the run script sets HOME
|
||||
before the exec. At runtime, the cleanest check is: start the
|
||||
container with HERMES_DASHBOARD=1 and verify the dashboard process
|
||||
(if it starts) has HOME=/opt/data.
|
||||
|
||||
Since the dashboard requires an auth provider on non-loopback binds,
|
||||
we bind to 127.0.0.1 where the auth gate doesn't engage, and check
|
||||
the process env.
|
||||
"""
|
||||
start_container(built_image, container_name, "HERMES_DASHBOARD=1", "HERMES_DASHBOARD_HOST=127.0.0.1")
|
||||
|
||||
# Check if the dashboard process is running and inspect its HOME.
|
||||
r = docker_exec_sh(
|
||||
container_name,
|
||||
# Find the dashboard process (hermes dashboard) and read its HOME
|
||||
# from /proc/<pid>/environ. If not running, verify the run script
|
||||
# itself exports HOME=/opt/data by grepping the script source.
|
||||
'pid=$(pgrep -f "hermes dashboard" | head -1); '
|
||||
'if [ -n "$pid" ]; then '
|
||||
' tr "\\0" "\\n" < /proc/$pid/environ | grep "^HOME="; '
|
||||
'else '
|
||||
' grep -q "export HOME=/opt/data" '
|
||||
' /opt/hermes/docker/s6-rc.d/dashboard/run && '
|
||||
' echo "HOME=/opt/data"; '
|
||||
'fi',
|
||||
timeout=15,
|
||||
)
|
||||
assert "HOME=/opt/data" in r.stdout, (
|
||||
f"dashboard process or run script does not set HOME=/opt/data: "
|
||||
f"stdout={r.stdout!r} stderr={r.stderr!r}"
|
||||
)
|
||||
|
||||
|
||||
def test_dashboard_does_not_auto_insecure_from_host(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""The dashboard MUST NOT auto-add ``--insecure`` based on
|
||||
HERMES_DASHBOARD_HOST. The auth gate is the authority now.
|
||||
|
||||
The auth gate is the authority on whether non-loopback binds are
|
||||
safe; ``--insecure`` must never be auto-derived from the bind host.
|
||||
|
||||
We start the container with a non-loopback bind host and verify
|
||||
the dashboard process does NOT receive ``--insecure`` in its
|
||||
command line. If the dashboard fails to start (because the auth
|
||||
gate correctly blocks an unauthenticated non-loopback bind), that's
|
||||
also acceptable — the point is no auto-insecure.
|
||||
"""
|
||||
start_container(built_image, container_name, "HERMES_DASHBOARD=1", "HERMES_DASHBOARD_HOST=0.0.0.0")
|
||||
|
||||
# Check the dashboard process command line for --insecure.
|
||||
r = docker_exec_sh(
|
||||
container_name,
|
||||
'pid=$(pgrep -f "hermes dashboard" | head -1); '
|
||||
'if [ -n "$pid" ]; then '
|
||||
' tr "\\0" " " < /proc/$pid/cmdline; '
|
||||
'fi',
|
||||
timeout=10,
|
||||
)
|
||||
cmdline = r.stdout.strip()
|
||||
# If the process is running, it must NOT have --insecure.
|
||||
if cmdline:
|
||||
assert "--insecure" not in cmdline, (
|
||||
f"dashboard process has --insecure in cmdline (auto-derived "
|
||||
f"from host): {cmdline!r}"
|
||||
)
|
||||
|
||||
|
||||
def test_stage2_repairs_profiles_and_cron_ownership(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""profiles/ and cron/ must both be reclaimed after root-context writes.
|
||||
|
||||
The stage2 hook chowns these dirs to hermes:hermes on every boot.
|
||||
We simulate a root-owned file in each, then restart the container
|
||||
and verify ownership is repaired.
|
||||
"""
|
||||
start_container(built_image, container_name)
|
||||
|
||||
# Create root-owned files in profiles/ and cron/ to simulate
|
||||
# docker exec (root) writes.
|
||||
docker_exec(
|
||||
container_name, "mkdir", "-p", "/opt/data/profiles/testprof",
|
||||
user="root", timeout=5,
|
||||
)
|
||||
docker_exec(
|
||||
container_name, "touch", "/opt/data/profiles/testprof/marker",
|
||||
user="root", timeout=5,
|
||||
)
|
||||
docker_exec(
|
||||
container_name, "touch", "/opt/data/cron/root_owned.json",
|
||||
user="root", timeout=5,
|
||||
)
|
||||
|
||||
# Verify they're root-owned before restart.
|
||||
r = docker_exec_sh(
|
||||
container_name,
|
||||
'stat -c "%U" /opt/data/profiles/testprof/marker '
|
||||
'/opt/data/cron/root_owned.json',
|
||||
timeout=5,
|
||||
)
|
||||
assert "root" in r.stdout, (
|
||||
f"expected root-owned files before restart, got: {r.stdout!r}"
|
||||
)
|
||||
|
||||
# Restart — stage2 hook runs again and repairs ownership.
|
||||
restart_container(container_name)
|
||||
|
||||
# Verify files are now owned by hermes.
|
||||
r = docker_exec_sh(
|
||||
container_name,
|
||||
'stat -c "%U" /opt/data/profiles/testprof/marker '
|
||||
'/opt/data/cron/root_owned.json',
|
||||
timeout=5,
|
||||
)
|
||||
assert "hermes" in r.stdout, (
|
||||
f"expected hermes-owned files after restart, got: {r.stdout!r} — "
|
||||
f"stage2 hook did not repair profiles/ and cron/ ownership"
|
||||
)
|
||||
140
tests/docker/test_immutable_install.py
Normal file
140
tests/docker/test_immutable_install.py
Normal file
@@ -0,0 +1,140 @@
|
||||
"""Runtime smoke tests for Docker immutable install tree and install-method stamp.
|
||||
|
||||
Build the real image and verify at runtime:
|
||||
|
||||
1. /opt/hermes is not writable by the hermes user (immutable install tree)
|
||||
2. PYTHONDONTWRITEBYTECODE and HERMES_DISABLE_LAZY_INSTALLS are set
|
||||
3. /opt/hermes/.install_method contains "docker" (code-scoped stamp)
|
||||
4. $HERMES_HOME/.install_method is NOT stamped as "docker" by stage2
|
||||
5. A stale "docker" stamp in $HERMES_HOME is healed (removed) on boot
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from tests.docker.conftest import (
|
||||
docker_exec,
|
||||
docker_exec_sh,
|
||||
restart_container,
|
||||
start_container,
|
||||
)
|
||||
|
||||
|
||||
def test_install_tree_not_writable_by_hermes(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""The hermes user must not be able to modify /opt/hermes.
|
||||
|
||||
The install tree (source, venv, TUI bundle, node_modules) must remain
|
||||
root-owned and non-writable so an agent session cannot self-modify
|
||||
the installation and brick the gateway.
|
||||
"""
|
||||
start_container(built_image, container_name)
|
||||
|
||||
r = docker_exec_sh(
|
||||
container_name,
|
||||
# Try to create a file under /opt/hermes as the hermes user
|
||||
"touch /opt/hermes/test_write 2>&1 && "
|
||||
"echo WRITE_SUCCEEDED || echo WRITE_FAILED",
|
||||
timeout=10,
|
||||
)
|
||||
assert "WRITE_FAILED" in r.stdout, (
|
||||
f"hermes user can write to /opt/hermes (install tree not immutable): "
|
||||
f"{r.stdout}"
|
||||
)
|
||||
|
||||
# Also check a key subdirectory
|
||||
r = docker_exec_sh(
|
||||
container_name,
|
||||
"touch /opt/hermes/.venv/test_write 2>&1 && "
|
||||
"echo WRITE_SUCCEEDED || echo WRITE_FAILED",
|
||||
timeout=10,
|
||||
)
|
||||
assert "WRITE_FAILED" in r.stdout, (
|
||||
f"hermes user can write to /opt/hermes/.venv: {r.stdout}"
|
||||
)
|
||||
|
||||
|
||||
def test_hermes_disable_lazy_installs_and_dont_write_bytecode(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""The container must set PYTHONDONTWRITEBYTECODE and
|
||||
HERMES_DISABLE_LAZY_INSTALLS=1 so no .pyc files are written to the
|
||||
immutable install tree and no lazy installs attempt to modify it."""
|
||||
start_container(built_image, container_name)
|
||||
|
||||
r = docker_exec_sh(
|
||||
container_name,
|
||||
'test "$PYTHONDONTWRITEBYTECODE" = "1" && '
|
||||
'test "$HERMES_DISABLE_LAZY_INSTALLS" = "1" && '
|
||||
'echo ENV_OK || echo ENV_MISSING',
|
||||
timeout=10,
|
||||
)
|
||||
assert "ENV_OK" in r.stdout, (
|
||||
f"expected PYTHONDONTWRITEBYTECODE=1 and "
|
||||
f"HERMES_DISABLE_LAZY_INSTALLS=1, got: {r.stdout} stderr={r.stderr}"
|
||||
)
|
||||
|
||||
|
||||
def test_install_method_stamp_is_code_scoped(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""The 'docker' install-method stamp must be baked at
|
||||
/opt/hermes/.install_method (code-scoped), NOT in $HERMES_HOME."""
|
||||
start_container(built_image, container_name)
|
||||
|
||||
# Code-scoped stamp must exist and say "docker"
|
||||
r = docker_exec_sh(
|
||||
container_name,
|
||||
"cat /opt/hermes/.install_method",
|
||||
timeout=10,
|
||||
)
|
||||
assert r.returncode == 0, (
|
||||
f"/opt/hermes/.install_method not found: {r.stderr}"
|
||||
)
|
||||
assert r.stdout.strip() == "docker", (
|
||||
f"expected 'docker' stamp, got: {r.stdout.strip()!r}"
|
||||
)
|
||||
|
||||
# $HERMES_HOME must NOT have a 'docker' stamp
|
||||
r = docker_exec_sh(
|
||||
container_name,
|
||||
"cat /opt/data/.install_method 2>/dev/null || echo NONE",
|
||||
timeout=10,
|
||||
)
|
||||
assert r.stdout.strip() != "docker", (
|
||||
f"$HERMES_HOME/.install_method is stamped 'docker' - stage2 must "
|
||||
f"not stamp the data volume (shared with host installs)"
|
||||
)
|
||||
|
||||
|
||||
def test_stale_docker_stamp_in_home_is_healed_on_boot(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""A stale 'docker' stamp left in $HERMES_HOME by an older image
|
||||
must be removed on boot so shared homes self-heal."""
|
||||
# Start container, write a stale stamp
|
||||
start_container(built_image, container_name)
|
||||
|
||||
# Write a stale 'docker' stamp as root
|
||||
docker_exec(
|
||||
container_name, "sh", "-c",
|
||||
"printf 'docker\\n' > /opt/data/.install_method",
|
||||
user="root", timeout=5,
|
||||
)
|
||||
# Verify it exists
|
||||
r = docker_exec_sh(container_name, "cat /opt/data/.install_method", timeout=5)
|
||||
assert r.stdout.strip() == "docker"
|
||||
|
||||
# Restart - stage2 should heal it
|
||||
restart_container(container_name)
|
||||
|
||||
# The stale stamp must be gone
|
||||
r = docker_exec_sh(
|
||||
container_name,
|
||||
"test -f /opt/data/.install_method && "
|
||||
"cat /opt/data/.install_method || echo HEALED",
|
||||
timeout=10,
|
||||
)
|
||||
assert "HEALED" in r.stdout or r.stdout.strip() != "docker", (
|
||||
f"stale 'docker' stamp in $HERMES_HOME was not healed on boot: "
|
||||
f"{r.stdout}"
|
||||
)
|
||||
26
tests/docker/test_license_file_present.py
Normal file
26
tests/docker/test_license_file_present.py
Normal file
@@ -0,0 +1,26 @@
|
||||
"""Runtime smoke test for Docker image license-file presence.
|
||||
|
||||
Build the real image and verify the LICENSE file is present inside the
|
||||
container (PEP 639 license-files metadata must resolve inside the
|
||||
Docker image).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import subprocess
|
||||
|
||||
|
||||
def test_docker_image_contains_license_file(built_image: str) -> None:
|
||||
"""The LICENSE file must be present inside the built Docker image.
|
||||
|
||||
PEP 639 license-files metadata references LICENSE, and the Docker
|
||||
build context must not exclude it.
|
||||
"""
|
||||
r = subprocess.run(
|
||||
["docker", "run", "--rm", "--entrypoint", "test",
|
||||
built_image, "-f", "/opt/hermes/LICENSE"],
|
||||
capture_output=True, text=True, timeout=60,
|
||||
)
|
||||
assert r.returncode == 0, (
|
||||
f"LICENSE file not found at /opt/hermes/LICENSE inside the Docker "
|
||||
f"image: {r.stderr[-500:]}"
|
||||
)
|
||||
47
tests/docker/test_log_dir_seed.py
Normal file
47
tests/docker/test_log_dir_seed.py
Normal file
@@ -0,0 +1,47 @@
|
||||
"""Runtime smoke test for Docker $HERMES_HOME/logs/gateways seeding.
|
||||
|
||||
Build the real image and verify logs/ and logs/gateways/ exist and are
|
||||
owned by the hermes user after container boot.
|
||||
|
||||
Regression guard for #45258: if the first gateway log service runs in
|
||||
root context, logs/gateways/ is created root-owned; every profile
|
||||
registered later runs its log service as the dropped hermes user and
|
||||
s6-log crash-loops on mkdir: Permission denied.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from tests.docker.conftest import docker_exec_sh, start_container
|
||||
|
||||
|
||||
def test_logs_gateways_seeded_and_hermes_owned(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""logs/ and logs/gateways/ must exist and be owned by hermes after boot."""
|
||||
start_container(built_image, container_name)
|
||||
|
||||
# Both directories must exist
|
||||
r = docker_exec_sh(
|
||||
container_name,
|
||||
"test -d /opt/data/logs && "
|
||||
"test -d /opt/data/logs/gateways && "
|
||||
"echo DIRS_OK || echo DIRS_MISSING",
|
||||
timeout=10,
|
||||
)
|
||||
assert "DIRS_OK" in r.stdout, (
|
||||
f"logs/ or logs/gateways/ not seeded: {r.stdout}"
|
||||
)
|
||||
|
||||
# Both must be owned by hermes
|
||||
r = docker_exec_sh(
|
||||
container_name,
|
||||
'logs_owner=$(stat -c "%U" /opt/data/logs); '
|
||||
'gateways_owner=$(stat -c "%U" /opt/data/logs/gateways); '
|
||||
'echo "logs=$logs_owner gateways=$gateways_owner"',
|
||||
timeout=10,
|
||||
)
|
||||
assert "logs=hermes" in r.stdout, (
|
||||
f"logs/ not owned by hermes: {r.stdout}"
|
||||
)
|
||||
assert "gateways=hermes" in r.stdout, (
|
||||
f"logs/gateways/ not owned by hermes: {r.stdout}"
|
||||
)
|
||||
@@ -69,12 +69,7 @@ def _svstat_wants_up(container: str) -> bool:
|
||||
def test_profile_create_then_gateway_start(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
subprocess.run(
|
||||
["docker", "run", "-d", "--name", container_name, built_image,
|
||||
"sleep", "120"],
|
||||
check=True, capture_output=True, timeout=30,
|
||||
)
|
||||
time.sleep(3)
|
||||
start_container(built_image, container_name, cmd="sleep 120")
|
||||
|
||||
r = _sh(container_name, f"hermes profile create {PROFILE}")
|
||||
assert r.returncode == 0, f"profile create failed: {r.stderr}"
|
||||
@@ -114,12 +109,7 @@ def test_profile_delete_stops_gateway(
|
||||
) -> None:
|
||||
"""Deleting a profile should stop its gateway and remove the s6
|
||||
service slot."""
|
||||
subprocess.run(
|
||||
["docker", "run", "-d", "--name", container_name, built_image,
|
||||
"sleep", "120"],
|
||||
check=True, capture_output=True, timeout=30,
|
||||
)
|
||||
time.sleep(3)
|
||||
start_container(built_image, container_name, cmd="sleep 120")
|
||||
|
||||
_sh(container_name, f"hermes profile create {PROFILE}")
|
||||
_sh(container_name, f"hermes -p {PROFILE} gateway start", timeout=60)
|
||||
@@ -135,4 +125,4 @@ def test_profile_delete_stops_gateway(
|
||||
time.sleep(2)
|
||||
# Service slot should be gone.
|
||||
r = _sh(container_name, f"test -d /run/service/gateway-{PROFILE}")
|
||||
assert r.returncode != 0, "s6 service slot still present after profile delete"
|
||||
assert r.returncode != 0, "s6 service slot still present after profile delete"
|
||||
88
tests/docker/test_puid_pgid_remap.py
Normal file
88
tests/docker/test_puid_pgid_remap.py
Normal file
@@ -0,0 +1,88 @@
|
||||
"""Runtime smoke tests for Docker PUID/PGID and UID/GID remap.
|
||||
|
||||
Build the real image and verify the actual runtime behavior:
|
||||
|
||||
1. PUID/PGID env vars remap the hermes user UID/GID at boot
|
||||
2. HERMES_UID/HERMES_GID take precedence over PUID/PGID aliases
|
||||
3. NAS-style low UIDs (99:100) are accepted and remapped
|
||||
4. Invalid UIDs are rejected
|
||||
5. The remapped user can write to the data volume
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from tests.docker.conftest import docker_exec_sh, start_container
|
||||
|
||||
|
||||
def test_puid_pgid_remaps_hermes_user(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""PUID=1000 PGID=1000 must remap the hermes user to UID 1000."""
|
||||
start_container(built_image, container_name, "PUID=1000", "PGID=1000")
|
||||
|
||||
r = docker_exec_sh(
|
||||
container_name,
|
||||
"id -u hermes",
|
||||
timeout=10,
|
||||
)
|
||||
assert r.stdout.strip() == "1000", (
|
||||
f"expected hermes UID 1000 after PUID remap, got: {r.stdout.strip()}"
|
||||
)
|
||||
|
||||
r = docker_exec_sh(
|
||||
container_name,
|
||||
"id -g hermes",
|
||||
timeout=10,
|
||||
)
|
||||
assert r.stdout.strip() == "1000", (
|
||||
f"expected hermes GID 1000 after PGID remap, got: {r.stdout.strip()}"
|
||||
)
|
||||
|
||||
|
||||
def test_hermes_uid_gid_take_precedence_over_aliases(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""HERMES_UID/HERMES_GID must win over PUID/PGID when both are set."""
|
||||
start_container(built_image, container_name, "HERMES_UID=2000", "HERMES_GID=2001", "PUID=1000", "PGID=1000")
|
||||
|
||||
r = docker_exec_sh(container_name, "id -u hermes", timeout=10)
|
||||
assert r.stdout.strip() == "2000", (
|
||||
f"expected hermes UID 2000 (HERMES_UID wins), got: {r.stdout.strip()}"
|
||||
)
|
||||
|
||||
r = docker_exec_sh(container_name, "id -g hermes", timeout=10)
|
||||
assert r.stdout.strip() == "2001", (
|
||||
f"expected hermes GID 2001 (HERMES_GID wins), got: {r.stdout.strip()}"
|
||||
)
|
||||
|
||||
|
||||
def test_nas_low_uid_accepted(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""NAS-style low UIDs (99:100, common on Unraid) must be accepted."""
|
||||
start_container(built_image, container_name, "PUID=99", "PGID=100")
|
||||
|
||||
r = docker_exec_sh(container_name, "id -u hermes", timeout=10)
|
||||
assert r.stdout.strip() == "99", (
|
||||
f"expected hermes UID 99, got: {r.stdout.strip()}"
|
||||
)
|
||||
|
||||
r = docker_exec_sh(container_name, "id -g hermes", timeout=10)
|
||||
assert r.stdout.strip() == "100", (
|
||||
f"expected hermes GID 100, got: {r.stdout.strip()}"
|
||||
)
|
||||
|
||||
|
||||
def test_remap_enables_data_volume_writes(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""After remap, the hermes user must be able to write to /opt/data."""
|
||||
start_container(built_image, container_name, "PUID=1000", "PGID=1000")
|
||||
|
||||
r = docker_exec_sh(
|
||||
container_name,
|
||||
"touch /opt/data/test_write && echo WRITE_OK || echo WRITE_FAIL",
|
||||
timeout=10,
|
||||
)
|
||||
assert "WRITE_OK" in r.stdout, (
|
||||
f"hermes user cannot write to /opt/data after remap: {r.stdout}"
|
||||
)
|
||||
@@ -22,7 +22,7 @@ from __future__ import annotations
|
||||
import subprocess
|
||||
import time
|
||||
|
||||
from tests.docker.conftest import docker_exec
|
||||
from tests.docker.conftest import docker_exec, start_container
|
||||
|
||||
|
||||
_REGISTER_SCRIPT = """
|
||||
@@ -45,49 +45,39 @@ print("UNREGISTERED")
|
||||
"""
|
||||
|
||||
|
||||
def _exec(container: str, *args: str, timeout: int = 30) -> subprocess.CompletedProcess:
|
||||
return docker_exec(container, *args, timeout=timeout)
|
||||
|
||||
|
||||
def test_s6_register_creates_service_dir_in_live_container(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""S6ServiceManager.register_profile_gateway must create
|
||||
``/run/service/gateway-<profile>/`` and trigger s6-svscan rescan
|
||||
against the real s6 supervision tree."""
|
||||
subprocess.run(
|
||||
["docker", "run", "-d", "--name", container_name, built_image,
|
||||
"sleep", "120"],
|
||||
check=True, capture_output=True, timeout=30,
|
||||
)
|
||||
# Give the supervision tree a moment to come up.
|
||||
time.sleep(3)
|
||||
start_container(built_image, container_name, cmd="sleep 120")
|
||||
|
||||
r = _exec(container_name, "python3", "-c", _REGISTER_SCRIPT, timeout=30)
|
||||
r = docker_exec(container_name, "python3", "-c", _REGISTER_SCRIPT, timeout=30)
|
||||
assert "REGISTERED" in r.stdout, (
|
||||
f"register failed: stderr={r.stderr!r} stdout={r.stdout!r}"
|
||||
)
|
||||
|
||||
# Service directory exists with the expected structure.
|
||||
r = _exec(container_name, "test", "-d", "/run/service/gateway-phase3test")
|
||||
r = docker_exec(container_name, "test", "-d", "/run/service/gateway-phase3test")
|
||||
assert r.returncode == 0, "service directory not created"
|
||||
|
||||
r = _exec(container_name, "test", "-f", "/run/service/gateway-phase3test/run")
|
||||
r = docker_exec(container_name, "test", "-f", "/run/service/gateway-phase3test/run")
|
||||
assert r.returncode == 0, "run script not created"
|
||||
|
||||
r = _exec(container_name, "test", "-f",
|
||||
r = docker_exec(container_name, "test", "-f",
|
||||
"/run/service/gateway-phase3test/log/run")
|
||||
assert r.returncode == 0, "log/run script not created"
|
||||
|
||||
# s6-svscan picked it up — s6-svstat works against the dir.
|
||||
# `docker exec` doesn't put /command/ on PATH (only the supervision
|
||||
# tree does), so call s6-svstat by absolute path.
|
||||
r = _exec(container_name, "/command/s6-svstat",
|
||||
r = docker_exec(container_name, "/command/s6-svstat",
|
||||
"/run/service/gateway-phase3test")
|
||||
assert r.returncode == 0, f"s6-svstat failed: {r.stderr or r.stdout}"
|
||||
|
||||
# list_profile_gateways picks it up.
|
||||
r = _exec(container_name, "python3", "-c", (
|
||||
r = docker_exec(container_name, "python3", "-c", (
|
||||
"from hermes_cli.service_manager import S6ServiceManager;"
|
||||
"print(S6ServiceManager().list_profile_gateways())"
|
||||
))
|
||||
@@ -108,22 +98,22 @@ def test_s6_unregister_removes_service_dir_in_live_container(
|
||||
time.sleep(3)
|
||||
|
||||
# First register so we have something to unregister.
|
||||
r = _exec(container_name, "python3", "-c", _REGISTER_SCRIPT, timeout=30)
|
||||
r = docker_exec(container_name, "python3", "-c", _REGISTER_SCRIPT, timeout=30)
|
||||
assert "REGISTERED" in r.stdout
|
||||
|
||||
# Then unregister.
|
||||
r = _exec(container_name, "python3", "-c", _UNREGISTER_SCRIPT, timeout=30)
|
||||
r = docker_exec(container_name, "python3", "-c", _UNREGISTER_SCRIPT, timeout=30)
|
||||
assert "UNREGISTERED" in r.stdout, (
|
||||
f"unregister failed: stderr={r.stderr!r} stdout={r.stdout!r}"
|
||||
)
|
||||
|
||||
# Directory is gone.
|
||||
r = _exec(container_name, "test", "-d", "/run/service/gateway-phase3test")
|
||||
r = docker_exec(container_name, "test", "-d", "/run/service/gateway-phase3test")
|
||||
assert r.returncode != 0, "service directory still exists after unregister"
|
||||
|
||||
# list_profile_gateways no longer includes it.
|
||||
r = _exec(container_name, "python3", "-c", (
|
||||
r = docker_exec(container_name, "python3", "-c", (
|
||||
"from hermes_cli.service_manager import S6ServiceManager;"
|
||||
"print(S6ServiceManager().list_profile_gateways())"
|
||||
))
|
||||
assert "phase3test" not in r.stdout
|
||||
assert "phase3test" not in r.stdout
|
||||
82
tests/docker/test_stage2_browser_discovery.py
Normal file
82
tests/docker/test_stage2_browser_discovery.py
Normal file
@@ -0,0 +1,82 @@
|
||||
"""Runtime smoke tests for Docker stage2 browser executable discovery.
|
||||
|
||||
Build the real image and verify the chromium binary is actually
|
||||
discovered at boot: ``AGENT_BROWSER_EXECUTABLE_PATH`` is set, points to
|
||||
a real executable, and is a browser binary (not a shared library picked
|
||||
up by a broad ``find | grep``).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from tests.docker.conftest import docker_exec_sh, start_container
|
||||
|
||||
|
||||
def test_stage2_discovers_chromium_binary(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""The stage2 hook must discover the Playwright chromium binary and
|
||||
export AGENT_BROWSER_EXECUTABLE_PATH so the browser tool can find it.
|
||||
|
||||
The discovery uses filename matching, not a broad ``find | grep``:
|
||||
shared libraries (libGLESv2.so etc.) inherit the executable bit from
|
||||
Playwright's tarball but must not be picked up. This test verifies the
|
||||
discovered binary is a real browser, not a .so.
|
||||
"""
|
||||
start_container(built_image, container_name)
|
||||
|
||||
# AGENT_BROWSER_EXECUTABLE_PATH must be set via s6 container_environment.
|
||||
r = docker_exec_sh(
|
||||
container_name,
|
||||
"cat /run/s6/container_environment/AGENT_BROWSER_EXECUTABLE_PATH",
|
||||
timeout=10,
|
||||
)
|
||||
assert r.returncode == 0, (
|
||||
f"AGENT_BROWSER_EXECUTABLE_PATH not set by stage2 hook: {r.stderr}"
|
||||
)
|
||||
browser_path = r.stdout.strip()
|
||||
assert browser_path, "AGENT_BROWSER_EXECUTABLE_PATH is empty"
|
||||
|
||||
# Must be a real file and executable.
|
||||
r = docker_exec_sh(
|
||||
container_name,
|
||||
f'test -x "{browser_path}"',
|
||||
timeout=5,
|
||||
)
|
||||
assert r.returncode == 0, (
|
||||
f"discovered browser path is not executable: {browser_path}"
|
||||
)
|
||||
|
||||
# Must be a browser binary by basename — NOT a shared library.
|
||||
accepted_names = (
|
||||
"chrome", "chromium", "chrome-headless-shell",
|
||||
"headless_shell", "chromium-browser",
|
||||
)
|
||||
r = docker_exec_sh(
|
||||
container_name,
|
||||
f'basename "{browser_path}"',
|
||||
timeout=5,
|
||||
)
|
||||
basename = r.stdout.strip()
|
||||
assert basename in accepted_names, (
|
||||
f"discovered binary basename {basename!r} is not a recognized "
|
||||
f"browser name (accepted: {accepted_names}) — the discovery may "
|
||||
f"have picked up a shared library (.so) instead of the real browser"
|
||||
)
|
||||
|
||||
|
||||
def test_stage2_browser_path_accessible_to_hermes_user(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""The discovered browser binary must be accessible to the
|
||||
unprivileged hermes user (UID 10000), since that's who runs
|
||||
agent-browser subprocesses."""
|
||||
start_container(built_image, container_name)
|
||||
|
||||
r = docker_exec_sh(
|
||||
container_name,
|
||||
'path="$(cat /run/s6/container_environment/AGENT_BROWSER_EXECUTABLE_PATH)" '
|
||||
'&& test -r "$path" && test -x "$path"',
|
||||
timeout=10,
|
||||
)
|
||||
assert r.returncode == 0, (
|
||||
f"browser binary not readable+executable by hermes user: {r.stderr}"
|
||||
)
|
||||
54
tests/docker/test_tini_compat_shim.py
Normal file
54
tests/docker/test_tini_compat_shim.py
Normal file
@@ -0,0 +1,54 @@
|
||||
"""Runtime smoke test for the Docker tini compatibility shim (#34192).
|
||||
|
||||
Build the real image and verify:
|
||||
|
||||
1. /usr/bin/tini exists and is a symlink to /init (the compat shim
|
||||
for orchestration templates that still reference /usr/bin/tini)
|
||||
2. The actual ENTRYPOINT is /init (s6-overlay), not /usr/bin/tini
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import subprocess
|
||||
|
||||
|
||||
def test_tini_compat_symlink_exists(built_image: str) -> None:
|
||||
"""/usr/bin/tini must exist as a symlink to /init.
|
||||
|
||||
Regression for #34192: orchestration templates (e.g. Hostinger's
|
||||
'Hermes WebUI' catalog) still pin /usr/bin/tini as the entrypoint.
|
||||
The shim symlinks it to /init so legacy wrappers exec the right
|
||||
PID-1 reaper without behavior change.
|
||||
"""
|
||||
r = subprocess.run(
|
||||
["docker", "run", "--rm", "--entrypoint", "sh",
|
||||
built_image, "-c",
|
||||
'test -L /usr/bin/tini && '
|
||||
'test "$(readlink -f /usr/bin/tini)" = "/init"'],
|
||||
capture_output=True, text=True, timeout=60,
|
||||
)
|
||||
assert r.returncode == 0, (
|
||||
f"/usr/bin/tini is not a symlink to /init: {r.stderr[-500:]}"
|
||||
)
|
||||
|
||||
|
||||
def test_entrypoint_is_init_not_tini(built_image: str) -> None:
|
||||
"""The image's actual ENTRYPOINT must be /init (s6-overlay).
|
||||
|
||||
The tini shim is only for legacy external wrappers; the image's own
|
||||
runtime must continue to use the canonical /init.
|
||||
"""
|
||||
r = subprocess.run(
|
||||
["docker", "inspect", built_image,
|
||||
"--format", "{{json .Config.Entrypoint}}"],
|
||||
capture_output=True, text=True, timeout=30,
|
||||
)
|
||||
assert r.returncode == 0, f"docker inspect failed: {r.stderr}"
|
||||
entrypoint = r.stdout.strip()
|
||||
assert "/init" in entrypoint, (
|
||||
f"ENTRYPOINT is not /init: {entrypoint!r}"
|
||||
)
|
||||
# The entrypoint array should be ["/init", "/opt/hermes/docker/main-wrapper.sh"]
|
||||
# /usr/bin/tini should NOT be in the entrypoint.
|
||||
assert "tini" not in entrypoint.lower(), (
|
||||
f"ENTRYPOINT references tini instead of /init: {entrypoint!r}"
|
||||
)
|
||||
93
tests/docker/test_toplevel_chown.py
Normal file
93
tests/docker/test_toplevel_chown.py
Normal file
@@ -0,0 +1,93 @@
|
||||
"""Runtime smoke tests for Docker top-level state-file ownership repair.
|
||||
|
||||
Build the real image and verify the actual runtime behavior:
|
||||
|
||||
1. Root-owned top-level state files (auth.json, state.db, gateway.lock,
|
||||
gateway_state.json) are chowned to hermes on boot
|
||||
2. Non-allowlisted host-owned files are NOT touched (targeted, not
|
||||
blanket find -user root sweep)
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from tests.docker.conftest import (
|
||||
docker_exec,
|
||||
docker_exec_sh,
|
||||
restart_container,
|
||||
start_container,
|
||||
)
|
||||
|
||||
|
||||
# The files the stage2 hook should repair (mirrors the allowlist in
|
||||
# stage2-hook.sh). We test a representative subset.
|
||||
ALLOWLISTED_FILES = ("auth.json", "state.db", "gateway.lock", "gateway_state.json")
|
||||
|
||||
|
||||
def test_root_owned_state_files_repaired_on_boot(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""Root-owned top-level state files must be chowned to hermes on boot."""
|
||||
start_container(built_image, container_name)
|
||||
|
||||
# Create root-owned state files to simulate docker exec (root) writes
|
||||
for f in ALLOWLISTED_FILES:
|
||||
docker_exec(
|
||||
container_name, "touch", f"/opt/data/{f}",
|
||||
user="root", timeout=5,
|
||||
)
|
||||
|
||||
# Verify they're root-owned
|
||||
r = docker_exec_sh(
|
||||
container_name,
|
||||
" ".join(f'stat -c %U /opt/data/{f}' for f in ALLOWLISTED_FILES),
|
||||
timeout=5,
|
||||
)
|
||||
for line in r.stdout.split():
|
||||
assert line == "root", f"expected root-owned, got: {line}"
|
||||
|
||||
# Restart - stage2 should repair ownership
|
||||
restart_container(container_name)
|
||||
|
||||
# Verify files are now hermes-owned
|
||||
r = docker_exec_sh(
|
||||
container_name,
|
||||
" ".join(f'stat -c %U /opt/data/{f}' for f in ALLOWLISTED_FILES),
|
||||
timeout=5,
|
||||
)
|
||||
for line in r.stdout.split():
|
||||
assert line == "hermes", (
|
||||
f"expected hermes-owned after restart, got: {line}"
|
||||
)
|
||||
|
||||
|
||||
def test_non_allowlisted_host_file_not_touched(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""A non-allowlisted host-owned file must NOT be chowned, even if
|
||||
root-owned. Regression guard for #19788 / #19795: a bind-mounted
|
||||
$HERMES_HOME may contain host-owned files Hermes does not manage."""
|
||||
start_container(built_image, container_name)
|
||||
|
||||
# Create a non-allowlisted file as root
|
||||
docker_exec(
|
||||
container_name, "touch", "/opt/data/host_secret.json",
|
||||
user="root", timeout=5,
|
||||
)
|
||||
# Make it root-owned explicitly (it already is, but be sure)
|
||||
docker_exec(
|
||||
container_name, "chown", "root:root", "/opt/data/host_secret.json",
|
||||
user="root", timeout=5,
|
||||
)
|
||||
|
||||
# Restart
|
||||
restart_container(container_name)
|
||||
|
||||
# The file must STILL be root-owned (not touched by stage2)
|
||||
r = docker_exec_sh(
|
||||
container_name,
|
||||
"stat -c %U /opt/data/host_secret.json",
|
||||
timeout=5,
|
||||
)
|
||||
assert r.stdout.strip() == "root", (
|
||||
f"non-allowlisted host file was chowned by stage2 (should be "
|
||||
f"preserved): {r.stdout.strip()}"
|
||||
)
|
||||
66
tests/docker/test_user_flag_guard.py
Normal file
66
tests/docker/test_user_flag_guard.py
Normal file
@@ -0,0 +1,66 @@
|
||||
"""Runtime smoke tests for Docker --user flag guard.
|
||||
|
||||
Build the real image and verify the actual runtime behavior:
|
||||
|
||||
1. docker run --user <arbitrary-uid> is rejected with actionable guidance
|
||||
2. Root start (default) works fine
|
||||
3. --user <hermes-uid> (10000) is allowed (supported non-root start)
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import subprocess
|
||||
|
||||
|
||||
def test_arbitrary_user_uid_rejected(
|
||||
built_image: str,
|
||||
) -> None:
|
||||
"""docker run --user 1000 must be rejected with actionable guidance."""
|
||||
r = subprocess.run(
|
||||
["docker", "run", "--rm", "--user", "1000:1000",
|
||||
built_image, "echo", "should_not_reach"],
|
||||
capture_output=True, text=True, timeout=60,
|
||||
)
|
||||
assert r.returncode != 0, (
|
||||
f"container started with arbitrary --user UID unexpectedly: {r.stdout}"
|
||||
)
|
||||
assert "should_not_reach" not in r.stdout, (
|
||||
f"container ran despite --user rejection: {r.stdout}"
|
||||
)
|
||||
combined = r.stdout + r.stderr
|
||||
assert "not supported" in combined.lower(), (
|
||||
f"rejection message missing 'not supported': {combined[-500:]}"
|
||||
)
|
||||
# Must mention the remediation env vars
|
||||
assert "HERMES_UID" in combined or "PUID" in combined, (
|
||||
f"rejection message missing remediation guidance: {combined[-500:]}"
|
||||
)
|
||||
|
||||
|
||||
def test_root_start_works(
|
||||
built_image: str,
|
||||
) -> None:
|
||||
"""Root start (the default) must work without issues."""
|
||||
r = subprocess.run(
|
||||
["docker", "run", "--rm", built_image, "sh", "-c", "echo OK"],
|
||||
capture_output=True, text=True, timeout=60,
|
||||
)
|
||||
assert r.returncode == 0, f"root start failed: {r.stderr[-500:]}"
|
||||
assert "OK" in r.stdout
|
||||
|
||||
|
||||
def test_user_pinned_to_hermes_uid_works(
|
||||
built_image: str,
|
||||
) -> None:
|
||||
"""docker run --user 10000:10000 (the hermes UID) must be allowed.
|
||||
|
||||
This is the supported non-root start from #34648 / #34837.
|
||||
"""
|
||||
r = subprocess.run(
|
||||
["docker", "run", "--rm", "--user", "10000:10000",
|
||||
built_image, "sh", "-c", "echo OK"],
|
||||
capture_output=True, text=True, timeout=60,
|
||||
)
|
||||
assert r.returncode == 0, (
|
||||
f"--user 10000:10000 (hermes UID) was rejected: {r.stderr[-500:]}"
|
||||
)
|
||||
assert "OK" in r.stdout
|
||||
@@ -12,22 +12,16 @@ docstring.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import subprocess
|
||||
import time
|
||||
|
||||
from tests.docker.conftest import docker_exec, docker_exec_sh
|
||||
from tests.docker.conftest import docker_exec, docker_exec_sh, start_container, start_container
|
||||
|
||||
|
||||
def test_orphan_zombies_reaped(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""Spawn an orphan child that exits immediately. PID 1 must reap it."""
|
||||
subprocess.run(
|
||||
["docker", "run", "-d", "--name", container_name, built_image,
|
||||
"sleep", "60"],
|
||||
check=True, capture_output=True, timeout=30,
|
||||
)
|
||||
time.sleep(2)
|
||||
start_container(built_image, container_name, cmd="sleep 60")
|
||||
|
||||
# `( ( sleep 0.1 & ) & ); sleep 1` creates a grandchild detached from
|
||||
# the original docker exec session — it becomes an orphan reparented
|
||||
@@ -42,4 +36,4 @@ def test_orphan_zombies_reaped(
|
||||
line for line in r.stdout.split("\n")
|
||||
if line.strip().startswith("Z")
|
||||
]
|
||||
assert not zombies, f"Zombies not reaped by PID 1: {zombies}"
|
||||
assert not zombies, f"Zombies not reaped by PID 1: {zombies}"
|
||||
@@ -1,91 +0,0 @@
|
||||
"""Regression tests for Docker HOME overrides under s6/with-contenv."""
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parent.parent
|
||||
DASHBOARD_RUN = REPO_ROOT / "docker" / "s6-rc.d" / "dashboard" / "run"
|
||||
MAIN_WRAPPER = REPO_ROOT / "docker" / "main-wrapper.sh"
|
||||
STAGE2_HOOK = REPO_ROOT / "docker" / "stage2-hook.sh"
|
||||
|
||||
|
||||
def test_main_wrapper_preserves_docker_workdir() -> None:
|
||||
"""The main-wrapper MUST save and restore the original working
|
||||
directory so the container starts in the Docker ``-w`` directory,
|
||||
not /opt/data. Regression test for #35472.
|
||||
"""
|
||||
text = MAIN_WRAPPER.read_text(encoding="utf-8")
|
||||
|
||||
# Must save original cwd before cd /opt/data.
|
||||
assert "_hermes_orig_cwd" in text, (
|
||||
"main-wrapper.sh must save the original cwd before cd /opt/data"
|
||||
)
|
||||
assert 'HERMES_ORIG_CWD:-$PWD' in text, (
|
||||
"main-wrapper.sh must capture PWD as the fallback original cwd"
|
||||
)
|
||||
|
||||
# Must cd to /opt/data for init (existing behaviour preserved).
|
||||
assert "cd /opt/data" in text
|
||||
|
||||
# Must restore original cwd before exec'ing the user command.
|
||||
# The restore cd must appear AFTER venv activation but BEFORE the
|
||||
# first exec / if-block.
|
||||
activate_idx = text.index("/opt/hermes/.venv/bin/activate")
|
||||
restore_idx = text.index('cd "$_hermes_orig_cwd"')
|
||||
exec_idx = text.index("if [ $# -eq 0 ]")
|
||||
assert activate_idx < restore_idx < exec_idx, (
|
||||
"cd $_hermes_orig_cwd must appear after venv activation and "
|
||||
"before the exec routing block"
|
||||
)
|
||||
|
||||
|
||||
def test_dashboard_run_resets_home_before_dropping_privileges() -> None:
|
||||
text = DASHBOARD_RUN.read_text(encoding="utf-8")
|
||||
|
||||
assert "#!/command/with-contenv sh" in text
|
||||
assert "export HOME=/opt/data" in text
|
||||
assert "exec s6-setuidgid hermes hermes dashboard" in text
|
||||
|
||||
|
||||
def test_dashboard_run_does_not_derive_insecure_from_bind_host() -> None:
|
||||
"""The s6 dashboard run script MUST NOT auto-add ``--insecure`` based on
|
||||
``HERMES_DASHBOARD_HOST``. Doing so disables the OAuth auth gate on
|
||||
every non-loopback bind even when an auth provider is registered —
|
||||
the exact regression that exposed every wildcard-subdomain agent
|
||||
dashboard publicly until early 2026.
|
||||
|
||||
The opt-in is now explicit: ``HERMES_DASHBOARD_INSECURE=1`` (truthy).
|
||||
The auth gate is the authority on whether non-loopback binds are safe.
|
||||
"""
|
||||
text = DASHBOARD_RUN.read_text(encoding="utf-8")
|
||||
|
||||
# No legacy host-derived flip.
|
||||
assert '127.0.0.1|localhost' not in text, (
|
||||
"Run script still derives --insecure from the bind host. The gate "
|
||||
"is the authority now — opt in via HERMES_DASHBOARD_INSECURE instead."
|
||||
)
|
||||
assert 'case "$dash_host" in' not in text, (
|
||||
"Legacy host-derived --insecure case-statement is back."
|
||||
)
|
||||
|
||||
# New opt-in env var present.
|
||||
assert "HERMES_DASHBOARD_INSECURE" in text, (
|
||||
"Explicit HERMES_DASHBOARD_INSECURE opt-in is missing."
|
||||
)
|
||||
# Truthy values aligned with the rest of the s6 scripts
|
||||
# (e.g. HERMES_DASHBOARD).
|
||||
for truthy in ("1", "true", "TRUE", "True", "yes", "YES", "Yes"):
|
||||
assert truthy in text, (
|
||||
f"HERMES_DASHBOARD_INSECURE should accept truthy value {truthy!r}"
|
||||
)
|
||||
|
||||
|
||||
def test_stage2_hook_repairs_profiles_and_cron_ownership_on_every_boot() -> None:
|
||||
"""profiles/ and cron/ must both be reclaimed after root-context writes."""
|
||||
text = STAGE2_HOOK.read_text(encoding="utf-8")
|
||||
|
||||
assert 'if [ -d "$HERMES_HOME/profiles" ]; then' in text
|
||||
assert 'chown -R hermes:hermes "$HERMES_HOME/profiles" 2>/dev/null || true' in text
|
||||
|
||||
assert 'if [ -d "$HERMES_HOME/cron" ]; then' in text
|
||||
assert 'chown -R hermes:hermes "$HERMES_HOME/cron" 2>/dev/null || true' in text
|
||||
@@ -1,19 +0,0 @@
|
||||
"""Regression tests for Docker stage2 browser executable discovery."""
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def test_stage2_discovers_playwright_arm64_headless_shell() -> None:
|
||||
"""Playwright's --only-shell layout may use a headless_shell basename."""
|
||||
script = Path("docker/stage2-hook.sh").read_text()
|
||||
|
||||
assert "-name 'headless_shell'" in script
|
||||
|
||||
|
||||
def test_stage2_discovery_stays_filename_matched() -> None:
|
||||
"""Avoid broad path grep that can pick executable shared libraries."""
|
||||
script = Path("docker/stage2-hook.sh").read_text()
|
||||
|
||||
discovery_block = script.split("browser_bin=$(", 1)[1].split(")\n if", 1)[0]
|
||||
assert "find \"$PLAYWRIGHT_BROWSERS_PATH\" -type f -executable" in discovery_block
|
||||
assert "grep" not in discovery_block
|
||||
@@ -1,49 +0,0 @@
|
||||
"""Regression test for #34192 — Dockerfile must keep the tini compat shim
|
||||
for orchestration templates that still reference /usr/bin/tini.
|
||||
|
||||
This is a documentation-as-test guard: removing the shim is a real
|
||||
choice, but it should be done deliberately (e.g. once Hostinger's
|
||||
'Hermes WebUI' catalog updates to /init) and not by accident.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def _dockerfile_text() -> str:
|
||||
return (Path(__file__).parent.parent / "Dockerfile").read_text(encoding="utf-8")
|
||||
|
||||
|
||||
def test_tini_compat_symlink_present():
|
||||
"""The /usr/bin/tini -> /init symlink line must exist for #34192."""
|
||||
df = _dockerfile_text()
|
||||
assert "ln -sf /init /usr/bin/tini" in df, (
|
||||
"Dockerfile must keep the tini compat symlink (#34192). "
|
||||
"Removing it breaks orchestration templates that still pin "
|
||||
"/usr/bin/tini as the entrypoint (Hostinger 'Hermes WebUI' "
|
||||
"catalog as of v0.14.x)."
|
||||
)
|
||||
|
||||
|
||||
def test_tini_compat_comment_explains_why():
|
||||
"""The symlink line is comment-anchored to #34192 so a future reader
|
||||
knows why it exists. Removing the comment makes it look like dead
|
||||
code worth deleting."""
|
||||
df = _dockerfile_text()
|
||||
assert "#34192" in df, (
|
||||
"The Dockerfile tini compat shim must keep its #34192 anchor "
|
||||
"comment so future maintainers know why the symlink is there."
|
||||
)
|
||||
|
||||
|
||||
def test_entrypoint_still_init_not_tini():
|
||||
"""Sanity check: the actual ENTRYPOINT is still /init (s6-overlay).
|
||||
The shim is for legacy external wrappers, not for the image's own
|
||||
runtime — that path must continue to use the canonical /init."""
|
||||
df = _dockerfile_text()
|
||||
assert 'ENTRYPOINT [ "/init"' in df, (
|
||||
"Dockerfile ENTRYPOINT must remain /init (s6-overlay). The "
|
||||
"tini shim is only for external wrappers that haven't been "
|
||||
"updated yet."
|
||||
)
|
||||
@@ -1,5 +1,6 @@
|
||||
"""Guards for the multi-container Hermes WebUI install surface."""
|
||||
|
||||
"""Test that setup.py uses temporary output directories when the source
|
||||
tree is read-only (as it is inside the Docker WebUI install surface).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
@@ -20,18 +21,6 @@ def _is_under(path: str, root: Path) -> bool:
|
||||
return True
|
||||
|
||||
|
||||
def test_docker_context_includes_license_file() -> None:
|
||||
"""PEP 639 license-files metadata must resolve inside the Docker image."""
|
||||
dockerignore = (REPO_ROOT / ".dockerignore").read_text(encoding="utf-8")
|
||||
active_lines = [
|
||||
line.strip()
|
||||
for line in dockerignore.splitlines()
|
||||
if line.strip() and not line.lstrip().startswith("#")
|
||||
]
|
||||
|
||||
assert "LICENSE" not in active_lines
|
||||
|
||||
|
||||
def test_setup_uses_temporary_outputs_when_source_tree_is_read_only(
|
||||
monkeypatch,
|
||||
) -> None:
|
||||
@@ -12,22 +12,16 @@ def _dockerfile_text() -> str:
|
||||
return DOCKERFILE.read_text()
|
||||
|
||||
|
||||
def test_dockerfile_makes_opt_hermes_root_owned_and_non_writable() -> None:
|
||||
def test_dockerfile_makes_opt_hermes_readonly_for_hermes_user() -> None:
|
||||
text = _dockerfile_text()
|
||||
|
||||
assert "COPY --chown=hermes:hermes . ." not in text
|
||||
assert "COPY . ." in text
|
||||
assert "chown -R root:root /opt/hermes" in text
|
||||
assert "chmod -R a+rX /opt/hermes" in text
|
||||
assert "chmod -R a-w /opt/hermes" in text
|
||||
|
||||
immutable_block = re.search(
|
||||
r"RUN mkdir -p /opt/hermes/bin && \\\n"
|
||||
r"(?:.*\\\n)+?"
|
||||
r"\s+chmod -R a-w /opt/hermes",
|
||||
text,
|
||||
)
|
||||
assert immutable_block, "Dockerfile must lock /opt/hermes after installing code/deps"
|
||||
# --chmod on the source COPY bakes read-only perms at copy time instead
|
||||
# of a separate chmod -R pass (which walked ~30k files — #49113).
|
||||
assert "COPY --link --chmod=a+rX,go-w . ." in text
|
||||
# The old tree-walking passes must not be present.
|
||||
assert "chown -R root:root /opt/hermes" not in text
|
||||
assert "chmod -R a+rX /opt/hermes" not in text
|
||||
assert "chmod -R a-w /opt/hermes" not in text
|
||||
|
||||
|
||||
def test_dockerfile_keeps_mutable_state_under_opt_data() -> None:
|
||||
@@ -68,19 +62,17 @@ def test_dockerfile_bakes_code_scoped_install_method_stamp() -> None:
|
||||
(/opt/hermes/.install_method) first; baking it at build time keeps the
|
||||
published image self-identifying as 'docker' WITHOUT writing into the
|
||||
shared $HERMES_HOME data volume (which a host install may also use).
|
||||
It must live inside the immutable block so the runtime user can't alter it.
|
||||
The stamp is created by root in the shim-wiring RUN block; the hermes
|
||||
user can't modify it (go-w from the --chmod on the source COPY).
|
||||
"""
|
||||
text = _dockerfile_text()
|
||||
assert "printf 'docker\\n' > /opt/hermes/.install_method" in text
|
||||
|
||||
immutable_block = re.search(
|
||||
# The stamp must be in the RUN block that wires the exec shim.
|
||||
shim_block = re.search(
|
||||
r"RUN mkdir -p /opt/hermes/bin && \\\n"
|
||||
r"(?:.*\\\n)+?"
|
||||
r"\s+chmod -R a-w /opt/hermes",
|
||||
r"\s+printf 'docker\\n' > /opt/hermes/\.install_method",
|
||||
text,
|
||||
)
|
||||
assert immutable_block, "immutable block must exist"
|
||||
assert ".install_method" in immutable_block.group(0), (
|
||||
"the code-scoped install-method stamp must be baked inside the "
|
||||
"immutable /opt/hermes block"
|
||||
)
|
||||
assert shim_block, "install-method stamp must be in the shim-wiring RUN block"
|
||||
|
||||
@@ -1,152 +0,0 @@
|
||||
"""Contract test: the s6-overlay stage2 hook seeds gateway_state.json from
|
||||
HERMES_GATEWAY_BOOTSTRAP_STATE on first boot, so a freshly-provisioned
|
||||
container can come up with the gateway already running.
|
||||
|
||||
Background. On a blank volume there is no gateway_state.json, so the boot
|
||||
reconciler (cont-init.d/02-reconcile-profiles ->
|
||||
container_boot.reconcile_profile_gateways) registers the gateway-default s6
|
||||
slot but leaves it DOWN — it only auto-starts when the last recorded state was
|
||||
"running". A container provisioned on a fresh volume therefore comes up with
|
||||
the gateway down until something starts it.
|
||||
|
||||
An orchestrator that wants the gateway running from first boot sets
|
||||
HERMES_GATEWAY_BOOTSTRAP_STATE=running; stage2-hook.sh (installed as
|
||||
/etc/cont-init.d/01-hermes-setup, which runs lexicographically BEFORE
|
||||
02-reconcile-profiles) seeds the state file so the reconciler sees
|
||||
prior_state=running and brings the slot up on the very first boot.
|
||||
|
||||
This mirrors the existing HERMES_AUTH_JSON_BOOTSTRAP env-seed pattern: it seeds
|
||||
the SAME gateway_state.json the reconciler already consults, guarded by
|
||||
``[ ! -f ]`` so persisted runtime state always wins on subsequent boots (a
|
||||
deliberately-stopped gateway must stay stopped across restarts).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[2]
|
||||
STAGE2_HOOK = REPO_ROOT / "docker" / "stage2-hook.sh"
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def stage2_text() -> str:
|
||||
if not STAGE2_HOOK.exists():
|
||||
pytest.skip("docker/stage2-hook.sh not present in this checkout")
|
||||
return STAGE2_HOOK.read_text()
|
||||
|
||||
|
||||
def _seed_block(text: str) -> str:
|
||||
"""Extract the ``if [ ! -f "$HERMES_HOME/gateway_state.json" ] && … fi``
|
||||
block that seeds the gateway state file from the bootstrap env var."""
|
||||
m = re.search(
|
||||
r'(if \[ ! -f "\$HERMES_HOME/gateway_state\.json" \] && \\\n'
|
||||
r"(?:.*\n)*?fi)",
|
||||
text,
|
||||
)
|
||||
assert m, (
|
||||
"stage2-hook.sh must contain the gateway_state.json bootstrap-seed block "
|
||||
"guarded on HERMES_GATEWAY_BOOTSTRAP_STATE"
|
||||
)
|
||||
return m.group(1)
|
||||
|
||||
|
||||
def test_seed_block_present_and_guarded(stage2_text: str) -> None:
|
||||
block = _seed_block(stage2_text)
|
||||
# Must be a first-boot-only seed (the [ ! -f ] guard) keyed on the env var.
|
||||
assert '[ ! -f "$HERMES_HOME/gateway_state.json" ]' in block, (
|
||||
"seed must be guarded by [ ! -f ] so persisted state wins on restart"
|
||||
)
|
||||
assert "HERMES_GATEWAY_BOOTSTRAP_STATE" in block
|
||||
assert "gateway_state" in block
|
||||
|
||||
|
||||
def _run_seed(
|
||||
text: str, *, env_value: str | None, preexisting: str | None
|
||||
) -> str | None:
|
||||
"""Run the extracted seed block in a sandbox $HERMES_HOME.
|
||||
|
||||
``env_value`` is the HERMES_GATEWAY_BOOTSTRAP_STATE value (None = unset).
|
||||
``preexisting`` is the contents of a gateway_state.json placed before the
|
||||
block runs (None = no file). Returns the file's contents afterwards, or
|
||||
None if it doesn't exist. ``chown``/``chmod`` are stubbed so the block
|
||||
runs without real root.
|
||||
"""
|
||||
bash = shutil.which("bash")
|
||||
if bash is None:
|
||||
pytest.skip("bash not available")
|
||||
block = _seed_block(text)
|
||||
|
||||
with tempfile.TemporaryDirectory() as d:
|
||||
dpath = Path(d)
|
||||
home = dpath / "home"
|
||||
home.mkdir()
|
||||
state_file = home / "gateway_state.json"
|
||||
if preexisting is not None:
|
||||
state_file.write_text(preexisting)
|
||||
|
||||
env_line = (
|
||||
f'export HERMES_GATEWAY_BOOTSTRAP_STATE="{env_value}"\n'
|
||||
if env_value is not None
|
||||
else "unset HERMES_GATEWAY_BOOTSTRAP_STATE\n"
|
||||
)
|
||||
script = (
|
||||
"set -e\n"
|
||||
f'HERMES_HOME="{home}"\n'
|
||||
# Stub privilege ops — the sandbox isn't root.
|
||||
"chown() { :; }\n"
|
||||
"chmod() { :; }\n"
|
||||
+ env_line
|
||||
+ block
|
||||
)
|
||||
script_path = dpath / "harness.sh"
|
||||
script_path.write_text(script)
|
||||
|
||||
proc = subprocess.run(
|
||||
[bash, str(script_path)], capture_output=True, text=True
|
||||
)
|
||||
assert proc.returncode == 0, proc.stderr
|
||||
|
||||
if not state_file.exists():
|
||||
return None
|
||||
return state_file.read_text()
|
||||
|
||||
|
||||
def test_seeds_running_state_on_blank_volume(stage2_text: str) -> None:
|
||||
"""env=running + no pre-existing file -> writes a valid running state."""
|
||||
out = _run_seed(stage2_text, env_value="running", preexisting=None)
|
||||
assert out is not None, "seed must create gateway_state.json"
|
||||
assert json.loads(out).get("gateway_state") == "running"
|
||||
|
||||
|
||||
def test_does_not_clobber_existing_state(stage2_text: str) -> None:
|
||||
"""The [ ! -f ] guard: an existing state file is never overwritten, even
|
||||
when the bootstrap env var says running. A deliberately-stopped gateway
|
||||
must stay stopped across restarts."""
|
||||
existing = json.dumps({"gateway_state": "stopped", "pid": 123})
|
||||
out = _run_seed(stage2_text, env_value="running", preexisting=existing)
|
||||
assert out == existing, "seed must not clobber a persisted state file"
|
||||
|
||||
|
||||
def test_no_seed_when_env_unset(stage2_text: str) -> None:
|
||||
"""No env var -> no file written (preserves the default down-on-first-boot
|
||||
behaviour for orchestrators that don't opt in)."""
|
||||
out = _run_seed(stage2_text, env_value=None, preexisting=None)
|
||||
assert out is None, "seed must not run when HERMES_GATEWAY_BOOTSTRAP_STATE is unset"
|
||||
|
||||
|
||||
def test_non_running_value_ignored(stage2_text: str) -> None:
|
||||
"""Only a literal "running" is honoured; any other value is ignored so a
|
||||
typo can't write a bogus state. (The reconciler's _AUTOSTART_STATES is
|
||||
exactly {"running"}.)"""
|
||||
for bogus in ("stopped", "Running", "1", "true", "starting"):
|
||||
out = _run_seed(stage2_text, env_value=bogus, preexisting=None)
|
||||
assert out is None, (
|
||||
f"only 'running' should seed a state file, not {bogus!r}"
|
||||
)
|
||||
@@ -1,48 +0,0 @@
|
||||
"""Contract tests for the Docker stage2 immutable install-tree policy.
|
||||
|
||||
Hosted/container Hermes keeps user-writable state under HERMES_HOME
|
||||
(/opt/data). The installed source, venv, TUI bundle, and node_modules under
|
||||
/opt/hermes must remain root-owned/non-writable by the runtime hermes user so
|
||||
an agent session cannot self-modify the installation and brick the gateway.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[2]
|
||||
STAGE2_HOOK = REPO_ROOT / "docker" / "stage2-hook.sh"
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def stage2_text() -> str:
|
||||
if not STAGE2_HOOK.exists():
|
||||
pytest.skip("docker/stage2-hook.sh not present in this checkout")
|
||||
return STAGE2_HOOK.read_text()
|
||||
|
||||
|
||||
def test_stage2_does_not_chown_install_tree_to_hermes(stage2_text: str) -> None:
|
||||
assert "Fixing ownership of build trees under $INSTALL_DIR" not in stage2_text
|
||||
assert 'chown -R hermes:hermes \\\n "$INSTALL_DIR/.venv"' not in stage2_text
|
||||
|
||||
assert "venv_owner=$(stat -c %u \"$INSTALL_DIR/.venv\"" not in stage2_text
|
||||
assert "chown of build trees failed" not in stage2_text
|
||||
for install_tree in (
|
||||
'"$INSTALL_DIR/.venv" \\',
|
||||
'"$INSTALL_DIR/ui-tui" \\',
|
||||
'"$INSTALL_DIR/gateway" \\',
|
||||
'"$INSTALL_DIR/node_modules" \\',
|
||||
):
|
||||
assert install_tree not in stage2_text, (
|
||||
f"stage2 must not chown {install_tree} back to hermes; "
|
||||
"the Dockerfile keeps /opt/hermes immutable and writable state "
|
||||
"belongs under HERMES_HOME"
|
||||
)
|
||||
|
||||
|
||||
def test_stage2_documents_immutable_install_contract(stage2_text: str) -> None:
|
||||
assert "Immutable install tree" in stage2_text
|
||||
assert "PYTHONDONTWRITEBYTECODE" in stage2_text
|
||||
assert "HERMES_DISABLE_LAZY_INSTALLS=1" in stage2_text
|
||||
assert "/opt/hermes" in stage2_text
|
||||
@@ -1,61 +0,0 @@
|
||||
"""Contract test: the s6-overlay stage2 hook must NOT stamp the install method
|
||||
into the shared $HERMES_HOME, and must heal a stale 'docker' stamp left there
|
||||
by older images.
|
||||
|
||||
Background (shared-$HERMES_HOME bug)
|
||||
------------------------------------
|
||||
$HERMES_HOME (/opt/data) is a DATA volume that users commonly bind-mount from
|
||||
the host (``~/.hermes:/opt/data``) and sometimes share with a host-side
|
||||
Desktop/CLI install. Older images wrote ``printf 'docker' > $HERMES_HOME/.install_method``
|
||||
at boot, which clobbered the host install's own marker — so the host's in-app
|
||||
updater read 'docker' and refused to run ``hermes update`` ("doesn't apply
|
||||
inside the Docker container").
|
||||
|
||||
The fix scopes the stamp to the install tree (baked at
|
||||
``/opt/hermes/.install_method`` in the Dockerfile, read first by
|
||||
``detect_install_method``). stage2 must therefore:
|
||||
|
||||
* NOT write the 'docker' stamp into $HERMES_HOME any more, and
|
||||
* proactively remove a stale 'docker' stamp from $HERMES_HOME so homes
|
||||
already poisoned by an older image self-heal on the next boot.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[2]
|
||||
STAGE2_HOOK = REPO_ROOT / "docker" / "stage2-hook.sh"
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def stage2_text() -> str:
|
||||
if not STAGE2_HOOK.exists():
|
||||
pytest.skip("docker/stage2-hook.sh not present in this checkout")
|
||||
return STAGE2_HOOK.read_text()
|
||||
|
||||
|
||||
def test_stage2_does_not_write_install_method_into_home(stage2_text: str) -> None:
|
||||
# No write/tee of the home-scoped install-method stamp anywhere.
|
||||
assert not re.search(
|
||||
r"(tee|>)\s*\"?\$HERMES_HOME/\.install_method", stage2_text
|
||||
), (
|
||||
"stage2 must not stamp $HERMES_HOME/.install_method — that data dir "
|
||||
"may be shared with a host install whose marker would be clobbered"
|
||||
)
|
||||
|
||||
|
||||
def test_stage2_heals_stale_docker_home_stamp(stage2_text: str) -> None:
|
||||
# It must remove a stale 'docker' stamp from $HERMES_HOME so already
|
||||
# poisoned shared homes recover.
|
||||
assert 'rm -f "$HERMES_HOME/.install_method"' in stage2_text, (
|
||||
"stage2 must remove a stale 'docker' stamp from $HERMES_HOME to heal "
|
||||
"homes poisoned by older images"
|
||||
)
|
||||
# The removal must be guarded on the value being 'docker' so we never
|
||||
# delete a legitimately-different stamp a user/host install put there.
|
||||
assert re.search(r'\[\s*"\$stamped"\s*=\s*"docker"\s*\]', stage2_text), (
|
||||
"the stale-stamp removal must be guarded on the value == 'docker'"
|
||||
)
|
||||
@@ -1,60 +0,0 @@
|
||||
"""Contract test: the s6-overlay stage2 hook seeds $HERMES_HOME/logs/gateways
|
||||
as the hermes user.
|
||||
|
||||
Regression guard for #45258: the per-profile gateway log service
|
||||
(`gateway-<profile>/log/run`) creates `logs/gateways/` via `mkdir -p` but only
|
||||
chowns the leaf `logs/gateways/<profile>`. If the first log service to boot
|
||||
runs in root context, the `gateways/` parent is created root-owned and stays
|
||||
that way; every profile registered later runs its log service as the dropped
|
||||
hermes user and s6-log crash-loops on `mkdir: Permission denied`.
|
||||
|
||||
Seeding `logs/gateways` in stage2 (cont-init runs before any service starts)
|
||||
guarantees the parent already exists hermes-owned by the time the first
|
||||
log/run executes its `mkdir -p`.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[2]
|
||||
STAGE2_HOOK = REPO_ROOT / "docker" / "stage2-hook.sh"
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def stage2_text() -> str:
|
||||
if not STAGE2_HOOK.exists():
|
||||
pytest.skip("docker/stage2-hook.sh not present in this checkout")
|
||||
return STAGE2_HOOK.read_text()
|
||||
|
||||
|
||||
def _seed_mkdir_block(text: str) -> str:
|
||||
"""Extract the `as_hermes mkdir -p \\ ...` seed block."""
|
||||
m = re.search(r"as_hermes mkdir -p \\\n(?:[^\n]*\\\n)*[^\n]*\n", text)
|
||||
assert m, "stage2-hook.sh must contain the as_hermes mkdir -p seed block"
|
||||
return m.group(0)
|
||||
|
||||
|
||||
def test_logs_gateways_is_seeded(stage2_text: str) -> None:
|
||||
block = _seed_mkdir_block(stage2_text)
|
||||
assert '"$HERMES_HOME/logs/gateways"' in block, (
|
||||
"logs/gateways must be seeded hermes-owned in stage2 so profiles "
|
||||
"added after first boot can create their log dirs (#45258)"
|
||||
)
|
||||
# The parent must also be seeded so mkdir -p inside the block never
|
||||
# creates logs/ implicitly with surprising ownership.
|
||||
assert '"$HERMES_HOME/logs"' in block
|
||||
|
||||
|
||||
def test_logs_subtree_is_healed_when_chown_needed(stage2_text: str) -> None:
|
||||
"""The needs_chown repair loop must cover the logs subtree recursively —
|
||||
that is what makes the seed entry above sufficient (no separate
|
||||
logs/gateways loop entry needed)."""
|
||||
m = re.search(r"for sub in ([^;]*); do", stage2_text)
|
||||
assert m, "stage2-hook.sh must contain the needs_chown subdir repair loop"
|
||||
assert "logs" in m.group(1).split(), (
|
||||
"the needs_chown loop must recursively chown logs/ — it covers "
|
||||
"logs/gateways, so the seed list does not need a loop twin"
|
||||
)
|
||||
@@ -1,110 +0,0 @@
|
||||
"""Contract test: the s6-overlay stage2 hook accepts PUID/PGID as aliases for
|
||||
HERMES_UID/HERMES_GID.
|
||||
|
||||
Regression guard for #15290. NAS platforms (UGOS, Synology, unRAID) bind-mount
|
||||
/opt/data from a host directory owned by the user's own UID and expect the
|
||||
LinuxServer.io PUID/PGID convention. Without the alias those vars are silently
|
||||
ignored, the s6-setuidgid drop lands on UID 10000, and the runtime cannot read
|
||||
the volume. HERMES_UID/HERMES_GID must still take precedence when both are
|
||||
set.
|
||||
|
||||
The s6-overlay rework moved bootstrap from docker/entrypoint.sh (now a shim)
|
||||
to docker/stage2-hook.sh, which is installed as /etc/cont-init.d/01-hermes-setup
|
||||
by the Dockerfile. This test targets the post-rework location.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[2]
|
||||
STAGE2_HOOK = REPO_ROOT / "docker" / "stage2-hook.sh"
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def stage2_text() -> str:
|
||||
if not STAGE2_HOOK.exists():
|
||||
pytest.skip("docker/stage2-hook.sh not present in this checkout")
|
||||
return STAGE2_HOOK.read_text()
|
||||
|
||||
|
||||
def _alias_lines(text: str) -> list[str]:
|
||||
"""The stage2 hook lines that resolve HERMES_UID/HERMES_GID from aliases."""
|
||||
return [
|
||||
line.strip()
|
||||
for line in text.splitlines()
|
||||
if line.strip().startswith(("HERMES_UID=", "HERMES_GID="))
|
||||
]
|
||||
|
||||
|
||||
def test_stage2_hook_resolves_puid_pgid_aliases(stage2_text: str) -> None:
|
||||
alias_lines = _alias_lines(stage2_text)
|
||||
assert any("PUID" in line for line in alias_lines), (
|
||||
"docker/stage2-hook.sh must resolve HERMES_UID from a PUID alias; see #15290"
|
||||
)
|
||||
assert any("PGID" in line for line in alias_lines), (
|
||||
"docker/stage2-hook.sh must resolve HERMES_GID from a PGID alias; see #15290"
|
||||
)
|
||||
|
||||
|
||||
def _resolve(stage2_text: str, env: dict[str, str]) -> str:
|
||||
"""Run the stage2 hook's alias-resolution lines in isolation and report the
|
||||
resolved ``HERMES_UID:HERMES_GID`` pair."""
|
||||
bash = shutil.which("bash")
|
||||
if bash is None:
|
||||
pytest.skip("bash not available")
|
||||
script = "\n".join(_alias_lines(stage2_text))
|
||||
script += '\necho "${HERMES_UID:-}:${HERMES_GID:-}"\n'
|
||||
proc = subprocess.run(
|
||||
[bash, "-ec", script],
|
||||
env={"PATH": os.environ.get("PATH", "")} | env,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
assert proc.returncode == 0, proc.stderr
|
||||
return proc.stdout.strip()
|
||||
|
||||
|
||||
def test_puid_pgid_populate_hermes_uid_gid(stage2_text: str) -> None:
|
||||
assert _resolve(stage2_text, {"PUID": "1000", "PGID": "10"}) == "1000:10"
|
||||
|
||||
|
||||
def test_hermes_uid_gid_take_precedence_over_aliases(stage2_text: str) -> None:
|
||||
resolved = _resolve(
|
||||
stage2_text,
|
||||
{"HERMES_UID": "2000", "HERMES_GID": "2001", "PUID": "1000", "PGID": "10"},
|
||||
)
|
||||
assert resolved == "2000:2001"
|
||||
|
||||
|
||||
def test_no_uid_vars_leaves_values_empty(stage2_text: str) -> None:
|
||||
# An empty resolution means the stage2 hook keeps the default hermes user.
|
||||
assert _resolve(stage2_text, {}) == ":"
|
||||
|
||||
|
||||
def test_stage2_hook_creates_s6_envdir_before_writing_browser_path(stage2_text: str) -> None:
|
||||
"""Regression guard for browser-path export on runtimes where the
|
||||
s6 container_environment directory is absent when the cont-init hook runs.
|
||||
"""
|
||||
mkdir_line = "mkdir -p /run/s6/container_environment"
|
||||
write_line = (
|
||||
"printf '%s' \"$browser_bin\" > "
|
||||
"/run/s6/container_environment/AGENT_BROWSER_EXECUTABLE_PATH"
|
||||
)
|
||||
|
||||
assert mkdir_line in stage2_text
|
||||
assert write_line in stage2_text
|
||||
assert stage2_text.index(mkdir_line) < stage2_text.index(write_line)
|
||||
|
||||
|
||||
def test_stage2_hook_runs_config_migration_as_hermes(stage2_text: str) -> None:
|
||||
assert "scripts/docker_config_migrate.py" in stage2_text
|
||||
assert 's6-setuidgid hermes "$INSTALL_DIR/.venv/bin/python"' in stage2_text
|
||||
|
||||
|
||||
def test_stage2_hook_documents_config_migration_opt_out(stage2_text: str) -> None:
|
||||
assert "HERMES_SKIP_CONFIG_MIGRATION" in stage2_text
|
||||
@@ -1,138 +0,0 @@
|
||||
"""Contract test: the s6-overlay stage2 hook resets ownership of hermes-owned
|
||||
top-level state files in $HERMES_HOME — but only those, never arbitrary
|
||||
host-owned files.
|
||||
|
||||
Regression guard for the gateway restart loop reported in #35098: files such
|
||||
as gateway.lock / state.db / auth.json live directly under $HERMES_HOME (not in
|
||||
a subdir), so the targeted subdir chown misses them. When created or rewritten
|
||||
by `docker exec <container> hermes …` (root unless `-u` is passed) they land
|
||||
root-owned and the unprivileged hermes runtime then hits PermissionError on next
|
||||
startup.
|
||||
|
||||
The fix uses an explicit allowlist rather than a blanket `find -user root`
|
||||
sweep, preserving the targeted-ownership contract from #19788 / PR #19795: a
|
||||
bind-mounted $HERMES_HOME may contain host-owned files Hermes does not manage,
|
||||
and those must never be chowned.
|
||||
|
||||
The s6-overlay rework moved bootstrap from docker/entrypoint.sh (now a shim) to
|
||||
docker/stage2-hook.sh, installed as /etc/cont-init.d/01-hermes-setup. This test
|
||||
targets that location.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[2]
|
||||
STAGE2_HOOK = REPO_ROOT / "docker" / "stage2-hook.sh"
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def stage2_text() -> str:
|
||||
if not STAGE2_HOOK.exists():
|
||||
pytest.skip("docker/stage2-hook.sh not present in this checkout")
|
||||
return STAGE2_HOOK.read_text()
|
||||
|
||||
|
||||
def _toplevel_chown_loop(text: str) -> str:
|
||||
"""Extract the `for f in … chown hermes:hermes "$HERMES_HOME/$f" … done`
|
||||
block that repairs top-level state-file ownership."""
|
||||
m = re.search(
|
||||
r"(for f in \\\n(?:.*\\\n)*?.*; do\n(?:.*\n)*?done)",
|
||||
text,
|
||||
)
|
||||
assert m, "stage2-hook.sh must contain the top-level-file chown for-loop (#35098)"
|
||||
block = m.group(1)
|
||||
assert 'chown hermes:hermes "$HERMES_HOME/$f"' in block, (
|
||||
"the top-level-file loop must chown each allowlisted file to hermes"
|
||||
)
|
||||
return block
|
||||
|
||||
|
||||
def test_toplevel_chown_loop_present(stage2_text: str) -> None:
|
||||
block = _toplevel_chown_loop(stage2_text)
|
||||
# The reported-broken files must be covered.
|
||||
for required in ("auth.json", "state.db", "gateway.lock", "gateway_state.json"):
|
||||
assert required in block, (
|
||||
f"top-level chown allowlist must include {required!r} (#35098)"
|
||||
)
|
||||
|
||||
|
||||
def test_no_blanket_find_user_root_sweep(stage2_text: str) -> None:
|
||||
"""The fix must NOT reintroduce a blanket `find … -user root` chown of
|
||||
$HERMES_HOME contents — that would clobber host-owned files in a bind mount
|
||||
(#19788 / PR #19795)."""
|
||||
assert not re.search(r"find\s+\"?\$\{?HERMES_HOME\}?\"?[^\n]*-user\s+root", stage2_text), (
|
||||
"stage2-hook.sh must not blanket-chown root-owned files under "
|
||||
"$HERMES_HOME via `find -user root`; use the targeted allowlist instead "
|
||||
"so host-owned bind-mounted files are preserved (#19788, #19795)."
|
||||
)
|
||||
|
||||
|
||||
def _run_loop(text: str, present_files: list[str]) -> list[str]:
|
||||
"""Run the extracted chown loop in a sandbox $HERMES_HOME, with `chown`
|
||||
stubbed to record which paths it was asked to touch. Returns the basenames
|
||||
the loop attempted to chown."""
|
||||
bash = shutil.which("bash")
|
||||
if bash is None:
|
||||
pytest.skip("bash not available")
|
||||
block = _toplevel_chown_loop(text)
|
||||
|
||||
import tempfile
|
||||
|
||||
with tempfile.TemporaryDirectory() as d:
|
||||
dpath = Path(d)
|
||||
home = dpath / "home"
|
||||
home.mkdir()
|
||||
for f in present_files:
|
||||
(home / f).touch()
|
||||
# A non-allowlisted, "host-owned" file that must never be chowned.
|
||||
(home / "host_secret.json").touch()
|
||||
|
||||
# Stub chown to record the basename of its last argument (the path),
|
||||
# so we observe exactly which files the allowlist loop selected
|
||||
# without needing real root privileges.
|
||||
script = (
|
||||
"set -e\n"
|
||||
f'HERMES_HOME="{home}"\n'
|
||||
f'chown() {{ for a in "$@"; do :; done; echo "${{a##*/}}" >> "{dpath}/chown.log"; }}\n'
|
||||
+ block
|
||||
)
|
||||
script_path = dpath / "harness.sh"
|
||||
script_path.write_text(script)
|
||||
|
||||
proc = subprocess.run([bash, str(script_path)], capture_output=True, text=True)
|
||||
assert proc.returncode == 0, proc.stderr
|
||||
|
||||
log = dpath / "chown.log"
|
||||
if not log.exists():
|
||||
return []
|
||||
return [ln for ln in log.read_text().splitlines() if ln]
|
||||
|
||||
|
||||
def test_loop_chowns_present_allowlisted_files(stage2_text: str) -> None:
|
||||
touched = _run_loop(stage2_text, ["auth.json", "state.db", "gateway.lock"])
|
||||
assert "auth.json" in touched
|
||||
assert "state.db" in touched
|
||||
assert "gateway.lock" in touched
|
||||
|
||||
|
||||
def test_loop_skips_nonallowlisted_host_file(stage2_text: str) -> None:
|
||||
"""A file NOT on the allowlist (e.g. a host-owned file in a bind mount) must
|
||||
never be chowned, even if present."""
|
||||
touched = _run_loop(stage2_text, ["auth.json"])
|
||||
assert "host_secret.json" not in touched, (
|
||||
"the allowlist loop must not touch non-allowlisted files (#19788)"
|
||||
)
|
||||
|
||||
|
||||
def test_loop_skips_absent_files(stage2_text: str) -> None:
|
||||
"""Allowlisted files that don't exist are skipped (no spurious chown)."""
|
||||
touched = _run_loop(stage2_text, ["auth.json"])
|
||||
# state.db wasn't created, so it must not appear.
|
||||
assert "state.db" not in touched
|
||||
@@ -1,86 +0,0 @@
|
||||
"""Regression tests for Docker stage2 UID/GID handling on NAS hosts.
|
||||
|
||||
Unraid commonly runs appdata as nobody:users (99:100). The stage2 hook must
|
||||
accept those non-root numeric IDs and keep legacy/new pairing stores writable
|
||||
after targeted ownership reconciliation.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[2]
|
||||
STAGE2_HOOK = REPO_ROOT / "docker" / "stage2-hook.sh"
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def stage2_text() -> str:
|
||||
if not STAGE2_HOOK.exists():
|
||||
pytest.skip("docker/stage2-hook.sh not present in this checkout")
|
||||
return STAGE2_HOOK.read_text()
|
||||
|
||||
|
||||
def _uid_gid_validator(text: str) -> str:
|
||||
marker = "# --- UID/GID remap ---"
|
||||
before_marker = text.split(marker, 1)[0]
|
||||
start = before_marker.index("validate_uid_gid()")
|
||||
return before_marker[start:]
|
||||
|
||||
|
||||
def _validate_uid_gid(text: str, value: str) -> bool:
|
||||
bash = shutil.which("bash")
|
||||
if bash is None:
|
||||
pytest.skip("bash not available")
|
||||
script = _uid_gid_validator(text) + '\nvalidate_uid_gid "$CANDIDATE"\n'
|
||||
proc = subprocess.run(
|
||||
[bash, "-c", script],
|
||||
env={"PATH": os.environ.get("PATH", ""), "CANDIDATE": value},
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
return proc.returncode == 0
|
||||
|
||||
|
||||
@pytest.mark.parametrize("value", ["1", "99", "100", "1000", "65534"])
|
||||
def test_uid_gid_validator_accepts_non_root_nas_ids(stage2_text: str, value: str) -> None:
|
||||
assert _validate_uid_gid(stage2_text, value), (
|
||||
f"stage2 hook must accept NAS UID/GID {value}; Unraid uses 99:100 (#38070)"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("value", ["", "0", "abc", "99x", "65535"])
|
||||
def test_uid_gid_validator_rejects_root_invalid_and_out_of_range(
|
||||
stage2_text: str,
|
||||
value: str,
|
||||
) -> None:
|
||||
assert not _validate_uid_gid(stage2_text, value)
|
||||
|
||||
|
||||
def _targeted_chown_subdirs(text: str) -> list[str]:
|
||||
m = re.search(
|
||||
r"for sub in (?P<items>.*?); do\n\s*if \[ -e \"\$HERMES_HOME/\$sub\" \]",
|
||||
text,
|
||||
re.DOTALL,
|
||||
)
|
||||
assert m, "stage2-hook.sh must contain the targeted subdir chown loop"
|
||||
return m.group("items").split()
|
||||
|
||||
|
||||
def test_targeted_chown_covers_legacy_and_new_pairing_dirs(stage2_text: str) -> None:
|
||||
subdirs = _targeted_chown_subdirs(stage2_text)
|
||||
assert "pairing" in subdirs
|
||||
assert "platforms/pairing" in subdirs
|
||||
|
||||
|
||||
def test_seeded_directory_list_covers_legacy_and_new_pairing_dirs(stage2_text: str) -> None:
|
||||
seed_block = stage2_text.split("as_hermes mkdir -p \\", 1)[1].split(
|
||||
"# --- Install-method stamp",
|
||||
1,
|
||||
)[0]
|
||||
assert '"$HERMES_HOME/pairing"' in seed_block
|
||||
assert '"$HERMES_HOME/platforms/pairing"' in seed_block
|
||||
@@ -1,119 +0,0 @@
|
||||
"""Contract test: the s6-overlay stage2 hook and main-wrapper reject an
|
||||
unsupported `docker run --user <arbitrary-uid>:<gid>` start with actionable
|
||||
guidance, while still allowing:
|
||||
|
||||
- root start (id -u == 0)
|
||||
- `--user <hermes-uid>` (the supported non-root start, #34648 / #34837)
|
||||
|
||||
Background: in the tini era `docker run --user $(id -u):$(id -g)` was used to
|
||||
make container-written files match the host user. Under s6-overlay this can't
|
||||
work — the bootstrap (UID remap, volume/build-tree chown, config seeding) needs
|
||||
root, and the baked image dirs are owned by the hermes build UID, so an
|
||||
arbitrary pinned UID can't write them (EACCES on a bind mount, hard crash on a
|
||||
named volume). The supported path is root start + HERMES_UID/HERMES_GID (or the
|
||||
PUID/PGID aliases), which remaps the hermes user and chowns the volume.
|
||||
|
||||
The guard fires only when the current UID is neither root NOR the hermes UID,
|
||||
so the #34648 `--user 10000:10000` case (pinning to the hermes UID itself) is
|
||||
unaffected.
|
||||
|
||||
Extraction + stubbed-shell-run mirrors
|
||||
tests/tools/test_stage2_hook_toplevel_chown.py.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[2]
|
||||
STAGE2_HOOK = REPO_ROOT / "docker" / "stage2-hook.sh"
|
||||
MAIN_WRAPPER = REPO_ROOT / "docker" / "main-wrapper.sh"
|
||||
|
||||
|
||||
def _read(p: Path) -> str:
|
||||
if not p.exists():
|
||||
pytest.skip(f"{p} not present in this checkout")
|
||||
return p.read_text()
|
||||
|
||||
|
||||
def _guard_block(text: str) -> str:
|
||||
"""Extract the `cur_uid=...; if [ ... ]; then ... exit 1; fi` guard."""
|
||||
m = re.search(
|
||||
r"(cur_uid=\"\$\(id -u\)\"\nif \[ \"\$cur_uid\" != 0 \](?:.*\n)*?fi)",
|
||||
text,
|
||||
)
|
||||
assert m, "expected the --user guard block (cur_uid + non-root/non-hermes check)"
|
||||
return m.group(1)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("path", [STAGE2_HOOK, MAIN_WRAPPER])
|
||||
def test_guard_present_and_mentions_remediation(path: Path) -> None:
|
||||
text = _read(path)
|
||||
block = _guard_block(text)
|
||||
# Must check non-root AND non-hermes-uid (so --user 10000:10000 is allowed).
|
||||
assert '"$cur_uid" != 0' in block
|
||||
assert '"$cur_uid" != "$(id -u hermes)"' in block
|
||||
assert "exit 1" in block
|
||||
# Must point users at the supported env vars.
|
||||
assert "HERMES_UID" in block and "HERMES_GID" in block
|
||||
assert "PUID" in block and "PGID" in block
|
||||
|
||||
|
||||
def _run_guard(text: str, *, cur_uid: int, hermes_uid: int = 10000) -> subprocess.CompletedProcess:
|
||||
"""Run the extracted guard with `id` stubbed. Returns the completed process
|
||||
(rc 1 + stderr message when rejected, rc 0 when allowed through)."""
|
||||
bash = shutil.which("bash")
|
||||
if bash is None:
|
||||
pytest.skip("bash not available")
|
||||
block = _guard_block(text)
|
||||
with tempfile.TemporaryDirectory() as d:
|
||||
script = (
|
||||
"set -e\n"
|
||||
# Stub `id`: `id -u` -> cur_uid; `id -u hermes` -> hermes_uid.
|
||||
f'id() {{ if [ "$2" = hermes ]; then echo {hermes_uid}; else echo {cur_uid}; fi; }}\n'
|
||||
+ block
|
||||
+ "\necho GUARD_PASSED\n" # only reached when the guard allows through
|
||||
)
|
||||
sp = Path(d) / "h.sh"
|
||||
sp.write_text(script)
|
||||
return subprocess.run([bash, str(sp)], capture_output=True, text=True)
|
||||
|
||||
|
||||
def test_arbitrary_user_uid_is_rejected() -> None:
|
||||
"""An arbitrary host UID (1000), neither root nor hermes, is rejected."""
|
||||
for text in (_read(STAGE2_HOOK), _read(MAIN_WRAPPER)):
|
||||
proc = _run_guard(text, cur_uid=1000, hermes_uid=10000)
|
||||
assert proc.returncode == 1, f"expected rejection, got rc={proc.returncode}"
|
||||
assert "not supported" in proc.stderr
|
||||
assert "GUARD_PASSED" not in proc.stdout
|
||||
|
||||
|
||||
def test_root_start_passes() -> None:
|
||||
"""Root start (uid 0) is never blocked."""
|
||||
for text in (_read(STAGE2_HOOK), _read(MAIN_WRAPPER)):
|
||||
proc = _run_guard(text, cur_uid=0, hermes_uid=10000)
|
||||
assert proc.returncode == 0, proc.stderr
|
||||
assert "GUARD_PASSED" in proc.stdout
|
||||
|
||||
|
||||
def test_user_pinned_to_hermes_uid_passes() -> None:
|
||||
"""`--user 10000:10000` (the hermes UID itself) is the supported non-root
|
||||
start from #34648 / #34837 and must NOT be blocked."""
|
||||
for text in (_read(STAGE2_HOOK), _read(MAIN_WRAPPER)):
|
||||
proc = _run_guard(text, cur_uid=10000, hermes_uid=10000)
|
||||
assert proc.returncode == 0, proc.stderr
|
||||
assert "GUARD_PASSED" in proc.stdout
|
||||
|
||||
|
||||
def test_user_pinned_to_remapped_hermes_uid_passes() -> None:
|
||||
"""After a HERMES_UID remap the hermes UID is e.g. 4242; a container pinned
|
||||
to that same UID must still pass (cur_uid == hermes_uid)."""
|
||||
for text in (_read(STAGE2_HOOK), _read(MAIN_WRAPPER)):
|
||||
proc = _run_guard(text, cur_uid=4242, hermes_uid=4242)
|
||||
assert proc.returncode == 0, proc.stderr
|
||||
assert "GUARD_PASSED" in proc.stdout
|
||||
Reference in New Issue
Block a user