mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-27 20:34:35 +08:00
Compare commits
18 Commits
thin-clien
...
ethie/ci-t
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ffe043998f | ||
|
|
5cca2b1c2d | ||
|
|
725ca2ab20 | ||
|
|
a7e32ca9c2 | ||
|
|
901f107976 | ||
|
|
c73adbd91b | ||
|
|
01a7dfc339 | ||
|
|
db03c207aa | ||
|
|
e74f230462 | ||
|
|
eb114af7f1 | ||
|
|
2118bc5ab3 | ||
|
|
9f51ec0280 | ||
|
|
03046b9b9a | ||
|
|
5e50b121ab | ||
|
|
5a20177fc3 | ||
|
|
3668c2c482 | ||
|
|
2fcc3ad9cb | ||
|
|
a6d54c9bbe |
2
.envrc
2
.envrc
@@ -1,5 +1,5 @@
|
||||
watch_file pyproject.toml uv.lock
|
||||
watch_file package-lock.json package.json web/package.json ui-tui/package.json website/package.json apps/shared/package.json apps/desktop/package.json ui-tui/packages/hermes-ink/package.json
|
||||
watch_file flake.nix flake.lock nix/devShell.nix nix/tui.nix nix/package.nix nix/python.nix
|
||||
watch_file flake.nix flake.lock nix/devShell.nix nix/tui.nix nix/package.nix nix/python.nix nix/hermes-agent.nix nix/desktop.nix
|
||||
|
||||
use flake
|
||||
|
||||
50
.github/actions/hermes-smoke-test/action.yml
vendored
50
.github/actions/hermes-smoke-test/action.yml
vendored
@@ -1,50 +0,0 @@
|
||||
name: Hermes smoke test
|
||||
description: >
|
||||
Run the image's built-in entrypoint against `--help` and `dashboard --help`
|
||||
to catch basic runtime regressions before publishing. Requires the image
|
||||
to already be loaded into the local Docker daemon under `image`.
|
||||
|
||||
Works identically on amd64 and arm64 runners.
|
||||
|
||||
inputs:
|
||||
image:
|
||||
description: Fully-qualified image tag (e.g. nousresearch/hermes-agent:test)
|
||||
required: true
|
||||
|
||||
runs:
|
||||
using: composite
|
||||
steps:
|
||||
- name: Ensure /tmp/hermes-test is hermes-writable
|
||||
shell: bash
|
||||
run: |
|
||||
# The image runs as the hermes user (UID 10000). GitHub Actions
|
||||
# creates /tmp/hermes-test root-owned by default, which hermes
|
||||
# can't write to — chown it to match the in-container UID before
|
||||
# bind-mounting. Real users doing `docker run -v ~/.hermes:...`
|
||||
# with their own UID hit the same issue and have their own
|
||||
# remediations (HERMES_UID env var, or chown locally).
|
||||
mkdir -p /tmp/hermes-test
|
||||
sudo chown -R 10000:10000 /tmp/hermes-test
|
||||
|
||||
- name: hermes --help
|
||||
shell: bash
|
||||
run: |
|
||||
# Use the image's real ENTRYPOINT (/init + main-wrapper.sh) so
|
||||
# this exercises the actual production startup path. PR #30136
|
||||
# review caught that an --entrypoint override here had been
|
||||
# silently neutered by the s6-overlay migration — stage2-hook
|
||||
# ignores its CMD args, so the smoke test was a no-op.
|
||||
docker run --rm \
|
||||
-v /tmp/hermes-test:/opt/data \
|
||||
"${{ inputs.image }}" --help
|
||||
|
||||
- name: hermes dashboard --help
|
||||
shell: bash
|
||||
run: |
|
||||
# Regression guard for #9153: dashboard was present in source but
|
||||
# missing from the published image. If this fails, something in
|
||||
# the Dockerfile is excluding the dashboard subcommand from the
|
||||
# installed package.
|
||||
docker run --rm \
|
||||
-v /tmp/hermes-test:/opt/data \
|
||||
"${{ inputs.image }}" dashboard --help
|
||||
86
.github/workflows/ci.yml
vendored
86
.github/workflows/ci.yml
vendored
@@ -20,6 +20,7 @@ permissions:
|
||||
pull-requests: write # needed by lint (PR comment) + supply-chain (PR comment)
|
||||
actions: read # needed by osv-scanner (SARIF upload)
|
||||
security-events: write # needed by osv-scanner (SARIF upload)
|
||||
packages: write # needed by docker build
|
||||
|
||||
concurrency:
|
||||
group: ci-${{ github.ref }}
|
||||
@@ -32,6 +33,7 @@ jobs:
|
||||
# (all lanes true) so post-merge validation is never weakened.
|
||||
# ─────────────────────────────────────────────────────────────────────
|
||||
detect:
|
||||
name: Detect affected areas
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
python: ${{ steps.classify.outputs.python }}
|
||||
@@ -53,11 +55,15 @@ jobs:
|
||||
# Skipped workflows (if condition is false) don't spin up runners.
|
||||
# ─────────────────────────────────────────────────────────────────────
|
||||
tests:
|
||||
name: Python tests
|
||||
needs: detect
|
||||
if: needs.detect.outputs.python == 'true'
|
||||
uses: ./.github/workflows/tests.yml
|
||||
with:
|
||||
slice_count: 8
|
||||
|
||||
lint:
|
||||
name: Python lints
|
||||
needs: detect
|
||||
if: needs.detect.outputs.python == 'true'
|
||||
uses: ./.github/workflows/lint.yml
|
||||
@@ -65,35 +71,48 @@ jobs:
|
||||
event_name: ${{ needs.detect.outputs.event_name }}
|
||||
|
||||
typecheck:
|
||||
name: TypeScript
|
||||
needs: detect
|
||||
if: needs.detect.outputs.frontend == 'true'
|
||||
uses: ./.github/workflows/typecheck.yml
|
||||
|
||||
docs-site:
|
||||
name: Docs Site
|
||||
needs: detect
|
||||
if: needs.detect.outputs.site == 'true'
|
||||
uses: ./.github/workflows/docs-site-checks.yml
|
||||
|
||||
history-check:
|
||||
name: Deny unrelated histories
|
||||
needs: detect
|
||||
if: needs.detect.outputs.event_name == 'pull_request'
|
||||
uses: ./.github/workflows/history-check.yml
|
||||
|
||||
contributor-check:
|
||||
name: Check contributors
|
||||
needs: detect
|
||||
if: needs.detect.outputs.python == 'true'
|
||||
uses: ./.github/workflows/contributor-check.yml
|
||||
|
||||
uv-lockfile:
|
||||
name: Check uv.lock
|
||||
needs: detect
|
||||
uses: ./.github/workflows/uv-lockfile-check.yml
|
||||
|
||||
docker-lint:
|
||||
name: Lint Docker scripts
|
||||
needs: detect
|
||||
if: needs.detect.outputs.docker_meta == 'true'
|
||||
uses: ./.github/workflows/docker-lint.yml
|
||||
|
||||
docker:
|
||||
name: Build&Test Docker image
|
||||
needs: detect
|
||||
if: needs.detect.outputs.python == 'true' || needs.detect.outputs.frontend == 'true' || needs.detect.outputs.docker_meta == 'true'
|
||||
uses: ./.github/workflows/docker.yml
|
||||
|
||||
supply-chain:
|
||||
name: Supply-chain scan
|
||||
needs: detect
|
||||
if: needs.detect.outputs.event_name == 'pull_request' && (needs.detect.outputs.scan == 'true' || needs.detect.outputs.deps == 'true' || needs.detect.outputs.mcp_catalog == 'true')
|
||||
uses: ./.github/workflows/supply-chain-audit.yml
|
||||
@@ -104,7 +123,7 @@ jobs:
|
||||
mcp_catalog: ${{ needs.detect.outputs.mcp_catalog == 'true' }}
|
||||
|
||||
osv-scanner:
|
||||
needs: detect
|
||||
name: OSV scan
|
||||
uses: ./.github/workflows/osv-scanner.yml
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────
|
||||
@@ -127,6 +146,7 @@ jobs:
|
||||
- docker-lint
|
||||
- supply-chain
|
||||
- osv-scanner
|
||||
- docker
|
||||
if: always()
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
@@ -143,3 +163,67 @@ jobs:
|
||||
sys.exit(1)
|
||||
print('All checks passed (or were skipped)')
|
||||
"
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────
|
||||
# CI timing report: collect per-job/step durations from the GitHub API,
|
||||
# cache them on main (as a baseline), and on PRs generate an HTML diff
|
||||
# report with a gantt chart + per-step breakdown. The report is uploaded
|
||||
# as an artifact and a markdown summary is written to $GITHUB_STEP_SUMMARY.
|
||||
# ─────────────────────────────────────────────────────────────────────
|
||||
ci-timings:
|
||||
name: CI timing report
|
||||
needs: all-checks-pass
|
||||
if: always()
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- name: Restore baseline cache (PR only)
|
||||
if: github.event_name == 'pull_request'
|
||||
uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
|
||||
with:
|
||||
path: ci-timings-baseline.json
|
||||
# Prefix-match: exact key will never hit (run_id differs), so
|
||||
# restore-keys finds the most recent baseline from main.
|
||||
key: ci-timings-baseline-never-exact
|
||||
restore-keys: |
|
||||
ci-timings-baseline-
|
||||
|
||||
- name: Collect timings and generate report
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: |
|
||||
python3 scripts/ci/timings_report.py \
|
||||
--baseline ci-timings-baseline.json \
|
||||
--output ci-timings-report.html \
|
||||
--json-out ci-timings.json \
|
||||
--summary-out ci-timings-summary.md
|
||||
|
||||
- name: Upload HTML report
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
|
||||
id: ci-timings-artifact
|
||||
with:
|
||||
name: ci-timings-report
|
||||
path: ci-timings-report.html
|
||||
retention-days: 14
|
||||
archive: false
|
||||
|
||||
- name: Output summary
|
||||
env:
|
||||
REPORT_URL: ${{ steps.ci-timings-artifact.outputs.artifact-url}}
|
||||
run: |
|
||||
echo "# CI Timing report" >> "$GITHUB_STEP_SUMMARY"
|
||||
echo "[View the full interactive report]($REPORT_URL)" >> "$GITHUB_STEP_SUMMARY"
|
||||
cat ci-timings-summary.md >> "$GITHUB_STEP_SUMMARY"
|
||||
|
||||
- name: Save baseline cache (main only)
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
|
||||
run: cp ci-timings.json ci-timings-baseline.json
|
||||
|
||||
- name: Upload baseline to cache (main only)
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
|
||||
uses: actions/cache/save@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
|
||||
with:
|
||||
path: ci-timings-baseline.json
|
||||
key: ci-timings-baseline-${{ github.run_id }}
|
||||
|
||||
2
.github/workflows/docker-lint.yml
vendored
2
.github/workflows/docker-lint.yml
vendored
@@ -2,7 +2,7 @@ name: Docker / shell lint
|
||||
|
||||
# Lints the container build inputs: Dockerfile (via hadolint) and any shell
|
||||
# scripts under docker/ (via shellcheck). These catch the class of regression
|
||||
# the behavioral docker-publish smoke test can't — unquoted variable
|
||||
# the behavioral docker smoke test can't — unquoted variable
|
||||
# expansions, silently-failing RUN commands, etc.
|
||||
#
|
||||
# Rules and ignores are documented in .hadolint.yaml at the repo root.
|
||||
|
||||
@@ -1,24 +1,9 @@
|
||||
name: Docker Build and Publish
|
||||
name: Docker Build, Test, and Publish
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
paths:
|
||||
- '**/*.py'
|
||||
- 'pyproject.toml'
|
||||
- 'uv.lock'
|
||||
- 'Dockerfile'
|
||||
- 'docker/**'
|
||||
- '.github/workflows/docker-publish.yml'
|
||||
- '.github/actions/hermes-smoke-test/**'
|
||||
|
||||
# No paths filter — the job must always run so the required check
|
||||
# reports a status (path-gated workflows leave checks "pending" forever
|
||||
# when no matching files change, which blocks merge).
|
||||
pull_request:
|
||||
|
||||
release:
|
||||
types: [published]
|
||||
workflow_call:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
@@ -39,11 +24,7 @@ env:
|
||||
IMAGE_NAME: nousresearch/hermes-agent
|
||||
|
||||
jobs:
|
||||
# ---------------------------------------------------------------------------
|
||||
# Build amd64 natively. This job also runs the smoke tests (basic --help
|
||||
# and the dashboard subcommand regression guard from #9153), because amd64
|
||||
# is the only arch we can `load` into the local daemon on an amd64 runner.
|
||||
# ---------------------------------------------------------------------------
|
||||
# Build, test, and optionally push the amd64 image.
|
||||
build-amd64:
|
||||
# Only run on the upstream repository, not on forks
|
||||
if: github.repository == 'NousResearch/hermes-agent'
|
||||
@@ -53,24 +34,19 @@ jobs:
|
||||
digest: ${{ steps.push.outputs.digest }}
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
# The image build + smoke test + integration tests run ONLY on
|
||||
# push-to-main and release — never on PRs. They are the heaviest jobs
|
||||
# in CI (~15-45 min) and a broken build surfaces on the main push (and
|
||||
# is gated pre-merge by docker-lint + uv-lockfile-check). Every step
|
||||
# below is skipped on PRs, so the job still reports green and the
|
||||
# required check never hangs.
|
||||
# The image build + integration tests run on every event
|
||||
# (PRs, push-to-main, release). Publish steps below are gated to
|
||||
# push-to-main / release only.
|
||||
- name: Set up Docker Buildx
|
||||
if: github.event_name != 'pull_request'
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
|
||||
|
||||
# Build once, load into the local daemon for smoke testing. Cached
|
||||
# Build once, load into the local daemon for testing. Cached
|
||||
# to gha with a per-arch scope; the push step below reuses every
|
||||
# layer from this build.
|
||||
- name: Build image (amd64, smoke test)
|
||||
if: github.event_name != 'pull_request'
|
||||
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
|
||||
- name: Build image (amd64)
|
||||
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
|
||||
with:
|
||||
context: .
|
||||
file: Dockerfile
|
||||
@@ -82,25 +58,12 @@ jobs:
|
||||
cache-from: type=gha,scope=docker-amd64
|
||||
cache-to: type=gha,mode=max,scope=docker-amd64
|
||||
|
||||
- name: Smoke test image
|
||||
if: github.event_name != 'pull_request'
|
||||
uses: ./.github/actions/hermes-smoke-test
|
||||
with:
|
||||
image: ${{ env.IMAGE_NAME }}:test
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# Run the docker-integration test suite against the freshly-built
|
||||
# image already loaded into the local daemon (`:test`). These tests
|
||||
# are excluded from the sharded `tests.yml :: test` matrix on purpose
|
||||
# (see `_SKIP_PARTS` in scripts/run_tests_parallel.py) because each
|
||||
# shard would otherwise reach the session-scoped ``built_image``
|
||||
# fixture in ``tests/docker/conftest.py`` and start a 3-7min
|
||||
# ``docker build`` — guaranteed to
|
||||
# die in fixture setup.
|
||||
# image already loaded into the local daemon (`:test`).
|
||||
#
|
||||
# Piggybacking here avoids a second image build: the smoke test
|
||||
# already proved the image loads + runs, so the daemon has it under
|
||||
# `${IMAGE_NAME}:test` and we just point ``HERMES_TEST_IMAGE`` at
|
||||
# Piggybacking here avoids a second image build: the build step
|
||||
# already loaded the image into the daemon under
|
||||
# `${IMAGE_NAME}:test`, so we just point ``HERMES_TEST_IMAGE`` at
|
||||
# that. The fixture's ``HERMES_TEST_IMAGE`` branch (see
|
||||
# tests/docker/conftest.py:62-63) short-circuits the rebuild.
|
||||
#
|
||||
@@ -110,26 +73,20 @@ jobs:
|
||||
# cheapest path to coverage on every PR that touches docker code.
|
||||
# ---------------------------------------------------------------------
|
||||
- name: Install uv (for docker tests)
|
||||
if: github.event_name != 'pull_request'
|
||||
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
|
||||
uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0
|
||||
|
||||
- name: Set up Python 3.11 (for docker tests)
|
||||
if: github.event_name != 'pull_request'
|
||||
run: uv python install 3.11
|
||||
|
||||
- name: Install Python dependencies (for docker tests)
|
||||
if: github.event_name != 'pull_request'
|
||||
run: |
|
||||
uv venv .venv --python 3.11
|
||||
source .venv/bin/activate
|
||||
# ``dev`` extra pulls in pytest, pytest-asyncio —
|
||||
# everything tests/docker/ needs. We deliberately avoid ``all``
|
||||
# here because the docker tests only drive the container via
|
||||
# subprocess and don't import hermes_agent's optional deps.
|
||||
uv pip install -e ".[dev]"
|
||||
uv sync --locked --python 3.11 --extra dev
|
||||
|
||||
- name: Run docker integration tests
|
||||
if: github.event_name != 'pull_request'
|
||||
env:
|
||||
# Skip rebuild; use the image already loaded by the build step.
|
||||
HERMES_TEST_IMAGE: ${{ env.IMAGE_NAME }}:test
|
||||
@@ -140,11 +97,11 @@ jobs:
|
||||
NOUS_API_KEY: ""
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
python -m pytest tests/docker/ -v --tb=short
|
||||
python -m pytest -m tests/docker/
|
||||
|
||||
- name: Log in to Docker Hub
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
|
||||
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
|
||||
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
@@ -155,7 +112,7 @@ jobs:
|
||||
- name: Push amd64 by digest
|
||||
id: push
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
|
||||
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
|
||||
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
|
||||
with:
|
||||
context: .
|
||||
file: Dockerfile
|
||||
@@ -179,7 +136,7 @@ jobs:
|
||||
|
||||
- name: Upload digest artifact
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7
|
||||
with:
|
||||
name: digest-amd64
|
||||
path: /tmp/digests/*
|
||||
@@ -187,10 +144,7 @@ jobs:
|
||||
retention-days: 1
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Build arm64 natively on GitHub's free arm64 runner. This replaces the
|
||||
# previous QEMU-emulated arm64 build, which was ~5-10x slower and shared
|
||||
# a cache scope with amd64. Matches the amd64 job's shape: build+load,
|
||||
# smoke test, then on push/release push by digest.
|
||||
# Build, test, and optionally push the arm64 image.
|
||||
# ---------------------------------------------------------------------------
|
||||
build-arm64:
|
||||
if: github.repository == 'NousResearch/hermes-agent'
|
||||
@@ -200,29 +154,26 @@ jobs:
|
||||
digest: ${{ steps.push.outputs.digest }}
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
# arm64 build runs only on push-to-main and release (see build-amd64).
|
||||
- name: Set up Docker Buildx
|
||||
if: github.event_name != 'pull_request'
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
|
||||
|
||||
# Log in to ghcr.io so the registry-backed build cache below can be
|
||||
# read (cache-from) on every event and written (cache-to) on
|
||||
# push/release. Uses the workflow's GITHUB_TOKEN, which is valid for
|
||||
# the whole job — unlike the gha cache backend's short-lived Azure SAS
|
||||
# token, which expired mid-build on slow cold-cache arm64 runs and
|
||||
# crashed the build before the smoke test (the reason the gha cache
|
||||
# crashed the build before the tests ran (the reason the gha cache
|
||||
# was removed from arm64 PRs in the first place).
|
||||
- name: Log in to ghcr.io (build cache)
|
||||
if: github.event_name != 'pull_request'
|
||||
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
|
||||
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
# Build once, load into the local daemon for smoke testing, then push
|
||||
# Build once, load into the local daemon for testing, then push
|
||||
# by digest below. Reads AND writes the registry-backed cache so the
|
||||
# push reuses layers from this build and the next build starts warm.
|
||||
#
|
||||
@@ -230,9 +181,8 @@ jobs:
|
||||
# cache that previously broke here: its credential is the job-lifetime
|
||||
# GITHUB_TOKEN, not a short-lived SAS token, so the cold-build-outlives-
|
||||
# token failure mode cannot recur.
|
||||
- name: Build image (arm64, smoke test, cached publish)
|
||||
if: github.event_name != 'pull_request'
|
||||
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
|
||||
- name: Build image (arm64, cached publish)
|
||||
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
|
||||
with:
|
||||
context: .
|
||||
file: Dockerfile
|
||||
@@ -244,15 +194,30 @@ jobs:
|
||||
cache-from: type=registry,ref=ghcr.io/nousresearch/hermes-agent:buildcache-arm64
|
||||
cache-to: type=registry,ref=ghcr.io/nousresearch/hermes-agent:buildcache-arm64,mode=max
|
||||
|
||||
- name: Smoke test image
|
||||
if: github.event_name != 'pull_request'
|
||||
uses: ./.github/actions/hermes-smoke-test
|
||||
with:
|
||||
image: ${{ env.IMAGE_NAME }}:test
|
||||
- name: Install uv for docker tests
|
||||
uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0
|
||||
|
||||
- name: Set up Python 3.11 for docker tests
|
||||
run: uv python install 3.11
|
||||
|
||||
- name: Install Python dependencies for docker tests
|
||||
run: |
|
||||
uv sync --locked --python 3.11 --extra dev
|
||||
|
||||
- name: Run docker tests
|
||||
env:
|
||||
# Skip rebuild; use the image already loaded by the build step.
|
||||
HERMES_TEST_IMAGE: ${{ env.IMAGE_NAME }}:test
|
||||
OPENROUTER_API_KEY: ""
|
||||
OPENAI_API_KEY: ""
|
||||
NOUS_API_KEY: ""
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
python -m pytest -m tests/docker/
|
||||
|
||||
- name: Log in to Docker Hub
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
|
||||
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
|
||||
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
@@ -260,7 +225,7 @@ jobs:
|
||||
- name: Push arm64 by digest
|
||||
id: push
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
|
||||
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
|
||||
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
|
||||
with:
|
||||
context: .
|
||||
file: Dockerfile
|
||||
@@ -282,7 +247,7 @@ jobs:
|
||||
|
||||
- name: Upload digest artifact
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7
|
||||
with:
|
||||
name: digest-arm64
|
||||
path: /tmp/digests/*
|
||||
@@ -304,17 +269,17 @@ jobs:
|
||||
timeout-minutes: 10
|
||||
steps:
|
||||
- name: Download digests
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
|
||||
with:
|
||||
path: /tmp/digests
|
||||
pattern: digest-*
|
||||
merge-multiple: true
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
|
||||
|
||||
- name: Log in to Docker Hub
|
||||
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
|
||||
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
44
.github/workflows/lint.yml
vendored
44
.github/workflows/lint.yml
vendored
@@ -37,7 +37,7 @@ jobs:
|
||||
fetch-depth: 0 # need full history for merge-base + worktree
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
|
||||
uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0
|
||||
|
||||
- name: Install ruff + ty
|
||||
uses: ./.github/actions/retry
|
||||
@@ -109,46 +109,6 @@ jobs:
|
||||
--output .lint-reports/summary.md
|
||||
cat .lint-reports/summary.md >> "$GITHUB_STEP_SUMMARY"
|
||||
|
||||
- name: Upload reports as artifact
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
|
||||
with:
|
||||
name: lint-reports
|
||||
path: .lint-reports/
|
||||
retention-days: 14
|
||||
|
||||
- name: Post / update PR comment
|
||||
if: inputs.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository
|
||||
continue-on-error: true
|
||||
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7
|
||||
with:
|
||||
script: |
|
||||
const fs = require('fs');
|
||||
const body = fs.readFileSync('.lint-reports/summary.md', 'utf8');
|
||||
const marker = '<!-- lint-diff-summary -->';
|
||||
const fullBody = marker + '\n' + body;
|
||||
|
||||
const { data: comments } = await github.rest.issues.listComments({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: context.issue.number,
|
||||
});
|
||||
const existing = comments.find(c => c.body && c.body.includes(marker));
|
||||
if (existing) {
|
||||
await github.rest.issues.updateComment({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
comment_id: existing.id,
|
||||
body: fullBody,
|
||||
});
|
||||
} else {
|
||||
await github.rest.issues.createComment({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: context.issue.number,
|
||||
body: fullBody,
|
||||
});
|
||||
}
|
||||
|
||||
ruff-blocking:
|
||||
# Enforce the rules in pyproject.toml [tool.ruff.lint.select]. Currently
|
||||
# PLW1514 (unspecified-encoding) — catches bare ``open()`` /
|
||||
@@ -164,7 +124,7 @@ jobs:
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
|
||||
uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0
|
||||
|
||||
- name: Install ruff
|
||||
uses: ./.github/actions/retry
|
||||
|
||||
18
.github/workflows/skills-index.yml
vendored
18
.github/workflows/skills-index.yml
vendored
@@ -3,17 +3,17 @@ name: Build Skills Index
|
||||
on:
|
||||
schedule:
|
||||
# Run twice daily: 6 AM and 6 PM UTC
|
||||
- cron: '0 6,18 * * *'
|
||||
workflow_dispatch: # Manual trigger
|
||||
- cron: "0 6,18 * * *"
|
||||
workflow_dispatch: # Manual trigger
|
||||
push:
|
||||
branches: [main]
|
||||
paths:
|
||||
- 'scripts/build_skills_index.py'
|
||||
- '.github/workflows/skills-index.yml'
|
||||
- "scripts/build_skills_index.py"
|
||||
- ".github/workflows/skills-index.yml"
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
actions: write # to trigger deploy-site.yml on schedule
|
||||
actions: write # to trigger deploy-site.yml on schedule
|
||||
|
||||
jobs:
|
||||
build-index:
|
||||
@@ -21,11 +21,11 @@ jobs:
|
||||
if: github.repository == 'NousResearch/hermes-agent'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
with:
|
||||
python-version: '3.11'
|
||||
python-version: "3.11"
|
||||
|
||||
- name: Install dependencies
|
||||
run: pip install httpx==0.28.1 pyyaml==6.0.2
|
||||
@@ -36,7 +36,7 @@ jobs:
|
||||
run: python scripts/build_skills_index.py
|
||||
|
||||
- name: Upload index artifact
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7
|
||||
with:
|
||||
name: skills-index
|
||||
path: website/static/api/skills-index.json
|
||||
|
||||
37
.github/workflows/tests.yml
vendored
37
.github/workflows/tests.yml
vendored
@@ -2,6 +2,11 @@ name: Tests
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
slice_count:
|
||||
description: Number of parallel test slices
|
||||
type: number
|
||||
default: 8
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
@@ -12,13 +17,29 @@ concurrency:
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
generate:
|
||||
name: "Generate slices"
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
slices: ${{ steps.matrix.outputs.slices }}
|
||||
slice_count: ${{ steps.matrix.outputs.slice_count }}
|
||||
steps:
|
||||
- name: Generate test slices
|
||||
id: matrix
|
||||
run: |
|
||||
COUNT="${{ inputs.slice_count }}"
|
||||
SLICES=$(python3 -c "import json; print(json.dumps({'slice': list(range(1, $COUNT + 1))}))")
|
||||
echo "slices=$SLICES" >> "$GITHUB_OUTPUT"
|
||||
echo "slice_count=$COUNT" >> "$GITHUB_OUTPUT"
|
||||
|
||||
test:
|
||||
name: Run tests slice
|
||||
needs: generate
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 30
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
slice: [1, 2, 3, 4, 5, 6]
|
||||
matrix: ${{ fromJSON(needs.generate.outputs.slices) }}
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
@@ -49,7 +70,7 @@ jobs:
|
||||
rg --version
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
|
||||
uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0
|
||||
with:
|
||||
# Persist uv's download/wheel cache (~/.cache/uv) across runs.
|
||||
# Keyed on the dependency manifests, so the cache is reused until
|
||||
@@ -78,8 +99,8 @@ jobs:
|
||||
# re-download, keeping the persisted cache small and fast to restore.
|
||||
run: uv cache prune --ci
|
||||
|
||||
- name: Run tests (slice ${{ matrix.slice }}/6)
|
||||
# Per-file isolation via scripts/run_tests_parallel.py: discovers
|
||||
- name: Run tests (slice ${{ matrix.slice }}/${{ needs.generate.outputs.slice_count }})
|
||||
# Per-file isolation via scripts/run_tests.sh: discovers
|
||||
# every test_*.py file under tests/ (excluding integration/ + e2e/),
|
||||
# then runs `python -m pytest <file>` in a freshly-spawned subprocess
|
||||
# with bounded parallelism. No xdist, no shared workers, no
|
||||
@@ -97,14 +118,14 @@ jobs:
|
||||
# fix. ThreadPoolExecutor + subprocess.run is ~60 lines and does
|
||||
# the job with cleaner semantics.
|
||||
#
|
||||
# Matrix slicing (--slice I/N): files are distributed across 6
|
||||
# Matrix slicing (--slice I/N): files are distributed across N
|
||||
# jobs by cached duration (LPT algorithm) so each job gets
|
||||
# roughly equal wall time. Without a cache, files default to 2s
|
||||
# estimate and get split roughly evenly by count — still correct,
|
||||
# just not perfectly balanced.
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
python scripts/run_tests_parallel.py --slice ${{ matrix.slice }}/6
|
||||
scripts/run_tests.sh --slice ${{ matrix.slice }}/${{ needs.generate.outputs.slice_count }}
|
||||
env:
|
||||
# Ensure tests don't accidentally call real APIs
|
||||
OPENROUTER_API_KEY: ""
|
||||
@@ -173,7 +194,7 @@ jobs:
|
||||
rg --version
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
|
||||
uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0
|
||||
with:
|
||||
# Persist uv's download/wheel cache (~/.cache/uv) across runs.
|
||||
# Keyed on the dependency manifests, so the cache is reused until
|
||||
|
||||
8
.github/workflows/typecheck.yml
vendored
8
.github/workflows/typecheck.yml
vendored
@@ -6,6 +6,7 @@ on:
|
||||
|
||||
jobs:
|
||||
typecheck:
|
||||
name: Check TypeScript
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
@@ -22,8 +23,7 @@ jobs:
|
||||
# native builds. Skipping install scripts drops node-pty's node-gyp
|
||||
# header fetch — the transient flake that killed this job pre-`tsc` — and
|
||||
# is faster. retry covers the remaining registry blips.
|
||||
-
|
||||
uses: ./.github/actions/retry
|
||||
- uses: ./.github/actions/retry
|
||||
with:
|
||||
command: npm ci --ignore-scripts
|
||||
- run: npm run --prefix ${{ matrix.package }} typecheck
|
||||
@@ -35,6 +35,7 @@ jobs:
|
||||
# users build apps/desktop from source on install/update. Run the real
|
||||
# `vite build` here so that class of break fails in CI instead.
|
||||
desktop-build:
|
||||
name: Build desktop app
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
@@ -44,8 +45,7 @@ jobs:
|
||||
cache: npm
|
||||
# Keep install scripts here: the production build may need node-pty's
|
||||
# native binary. retry handles the transient install-time fetch flakes.
|
||||
-
|
||||
uses: ./.github/actions/retry
|
||||
- uses: ./.github/actions/retry
|
||||
with:
|
||||
command: npm ci
|
||||
- run: npm run --prefix apps/desktop build
|
||||
|
||||
32
.github/workflows/upload_to_pypi.yml
vendored
32
.github/workflows/upload_to_pypi.yml
vendored
@@ -5,11 +5,11 @@ name: Publish to PyPI
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- 'v20*' # CalVer tags: v2026.5.15, v2026.5.15.2, etc.
|
||||
- "v20*" # CalVer tags: v2026.5.15, v2026.5.15.2, etc.
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
confirm_tag:
|
||||
description: 'Tag to publish (e.g. v2026.5.15). Must already exist.'
|
||||
description: "Tag to publish (e.g. v2026.5.15). Must already exist."
|
||||
required: true
|
||||
type: string
|
||||
|
||||
@@ -27,7 +27,7 @@ jobs:
|
||||
name: Build distribution 📦
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
persist-credentials: false
|
||||
# On workflow_dispatch, check out the confirmed tag.
|
||||
@@ -43,17 +43,17 @@ jobs:
|
||||
fi
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
with:
|
||||
python-version: '3.13'
|
||||
python-version: "3.13"
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@d0cc045d04ccac9d8b7881df0226f9e82c39688e # v6
|
||||
uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0
|
||||
|
||||
- name: Set up Node.js
|
||||
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
|
||||
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
|
||||
with:
|
||||
node-version: '22'
|
||||
node-version: "22"
|
||||
|
||||
- name: Build web dashboard
|
||||
run: cd web && npm ci && npm run build
|
||||
@@ -81,7 +81,7 @@ jobs:
|
||||
run: uv build --sdist --wheel
|
||||
|
||||
- name: Upload distribution artifacts
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7
|
||||
with:
|
||||
name: python-package-distributions
|
||||
path: dist/
|
||||
@@ -94,17 +94,17 @@ jobs:
|
||||
name: pypi
|
||||
url: https://pypi.org/p/hermes-agent
|
||||
permissions:
|
||||
id-token: write # OIDC trusted publishing
|
||||
id-token: write # OIDC trusted publishing
|
||||
|
||||
steps:
|
||||
- name: Download distribution artifacts
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
|
||||
with:
|
||||
name: python-package-distributions
|
||||
path: dist/
|
||||
|
||||
- name: Publish to PyPI
|
||||
uses: pypa/gh-action-pypi-publish@cef221092ed1bacb1cc03d23a2d87d1d172e277b # v1.14.0
|
||||
uses: pypa/gh-action-pypi-publish@cef221092ed1bacb1cc03d23a2d87d1d172e277b # v1.14.0
|
||||
with:
|
||||
skip-existing: true
|
||||
|
||||
@@ -116,12 +116,12 @@ jobs:
|
||||
needs: publish
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: write # attach assets to the existing release
|
||||
id-token: write # sigstore signing
|
||||
contents: write # attach assets to the existing release
|
||||
id-token: write # sigstore signing
|
||||
|
||||
steps:
|
||||
- name: Download distribution artifacts
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
|
||||
with:
|
||||
name: python-package-distributions
|
||||
path: dist/
|
||||
@@ -145,7 +145,7 @@ jobs:
|
||||
|
||||
- name: Sign with Sigstore
|
||||
if: env.skip_sign != 'true'
|
||||
uses: sigstore/gh-action-sigstore-python@04cffa1d795717b140764e8b640de88853c92acc # v3.3.0
|
||||
uses: sigstore/gh-action-sigstore-python@04cffa1d795717b140764e8b640de88853c92acc # v3.3.0
|
||||
with:
|
||||
inputs: >-
|
||||
./dist/*.tar.gz
|
||||
|
||||
6
.github/workflows/uv-lockfile-check.yml
vendored
6
.github/workflows/uv-lockfile-check.yml
vendored
@@ -4,7 +4,7 @@ name: uv.lock check
|
||||
# that modify pyproject.toml without regenerating uv.lock (or vice versa)
|
||||
# must not merge, because the Docker build's `uv sync --frozen` step will
|
||||
# fail on a stale lockfile and we'd rather catch it here than in the
|
||||
# docker-publish workflow on main.
|
||||
# docker workflow on main.
|
||||
#
|
||||
# ─────────────────────────────────────────────────────────────────────────
|
||||
# IMPORTANT: this check runs against the MERGED state, not just your branch
|
||||
@@ -63,7 +63,7 @@ jobs:
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
|
||||
uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0
|
||||
|
||||
# `uv lock --check` re-resolves the project from pyproject.toml and
|
||||
# compares the result to uv.lock, exiting non-zero if they disagree.
|
||||
@@ -100,7 +100,7 @@ jobs:
|
||||
|
||||
This check is blocking because the Docker image build uses
|
||||
`uv sync --frozen --extra all`, which rejects stale lockfiles
|
||||
— catching it here avoids a ~15 min failed docker-publish run
|
||||
— catching it here avoids a ~15 min failed docker run
|
||||
on `main` post-merge.
|
||||
EOF
|
||||
echo "::error title=uv.lock out of sync::Run \`uv lock\` locally and commit the result. If on a PR, sync with main first."
|
||||
|
||||
28
Dockerfile
28
Dockerfile
@@ -189,7 +189,13 @@ RUN cd web && npm run build && \
|
||||
|
||||
# ---------- Source code ----------
|
||||
# .dockerignore excludes node_modules, so the installs above survive.
|
||||
COPY . .
|
||||
# --link decouples this layer from parents for cache purposes; --chmod bakes
|
||||
# the final read-only permissions at copy time so we skip the separate
|
||||
# `chmod -R` pass that previously walked ~30k files across the venv +
|
||||
# node_modules + source (21s amd64 / 222s arm64 — #49113). `a+rX,go-w`
|
||||
# gives the non-root hermes user read + traverse but no write; root retains
|
||||
# write so the build steps below don't need chmod u+w dances.
|
||||
COPY --link --chmod=a+rX,go-w . .
|
||||
|
||||
# ---------- Permissions ----------
|
||||
# Link hermes-agent itself (editable). Deps are already installed in the
|
||||
@@ -197,19 +203,15 @@ COPY . .
|
||||
# resolution or downloads.
|
||||
RUN uv pip install --no-cache-dir --no-deps -e "."
|
||||
|
||||
# Keep /opt/hermes immutable for the runtime hermes user. Hosted/container
|
||||
# instances must not be able to self-edit the installed source or venv; user
|
||||
# data, skills, plugins, config, logs, and dashboard uploads live under
|
||||
# /opt/data instead. Root can still repair the image during build/boot, but
|
||||
# supervised Hermes processes drop to the non-root hermes user.
|
||||
# Wire the exec shim and install-method stamp. Files under /opt/hermes are
|
||||
# already root-owned (COPY, uv sync, npm install all run as root) and
|
||||
# read-only for the hermes user (go-w from the --chmod above).
|
||||
|
||||
USER root
|
||||
RUN mkdir -p /opt/hermes/bin && \
|
||||
cp /opt/hermes/docker/hermes-exec-shim.sh /opt/hermes/bin/hermes && \
|
||||
chmod 0755 /opt/hermes/bin/hermes && \
|
||||
printf 'docker\n' > /opt/hermes/.install_method && \
|
||||
chown -R root:root /opt/hermes && \
|
||||
chmod -R a+rX /opt/hermes && \
|
||||
chmod -R a-w /opt/hermes
|
||||
printf 'docker\n' > /opt/hermes/.install_method
|
||||
# The ``.install_method`` stamp is baked next to the running code (the install
|
||||
# tree), NOT into $HERMES_HOME. $HERMES_HOME (/opt/data) is a shared data
|
||||
# volume that is commonly bind-mounted from the host and even shared with a
|
||||
@@ -236,13 +238,11 @@ RUN mkdir -p /opt/hermes/bin && \
|
||||
#
|
||||
# The arg is optional — local `docker build` without --build-arg simply
|
||||
# omits the file, and the runtime falls back to live-git lookup. CI
|
||||
# (.github/workflows/docker-publish.yml) passes ${{ github.sha }} so
|
||||
# (.github/workflows/docker.yml) passes ${{ github.sha }} so
|
||||
# every published image has it.
|
||||
ARG HERMES_GIT_SHA=
|
||||
RUN if [ -n "${HERMES_GIT_SHA}" ]; then \
|
||||
chmod u+w /opt/hermes && \
|
||||
printf '%s\n' "${HERMES_GIT_SHA}" > /opt/hermes/.hermes_build_sha && \
|
||||
chmod a-w /opt/hermes /opt/hermes/.hermes_build_sha; \
|
||||
printf '%s\n' "${HERMES_GIT_SHA}" > /opt/hermes/.hermes_build_sha; \
|
||||
fi
|
||||
|
||||
# ---------- s6-overlay service wiring ----------
|
||||
|
||||
@@ -25,10 +25,12 @@
|
||||
in
|
||||
{
|
||||
devShells.default = pkgs.mkShell {
|
||||
inputsFrom = packages;
|
||||
packages = with pkgs; [
|
||||
uv
|
||||
];
|
||||
packages =
|
||||
with pkgs;
|
||||
[
|
||||
uv
|
||||
]
|
||||
++ self'.packages.default.passthru.devDeps;
|
||||
shellHook = ''
|
||||
echo "Hermes Agent dev shell"
|
||||
${combinedNonNpm}
|
||||
|
||||
@@ -37,10 +37,14 @@
|
||||
}:
|
||||
let
|
||||
nodejs = nodejs_22;
|
||||
hermesVenv = callPackage ./python.nix {
|
||||
inherit uv2nix pyproject-nix pyproject-build-systems;
|
||||
dependency-groups = [ "all" ] ++ extraDependencyGroups;
|
||||
};
|
||||
mkHermesVenv =
|
||||
extraDependencyGroups:
|
||||
callPackage ./python.nix {
|
||||
inherit uv2nix pyproject-nix pyproject-build-systems;
|
||||
dependency-groups = [ "all" ] ++ extraDependencyGroups;
|
||||
};
|
||||
|
||||
hermesVenv = mkHermesVenv extraDependencyGroups;
|
||||
|
||||
hermesNpmLib = callPackage ./lib.nix {
|
||||
inherit npm-lockfile-fix nodejs;
|
||||
@@ -106,12 +110,6 @@ let
|
||||
|
||||
pythonPath = lib.makeSearchPath sitePackagesPath allExtraPythonPackages;
|
||||
|
||||
pyprojectHash = builtins.hashString "sha256" (builtins.readFile ../pyproject.toml);
|
||||
uvLockHash =
|
||||
if builtins.pathExists ../uv.lock then
|
||||
builtins.hashString "sha256" (builtins.readFile ../uv.lock)
|
||||
else
|
||||
"none";
|
||||
checkPackageCollisions = ''
|
||||
import pathlib, sys, re
|
||||
|
||||
@@ -223,21 +221,10 @@ stdenv.mkDerivation (finalAttrs: {
|
||||
};
|
||||
|
||||
devShellHook = ''
|
||||
STAMP=".nix-stamps/hermes-agent"
|
||||
STAMP_VALUE="${pyprojectHash}:${uvLockHash}"
|
||||
if [ ! -f "$STAMP" ] || [ "$(cat "$STAMP")" != "$STAMP_VALUE" ]; then
|
||||
echo "hermes-agent: installing Python dependencies..."
|
||||
uv venv .venv --python ${python312}/bin/python3 2>/dev/null || true
|
||||
source .venv/bin/activate
|
||||
uv pip install -e ".[all]"
|
||||
[ -d mini-swe-agent ] && uv pip install -e ./mini-swe-agent 2>/dev/null || true
|
||||
mkdir -p .nix-stamps
|
||||
echo "$STAMP_VALUE" > "$STAMP"
|
||||
else
|
||||
source .venv/bin/activate
|
||||
export HERMES_PYTHON=${hermesVenv}/bin/python3
|
||||
fi
|
||||
export HERMES_PYTHON=${hermesVenv}/bin/python3
|
||||
'';
|
||||
|
||||
devDeps = runtimeDeps ++ [ (mkHermesVenv (extraDependencyGroups ++ [ "dev" ])) ];
|
||||
};
|
||||
|
||||
meta = with lib; {
|
||||
|
||||
@@ -2,54 +2,62 @@
|
||||
{ inputs, ... }:
|
||||
{
|
||||
perSystem =
|
||||
{ pkgs, lib, inputs', ... }:
|
||||
{
|
||||
pkgs,
|
||||
lib,
|
||||
inputs',
|
||||
...
|
||||
}:
|
||||
let
|
||||
hermesAgent = pkgs.callPackage ./hermes-agent.nix {
|
||||
minimal = pkgs.callPackage ./hermes-agent.nix {
|
||||
inherit (inputs) uv2nix pyproject-nix pyproject-build-systems;
|
||||
npm-lockfile-fix = inputs'.npm-lockfile-fix.packages.default;
|
||||
# Only embed clean revs — dirtyRev doesn't represent any upstream
|
||||
# commit, so comparing it would always claim "update available".
|
||||
rev = inputs.self.rev or null;
|
||||
};
|
||||
|
||||
# All platform-portable optional integrations pre-built.
|
||||
full = minimal.override {
|
||||
extraDependencyGroups = [
|
||||
"anthropic"
|
||||
"azure-identity"
|
||||
"bedrock"
|
||||
"daytona"
|
||||
"dingtalk"
|
||||
"edge-tts"
|
||||
"exa"
|
||||
"fal"
|
||||
"feishu"
|
||||
"firecrawl"
|
||||
"hindsight"
|
||||
"honcho"
|
||||
"messaging"
|
||||
"modal"
|
||||
"parallel-web"
|
||||
"tts-premium"
|
||||
"voice"
|
||||
]
|
||||
# matrix is Linux-only (oqs/liboqs lacks aarch64-darwin wheels).
|
||||
++ lib.optionals pkgs.stdenv.isLinux [ "matrix" ];
|
||||
};
|
||||
in
|
||||
{
|
||||
packages = {
|
||||
default = hermesAgent;
|
||||
default = full;
|
||||
|
||||
inherit minimal;
|
||||
|
||||
# Ships discord.py + python-telegram-bot + slack-sdk so a plain
|
||||
# `nix profile install .#messaging` connects to Discord/Telegram/Slack
|
||||
# on first run — lazy-install can't write to the read-only /nix/store.
|
||||
messaging = hermesAgent.override {
|
||||
messaging = minimal.override {
|
||||
extraDependencyGroups = [ "messaging" ];
|
||||
};
|
||||
|
||||
# All platform-portable optional integrations pre-built.
|
||||
# matrix is Linux-only (oqs/liboqs lacks aarch64-darwin wheels).
|
||||
full = hermesAgent.override {
|
||||
extraDependencyGroups = [
|
||||
"anthropic"
|
||||
"azure-identity"
|
||||
"bedrock"
|
||||
"daytona"
|
||||
"dingtalk"
|
||||
"edge-tts"
|
||||
"exa"
|
||||
"fal"
|
||||
"feishu"
|
||||
"firecrawl"
|
||||
"hindsight"
|
||||
"honcho"
|
||||
"messaging"
|
||||
"modal"
|
||||
"parallel-web"
|
||||
"tts-premium"
|
||||
"voice"
|
||||
] ++ lib.optionals pkgs.stdenv.isLinux [ "matrix" ];
|
||||
};
|
||||
|
||||
tui = hermesAgent.hermesTui;
|
||||
web = hermesAgent.hermesWeb;
|
||||
desktop = hermesAgent.hermesDesktop;
|
||||
tui = full.hermesTui;
|
||||
web = full.hermesWeb;
|
||||
desktop = full.hermesDesktop;
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
782
scripts/ci/timings_report.py
Normal file
782
scripts/ci/timings_report.py
Normal file
@@ -0,0 +1,782 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Collect CI job/step timings from the GitHub API and generate an HTML diff report.
|
||||
|
||||
In CI, the script reads GITHUB_TOKEN, GITHUB_REPOSITORY, GITHUB_RUN_ID, and
|
||||
GITHUB_SHA from the environment to collect timings via the REST API.
|
||||
|
||||
If a baseline JSON file (ci-timings-baseline.json by default) exists, the
|
||||
report includes a diff with per-job and per-step deltas, plus a gantt chart
|
||||
overlaying current vs baseline bars.
|
||||
|
||||
Usage:
|
||||
# Collect from API (CI mode):
|
||||
python scripts/ci/timings_report.py
|
||||
|
||||
# Regenerate HTML from saved JSON (testing):
|
||||
python scripts/ci/timings_report.py --from-json ci-timings.json
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
from datetime import datetime
|
||||
from html import escape
|
||||
|
||||
API_BASE = "https://api.github.com"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# GitHub API helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def api_get(path: str, token: str, params: dict | None = None,
|
||||
list_key: str | None = None) -> list | dict:
|
||||
"""Authenticated GitHub API GET with automatic pagination.
|
||||
|
||||
For list endpoints, pass list_key to extract items from the paginated
|
||||
wrapper response (e.g. list_key='jobs' for {'total_count': N, 'jobs': [...]}).
|
||||
When list_key is omitted, a non-list response is returned as-is (single object).
|
||||
"""
|
||||
url = f"{API_BASE}{path}"
|
||||
if params:
|
||||
url += "?" + urllib.parse.urlencode(params)
|
||||
|
||||
results: list = []
|
||||
while url:
|
||||
req = urllib.request.Request(url, headers={
|
||||
"Authorization": f"Bearer {token}",
|
||||
"Accept": "application/vnd.github+json",
|
||||
"X-GitHub-Api-Version": "2022-11-28",
|
||||
"User-Agent": "ci-timings-report",
|
||||
})
|
||||
with urllib.request.urlopen(req) as resp:
|
||||
data = json.loads(resp.read())
|
||||
link_header = resp.headers.get("Link", "")
|
||||
|
||||
if list_key:
|
||||
results.extend(data.get(list_key, []))
|
||||
elif isinstance(data, list):
|
||||
results.extend(data)
|
||||
else:
|
||||
return data
|
||||
|
||||
next_url = None
|
||||
for part in link_header.split(","):
|
||||
part = part.strip()
|
||||
if 'rel="next"' in part:
|
||||
next_url = part[part.find("<") + 1:part.find(">")]
|
||||
break
|
||||
url = next_url
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def parse_ts(ts: str | None) -> datetime | None:
|
||||
if not ts:
|
||||
return None
|
||||
return datetime.fromisoformat(ts.replace("Z", "+00:00"))
|
||||
|
||||
|
||||
def dur_s(started: str | None, completed: str | None) -> float | None:
|
||||
s = parse_ts(started)
|
||||
e = parse_ts(completed)
|
||||
if not s or not e:
|
||||
return None
|
||||
return (e - s).total_seconds()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Timings collection
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _normalize_job(raw: dict) -> dict:
|
||||
steps = []
|
||||
for step in (raw.get("steps") or []):
|
||||
steps.append({
|
||||
"name": step.get("name", ""),
|
||||
"number": step.get("number", 0),
|
||||
"status": step.get("status", ""),
|
||||
"conclusion": step.get("conclusion", ""),
|
||||
"started_at": step.get("started_at"),
|
||||
"completed_at": step.get("completed_at"),
|
||||
"duration_s": dur_s(step.get("started_at"), step.get("completed_at")),
|
||||
})
|
||||
return {
|
||||
"name": raw.get("name", "unknown"),
|
||||
"workflow_name": raw.get("_workflow_name", ""),
|
||||
"job_id": raw.get("id"),
|
||||
"status": raw.get("status", ""),
|
||||
"conclusion": raw.get("conclusion", ""),
|
||||
"started_at": raw.get("started_at"),
|
||||
"completed_at": raw.get("completed_at"),
|
||||
"duration_s": dur_s(raw.get("started_at"), raw.get("completed_at")),
|
||||
"html_url": raw.get("html_url", ""),
|
||||
"steps": steps,
|
||||
}
|
||||
|
||||
|
||||
def collect_timings(token: str, repo: str, run_id: str, head_sha: str) -> dict:
|
||||
"""Collect job/step timings from the GitHub API.
|
||||
|
||||
1. Get orchestrator run's direct jobs (detect, all-checks-pass, etc.).
|
||||
Skip workflow-call placeholder jobs (step name starts with "Run ./.github/").
|
||||
2. Find sub-workflow runs via head_sha + event=workflow_call.
|
||||
3. Get each sub-workflow run's jobs with full step timing.
|
||||
"""
|
||||
owner, repo_name = repo.split("/")
|
||||
|
||||
# Orchestrator run info
|
||||
run_info = api_get(f"/repos/{owner}/{repo_name}/actions/runs/{run_id}", token)
|
||||
created_at = run_info.get("created_at", "")
|
||||
|
||||
# Orchestrator direct jobs
|
||||
orch_jobs = api_get(f"/repos/{owner}/{repo_name}/actions/runs/{run_id}/jobs",
|
||||
token, list_key="jobs")
|
||||
|
||||
direct = []
|
||||
for job in orch_jobs:
|
||||
steps = job.get("steps") or []
|
||||
if any(s.get("name", "").startswith("Run ./.github/") for s in steps):
|
||||
continue # workflow-call placeholder
|
||||
if job.get("status") in ("in_progress", "queued"):
|
||||
continue # skip self / unfinished
|
||||
direct.append(job)
|
||||
|
||||
# Sub-workflow runs
|
||||
sub_runs = api_get(f"/repos/{owner}/{repo_name}/actions/runs", token, params={
|
||||
"head_sha": head_sha,
|
||||
"event": "workflow_call",
|
||||
"per_page": 100,
|
||||
}, list_key="workflow_runs")
|
||||
sub_runs = [r for r in sub_runs if r.get("created_at", "") >= created_at]
|
||||
|
||||
sub_jobs_raw = []
|
||||
for sr in sub_runs:
|
||||
sr_id = sr["id"]
|
||||
sr_name = sr.get("name", "")
|
||||
sr_jobs = api_get(f"/repos/{owner}/{repo_name}/actions/runs/{sr_id}/jobs",
|
||||
token, list_key="jobs")
|
||||
for j in sr_jobs:
|
||||
j["_workflow_name"] = sr_name
|
||||
j["_workflow_run_id"] = sr_id
|
||||
sub_jobs_raw.append(j)
|
||||
|
||||
# Normalize + sort
|
||||
all_jobs = [_normalize_job(j) for j in direct + sub_jobs_raw]
|
||||
all_jobs = [j for j in all_jobs if j["status"] not in ("in_progress", "queued")]
|
||||
all_jobs.sort(key=lambda j: j.get("started_at") or "")
|
||||
|
||||
return {
|
||||
"run_id": run_id,
|
||||
"head_sha": head_sha,
|
||||
"created_at": created_at,
|
||||
"jobs": all_jobs,
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Formatting helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def fmt_dur(seconds: float | None) -> str:
|
||||
if seconds is None:
|
||||
return "—"
|
||||
if seconds < 60:
|
||||
return f"{seconds:.1f}s"
|
||||
m = int(seconds // 60)
|
||||
s = seconds % 60
|
||||
if s == 0:
|
||||
return f"{m}m"
|
||||
return f"{m}m{s:.0f}s"
|
||||
|
||||
|
||||
def fmt_delta(current: float | None, baseline: float | None) -> tuple[str, str]:
|
||||
"""Return (text, css_class) for a delta."""
|
||||
if current is None or baseline is None:
|
||||
return ("—", "neutral")
|
||||
delta = current - baseline
|
||||
if baseline == 0:
|
||||
pct_str = "new" if delta > 0 else "0%"
|
||||
else:
|
||||
pct = (delta / baseline) * 100
|
||||
pct_str = f"{pct:+.1f}%"
|
||||
if abs(delta) < 1.0:
|
||||
cls = "neutral"
|
||||
elif delta > 0:
|
||||
cls = "slower"
|
||||
else:
|
||||
cls = "faster"
|
||||
sign = "+" if delta >= 0 else ""
|
||||
return (f"{sign}{delta:.1f}s ({pct_str})", cls)
|
||||
|
||||
|
||||
def nice_ticks(max_seconds: float, num_ticks: int = 8) -> list[int]:
|
||||
if max_seconds <= 0:
|
||||
return [0]
|
||||
raw = max_seconds / num_ticks
|
||||
for nice in [5, 10, 15, 30, 60, 120, 180, 300, 600, 900, 1800, 3600, 7200]:
|
||||
if nice >= raw:
|
||||
step = nice
|
||||
break
|
||||
else:
|
||||
step = max(int(raw), 3600)
|
||||
return list(range(0, int(max_seconds) + step + 1, step))
|
||||
|
||||
|
||||
def fmt_tick(seconds: int) -> str:
|
||||
if seconds < 60:
|
||||
return f"{seconds}s"
|
||||
m, s = divmod(seconds, 60)
|
||||
if s == 0:
|
||||
return f"{m}m"
|
||||
return f"{m}m{s}s"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Stats computation
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def compute_stats(timings: dict, baseline: dict | None = None) -> dict:
|
||||
jobs = timings.get("jobs", [])
|
||||
bl_jobs = {j["name"]: j for j in (baseline or {}).get("jobs", [])}
|
||||
|
||||
# Wall time
|
||||
starts = [s for s in (parse_ts(j.get("started_at")) for j in jobs) if s is not None]
|
||||
ends = [e for e in (parse_ts(j.get("completed_at")) for j in jobs) if e is not None]
|
||||
wall = (max(ends) - min(starts)).total_seconds() if starts and ends else 0
|
||||
compute = sum(j.get("duration_s") or 0 for j in jobs)
|
||||
|
||||
# Baseline wall/compute
|
||||
bl_wall = None
|
||||
bl_compute = None
|
||||
if baseline:
|
||||
bl_starts = [s for s in (parse_ts(j.get("started_at")) for j in baseline.get("jobs", [])) if s is not None]
|
||||
bl_ends = [e for e in (parse_ts(j.get("completed_at")) for j in baseline.get("jobs", [])) if e is not None]
|
||||
if bl_starts and bl_ends:
|
||||
bl_wall = (max(bl_ends) - min(bl_starts)).total_seconds()
|
||||
bl_compute = sum(j.get("duration_s") or 0 for j in baseline.get("jobs", []))
|
||||
|
||||
# Per-job deltas
|
||||
faster = 0
|
||||
slower = 0
|
||||
unchanged = 0
|
||||
no_baseline = 0
|
||||
for j in jobs:
|
||||
bl = bl_jobs.get(j["name"])
|
||||
if not bl:
|
||||
no_baseline += 1
|
||||
continue
|
||||
cur_d = j.get("duration_s") or 0
|
||||
bl_d = bl.get("duration_s") or 0
|
||||
if abs(cur_d - bl_d) < 1.0:
|
||||
unchanged += 1
|
||||
elif cur_d > bl_d:
|
||||
slower += 1
|
||||
else:
|
||||
faster += 1
|
||||
|
||||
return {
|
||||
"wall": wall,
|
||||
"compute": compute,
|
||||
"bl_wall": bl_wall,
|
||||
"bl_compute": bl_compute,
|
||||
"faster": faster,
|
||||
"slower": slower,
|
||||
"unchanged": unchanged,
|
||||
"no_baseline": no_baseline,
|
||||
"total_jobs": len(jobs),
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# HTML generation
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
CSS = """
|
||||
* { box-sizing: border-box; margin: 0; padding: 0; }
|
||||
body {
|
||||
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif;
|
||||
background: #0d1117; color: #e6edf3; line-height: 1.5; padding: 24px;
|
||||
}
|
||||
h1 { font-size: 24px; border-bottom: 1px solid #30363d; padding-bottom: 12px; margin-bottom: 8px; }
|
||||
.meta { color: #8b949e; font-size: 13px; margin-bottom: 24px; }
|
||||
h2 { font-size: 18px; margin: 32px 0 12px; }
|
||||
|
||||
/* Stats cards */
|
||||
.stats { display: flex; gap: 12px; flex-wrap: wrap; margin-bottom: 24px; }
|
||||
.stat-card {
|
||||
background: #161b22; border: 1px solid #30363d; border-radius: 8px;
|
||||
padding: 14px 18px; min-width: 140px;
|
||||
}
|
||||
.stat-label { font-size: 12px; color: #8b949e; text-transform: uppercase; letter-spacing: 0.5px; }
|
||||
.stat-value { font-size: 22px; font-weight: 600; margin: 4px 0; }
|
||||
.stat-delta { font-size: 13px; }
|
||||
.faster { color: #3fb950; }
|
||||
.slower { color: #f85149; }
|
||||
.neutral { color: #8b949e; }
|
||||
|
||||
/* Gantt */
|
||||
.gantt-wrap { overflow-x: auto; }
|
||||
.gantt { min-width: 700px; }
|
||||
.gantt-row { display: flex; align-items: center; height: 28px; }
|
||||
.gantt-label {
|
||||
width: 220px; padding-right: 12px; text-align: right;
|
||||
font-size: 12px; white-space: nowrap; overflow: hidden; text-overflow: ellipsis;
|
||||
}
|
||||
.gantt-track { flex: 1; position: relative; height: 100%; border-left: 1px solid #21262d; }
|
||||
.gantt-bar {
|
||||
position: absolute; height: 18px; border-radius: 3px;
|
||||
display: flex; align-items: center; justify-content: center;
|
||||
font-size: 10px; color: transparent; overflow: hidden;
|
||||
transition: color 0.15s;
|
||||
}
|
||||
.gantt-bar:hover { color: #fff; z-index: 10; }
|
||||
.gantt-bar.current { background: #1f6feb; top: 5px; z-index: 2; }
|
||||
.gantt-bar.baseline {
|
||||
background: transparent; border: 1px dashed #8b949e; top: 2px; height: 24px; z-index: 1;
|
||||
}
|
||||
.gantt-axis { display: flex; height: 20px; position: relative; border-top: 1px solid #30363d; margin-top: 4px; }
|
||||
.gantt-tick { position: absolute; font-size: 10px; color: #8b949e; transform: translateX(-50%); top: 4px; }
|
||||
.gantt-tick::before { content: ''; position: absolute; top: -4px; left: 50%; width: 1px; height: 4px; background: #30363d; }
|
||||
.legend { display: flex; gap: 16px; margin-top: 8px; font-size: 12px; color: #8b949e; }
|
||||
.legend-swatch { display: inline-block; width: 16px; height: 10px; border-radius: 2px; margin-right: 4px; vertical-align: middle; }
|
||||
|
||||
/* Tables */
|
||||
table { border-collapse: collapse; width: 100%; font-size: 13px; margin-bottom: 16px; }
|
||||
th, td { border: 1px solid #30363d; padding: 6px 10px; text-align: left; }
|
||||
th { background: #161b22; font-weight: 600; position: sticky; top: 0; }
|
||||
tr:hover td { background: #161b22; }
|
||||
.num { text-align: right; font-variant-numeric: tabular-nums; }
|
||||
.job-name { font-weight: 500; }
|
||||
|
||||
/* Step details */
|
||||
details { margin-bottom: 8px; background: #161b22; border: 1px solid #30363d; border-radius: 6px; }
|
||||
summary { padding: 8px 12px; cursor: pointer; font-weight: 500; font-size: 14px; user-select: none; }
|
||||
summary:hover { background: #21262d; }
|
||||
details[open] summary { border-bottom: 1px solid #30363d; }
|
||||
details table { border: none; margin: 0; }
|
||||
details td, details th { font-size: 12px; }
|
||||
|
||||
/* Worst regressions */
|
||||
.regressions { margin-bottom: 24px; }
|
||||
.regressions table { font-size: 13px; }
|
||||
.tag {
|
||||
display: inline-block; padding: 1px 6px; border-radius: 3px; font-size: 11px; font-weight: 500;
|
||||
}
|
||||
.tag.slow { background: rgba(248,81,73,0.15); color: #f85149; }
|
||||
.tag.fast { background: rgba(63,185,80,0.15); color: #3fb950; }
|
||||
"""
|
||||
|
||||
|
||||
def _gantt_bars(timings: dict, baseline: dict | None) -> str:
|
||||
"""Render the gantt chart HTML.
|
||||
|
||||
Both current and baseline timelines are normalized to start at t=0
|
||||
(relative to each run's earliest job start). The axis scale spans
|
||||
0..max_end across both runs so bars are directly comparable.
|
||||
"""
|
||||
jobs = [j for j in timings.get("jobs", []) if j.get("started_at") and j.get("completed_at")]
|
||||
bl_map = {j["name"]: j for j in (baseline or {}).get("jobs", [])}
|
||||
|
||||
# Current run: relative offsets from earliest start
|
||||
cur_starts = [s for s in (parse_ts(j.get("started_at")) for j in jobs) if s is not None]
|
||||
cur_ends = [e for e in (parse_ts(j.get("completed_at")) for j in jobs) if e is not None]
|
||||
if not cur_starts or not cur_ends:
|
||||
return '<p style="color:#8b949e">No timing data available.</p>'
|
||||
cur_t0 = min(cur_starts)
|
||||
cur_max = (max(cur_ends) - cur_t0).total_seconds()
|
||||
|
||||
# Baseline run: relative offsets from its earliest start
|
||||
bl_t0 = None
|
||||
bl_max = 0.0
|
||||
bl_jobs_timed = []
|
||||
for bl_j in bl_map.values():
|
||||
s = parse_ts(bl_j.get("started_at"))
|
||||
e = parse_ts(bl_j.get("completed_at"))
|
||||
if s is not None and e is not None:
|
||||
bl_jobs_timed.append((bl_j, s, e))
|
||||
if bl_t0 is None or s < bl_t0:
|
||||
bl_t0 = s
|
||||
rel_end = (e - s).total_seconds() + (s - (bl_t0 or s)).total_seconds()
|
||||
if bl_t0 is not None:
|
||||
bl_max = max((e - bl_t0).total_seconds() for _, _, e in bl_jobs_timed) if bl_jobs_timed else 0
|
||||
|
||||
total_s = max(cur_max, bl_max)
|
||||
if total_s <= 0:
|
||||
total_s = 1
|
||||
|
||||
rows = []
|
||||
for j in jobs:
|
||||
s = parse_ts(j.get("started_at"))
|
||||
e = parse_ts(j.get("completed_at"))
|
||||
if s is None or e is None:
|
||||
continue
|
||||
left = (s - cur_t0).total_seconds() / total_s * 100
|
||||
width = max((e - s).total_seconds() / total_s * 100, 0.5) # min 0.5% for visibility
|
||||
dur = j.get("duration_s") or 0
|
||||
|
||||
bl = bl_map.get(j["name"])
|
||||
bl_bar = ""
|
||||
if bl and bl_t0 is not None:
|
||||
bl_s = parse_ts(bl.get("started_at"))
|
||||
bl_e = parse_ts(bl.get("completed_at"))
|
||||
if bl_s is not None and bl_e is not None:
|
||||
bl_left = (bl_s - bl_t0).total_seconds() / total_s * 100
|
||||
bl_width = max((bl_e - bl_s).total_seconds() / total_s * 100, 0.5)
|
||||
bl_dur = bl.get("duration_s") or 0
|
||||
bl_bar = (
|
||||
f'<div class="gantt-bar baseline" '
|
||||
f'style="left:{bl_left:.2f}%;width:{bl_width:.2f}%" '
|
||||
f'title="baseline: {fmt_dur(bl_dur)}"></div>'
|
||||
)
|
||||
|
||||
name_display = escape(j["name"])
|
||||
if j.get("workflow_name"):
|
||||
name_display = f'{escape(j["workflow_name"])} / {escape(j["name"])}'
|
||||
|
||||
delta_info = ""
|
||||
if bl and bl.get("duration_s") is not None:
|
||||
d_text, d_cls = fmt_delta(dur, bl.get("duration_s"))
|
||||
delta_info = f' — {d_text}'
|
||||
|
||||
rows.append(
|
||||
f'<div class="gantt-row">'
|
||||
f'<div class="gantt-label" title="{escape(j["name"])}">{name_display}</div>'
|
||||
f'<div class="gantt-track">'
|
||||
f'{bl_bar}'
|
||||
f'<div class="gantt-bar current" '
|
||||
f'style="left:{left:.2f}%;width:{width:.2f}%" '
|
||||
f'title="{escape(j["name"])}: {fmt_dur(dur)}{delta_info}"></div>'
|
||||
f'</div></div>'
|
||||
)
|
||||
|
||||
# Axis
|
||||
ticks = nice_ticks(total_s)
|
||||
tick_html = "".join(
|
||||
f'<span class="gantt-tick" style="left:{(t / total_s * 100):.1f}%">{fmt_tick(t)}</span>'
|
||||
for t in ticks
|
||||
)
|
||||
axis = f'<div class="gantt-axis"><div class="gantt-track">{tick_html}</div></div>'
|
||||
|
||||
legend = (
|
||||
'<div class="legend">'
|
||||
'<span><span class="legend-swatch" style="background:#1f6feb"></span>Current</span>'
|
||||
)
|
||||
if baseline:
|
||||
legend += '<span><span class="legend-swatch" style="border:1px dashed #8b949e"></span>Baseline (main)</span>'
|
||||
legend += '</div>'
|
||||
|
||||
return f'<div class="gantt-wrap"><div class="gantt">{"".join(rows)}{axis}</div></div>{legend}'
|
||||
|
||||
|
||||
def _stats_cards(stats: dict) -> str:
|
||||
wall_text = fmt_dur(stats["wall"])
|
||||
wall_delta = ""
|
||||
if stats["bl_wall"] is not None:
|
||||
d, cls = fmt_delta(stats["wall"], stats["bl_wall"])
|
||||
wall_delta = f'<span class="stat-delta {cls}">{d}</span>'
|
||||
|
||||
compute_text = fmt_dur(stats["compute"])
|
||||
compute_delta = ""
|
||||
if stats["bl_compute"] is not None:
|
||||
d, cls = fmt_delta(stats["compute"], stats["bl_compute"])
|
||||
compute_delta = f'<span class="stat-delta {cls}">{d}</span>'
|
||||
|
||||
cards = [
|
||||
f'<div class="stat-card"><span class="stat-label">Wall Time</span>'
|
||||
f'<div class="stat-value">{wall_text}</div>{wall_delta}</div>',
|
||||
f'<div class="stat-card"><span class="stat-label">Total Compute</span>'
|
||||
f'<div class="stat-value">{compute_text}</div>{compute_delta}</div>',
|
||||
f'<div class="stat-card"><span class="stat-label">Jobs Faster</span>'
|
||||
f'<div class="stat-value faster">{stats["faster"]}</div></div>',
|
||||
f'<div class="stat-card"><span class="stat-label">Jobs Slower</span>'
|
||||
f'<div class="stat-value slower">{stats["slower"]}</div></div>',
|
||||
f'<div class="stat-card"><span class="stat-label">Unchanged</span>'
|
||||
f'<div class="stat-value neutral">{stats["unchanged"]}</div></div>',
|
||||
f'<div class="stat-card"><span class="stat-label">No Baseline</span>'
|
||||
f'<div class="stat-value neutral">{stats["no_baseline"]}</div></div>',
|
||||
]
|
||||
return f'<div class="stats">{"".join(cards)}</div>'
|
||||
|
||||
|
||||
def _job_table(timings: dict, baseline: dict | None) -> str:
|
||||
bl_map = {j["name"]: j for j in (baseline or {}).get("jobs", [])}
|
||||
rows = []
|
||||
for j in timings.get("jobs", []):
|
||||
dur = j.get("duration_s")
|
||||
bl = bl_map.get(j["name"])
|
||||
bl_dur = bl.get("duration_s") if bl else None
|
||||
delta_text, delta_cls = fmt_delta(dur, bl_dur)
|
||||
|
||||
name = escape(j["name"])
|
||||
if j.get("workflow_name"):
|
||||
name = f'{escape(j["workflow_name"])} / {escape(j["name"])}'
|
||||
|
||||
concl = j.get("conclusion", "")
|
||||
concl_icon = {"success": "✓", "failure": "✗", "skipped": "⊘"}.get(concl, "?")
|
||||
concl_cls = {"success": "faster", "failure": "slower", "skipped": "neutral"}.get(concl, "neutral")
|
||||
|
||||
rows.append(
|
||||
f'<tr>'
|
||||
f'<td class="job-name">{name}</td>'
|
||||
f'<td class="num">{fmt_dur(dur)}</td>'
|
||||
f'<td class="num">{fmt_dur(bl_dur)}</td>'
|
||||
f'<td class="num {delta_cls}">{delta_text}</td>'
|
||||
f'<td class="{concl_cls}" style="text-align:center">{concl_icon}</td>'
|
||||
f'</tr>'
|
||||
)
|
||||
|
||||
return (
|
||||
'<table><thead><tr>'
|
||||
'<th>Job</th><th class="num">Current</th><th class="num">Baseline</th>'
|
||||
'<th class="num">Delta</th><th>Status</th>'
|
||||
'</tr></thead><tbody>' + "".join(rows) + '</tbody></table>'
|
||||
)
|
||||
|
||||
|
||||
def _step_details(timings: dict, baseline: dict | None) -> str:
|
||||
bl_map = {j["name"]: j for j in (baseline or {}).get("jobs", [])}
|
||||
blocks = []
|
||||
for j in timings.get("jobs", []):
|
||||
if not j.get("steps"):
|
||||
continue
|
||||
bl = bl_map.get(j["name"], {})
|
||||
bl_steps = {s["name"]: s for s in bl.get("steps", [])}
|
||||
|
||||
dur = j.get("duration_s") or 0
|
||||
bl_dur = bl.get("duration_s") if bl else None
|
||||
delta_text, delta_cls = fmt_delta(dur, bl_dur)
|
||||
|
||||
summary_text = f'{escape(j["name"])} — {fmt_dur(dur)}'
|
||||
if bl_dur is not None:
|
||||
summary_text += f' <span class="{delta_cls}">({delta_text})</span>'
|
||||
|
||||
step_rows = []
|
||||
for s in j["steps"]:
|
||||
s_dur = s.get("duration_s")
|
||||
bl_s = bl_steps.get(s["name"])
|
||||
bl_s_dur = bl_s.get("duration_s") if bl_s else None
|
||||
s_delta, s_cls = fmt_delta(s_dur, bl_s_dur)
|
||||
|
||||
step_rows.append(
|
||||
f'<tr>'
|
||||
f'<td>{escape(s["name"])}</td>'
|
||||
f'<td class="num">{fmt_dur(s_dur)}</td>'
|
||||
f'<td class="num">{fmt_dur(bl_s_dur)}</td>'
|
||||
f'<td class="num {s_cls}">{s_delta}</td>'
|
||||
f'</tr>'
|
||||
)
|
||||
|
||||
blocks.append(
|
||||
f'<details><summary>{summary_text}</summary>'
|
||||
f'<table><thead><tr>'
|
||||
'<th>Step</th><th class="num">Current</th><th class="num">Baseline</th>'
|
||||
'<th class="num">Delta</th>'
|
||||
f'</tr></thead><tbody>{"".join(step_rows)}</tbody></table>'
|
||||
f'</details>'
|
||||
)
|
||||
|
||||
return "".join(blocks) if blocks else '<p style="color:#8b949e">No step data available.</p>'
|
||||
|
||||
|
||||
def _regressions(timings: dict, baseline: dict | None) -> str:
|
||||
"""Show top 10 biggest absolute regressions/improvements across all steps."""
|
||||
if not baseline:
|
||||
return ""
|
||||
bl_map = {j["name"]: j for j in baseline.get("jobs", [])}
|
||||
|
||||
deltas = [] # (abs_delta, job_name, step_name, current, baseline, is_slower)
|
||||
for j in timings.get("jobs", []):
|
||||
bl = bl_map.get(j["name"])
|
||||
if not bl:
|
||||
continue
|
||||
bl_steps = {s["name"]: s for s in bl.get("steps", [])}
|
||||
for s in j.get("steps", []):
|
||||
bl_s = bl_steps.get(s["name"])
|
||||
if not bl_s:
|
||||
continue
|
||||
cur = s.get("duration_s") or 0
|
||||
bl_d = bl_s.get("duration_s") or 0
|
||||
diff = cur - bl_d
|
||||
if abs(diff) < 1.0:
|
||||
continue
|
||||
deltas.append((abs(diff), diff, j["name"], s["name"], cur, bl_d))
|
||||
|
||||
deltas.sort(key=lambda x: x[0], reverse=True)
|
||||
top = deltas[:10]
|
||||
if not top:
|
||||
return ""
|
||||
|
||||
rows = []
|
||||
for _, diff, job, step, cur, bl_d in top:
|
||||
cls = "slower" if diff > 0 else "faster"
|
||||
tag = f'<span class="tag {"slow" if diff > 0 else "fast"}">{"+" if diff > 0 else ""}{diff:.1f}s</span>'
|
||||
rows.append(
|
||||
f'<tr>'
|
||||
f'<td class="job-name">{escape(job)}</td>'
|
||||
f'<td>{escape(step)}</td>'
|
||||
f'<td class="num">{fmt_dur(cur)}</td>'
|
||||
f'<td class="num">{fmt_dur(bl_d)}</td>'
|
||||
f'<td>{tag}</td>'
|
||||
f'</tr>'
|
||||
)
|
||||
|
||||
return (
|
||||
'<div class="regressions">'
|
||||
'<table><thead><tr>'
|
||||
'<th>Job</th><th>Step</th><th class="num">Current</th><th class="num">Baseline</th>'
|
||||
'<th>Delta</th>'
|
||||
'</tr></thead><tbody>' + "".join(rows) + '</tbody></table>'
|
||||
'</div>'
|
||||
)
|
||||
|
||||
|
||||
def generate_html(timings: dict, baseline: dict | None = None) -> str:
|
||||
stats = compute_stats(timings, baseline)
|
||||
|
||||
sha_short = (timings.get("head_sha") or "")[:7]
|
||||
run_id = timings.get("run_id", "?")
|
||||
created = timings.get("created_at", "")
|
||||
|
||||
bl_info = ""
|
||||
if baseline:
|
||||
bl_sha = (baseline.get("head_sha") or "")[:7]
|
||||
bl_info = f' | Baseline: <code>{bl_sha}</code> (main)'
|
||||
|
||||
html = (
|
||||
f'<!DOCTYPE html>\n<html lang="en">\n<head>\n'
|
||||
f'<meta charset="utf-8">\n'
|
||||
f'<meta name="viewport" content="width=device-width, initial-scale=1">\n'
|
||||
f'<title>CI Timing Report — {sha_short}</title>\n'
|
||||
f'<style>{CSS}</style>\n'
|
||||
f'</head>\n<body>\n'
|
||||
f'<h1>CI Timing Report</h1>\n'
|
||||
f'<div class="meta">Run <code>{escape(run_id)}</code> | SHA <code>{sha_short}</code>'
|
||||
f' | Generated {escape(created)}{bl_info}</div>\n'
|
||||
)
|
||||
|
||||
html += '<h2>Global Stats</h2>\n'
|
||||
html += _stats_cards(stats)
|
||||
|
||||
if baseline:
|
||||
html += '<h2>Top Regressions & Improvements</h2>\n'
|
||||
html += _regressions(timings, baseline)
|
||||
|
||||
html += '<h2>Gantt Chart</h2>\n'
|
||||
html += _gantt_bars(timings, baseline)
|
||||
|
||||
html += '<h2>Per-Job Comparison</h2>\n'
|
||||
html += _job_table(timings, baseline)
|
||||
|
||||
html += '<h2>Step Details</h2>\n'
|
||||
html += _step_details(timings, baseline)
|
||||
|
||||
html += '</body>\n</html>\n'
|
||||
return html
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Markdown summary for $GITHUB_STEP_SUMMARY
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def generate_summary(timings: dict, baseline: dict | None = None) -> str:
|
||||
stats = compute_stats(timings, baseline)
|
||||
bl_map = {j["name"]: j for j in (baseline or {}).get("jobs", [])}
|
||||
|
||||
lines = ["## CI Timing Summary\n"]
|
||||
|
||||
# Global stats table
|
||||
lines.append("| Metric | Current | Baseline | Delta |")
|
||||
lines.append("|--------|---------|----------|-------|")
|
||||
|
||||
wall_d = ""
|
||||
if stats["bl_wall"] is not None:
|
||||
d, _ = fmt_delta(stats["wall"], stats["bl_wall"])
|
||||
wall_d = d
|
||||
lines.append(f"| Wall time | {fmt_dur(stats['wall'])} | {fmt_dur(stats['bl_wall'])} | {wall_d} |")
|
||||
|
||||
compute_d = ""
|
||||
if stats["bl_compute"] is not None:
|
||||
d, _ = fmt_delta(stats["compute"], stats["bl_compute"])
|
||||
compute_d = d
|
||||
lines.append(f"| Total compute | {fmt_dur(stats['compute'])} | {fmt_dur(stats['bl_compute'])} | {compute_d} |")
|
||||
|
||||
lines.append(f"| Jobs faster | {stats['faster']} | — | — |")
|
||||
lines.append(f"| Jobs slower | {stats['slower']} | — | — |")
|
||||
lines.append(f"| Jobs unchanged | {stats['unchanged']} | — | — |")
|
||||
lines.append(f"| Jobs without baseline | {stats['no_baseline']} | — | — |")
|
||||
lines.append("")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Main
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def expect_env(var: str) -> str:
|
||||
val = os.environ.get(var)
|
||||
if not val:
|
||||
raise ValueError(f"missing environment variable {var}")
|
||||
return val
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Collect CI timings and generate HTML report")
|
||||
parser.add_argument("--from-json", help="Read timings from JSON instead of API")
|
||||
parser.add_argument("--baseline", default="ci-timings-baseline.json",
|
||||
help="Baseline JSON path (default: ci-timings-baseline.json)")
|
||||
parser.add_argument("--output", default="ci-timings-report.html",
|
||||
help="HTML output path (default: ci-timings-report.html)")
|
||||
parser.add_argument("--json-out", default="ci-timings.json",
|
||||
help="JSON output path (default: ci-timings.json)")
|
||||
parser.add_argument("--summary-out", default="ci-timings-summary.md",
|
||||
help="Markdown summary output path (default: ci-timings-summary.md)")
|
||||
args = parser.parse_args()
|
||||
|
||||
# Collect or load timings
|
||||
if args.from_json:
|
||||
with open(args.from_json, encoding="utf-8") as f:
|
||||
timings = json.load(f)
|
||||
else:
|
||||
token = expect_env("GITHUB_TOKEN")
|
||||
repo = expect_env("GITHUB_REPOSITORY")
|
||||
run_id = expect_env("GITHUB_RUN_ID")
|
||||
head_sha = expect_env("GITHUB_SHA")
|
||||
|
||||
timings = collect_timings(token, repo, run_id, head_sha)
|
||||
|
||||
# Save JSON
|
||||
with open(args.json_out, "w", encoding="utf-8") as f:
|
||||
json.dump(timings, f, indent=2)
|
||||
print(f"Saved timings to {args.json_out} ({len(timings.get('jobs', []))} jobs)")
|
||||
|
||||
# Load baseline
|
||||
baseline = None
|
||||
if os.path.exists(args.baseline):
|
||||
with open(args.baseline, encoding="utf-8") as f:
|
||||
baseline = json.load(f)
|
||||
print(f"Loaded baseline from {args.baseline}")
|
||||
else:
|
||||
print(f"No baseline file at {args.baseline} — generating current-only report")
|
||||
|
||||
# Generate HTML
|
||||
html = generate_html(timings, baseline)
|
||||
with open(args.output, "w", encoding="utf-8") as f:
|
||||
f.write(html)
|
||||
print(f"Generated HTML report: {args.output}")
|
||||
|
||||
# Write summary
|
||||
summary = generate_summary(timings, baseline)
|
||||
with open(args.summary_out, "a", encoding="utf-8") as f:
|
||||
f.write(summary)
|
||||
print(f"Wrote summary to {args.summary_out}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -58,7 +58,7 @@ _DEFAULT_ROOTS = ["tests"]
|
||||
#
|
||||
# tests/e2e/ — .github/workflows/tests.yml :: e2e job
|
||||
# tests/integration/ — historical; legacy --ignore flags
|
||||
# tests/docker/ — .github/workflows/docker-publish.yml ::
|
||||
# tests/docker/ — .github/workflows/docker.yml ::
|
||||
# build-amd64 job (runs against the freshly-loaded
|
||||
# nousresearch/hermes-agent:test image, via
|
||||
# ``HERMES_TEST_IMAGE`` so the fixture skips
|
||||
@@ -81,7 +81,7 @@ _DURATIONS_FILE = "test_durations.json"
|
||||
|
||||
|
||||
def _count_tests(
|
||||
files: List[Path], repo_root: Path, pytest_passthrough: List[str]
|
||||
files: List[Path], repo_root: Path
|
||||
) -> dict[Path, int]:
|
||||
"""Run ``pytest --co -q`` once to count individual tests per file.
|
||||
|
||||
@@ -113,7 +113,6 @@ def _count_tests(
|
||||
"--co", "-q",
|
||||
*ignore_args,
|
||||
*[str(f) for f in files],
|
||||
*pytest_passthrough,
|
||||
]
|
||||
try:
|
||||
result = subprocess.run(
|
||||
@@ -697,7 +696,7 @@ def main() -> int:
|
||||
return 1
|
||||
|
||||
# Count individual tests per file via a single pytest --co pass.
|
||||
test_counts = _count_tests(files, repo_root, pytest_passthrough)
|
||||
test_counts = _count_tests(files, repo_root)
|
||||
total_tests = sum(test_counts.values())
|
||||
|
||||
# Apply slicing if requested — distribute files across CI jobs by
|
||||
|
||||
@@ -410,8 +410,8 @@ class TestSendUpdate:
|
||||
|
||||
assert created["coro"] is not None
|
||||
assert created["coro"].cr_frame is None
|
||||
# Only count warnings about THIS test's coroutine; other tests in the
|
||||
# same xdist worker (or stdlib mock internals) may emit unrelated
|
||||
# Only count warnings about THIS test's coroutine; other tests
|
||||
# may emit unrelated
|
||||
# "coroutine was never awaited" warnings that bleed through.
|
||||
runtime_warnings = [
|
||||
w for w in caught
|
||||
|
||||
@@ -20,8 +20,7 @@ def _no_unawaited_warnings(caught, *, coro_name: str = "") -> bool:
|
||||
"""Return True if no "X was never awaited" warning slipped through.
|
||||
|
||||
When *coro_name* is provided, only warnings naming that coroutine are
|
||||
counted — xdist workers may emit unrelated unawaited-coroutine warnings
|
||||
(e.g. ``AsyncMockMixin._execute_mock_call``) from concurrent tests.
|
||||
counted
|
||||
"""
|
||||
bad = [
|
||||
w for w in caught
|
||||
|
||||
@@ -39,10 +39,9 @@ def _write_skill(skills_dir: Path, name: str, description: str = "") -> Path:
|
||||
def hermes_home(monkeypatch):
|
||||
"""Isolate HERMES_HOME for ``reload_skills`` tests.
|
||||
|
||||
Rather than popping cache-bearing modules from ``sys.modules`` (which
|
||||
races against pytest-xdist's parallel workers), we monkeypatch the
|
||||
module-level ``HERMES_HOME`` / ``SKILLS_DIR`` constants in place so the
|
||||
isolation is local to this fixture's scope.
|
||||
Rather than popping cache-bearing modules from ``sys.modules``,
|
||||
we monkeypatch the module-level ``HERMES_HOME`` / ``SKILLS_DIR``
|
||||
constants in place so the isolation is local to this fixture's scope.
|
||||
"""
|
||||
td = tempfile.mkdtemp(prefix="hermes-reload-skills-")
|
||||
monkeypatch.setenv("HERMES_HOME", td)
|
||||
|
||||
@@ -13,7 +13,7 @@ from hermes_cli import main as hermes_main
|
||||
# ---------------------------------------------------------------------------
|
||||
# Module isolation: _import_cli() wipes tools.* / cli / run_agent from
|
||||
# sys.modules so it can re-import cli fresh. Without cleanup the wiped
|
||||
# modules leak into subsequent tests on the same xdist worker, breaking
|
||||
# modules leak into subsequent tests, breaking
|
||||
# mock patches that target "tools.file_tools._get_file_ops" etc.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@@ -184,8 +184,7 @@ class TestGatewayQuickCommands:
|
||||
from gateway.run import GatewayRunner
|
||||
|
||||
# Ensure redaction is active regardless of host HERMES_REDACT_SECRETS state
|
||||
# or test ordering (the module snapshots env at import time, so other
|
||||
# tests in the same xdist worker can flip the flag).
|
||||
# or test ordering
|
||||
monkeypatch.setattr("agent.redact._REDACT_ENABLED", True)
|
||||
|
||||
runner = GatewayRunner.__new__(GatewayRunner)
|
||||
|
||||
@@ -8,15 +8,13 @@ Override the image with ``HERMES_TEST_IMAGE`` env var to point at a pre-built
|
||||
image (faster local iteration); otherwise the ``built_image`` fixture builds
|
||||
the repo's Dockerfile once per session.
|
||||
|
||||
Docker tests need longer timeouts than the suite default (30s), so every
|
||||
test under this directory is granted a 180s default via
|
||||
``pytest.mark.timeout`` applied at collection time.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import time
|
||||
from collections.abc import Iterator
|
||||
|
||||
import pytest
|
||||
@@ -43,11 +41,9 @@ def pytest_collection_modifyitems(config, items): # noqa: D401 - pytest hook
|
||||
skip_docker = pytest.mark.skip(
|
||||
reason="Docker not available or daemon not running",
|
||||
)
|
||||
extend_timeout = pytest.mark.timeout(180)
|
||||
for item in items:
|
||||
if "tests/docker/" not in str(item.fspath).replace(os.sep, "/"):
|
||||
continue
|
||||
item.add_marker(extend_timeout)
|
||||
if not docker_ok:
|
||||
item.add_marker(skip_docker)
|
||||
|
||||
@@ -137,3 +133,181 @@ def docker_exec_sh(
|
||||
return docker_exec(
|
||||
container, "sh", "-c", command, user=user, timeout=timeout,
|
||||
)
|
||||
|
||||
|
||||
def wait_for_container_ready(
|
||||
container: str,
|
||||
*,
|
||||
deadline_s: float = 30.0,
|
||||
interval_s: float = 0.25,
|
||||
) -> None:
|
||||
"""Poll until the container has finished s6 cont-init (stage2 + reconcile).
|
||||
|
||||
The readiness signal is ``profile=default`` appearing in
|
||||
``/opt/data/logs/container-boot.log``, which the 02-reconcile-profiles
|
||||
cont-init script writes on every boot. That log entry fires AFTER
|
||||
stage2-hook.sh completes, so by the time it appears the full
|
||||
cont-init chain (UID remap, chown, config seeding, skills sync,
|
||||
browser discovery, config migration) has run.
|
||||
|
||||
Raises ``TimeoutError`` if the container never becomes ready — much
|
||||
better than a fixed ``time.sleep()`` that either wastes time on fast
|
||||
machines or flakes on slow ones.
|
||||
"""
|
||||
end = time.monotonic() + deadline_s
|
||||
while time.monotonic() < end:
|
||||
r = docker_exec(
|
||||
container,
|
||||
"sh", "-c",
|
||||
"cat /opt/data/logs/container-boot.log 2>/dev/null",
|
||||
timeout=5,
|
||||
)
|
||||
if r.returncode == 0 and "profile=default" in r.stdout:
|
||||
return
|
||||
time.sleep(interval_s)
|
||||
raise TimeoutError(
|
||||
f"container {container} did not finish cont-init within {deadline_s}s"
|
||||
)
|
||||
|
||||
|
||||
def start_container(
|
||||
image: str,
|
||||
name: str,
|
||||
*env: str,
|
||||
cmd: str = "sleep infinity",
|
||||
timeout: int = 60,
|
||||
) -> str:
|
||||
"""Start a detached container and wait for cont-init to finish.
|
||||
|
||||
Args:
|
||||
image: Docker image to run.
|
||||
name: Container name (cleanup is the caller's responsibility —
|
||||
typically handled by the ``container_name`` fixture).
|
||||
env: Env vars as ``KEY=VALUE`` strings, each passed via ``-e``.
|
||||
cmd: Container CMD (default ``sleep infinity``).
|
||||
timeout: ``docker run`` subprocess timeout.
|
||||
|
||||
Returns the container name. Raises on ``docker run`` failure or if
|
||||
the container never finishes cont-init within 30s.
|
||||
"""
|
||||
args = ["docker", "run", "-d", "--name", name]
|
||||
for e in env:
|
||||
args.extend(["-e", e])
|
||||
args.extend([image, *cmd.split()])
|
||||
subprocess.run(args, check=True, capture_output=True, timeout=timeout)
|
||||
wait_for_container_ready(name)
|
||||
return name
|
||||
|
||||
|
||||
def restart_container(container: str, timeout: int = 60) -> None:
|
||||
"""Restart a container and wait for cont-init to finish.
|
||||
|
||||
Equivalent to ``docker restart <container>`` followed by
|
||||
:func:`wait_for_container_ready`.
|
||||
|
||||
The readiness signal (``profile=default`` in
|
||||
``/opt/data/logs/container-boot.log``) is append-only and persists
|
||||
across restarts, so we truncate it BEFORE restarting — otherwise
|
||||
``wait_for_container_ready`` would match the stale line from the
|
||||
previous boot and return before cont-init runs on the new boot.
|
||||
"""
|
||||
docker_exec(container, "sh", "-c",
|
||||
"truncate -s 0 /opt/data/logs/container-boot.log 2>/dev/null || true",
|
||||
user="root", timeout=5)
|
||||
subprocess.run(
|
||||
["docker", "restart", container],
|
||||
check=True, capture_output=True, timeout=timeout,
|
||||
)
|
||||
wait_for_container_ready(container)
|
||||
|
||||
|
||||
def poll_container(
|
||||
container: str,
|
||||
probe: str,
|
||||
*,
|
||||
deadline_s: float = 30.0,
|
||||
interval_s: float = 0.5,
|
||||
user: str = "hermes",
|
||||
) -> tuple[bool, str]:
|
||||
"""Repeatedly run ``probe`` inside the container until it exits 0 or
|
||||
``deadline_s`` elapses.
|
||||
|
||||
Returns ``(success, last_stdout)``. Useful for waiting on a process
|
||||
to appear, a port to open, a file to contain a string, etc.
|
||||
"""
|
||||
end = time.monotonic() + deadline_s
|
||||
last = ""
|
||||
while time.monotonic() < end:
|
||||
r = docker_exec_sh(container, probe, user=user, timeout=10)
|
||||
last = r.stdout
|
||||
if r.returncode == 0:
|
||||
return True, last
|
||||
time.sleep(interval_s)
|
||||
return False, last
|
||||
|
||||
|
||||
def wait_for_path(
|
||||
container: str,
|
||||
path: str,
|
||||
*,
|
||||
kind: str = "f",
|
||||
deadline_s: float = 30.0,
|
||||
interval_s: float = 0.25,
|
||||
) -> bool:
|
||||
"""Poll ``test -<kind> <path>`` inside the container until success or timeout.
|
||||
|
||||
``kind`` is the ``test`` flag: ``'f'`` for file, ``'d'`` for directory,
|
||||
``'e'`` for existence. Returns ``True`` on success, ``False`` on timeout.
|
||||
"""
|
||||
return poll_container(
|
||||
container, f"test -{kind} {path}",
|
||||
deadline_s=deadline_s, interval_s=interval_s,
|
||||
)[0]
|
||||
|
||||
|
||||
def wait_for_log(
|
||||
container: str,
|
||||
log_path: str,
|
||||
needle: str,
|
||||
*,
|
||||
deadline_s: float = 30.0,
|
||||
interval_s: float = 0.25,
|
||||
) -> str:
|
||||
"""Poll until a log file inside the container contains ``needle``.
|
||||
|
||||
Returns the full log on success.
|
||||
"""
|
||||
end = time.monotonic() + deadline_s
|
||||
last = ""
|
||||
while time.monotonic() < end:
|
||||
r = docker_exec_sh(
|
||||
container, f"cat {log_path} 2>/dev/null", timeout=5,
|
||||
)
|
||||
if r.returncode == 0:
|
||||
last = r.stdout
|
||||
if needle in last:
|
||||
return last
|
||||
time.sleep(interval_s)
|
||||
raise AssertionError(f"Didn't see `{needle}` in {log_path} within {deadline_s} in container {container}")
|
||||
|
||||
|
||||
|
||||
def wait_for_docker_logs(
|
||||
container: str, needle: str, *, deadline_s: float = 30.0, interval_s: float = 0.5,
|
||||
) -> str:
|
||||
"""Poll ``docker logs`` until ``needle`` appears or deadline expires.
|
||||
|
||||
Returns the full docker logs on success.
|
||||
"""
|
||||
end = time.monotonic() + deadline_s
|
||||
last = ""
|
||||
while time.monotonic() < end:
|
||||
r = subprocess.run(
|
||||
["docker", "logs", container],
|
||||
capture_output=True, text=True, timeout=10,
|
||||
)
|
||||
last = r.stdout + r.stderr
|
||||
if needle in last:
|
||||
return last
|
||||
time.sleep(interval_s)
|
||||
raise AssertionError(f"Didn't see `{needle}` in docker logs within {deadline_s} in container {container}")
|
||||
|
||||
69
tests/docker/test_config_migration.py
Normal file
69
tests/docker/test_config_migration.py
Normal file
@@ -0,0 +1,69 @@
|
||||
"""Runtime smoke test for Docker config-schema migration on boot.
|
||||
|
||||
Build the real image and verify: a config.yaml present in $HERMES_HOME
|
||||
is migrated by docker_config_migrate.py on boot, running as the hermes
|
||||
user.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from tests.docker.conftest import docker_exec, docker_exec_sh, start_container
|
||||
|
||||
|
||||
def test_config_migration_runs_on_boot(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""A config.yaml in $HERMES_HOME must be migrated on boot by
|
||||
docker_config_migrate.py, running as the hermes user."""
|
||||
# Start container
|
||||
start_container(built_image, container_name)
|
||||
|
||||
# Verify config.yaml exists (should be seeded by stage2 if not present)
|
||||
r = docker_exec_sh(
|
||||
container_name,
|
||||
"test -f /opt/data/config.yaml && echo EXISTS || echo MISSING",
|
||||
timeout=10,
|
||||
)
|
||||
assert "EXISTS" in r.stdout, (
|
||||
f"config.yaml not found in $HERMES_HOME: {r.stdout}"
|
||||
)
|
||||
|
||||
# Verify the migration script exists in the image
|
||||
r = docker_exec_sh(
|
||||
container_name,
|
||||
"test -f /opt/hermes/scripts/docker_config_migrate.py && "
|
||||
"echo SCRIPT_EXISTS || echo SCRIPT_MISSING",
|
||||
timeout=10,
|
||||
)
|
||||
assert "SCRIPT_EXISTS" in r.stdout, (
|
||||
f"docker_config_migrate.py not found in image: {r.stdout}"
|
||||
)
|
||||
|
||||
# Verify config.yaml is owned by hermes (migration ran as hermes)
|
||||
r = docker_exec_sh(
|
||||
container_name,
|
||||
'stat -c "%U" /opt/data/config.yaml',
|
||||
timeout=10,
|
||||
)
|
||||
assert r.stdout.strip() == "hermes", (
|
||||
f"config.yaml not owned by hermes (migration may have run as root): "
|
||||
f"{r.stdout.strip()}"
|
||||
)
|
||||
|
||||
|
||||
def test_config_migration_opt_out_env_var_respected(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""HERMES_SKIP_CONFIG_MIGRATION=1 must skip the migration."""
|
||||
start_container(
|
||||
built_image, container_name, "HERMES_SKIP_CONFIG_MIGRATION=1",
|
||||
)
|
||||
|
||||
# config.yaml should still be seeded (seeding is separate from migration)
|
||||
r = docker_exec_sh(
|
||||
container_name,
|
||||
"test -f /opt/data/config.yaml && echo EXISTS || echo MISSING",
|
||||
timeout=10,
|
||||
)
|
||||
assert "EXISTS" in r.stdout, (
|
||||
f"config.yaml should be seeded even with migration skipped: {r.stdout}"
|
||||
)
|
||||
@@ -21,7 +21,7 @@ import time
|
||||
|
||||
import pytest
|
||||
|
||||
from tests.docker.conftest import docker_exec, docker_exec_sh
|
||||
from tests.docker.conftest import docker_exec, docker_exec_sh, wait_for_path, wait_for_log, wait_for_docker_logs, poll_container
|
||||
|
||||
|
||||
def _docker(*args: str, **kw) -> subprocess.CompletedProcess[str]:
|
||||
@@ -32,41 +32,8 @@ def _docker(*args: str, **kw) -> subprocess.CompletedProcess[str]:
|
||||
)
|
||||
|
||||
|
||||
def _exec(container: str, *args: str, timeout: int = 30) -> subprocess.CompletedProcess[str]:
|
||||
return docker_exec(container, *args, timeout=timeout)
|
||||
|
||||
|
||||
def _sh(container: str, cmd: str, timeout: int = 30) -> subprocess.CompletedProcess[str]:
|
||||
return docker_exec_sh(container, cmd, timeout=timeout)
|
||||
|
||||
|
||||
def _wait_for_path(
|
||||
container: str,
|
||||
path: str,
|
||||
*,
|
||||
kind: str = "f",
|
||||
deadline_s: float = 30.0,
|
||||
interval_s: float = 0.25,
|
||||
) -> bool:
|
||||
"""Poll `test -<kind> <path>` inside container until success or timeout.
|
||||
|
||||
`kind` is the `test` flag: 'f' for file, 'd' for directory, 'e' for
|
||||
existence. Returns True on success, False on timeout. Strictly
|
||||
better than a fixed `time.sleep()` because:
|
||||
|
||||
* we don't wait the full budget when the path appears early, and
|
||||
* the test fails with a precise "waited N seconds" assertion
|
||||
instead of a confusing one-line failure mid-test when the
|
||||
sleep was too short.
|
||||
"""
|
||||
end = time.monotonic() + deadline_s
|
||||
while time.monotonic() < end:
|
||||
r = _sh(container, f"test -{kind} {path}", timeout=5)
|
||||
if r.returncode == 0:
|
||||
return True
|
||||
time.sleep(interval_s)
|
||||
return False
|
||||
|
||||
|
||||
def _wait_for_reconcile_log_mention(
|
||||
container: str,
|
||||
@@ -76,23 +43,8 @@ def _wait_for_reconcile_log_mention(
|
||||
interval_s: float = 0.25,
|
||||
) -> str:
|
||||
"""Poll until /opt/data/logs/container-boot.log mentions `profile`.
|
||||
|
||||
Returns the matching log content on success. On timeout, returns
|
||||
the last observed contents so the assertion can render a
|
||||
meaningful diagnostic. The container-boot.log is the explicit
|
||||
signal that the reconciler has finished — much more reliable
|
||||
than a fixed sleep that hopes 8 seconds is enough.
|
||||
"""
|
||||
end = time.monotonic() + deadline_s
|
||||
last = ""
|
||||
while time.monotonic() < end:
|
||||
r = _sh(container, "cat /opt/data/logs/container-boot.log", timeout=5)
|
||||
if r.returncode == 0:
|
||||
last = r.stdout
|
||||
if f"profile={profile}" in last:
|
||||
return last
|
||||
time.sleep(interval_s)
|
||||
return last
|
||||
return wait_for_log(container, "/opt/data/logs/container-boot.log", f"profile={profile}")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
@@ -117,23 +69,7 @@ def restart_container(request, built_image: str):
|
||||
# it starts issuing commands. The reconciler always writes one
|
||||
# 'default' line on every boot (PR #30136 item I1) — that's our
|
||||
# readiness signal.
|
||||
deadline = time.monotonic() + 30.0
|
||||
while time.monotonic() < deadline:
|
||||
r = _docker(
|
||||
"exec", "-u", "hermes", name, "sh", "-c",
|
||||
"cat /opt/data/logs/container-boot.log 2>/dev/null",
|
||||
timeout=5,
|
||||
)
|
||||
if r.returncode == 0 and "profile=default" in r.stdout:
|
||||
break
|
||||
time.sleep(0.25)
|
||||
else:
|
||||
# Defensive: surface a timeout from the fixture itself so the
|
||||
# test failure points at "container never finished cont-init"
|
||||
# rather than mid-test where the symptom would be obscure.
|
||||
raise RuntimeError(
|
||||
f"container {name} did not finish cont-init within 30s"
|
||||
)
|
||||
wait_for_log(name, "/opt/data/logs/container-boot.log", "profile=default")
|
||||
yield name
|
||||
_docker("rm", "-f", name)
|
||||
_docker("volume", "rm", "-f", volume)
|
||||
@@ -145,20 +81,14 @@ def test_running_gateway_survives_container_restart(restart_container: str) -> N
|
||||
# Create the profile + start its gateway. The Phase 4 hooks
|
||||
# register the s6 service slot during create and the dispatch
|
||||
# path brings it up via s6-svc -u.
|
||||
r = _exec(container, "hermes", "profile", "create", "coder")
|
||||
r = docker_exec(container, "hermes", "profile", "create", "coder")
|
||||
assert r.returncode == 0, f"profile create failed: {r.stderr}"
|
||||
|
||||
r = _exec(container, "hermes", "-p", "coder", "gateway", "start", timeout=60)
|
||||
r = docker_exec(container, "hermes", "-p", "coder", "gateway", "start", timeout=60)
|
||||
assert r.returncode == 0, f"gateway start failed: {r.stderr}"
|
||||
|
||||
# Give the service time to actually come up under supervision.
|
||||
deadline = time.monotonic() + 15.0
|
||||
while time.monotonic() < deadline:
|
||||
r = _sh(container, "/command/s6-svstat /run/service/gateway-coder")
|
||||
if r.returncode == 0 and "up " in r.stdout:
|
||||
break
|
||||
time.sleep(0.5)
|
||||
assert "up " in r.stdout, f"gateway never came up pre-restart: {r.stdout!r}"
|
||||
poll_container(container, "/command/s6-svstat /run/service/gateway-coder | grep -q 'up '")
|
||||
|
||||
# Persist state so the reconciler will treat the slot as 'running'
|
||||
# post-restart. The gateway process itself writes gateway_state.json
|
||||
@@ -170,7 +100,7 @@ def test_running_gateway_survives_container_restart(restart_container: str) -> N
|
||||
"p = pathlib.Path('/opt/data/profiles/coder/gateway_state.json'); "
|
||||
"p.write_text(json.dumps({'gateway_state': 'running', 'timestamp': 1}))"
|
||||
)
|
||||
_exec(container, "python3", "-c", write_state, timeout=10).check_returncode()
|
||||
docker_exec(container, "python3", "-c", write_state, timeout=10).check_returncode()
|
||||
|
||||
# Restart. After this, /run/service/ is empty until cont-init.d
|
||||
# runs the reconciler. We need to wait long enough for the
|
||||
@@ -179,25 +109,22 @@ def test_running_gateway_survives_container_restart(restart_container: str) -> N
|
||||
# restored slot. Polling the boot log gives us the first signal.
|
||||
_docker("restart", container, timeout=60).check_returncode()
|
||||
log = _wait_for_reconcile_log_mention(container, "coder", deadline_s=30.0)
|
||||
assert "profile=coder" in log, (
|
||||
f"reconciler never logged coder after restart: {log!r}"
|
||||
)
|
||||
assert "action=started" in log
|
||||
|
||||
# Service slot exists.
|
||||
assert _wait_for_path(
|
||||
assert wait_for_path(
|
||||
container, "/run/service/gateway-coder", kind="d", deadline_s=10.0,
|
||||
), "slot not recreated after restart"
|
||||
|
||||
# No `down` marker — we asked for auto-start.
|
||||
r = _sh(container, "test -f /run/service/gateway-coder/down")
|
||||
r = docker_exec_sh(container, "test -f /run/service/gateway-coder/down")
|
||||
assert r.returncode != 0, "down marker present despite prior_state=running"
|
||||
|
||||
|
||||
def test_stopped_gateway_stays_stopped_after_restart(restart_container: str) -> None:
|
||||
container = restart_container
|
||||
|
||||
_exec(container, "hermes", "profile", "create", "writer").check_returncode()
|
||||
docker_exec(container, "hermes", "profile", "create", "writer").check_returncode()
|
||||
|
||||
# Write 'stopped' directly so we don't have to race against the
|
||||
# gateway's own state writes.
|
||||
@@ -206,19 +133,18 @@ def test_stopped_gateway_stays_stopped_after_restart(restart_container: str) ->
|
||||
"p = pathlib.Path('/opt/data/profiles/writer/gateway_state.json'); "
|
||||
"p.write_text(json.dumps({'gateway_state': 'stopped', 'timestamp': 1}))"
|
||||
)
|
||||
_exec(container, "python3", "-c", write_state, timeout=10).check_returncode()
|
||||
docker_exec(container, "python3", "-c", write_state, timeout=10).check_returncode()
|
||||
|
||||
_docker("restart", container, timeout=60).check_returncode()
|
||||
log = _wait_for_reconcile_log_mention(container, "writer", deadline_s=30.0)
|
||||
assert "profile=writer" in log
|
||||
_wait_for_reconcile_log_mention(container, "writer", deadline_s=30.0)
|
||||
|
||||
# Slot exists.
|
||||
assert _wait_for_path(
|
||||
assert wait_for_path(
|
||||
container, "/run/service/gateway-writer", kind="d", deadline_s=10.0,
|
||||
)
|
||||
|
||||
# Down marker present.
|
||||
r = _sh(container, "test -f /run/service/gateway-writer/down")
|
||||
r = docker_exec_sh(container, "test -f /run/service/gateway-writer/down")
|
||||
assert r.returncode == 0, "down marker missing despite prior_state=stopped"
|
||||
|
||||
|
||||
@@ -229,7 +155,7 @@ def test_stale_gateway_pid_cleaned_up_on_restart(restart_container: str) -> None
|
||||
process-mismatch checks."""
|
||||
container = restart_container
|
||||
|
||||
_exec(container, "hermes", "profile", "create", "ghost").check_returncode()
|
||||
docker_exec(container, "hermes", "profile", "create", "ghost").check_returncode()
|
||||
|
||||
# Stamp stale runtime files alongside a 'running' state so the
|
||||
# reconciler walks this profile.
|
||||
@@ -240,15 +166,15 @@ def test_stale_gateway_pid_cleaned_up_on_restart(restart_container: str) -> None
|
||||
"(p / 'gateway.pid').write_text(json.dumps({'pid': 99999, 'host': 'old'})); "
|
||||
"(p / 'processes.json').write_text('[]')"
|
||||
)
|
||||
_exec(container, "python3", "-c", stamp, timeout=10).check_returncode()
|
||||
docker_exec(container, "python3", "-c", stamp, timeout=10).check_returncode()
|
||||
|
||||
_docker("restart", container, timeout=60).check_returncode()
|
||||
_wait_for_reconcile_log_mention(container, "ghost", deadline_s=30.0)
|
||||
|
||||
# Stale runtime files swept.
|
||||
r = _sh(container, "test -f /opt/data/profiles/ghost/gateway.pid")
|
||||
r = docker_exec_sh(container, "test -f /opt/data/profiles/ghost/gateway.pid")
|
||||
assert r.returncode != 0, "stale gateway.pid survived restart"
|
||||
r = _sh(container, "test -f /opt/data/profiles/ghost/processes.json")
|
||||
r = docker_exec_sh(container, "test -f /opt/data/profiles/ghost/processes.json")
|
||||
assert r.returncode != 0, "stale processes.json survived restart"
|
||||
|
||||
|
||||
@@ -271,37 +197,20 @@ def test_live_gateway_autostarts_after_real_restart_without_manual_state_stamp(
|
||||
"""
|
||||
container = restart_container
|
||||
|
||||
_exec(container, "hermes", "profile", "create", "live").check_returncode()
|
||||
r = _exec(container, "hermes", "-p", "live", "gateway", "start", timeout=60)
|
||||
docker_exec(container, "hermes", "profile", "create", "live").check_returncode()
|
||||
r = docker_exec(container, "hermes", "-p", "live", "gateway", "start", timeout=60)
|
||||
assert r.returncode == 0, f"gateway start failed: {r.stderr}"
|
||||
|
||||
# Wait for the gateway to actually come up under supervision AND write
|
||||
# its own gateway_state=running (we do NOT stamp it ourselves).
|
||||
deadline = time.monotonic() + 20.0
|
||||
while time.monotonic() < deadline:
|
||||
r = _sh(container, "/command/s6-svstat /run/service/gateway-live")
|
||||
if r.returncode == 0 and "up " in r.stdout:
|
||||
break
|
||||
time.sleep(0.5)
|
||||
assert "up " in r.stdout, f"gateway never came up pre-restart: {r.stdout!r}"
|
||||
poll_container(container, "/command/s6-svstat /run/service/gateway-live | grep -q 'up '")
|
||||
|
||||
# Confirm the gateway persisted its own 'running' state (sanity: we're
|
||||
# testing the real write path, not a stamped fixture).
|
||||
deadline = time.monotonic() + 15.0
|
||||
state = ""
|
||||
while time.monotonic() < deadline:
|
||||
r = _sh(
|
||||
container,
|
||||
"cat /opt/data/profiles/live/gateway_state.json 2>/dev/null",
|
||||
)
|
||||
if r.returncode == 0 and '"gateway_state"' in r.stdout:
|
||||
state = r.stdout
|
||||
if '"running"' in state:
|
||||
break
|
||||
time.sleep(0.5)
|
||||
assert '"running"' in state, (
|
||||
f"gateway never persisted running state pre-restart: {state!r}"
|
||||
)
|
||||
# Confirm the gateway persisted its own 'running' state. The gateway has
|
||||
# to boot Python, discover ~50 plugins, construct GatewayRunner, and
|
||||
# reach write_runtime_status("running") at run.py start() — on a loaded
|
||||
# CI runner with parallel docker test containers competing for CPU, this
|
||||
# can take a while.
|
||||
wait_for_log(container, "/opt/data/profiles/live/gateway_state.json", '"running"', deadline_s=45, interval_s=1)
|
||||
|
||||
# Real restart — Docker sends SIGTERM to PID 1; s6 propagates it to the
|
||||
# supervised gateway. No planned-stop marker is written (this is not an
|
||||
@@ -309,9 +218,6 @@ def test_live_gateway_autostarts_after_real_restart_without_manual_state_stamp(
|
||||
_docker("restart", container, timeout=60).check_returncode()
|
||||
|
||||
log = _wait_for_reconcile_log_mention(container, "live", deadline_s=30.0)
|
||||
assert "profile=live" in log, (
|
||||
f"reconciler never logged live after restart: {log!r}"
|
||||
)
|
||||
# The crux: the reconciler must AUTO-START it, not register it down.
|
||||
assert "action=started" in log, (
|
||||
f"gateway did NOT auto-start after a real restart (issue #42675 "
|
||||
@@ -319,10 +225,10 @@ def test_live_gateway_autostarts_after_real_restart_without_manual_state_stamp(
|
||||
)
|
||||
|
||||
# Slot recreated, and NO down marker (we expect auto-start).
|
||||
assert _wait_for_path(
|
||||
assert wait_for_path(
|
||||
container, "/run/service/gateway-live", kind="d", deadline_s=10.0,
|
||||
), "slot not recreated after restart"
|
||||
r = _sh(container, "test -f /run/service/gateway-live/down")
|
||||
r = docker_exec_sh(container, "test -f /run/service/gateway-live/down")
|
||||
assert r.returncode != 0, (
|
||||
"down marker present despite a live gateway being restarted — "
|
||||
"the signal-initiated shutdown wrongly persisted 'stopped' (#42675)"
|
||||
|
||||
@@ -13,39 +13,16 @@ the realistic runtime context. See the conftest module docstring.
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
import time
|
||||
|
||||
from tests.docker.conftest import docker_exec, docker_exec_sh
|
||||
|
||||
|
||||
def _poll(container: str, probe: str, *, deadline_s: float = 30.0,
|
||||
interval_s: float = 0.5) -> tuple[bool, str]:
|
||||
"""Repeatedly run ``probe`` inside the container until it exits 0 or
|
||||
``deadline_s`` elapses. Returns (success, last stdout)."""
|
||||
end = time.monotonic() + deadline_s
|
||||
last = ""
|
||||
while time.monotonic() < end:
|
||||
r = docker_exec_sh(container, probe, timeout=10)
|
||||
last = r.stdout
|
||||
if r.returncode == 0:
|
||||
return True, last
|
||||
time.sleep(interval_s)
|
||||
return False, last
|
||||
from tests.docker.conftest import docker_exec, docker_exec_sh, start_container, poll_container
|
||||
|
||||
|
||||
def test_dashboard_not_running_by_default(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""Without HERMES_DASHBOARD, no dashboard process should be running."""
|
||||
subprocess.run(
|
||||
["docker", "run", "-d", "--name", container_name, built_image,
|
||||
"sleep", "60"],
|
||||
check=True, capture_output=True, timeout=30,
|
||||
)
|
||||
# Give the entrypoint enough time to finish bootstrap; if a dashboard
|
||||
# were going to start it'd be visible by now.
|
||||
time.sleep(5)
|
||||
start_container(built_image, container_name, cmd="sleep 60")
|
||||
r = docker_exec(container_name, "pgrep", "-f", "hermes dashboard")
|
||||
# pgrep exits non-zero when no match found
|
||||
assert r.returncode != 0, (
|
||||
@@ -64,12 +41,7 @@ def test_dashboard_slot_reports_down_when_disabled(
|
||||
writes a `down` marker file in the live service-dir when
|
||||
HERMES_DASHBOARD is unset, so the slot reflects reality.
|
||||
"""
|
||||
subprocess.run(
|
||||
["docker", "run", "-d", "--name", container_name, built_image,
|
||||
"sleep", "60"],
|
||||
check=True, capture_output=True, timeout=30,
|
||||
)
|
||||
time.sleep(5)
|
||||
start_container(built_image, container_name, cmd="sleep 60")
|
||||
# /command/ isn't on PATH for docker-exec sessions, so call by
|
||||
# absolute path.
|
||||
r = docker_exec(
|
||||
@@ -86,56 +58,42 @@ def test_dashboard_slot_reports_up_when_enabled(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""Symmetry: with HERMES_DASHBOARD=1, s6-svstat reports the slot as up."""
|
||||
subprocess.run(
|
||||
["docker", "run", "-d", "--name", container_name,
|
||||
"-e", "HERMES_DASHBOARD=1",
|
||||
# The default dashboard host is 0.0.0.0, which now engages the
|
||||
# OAuth auth gate. Without a provider registered (no
|
||||
# HERMES_DASHBOARD_OAUTH_CLIENT_ID in this test env), start_server
|
||||
# would fail closed and the slot would never come up. Pin the
|
||||
# explicit insecure opt-in to keep this test focused on the s6
|
||||
# supervision contract, not the auth gate.
|
||||
"-e", "HERMES_DASHBOARD_BASIC_AUTH_USERNAME=admin",
|
||||
"-e", "HERMES_DASHBOARD_BASIC_AUTH_PASSWORD=test-dashboard-pw",
|
||||
built_image, "sleep", "120"],
|
||||
check=True, capture_output=True, timeout=30,
|
||||
# The default dashboard host is 0.0.0.0, which now engages the
|
||||
# OAuth auth gate. Without a provider registered (no
|
||||
# HERMES_DASHBOARD_OAUTH_CLIENT_ID in this test env), start_server
|
||||
# would fail closed and the slot would never come up. Pin the
|
||||
# explicit insecure opt-in to keep this test focused on the s6
|
||||
# supervision contract, not the auth gate.
|
||||
start_container(
|
||||
built_image, container_name,
|
||||
"HERMES_DASHBOARD=1",
|
||||
"HERMES_DASHBOARD_BASIC_AUTH_USERNAME=admin",
|
||||
"HERMES_DASHBOARD_BASIC_AUTH_PASSWORD=test-dashboard-pw",
|
||||
cmd="sleep 120",
|
||||
)
|
||||
# uvicorn takes a moment to bind; poll svstat.
|
||||
deadline = time.monotonic() + 30.0
|
||||
last = ""
|
||||
while time.monotonic() < deadline:
|
||||
r = docker_exec(
|
||||
container_name, "/command/s6-svstat", "/run/service/dashboard",
|
||||
)
|
||||
last = r.stdout
|
||||
if r.returncode == 0 and "up " in r.stdout:
|
||||
return # success
|
||||
time.sleep(0.5)
|
||||
raise AssertionError(
|
||||
f"Dashboard slot never reached up state; last svstat: {last!r}"
|
||||
)
|
||||
poll_container(container_name, "/command/s6-svstat /run/service/dashboard | grep -q 'up '")
|
||||
|
||||
|
||||
def test_dashboard_opt_in_starts(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""With HERMES_DASHBOARD=1, a dashboard process should be visible."""
|
||||
subprocess.run(
|
||||
["docker", "run", "-d", "--name", container_name,
|
||||
"-e", "HERMES_DASHBOARD=1",
|
||||
# Default bind is 0.0.0.0, which engages the auth gate. Register the
|
||||
# bundled basic password provider so the gate has a provider and the
|
||||
# dashboard binds (vs fail-closed). Keeps the test focused on s6
|
||||
# supervision, not auth.
|
||||
"-e", "HERMES_DASHBOARD_BASIC_AUTH_USERNAME=admin",
|
||||
"-e", "HERMES_DASHBOARD_BASIC_AUTH_PASSWORD=test-dashboard-pw",
|
||||
built_image, "sleep", "120"],
|
||||
check=True, capture_output=True, timeout=30,
|
||||
# Default bind is 0.0.0.0, which engages the auth gate. Register the
|
||||
# bundled basic password provider so the gate has a provider and the
|
||||
# dashboard binds (vs fail-closed). Keeps the test focused on s6
|
||||
# supervision, not auth.
|
||||
start_container(
|
||||
built_image, container_name,
|
||||
"HERMES_DASHBOARD=1",
|
||||
"HERMES_DASHBOARD_BASIC_AUTH_USERNAME=admin",
|
||||
"HERMES_DASHBOARD_BASIC_AUTH_PASSWORD=test-dashboard-pw",
|
||||
cmd="sleep 120",
|
||||
)
|
||||
# Poll for the dashboard subprocess to appear — the entrypoint
|
||||
# backgrounds it and bootstrap (skills sync etc.) can take a few
|
||||
# seconds before the python process actually launches.
|
||||
ok, _ = _poll(
|
||||
ok, _ = poll_container(
|
||||
container_name, "pgrep -f 'hermes dashboard'", deadline_s=30.0,
|
||||
)
|
||||
assert ok, "Dashboard should be running with HERMES_DASHBOARD=1"
|
||||
@@ -145,22 +103,22 @@ def test_dashboard_port_override(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""HERMES_DASHBOARD_PORT changes the dashboard's listen port."""
|
||||
subprocess.run(
|
||||
["docker", "run", "-d", "--name", container_name,
|
||||
"-e", "HERMES_DASHBOARD=1", "-e", "HERMES_DASHBOARD_PORT=9120",
|
||||
# Default bind is 0.0.0.0; register the basic password provider so
|
||||
# the auth gate has a provider and the dashboard binds. See
|
||||
# test_dashboard_slot_reports_up_when_enabled for the full rationale.
|
||||
"-e", "HERMES_DASHBOARD_BASIC_AUTH_USERNAME=admin",
|
||||
"-e", "HERMES_DASHBOARD_BASIC_AUTH_PASSWORD=test-dashboard-pw",
|
||||
built_image, "sleep", "120"],
|
||||
check=True, capture_output=True, timeout=30,
|
||||
# Default bind is 0.0.0.0; register the basic password provider so
|
||||
# the auth gate has a provider and the dashboard binds. See
|
||||
# test_dashboard_slot_reports_up_when_enabled for the full rationale.
|
||||
start_container(
|
||||
built_image, container_name,
|
||||
"HERMES_DASHBOARD=1",
|
||||
"HERMES_DASHBOARD_PORT=9120",
|
||||
"HERMES_DASHBOARD_BASIC_AUTH_USERNAME=admin",
|
||||
"HERMES_DASHBOARD_BASIC_AUTH_PASSWORD=test-dashboard-pw",
|
||||
cmd="sleep 120",
|
||||
)
|
||||
# The dashboard process appearing in pgrep doesn't mean it's bound
|
||||
# to the port yet — uvicorn takes another second or two to come up.
|
||||
# The image doesn't ship ss/netstat, so probe /proc/net/tcp directly:
|
||||
# port 9120 = 0x23A0, state 0A = LISTEN.
|
||||
ok, stdout = _poll(
|
||||
ok, stdout = poll_container(
|
||||
container_name,
|
||||
"grep -E ' 0+:23A0 .* 0A ' /proc/net/tcp /proc/net/tcp6 "
|
||||
"2>/dev/null",
|
||||
@@ -180,20 +138,19 @@ def test_dashboard_restarts_after_crash(
|
||||
dashboard runs as a longrun s6-rc service and s6-supervise restarts
|
||||
it after a ~1s backoff (the default).
|
||||
"""
|
||||
subprocess.run(
|
||||
["docker", "run", "-d", "--name", container_name,
|
||||
"-e", "HERMES_DASHBOARD=1",
|
||||
# Default bind is 0.0.0.0; register the basic password provider so
|
||||
# the auth gate has a provider and the supervised dashboard binds.
|
||||
# See test_dashboard_slot_reports_up_when_enabled for the full
|
||||
# rationale.
|
||||
"-e", "HERMES_DASHBOARD_BASIC_AUTH_USERNAME=admin",
|
||||
"-e", "HERMES_DASHBOARD_BASIC_AUTH_PASSWORD=test-dashboard-pw",
|
||||
built_image, "sleep", "120"],
|
||||
check=True, capture_output=True, timeout=30,
|
||||
# Default bind is 0.0.0.0; register the basic password provider so
|
||||
# the auth gate has a provider and the supervised dashboard binds.
|
||||
# See test_dashboard_slot_reports_up_when_enabled for the full
|
||||
# rationale.
|
||||
start_container(
|
||||
built_image, container_name,
|
||||
"HERMES_DASHBOARD=1",
|
||||
"HERMES_DASHBOARD_BASIC_AUTH_USERNAME=admin",
|
||||
"HERMES_DASHBOARD_BASIC_AUTH_PASSWORD=test-dashboard-pw",
|
||||
cmd="sleep 120",
|
||||
)
|
||||
# Wait for the first dashboard to come up.
|
||||
ok, _ = _poll(
|
||||
ok, _ = poll_container(
|
||||
container_name, "pgrep -f 'hermes dashboard'", deadline_s=30.0,
|
||||
)
|
||||
assert ok, "Dashboard never started initially"
|
||||
@@ -338,13 +295,12 @@ def test_dashboard_oauth_gate_engages_on_non_loopback_bind(
|
||||
responds 200 without a cookie under both gates, so it cannot
|
||||
distinguish "gate on" from "gate off".
|
||||
"""
|
||||
subprocess.run(
|
||||
["docker", "run", "-d", "--name", container_name,
|
||||
"-e", "HERMES_DASHBOARD=1",
|
||||
"-e", "HERMES_DASHBOARD_HOST=0.0.0.0",
|
||||
"-e", "HERMES_DASHBOARD_OAUTH_CLIENT_ID=agent:test-instance",
|
||||
built_image, "sleep", "120"],
|
||||
check=True, capture_output=True, timeout=30,
|
||||
start_container(
|
||||
built_image, container_name,
|
||||
"HERMES_DASHBOARD=1",
|
||||
"HERMES_DASHBOARD_HOST=0.0.0.0",
|
||||
"HERMES_DASHBOARD_OAUTH_CLIENT_ID=agent:test-instance",
|
||||
cmd="sleep 120",
|
||||
)
|
||||
|
||||
# (1) Provider registry visible via the public bootstrap endpoint.
|
||||
@@ -398,18 +354,17 @@ def test_dashboard_insecure_env_var_no_longer_bypasses_gate(
|
||||
public-dashboard escape hatch is gone: there is no env that serves the
|
||||
dashboard on a public bind without an auth provider.
|
||||
"""
|
||||
subprocess.run(
|
||||
["docker", "run", "-d", "--name", container_name,
|
||||
"-e", "HERMES_DASHBOARD=1",
|
||||
"-e", "HERMES_DASHBOARD_HOST=0.0.0.0",
|
||||
"-e", "HERMES_DASHBOARD_INSECURE=1",
|
||||
built_image, "sleep", "120"],
|
||||
check=True, capture_output=True, timeout=30,
|
||||
start_container(
|
||||
built_image, container_name,
|
||||
"HERMES_DASHBOARD=1",
|
||||
"HERMES_DASHBOARD_HOST=0.0.0.0",
|
||||
"HERMES_DASHBOARD_INSECURE=1",
|
||||
cmd="sleep 120",
|
||||
)
|
||||
# Fail-closed: the dashboard process must NOT successfully serve. Probe
|
||||
# for a few seconds; /api/status should never become reachable because
|
||||
# start_server raised SystemExit before binding.
|
||||
ok, _ = _poll(
|
||||
ok, _ = poll_container(
|
||||
container_name,
|
||||
"curl -fsS -m 2 http://127.0.0.1:9119/api/status >/dev/null 2>&1",
|
||||
deadline_s=12.0,
|
||||
|
||||
@@ -22,6 +22,7 @@ These tests verify:
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
from tests.docker.conftest import docker_exec
|
||||
|
||||
import subprocess
|
||||
import time
|
||||
@@ -36,8 +37,8 @@ _RUN_READY_TIMEOUT_S = 20
|
||||
|
||||
def _wait_for_init(container: str) -> None:
|
||||
"""Block until /init is up enough that `docker exec` is responsive."""
|
||||
deadline = time.time() + _RUN_READY_TIMEOUT_S
|
||||
while time.time() < deadline:
|
||||
deadline = time.monotonic() + _RUN_READY_TIMEOUT_S
|
||||
while time.monotonic() < deadline:
|
||||
r = subprocess.run(
|
||||
["docker", "exec", container, "true"],
|
||||
capture_output=True, timeout=5,
|
||||
@@ -287,4 +288,4 @@ def test_e2e_login_then_supervised_gateway_can_read_auth(
|
||||
"Files written by `docker exec` are unreadable to the hermes user "
|
||||
f"(supervised gateway UID): {unreadable}. The shim failed to drop "
|
||||
"privileges before the write."
|
||||
)
|
||||
)
|
||||
@@ -6,7 +6,7 @@ fails inside the published image and ``hermes dump`` used to report
|
||||
``$HERMES_GIT_SHA`` build-arg to ``/opt/hermes/.hermes_build_sha`` and
|
||||
``hermes_cli/build_info.py`` reads it as a fallback.
|
||||
|
||||
CI (``.github/workflows/docker-publish.yml``) always sets the build-arg
|
||||
CI (``.github/workflows/docker.yml``) always sets the build-arg
|
||||
to ``${{ github.sha }}``. Local ``docker build`` (the ``built_image``
|
||||
fixture in ``tests/docker/conftest.py``) does NOT — so locally the file
|
||||
is absent and ``hermes dump`` correctly falls back to ``(unknown)``.
|
||||
|
||||
157
tests/docker/test_gateway_bootstrap_state.py
Normal file
157
tests/docker/test_gateway_bootstrap_state.py
Normal file
@@ -0,0 +1,157 @@
|
||||
"""Runtime smoke tests for Docker gateway_state.json bootstrap seeding.
|
||||
|
||||
Build the real image and verify the actual runtime behavior:
|
||||
|
||||
1. HERMES_GATEWAY_BOOTSTRAP_STATE=running on a fresh volume seeds
|
||||
gateway_state.json with running state
|
||||
2. An existing gateway_state.json is never clobbered (first-boot-only)
|
||||
3. No env var = no seed (default down-on-first-boot preserved)
|
||||
4. Only literal "running" is honored; other values are ignored
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
|
||||
from tests.docker.conftest import docker_exec, docker_exec_sh, wait_for_container_ready
|
||||
|
||||
|
||||
def _start_container(
|
||||
built_image: str, name: str, *env: str,
|
||||
) -> str:
|
||||
"""Start a container with given env vars, return its name."""
|
||||
args = ["docker", "run", "-d", "--name", name]
|
||||
for e in env:
|
||||
args.extend(["-e", e])
|
||||
args.extend([built_image, "sleep", "infinity"])
|
||||
subprocess.run(args, check=True, capture_output=True, timeout=60)
|
||||
wait_for_container_ready(name)
|
||||
return name
|
||||
|
||||
|
||||
def test_seeds_running_state_on_blank_volume(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""HERMES_GATEWAY_BOOTSTRAP_STATE=running on a fresh volume must
|
||||
seed gateway_state.json with a valid running state."""
|
||||
_start_container(
|
||||
built_image, container_name,
|
||||
"HERMES_GATEWAY_BOOTSTRAP_STATE=running",
|
||||
)
|
||||
|
||||
r = docker_exec_sh(
|
||||
container_name,
|
||||
"cat /opt/data/gateway_state.json 2>/dev/null || echo NONE",
|
||||
timeout=10,
|
||||
)
|
||||
assert r.stdout.strip() != "NONE", (
|
||||
f"gateway_state.json not seeded on fresh volume: {r.stdout}"
|
||||
)
|
||||
state = json.loads(r.stdout.strip())
|
||||
assert state.get("gateway_state") == "running", (
|
||||
f"expected gateway_state=running, got: {state}"
|
||||
)
|
||||
|
||||
|
||||
def test_does_not_clobber_existing_state(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""An existing gateway_state.json must never be overwritten by the
|
||||
seed, even when the bootstrap env var says running.
|
||||
|
||||
We use a named volume so we can pre-create the state file before
|
||||
the container boots. The [ ! -f ] guard in stage2 must skip seeding
|
||||
because the file already exists. We check the file immediately after
|
||||
boot — before the gateway service has a chance to write its own
|
||||
state — by reading it as fast as possible after container start.
|
||||
"""
|
||||
import json as _json
|
||||
|
||||
volume = f"{container_name}-vol"
|
||||
subprocess.run(
|
||||
["docker", "volume", "create", volume],
|
||||
check=True, capture_output=True, timeout=10,
|
||||
)
|
||||
|
||||
# Pre-create the state file via a throwaway container
|
||||
existing = _json.dumps({"gateway_state": "stopped", "pid": 123})
|
||||
subprocess.run(
|
||||
["docker", "run", "--rm", "-v", f"{volume}:/opt/data",
|
||||
"--entrypoint", "sh", built_image,
|
||||
"-c", f"printf '{existing}\\n' > /opt/data/gateway_state.json"],
|
||||
check=True, capture_output=True, timeout=30,
|
||||
)
|
||||
|
||||
# Boot with the env var set — stage2 must NOT clobber the existing file
|
||||
subprocess.run(
|
||||
["docker", "run", "-d", "--name", container_name,
|
||||
"-v", f"{volume}:/opt/data",
|
||||
"-e", "HERMES_GATEWAY_BOOTSTRAP_STATE=running",
|
||||
built_image, "sleep", "infinity"],
|
||||
check=True, capture_output=True, timeout=60,
|
||||
)
|
||||
# Read the file as quickly as possible — the gateway service may
|
||||
# start and write its own state, but the stage2 [ ! -f ] guard runs
|
||||
# during cont-init (before any service starts), so the file must
|
||||
# still be our "stopped" state at this point.
|
||||
wait_for_container_ready(container_name)
|
||||
r = docker_exec_sh(
|
||||
container_name, "cat /opt/data/gateway_state.json", timeout=10,
|
||||
)
|
||||
state = _json.loads(r.stdout.strip())
|
||||
assert state.get("gateway_state") == "stopped", (
|
||||
f"existing state was clobbered by bootstrap seed: {state}"
|
||||
)
|
||||
|
||||
# Cleanup
|
||||
subprocess.run(
|
||||
["docker", "rm", "-f", container_name],
|
||||
capture_output=True, timeout=10,
|
||||
)
|
||||
subprocess.run(
|
||||
["docker", "volume", "rm", "-f", volume],
|
||||
capture_output=True, timeout=10,
|
||||
)
|
||||
|
||||
|
||||
def test_no_seed_when_env_unset(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""No HERMES_GATEWAY_BOOTSTRAP_STATE = no seed file written."""
|
||||
_start_container(built_image, container_name)
|
||||
|
||||
r = docker_exec_sh(
|
||||
container_name,
|
||||
"test -f /opt/data/gateway_state.json && "
|
||||
"echo EXISTS || echo ABSENT",
|
||||
timeout=10,
|
||||
)
|
||||
assert "ABSENT" in r.stdout, (
|
||||
f"gateway_state.json was seeded without the env var: {r.stdout}"
|
||||
)
|
||||
|
||||
|
||||
def test_non_running_value_ignored(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""Only literal 'running' is honored; any other value is ignored."""
|
||||
for bogus in ("stopped", "Running", "1", "true", "starting"):
|
||||
# Need a fresh container per iteration
|
||||
name = f"{container_name}-{bogus}"
|
||||
_start_container(
|
||||
built_image, name,
|
||||
f"HERMES_GATEWAY_BOOTSTRAP_STATE={bogus}",
|
||||
)
|
||||
r = docker_exec_sh(
|
||||
name,
|
||||
"test -f /opt/data/gateway_state.json && "
|
||||
"echo EXISTS || echo ABSENT",
|
||||
timeout=10,
|
||||
)
|
||||
assert "ABSENT" in r.stdout, (
|
||||
f"bogus value {bogus!r} should not seed a state file: {r.stdout}"
|
||||
)
|
||||
subprocess.run(
|
||||
["docker", "rm", "-f", name],
|
||||
capture_output=True, timeout=10,
|
||||
)
|
||||
@@ -23,15 +23,15 @@ from __future__ import annotations
|
||||
import subprocess
|
||||
import time
|
||||
|
||||
from tests.docker.conftest import docker_exec_sh
|
||||
|
||||
|
||||
def _sh(container: str, command: str, timeout: int = 30):
|
||||
return docker_exec_sh(container, command, timeout=timeout)
|
||||
from tests.docker.conftest import (
|
||||
docker_exec_sh,
|
||||
start_container,
|
||||
wait_for_docker_logs,
|
||||
)
|
||||
|
||||
|
||||
def _svstat(container: str, slot: str = "gateway-default") -> str:
|
||||
r = _sh(container, f"/command/s6-svstat /run/service/{slot}")
|
||||
r = docker_exec_sh(container, f"/command/s6-svstat /run/service/{slot}")
|
||||
return r.stdout if r.returncode == 0 else ""
|
||||
|
||||
|
||||
@@ -46,6 +46,43 @@ def _svstat_wants_up(container: str, slot: str = "gateway-default") -> bool:
|
||||
return "want up" in state
|
||||
|
||||
|
||||
def _wait_for_gateway_or_exit(
|
||||
container: str,
|
||||
*,
|
||||
deadline_s: float = 60.0,
|
||||
) -> str:
|
||||
"""Poll until the container is either running a foreground gateway
|
||||
process or has exited. Returns the final container status.
|
||||
|
||||
Used by the ``--no-supervise`` tests where the gateway runs as the
|
||||
CMD process (not supervised by s6). Under CI load the gateway can
|
||||
take well over 6s to finish Python imports and reach the gateway
|
||||
entrypoint — a fixed ``time.sleep(6)`` races. Polling for
|
||||
``pgrep -f 'hermes.*gateway'`` (the gateway is running) or
|
||||
``docker inspect`` returning ``exited`` is both faster on quick
|
||||
machines and flake-free on slow ones.
|
||||
"""
|
||||
end = time.monotonic() + deadline_s
|
||||
while time.monotonic() < end:
|
||||
r = subprocess.run(
|
||||
["docker", "inspect", "-f", "{{.State.Status}}", container],
|
||||
capture_output=True, text=True, timeout=10,
|
||||
)
|
||||
status = r.stdout.strip()
|
||||
if status == "exited":
|
||||
return "exited"
|
||||
if status == "running":
|
||||
# Check if the gateway process is actually running in the
|
||||
# foreground (the no-supervise path). If it is, we're done.
|
||||
pgrep = docker_exec_sh(
|
||||
container, "pgrep -f 'hermes.*gateway' >/dev/null 2>&1",
|
||||
)
|
||||
if pgrep.returncode == 0:
|
||||
return "running"
|
||||
time.sleep(0.5)
|
||||
return status
|
||||
|
||||
|
||||
def test_gateway_run_redirects_to_supervised(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
@@ -64,15 +101,27 @@ def test_gateway_run_redirects_to_supervised(
|
||||
# exit immediately (which is what would happen pre-this-PR on the
|
||||
# s6 image — the foreground gateway would crash without config,
|
||||
# the CMD would exit, /init would shut down).
|
||||
subprocess.run(
|
||||
["docker", "run", "-d", "--name", container_name, built_image,
|
||||
"gateway", "run"],
|
||||
check=True, capture_output=True, timeout=30,
|
||||
)
|
||||
start_container(built_image, container_name, cmd="gateway run")
|
||||
|
||||
# Give /init time to run cont-init.d, the wrapper time to dispatch
|
||||
# the redirect, and s6-supervise time to spin up the slot.
|
||||
time.sleep(5)
|
||||
# Wait for the redirect breadcrumb to appear in docker logs.
|
||||
# Under heavy parallel load (32-way docker test fan-out), the CMD
|
||||
# process (main-wrapper.sh → python → hermes gateway run) can take
|
||||
# well over 5s to reach the redirect logic. The breadcrumb is the
|
||||
# definitive signal that the redirect fired — polling for it is
|
||||
# both faster on quick machines and flake-free on slow ones.
|
||||
# Under heavy parallel docker load (32-way fan-out), the CMD process
|
||||
# (main-wrapper.sh → python → hermes gateway run) can take well over
|
||||
# 30s to import the codebase, load config, and reach the redirect
|
||||
# logic. 60s matches the deadline other boot-readiness polls use.
|
||||
logs = wait_for_docker_logs(
|
||||
container_name, "s6 supervision", deadline_s=60.0,
|
||||
)
|
||||
assert "s6 supervision" in logs, (
|
||||
f"expected loud breadcrumb in docker logs; got:\n{logs}"
|
||||
)
|
||||
assert "--no-supervise" in logs, (
|
||||
f"breadcrumb missing opt-out hint; got:\n{logs}"
|
||||
)
|
||||
|
||||
# Container should still be running. If the redirect didn't fire,
|
||||
# the foreground gateway would have crashed and the container
|
||||
@@ -83,7 +132,7 @@ def test_gateway_run_redirects_to_supervised(
|
||||
)
|
||||
assert r.returncode == 0 and r.stdout.strip() == "running", (
|
||||
f"container exited prematurely: {r.stdout!r}; "
|
||||
f"docker logs:\n{subprocess.run(['docker', 'logs', container_name], capture_output=True, text=True).stdout}"
|
||||
f"docker logs:\n{logs}"
|
||||
)
|
||||
|
||||
# s6's intent for the default-profile gateway slot should be up.
|
||||
@@ -96,26 +145,24 @@ def test_gateway_run_redirects_to_supervised(
|
||||
)
|
||||
|
||||
# The CMD process (PID under /init that the wrapper exec'd into)
|
||||
# should be sleeping, not the gateway. We grep `ps` for the
|
||||
# `sleep infinity` heartbeat.
|
||||
r = _sh(container_name, "ps -eo pid,cmd | grep -v grep | grep 'sleep infinity'")
|
||||
assert r.returncode == 0 and "sleep infinity" in r.stdout, (
|
||||
f"expected `sleep infinity` heartbeat process; got ps:\n{r.stdout}\n"
|
||||
f"stderr: {r.stderr}"
|
||||
# should be sleeping, not the gateway. We count `sleep infinity`
|
||||
# processes parented to the CMD wrapper (main-wrapper.sh / rc.init
|
||||
# top), NOT the static main-hermes service's sleep — a bare grep
|
||||
# for `sleep infinity` would false-positive on the main-hermes
|
||||
# sleep and pass even before the redirect fires.
|
||||
r = docker_exec_sh(
|
||||
container_name,
|
||||
"ps -eo pid,ppid,cmd | grep -v grep | awk "
|
||||
"'/main-wrapper.sh|rc.init top/ { wrapper_pid=$1 } "
|
||||
"$3==\"sleep\" && $4==\"infinity\" && $2==wrapper_pid { c++ } "
|
||||
"END { print c+0 }'",
|
||||
)
|
||||
|
||||
# And the loud breadcrumb should be in `docker logs` so users see
|
||||
# the upgrade explanation.
|
||||
r = subprocess.run(
|
||||
["docker", "logs", container_name],
|
||||
capture_output=True, text=True, timeout=10,
|
||||
)
|
||||
logs = r.stdout + r.stderr
|
||||
assert "s6 supervision" in logs, (
|
||||
f"expected loud breadcrumb in docker logs; got:\n{logs}"
|
||||
)
|
||||
assert "--no-supervise" in logs, (
|
||||
f"breadcrumb missing opt-out hint; got:\n{logs}"
|
||||
assert r.returncode == 0
|
||||
redirected_sleeps = int(r.stdout.strip() or 0)
|
||||
assert redirected_sleeps == 1, (
|
||||
f"expected one `sleep infinity` heartbeat parented to the CMD "
|
||||
f"wrapper (the redirect); found {redirected_sleeps}. "
|
||||
f"ps:\n{docker_exec_sh(container_name, 'ps -eo pid,ppid,cmd').stdout}"
|
||||
)
|
||||
|
||||
|
||||
@@ -139,25 +186,13 @@ def test_gateway_run_no_supervise_flag_preserves_legacy_behavior(
|
||||
* The ``gateway-default`` s6 service slot is NOT created.
|
||||
* No supervision-redirect breadcrumb appears in docker logs.
|
||||
"""
|
||||
subprocess.run(
|
||||
["docker", "run", "-d", "--name", container_name, built_image,
|
||||
"gateway", "run", "--no-supervise"],
|
||||
check=True, capture_output=True, timeout=30,
|
||||
)
|
||||
# Give startup time. The unconfigured-profile case used to fail
|
||||
# fast; with a config bind-mounted profile (and a real volume on
|
||||
# most realistic deployments) the gateway just runs.
|
||||
time.sleep(6)
|
||||
start_container(built_image, container_name, cmd="gateway run --no-supervise")
|
||||
|
||||
# Container should still be running OR have exited cleanly with
|
||||
# the gateway's status code. Either is correct for pre-s6
|
||||
# semantics — what's NOT correct is the supervised behavior
|
||||
# (sleep infinity heartbeat + supervised gateway slot).
|
||||
inspect = subprocess.run(
|
||||
["docker", "inspect", "-f", "{{.State.Status}}", container_name],
|
||||
capture_output=True, text=True, timeout=10,
|
||||
)
|
||||
status = inspect.stdout.strip()
|
||||
# Wait for the gateway to start in the foreground or the container
|
||||
# to exit (no-config crash is also valid pre-s6 semantics).
|
||||
# A fixed time.sleep(6) races under CI parallel docker load —
|
||||
# the gateway can take well over 6s to finish Python imports.
|
||||
status = _wait_for_gateway_or_exit(container_name, deadline_s=60.0)
|
||||
|
||||
# No redirect breadcrumb anywhere.
|
||||
logs = subprocess.run(
|
||||
@@ -175,7 +210,7 @@ def test_gateway_run_no_supervise_flag_preserves_legacy_behavior(
|
||||
if status == "running":
|
||||
# Gateway running in foreground — the CMD process should be
|
||||
# the gateway itself, NOT a sleep-infinity heartbeat.
|
||||
r = _sh(
|
||||
r = docker_exec_sh(
|
||||
container_name,
|
||||
"ps -eo pid,ppid,cmd | grep -v grep | awk '/main-wrapper.sh|rc.init top/ { wrapper_pid=$1 } "
|
||||
"$3==\"sleep\" && $4==\"infinity\" && $2==wrapper_pid { c++ } END { print c+0 }'",
|
||||
@@ -186,7 +221,7 @@ def test_gateway_run_no_supervise_flag_preserves_legacy_behavior(
|
||||
f"--no-supervise: expected NO `sleep infinity` parented to "
|
||||
f"the CMD wrapper (foreground gateway should be the CMD), "
|
||||
f"found {redirected_sleeps}. "
|
||||
f"ps:\n{_sh(container_name, 'ps -eo pid,ppid,cmd').stdout}"
|
||||
f"ps:\n{docker_exec_sh(container_name, 'ps -eo pid,ppid,cmd').stdout}"
|
||||
)
|
||||
|
||||
# The gateway-default s6 slot exists (the cont-init.d
|
||||
@@ -211,13 +246,15 @@ def test_gateway_run_no_supervise_env_var(
|
||||
Useful when users can't easily change their `docker run` args
|
||||
(orchestration templates, K8s manifests) but can set env vars.
|
||||
"""
|
||||
subprocess.run(
|
||||
["docker", "run", "-d", "--name", container_name,
|
||||
"-e", "HERMES_GATEWAY_NO_SUPERVISE=1",
|
||||
built_image, "gateway", "run"],
|
||||
check=True, capture_output=True, timeout=30,
|
||||
start_container(
|
||||
built_image, container_name,
|
||||
"HERMES_GATEWAY_NO_SUPERVISE=1",
|
||||
cmd="gateway run",
|
||||
)
|
||||
time.sleep(6)
|
||||
|
||||
# Same as the CLI-flag test: wait for the gateway to start or
|
||||
# the container to exit, instead of a blind time.sleep(6).
|
||||
status = _wait_for_gateway_or_exit(container_name, deadline_s=60.0)
|
||||
|
||||
logs = subprocess.run(
|
||||
["docker", "logs", container_name],
|
||||
@@ -231,11 +268,7 @@ def test_gateway_run_no_supervise_env_var(
|
||||
|
||||
# Same as the CLI-flag test: the slot exists (reconciler creates
|
||||
# it) but should not have want-state up.
|
||||
inspect = subprocess.run(
|
||||
["docker", "inspect", "-f", "{{.State.Status}}", container_name],
|
||||
capture_output=True, text=True, timeout=10,
|
||||
)
|
||||
if inspect.stdout.strip() == "running":
|
||||
if status == "running":
|
||||
assert not _svstat_wants_up(container_name, "gateway-default"), (
|
||||
"HERMES_GATEWAY_NO_SUPERVISE=1: gateway-default has "
|
||||
"want-state up, implying the redirect dispatched `start` "
|
||||
@@ -260,25 +293,33 @@ def test_supervised_gateway_does_not_recurse(
|
||||
supervised gateway). Two or more would imply recursive spawning
|
||||
via the redirect → start → run → redirect → ... loop.
|
||||
"""
|
||||
subprocess.run(
|
||||
["docker", "run", "-d", "--name", container_name, built_image,
|
||||
"gateway", "run"],
|
||||
check=True, capture_output=True, timeout=30,
|
||||
)
|
||||
time.sleep(6)
|
||||
start_container(built_image, container_name, cmd="gateway run")
|
||||
|
||||
# Count python processes running `hermes gateway run`. If the
|
||||
# recursion guard fails, s6 would respawn fresh `gateway run`
|
||||
# processes on every cycle, leaving multiple Python-process
|
||||
# descendants under the gateway-default supervise tree.
|
||||
r = _sh(container_name, "ps -eo pid,cmd | grep -v grep | grep -E 'python.*hermes.*gateway run' | wc -l")
|
||||
# Wait for the redirect to fire by polling for the breadcrumb.
|
||||
# Under CI parallel docker test fan-out, the CMD process
|
||||
# (main-wrapper.sh → python → hermes gateway run) can take well
|
||||
# over 6s to reach the redirect logic. A fixed sleep would race:
|
||||
# if we check too early, the CMD process hasn't exec'd into
|
||||
# `sleep infinity` yet and the s6-supervised gateway hasn't
|
||||
# started either — so we'd see the CMD's `hermes gateway run`
|
||||
# AND the supervised one (2 processes) and falsely conclude
|
||||
# recursion. Polling the breadcrumb is the definitive signal
|
||||
# that the redirect fired and the CMD process is now `sleep`.
|
||||
wait_for_docker_logs(container_name, "s6 supervision")
|
||||
|
||||
# Now that the redirect fired, count python processes running
|
||||
# `hermes gateway run`. If the recursion guard fails, s6 would
|
||||
# respawn fresh `gateway run` processes on every cycle, leaving
|
||||
# multiple Python-process descendants under the gateway-default
|
||||
# supervise tree.
|
||||
r = docker_exec_sh(container_name, "ps -eo pid,cmd | grep -v grep | grep -E 'python.*hermes.*gateway run' | wc -l")
|
||||
assert r.returncode == 0
|
||||
n = int(r.stdout.strip() or 0)
|
||||
assert n <= 1, (
|
||||
f"expected at most one supervised python `hermes gateway run` "
|
||||
f"process (the legitimately-supervised gateway); found {n}. "
|
||||
f"Recursion guard may have failed. "
|
||||
f"ps:\n{_sh(container_name, 'ps -eo pid,ppid,cmd').stdout}"
|
||||
f"ps:\n{docker_exec_sh(container_name, 'ps -eo pid,ppid,cmd').stdout}"
|
||||
)
|
||||
|
||||
# Stronger positive assertion: there should be exactly one
|
||||
@@ -286,7 +327,7 @@ def test_supervised_gateway_does_not_recurse(
|
||||
# CMD process (PID 17 typically). The static `main-hermes`
|
||||
# service has its own `sleep infinity` child; THAT one is fine
|
||||
# and unrelated to our redirect.
|
||||
r = _sh(
|
||||
r = docker_exec_sh(
|
||||
container_name,
|
||||
# Find PID of the CMD process (main-wrapper.sh or its sh
|
||||
# parent), then count `sleep infinity` children.
|
||||
@@ -298,7 +339,7 @@ def test_supervised_gateway_does_not_recurse(
|
||||
assert redirected == 1, (
|
||||
f"expected exactly one `sleep infinity` parented to the CMD "
|
||||
f"wrapper (the redirect heartbeat); found {redirected}. "
|
||||
f"ps:\n{_sh(container_name, 'ps -eo pid,ppid,cmd').stdout}"
|
||||
f"ps:\n{docker_exec_sh(container_name, 'ps -eo pid,ppid,cmd').stdout}"
|
||||
)
|
||||
|
||||
|
||||
@@ -312,20 +353,47 @@ def test_dashboard_supervised_when_env_set(
|
||||
redirect: one container = supervised gateway + supervised
|
||||
dashboard, with zero extra user effort.
|
||||
"""
|
||||
subprocess.run(
|
||||
["docker", "run", "-d", "--name", container_name,
|
||||
"-e", "HERMES_DASHBOARD=1",
|
||||
built_image, "gateway", "run"],
|
||||
check=True, capture_output=True, timeout=30,
|
||||
start_container(
|
||||
built_image, container_name,
|
||||
"HERMES_DASHBOARD=1",
|
||||
cmd="gateway run",
|
||||
)
|
||||
time.sleep(5)
|
||||
|
||||
# Both slots should report want-up.
|
||||
assert _svstat_wants_up(container_name, "gateway-default"), (
|
||||
f"gateway-default slot not up: {_svstat(container_name)!r}"
|
||||
# Wait for the redirect to fire (the breadcrumb appears in docker
|
||||
# logs when the CMD process reaches the redirect logic). This is
|
||||
# the same signal the other gateway-run tests use.
|
||||
# A fixed time.sleep(5) was racing: start_container returns when
|
||||
# cont-init finishes, but the redirect (which creates the
|
||||
# gateway-default s6 slot) happens later in the CMD process.
|
||||
wait_for_docker_logs(
|
||||
container_name, "s6 supervision", deadline_s=60.0,
|
||||
)
|
||||
assert _svstat_wants_up(container_name, "dashboard"), (
|
||||
f"dashboard slot not up: {_svstat(container_name, 'dashboard')!r}"
|
||||
|
||||
# Poll for both slots to report want-up, using the same
|
||||
# _svstat_wants_up helper the other tests use. A simple
|
||||
# `grep 'want up'` is wrong: when the service is already up,
|
||||
# s6-svstat output is "up (pid ...) Ns" with no literal "want up"
|
||||
# — the want-up intent is implied by the absence of "want down".
|
||||
ok_gateway = False
|
||||
end = time.monotonic() + 30.0
|
||||
while time.monotonic() < end:
|
||||
if _svstat_wants_up(container_name, "gateway-default"):
|
||||
ok_gateway = True
|
||||
break
|
||||
time.sleep(0.5)
|
||||
assert ok_gateway, (
|
||||
f"gateway-default slot not want-up: {_svstat(container_name)!r}"
|
||||
)
|
||||
|
||||
ok_dash = False
|
||||
end = time.monotonic() + 30.0
|
||||
while time.monotonic() < end:
|
||||
if _svstat_wants_up(container_name, "dashboard"):
|
||||
ok_dash = True
|
||||
break
|
||||
time.sleep(0.5)
|
||||
assert ok_dash, (
|
||||
f"dashboard slot not want-up: {_svstat(container_name, 'dashboard')!r}"
|
||||
)
|
||||
|
||||
|
||||
@@ -354,14 +422,17 @@ def test_supervised_gateway_stdout_reaches_docker_logs(
|
||||
Python-logging output, so its presence in ``docker logs`` proves
|
||||
the stdout-tee is working.
|
||||
"""
|
||||
subprocess.run(
|
||||
["docker", "run", "-d", "--name", container_name, built_image,
|
||||
"gateway", "run"],
|
||||
check=True, capture_output=True, timeout=30,
|
||||
)
|
||||
# Banner is printed during gateway startup — give it time to
|
||||
# initialize past the imports + config-load phase.
|
||||
time.sleep(8)
|
||||
start_container(built_image, container_name, cmd="gateway run")
|
||||
|
||||
# Poll docker logs for the banner glyph (⚕) or "Hermes Gateway
|
||||
# Starting" — the gateway's rich-console startup banner. A fixed
|
||||
# sleep(8) races under CI parallel docker test fan-out: the
|
||||
# supervised gateway can take well over 8s to finish imports +
|
||||
# config-load + banner print under load, and the assertion would
|
||||
# fail not because the stdout-tee is broken but because we checked
|
||||
# too early. Polling with a generous deadline is both faster on
|
||||
# quick machines and flake-free on slow ones.
|
||||
wait_for_docker_logs(container_name, "⚕", deadline_s=60.0)
|
||||
|
||||
logs = subprocess.run(
|
||||
["docker", "logs", container_name],
|
||||
@@ -377,14 +448,14 @@ def test_supervised_gateway_stdout_reaches_docker_logs(
|
||||
"This means the `1` action directive in _render_log_run isn't "
|
||||
"forwarding stdout to /init. "
|
||||
f"docker logs (last 2000 chars):\n{combined[-2000:]}\n"
|
||||
f"file contents:\n{_sh(container_name, 'cat /opt/data/logs/gateways/default/current').stdout}"
|
||||
f"file contents:\n{docker_exec_sh(container_name, 'cat /opt/data/logs/gateways/default/current').stdout}"
|
||||
)
|
||||
|
||||
# Cross-check: the same banner must also be in the rotated log
|
||||
# file (we kept the file destination, just added stdout). The
|
||||
# file version has s6-log's ISO 8601 timestamp prefix; the
|
||||
# docker logs version is raw.
|
||||
file_contents = _sh(
|
||||
file_contents = docker_exec_sh(
|
||||
container_name, "cat /opt/data/logs/gateways/default/current",
|
||||
).stdout
|
||||
assert "⚕" in file_contents or "Hermes Gateway Starting" in file_contents, (
|
||||
@@ -392,4 +463,3 @@ def test_supervised_gateway_stdout_reaches_docker_logs(
|
||||
"destination may have been dropped by the new s6-log script. "
|
||||
f"File contents:\n{file_contents}"
|
||||
)
|
||||
|
||||
|
||||
169
tests/docker/test_home_override_scripts.py
Normal file
169
tests/docker/test_home_override_scripts.py
Normal file
@@ -0,0 +1,169 @@
|
||||
"""Runtime smoke tests for Docker HOME overrides and script behavior.
|
||||
|
||||
Build the real image and verify the actual runtime behavior:
|
||||
|
||||
1. main-wrapper preserves the Docker ``-w`` working directory
|
||||
2. dashboard service resets HOME to /opt/data before privilege drop
|
||||
3. dashboard does not auto-add ``--insecure`` from a non-loopback bind host
|
||||
4. stage2 hook repairs profiles/ and cron/ ownership on every boot
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import subprocess
|
||||
|
||||
from tests.docker.conftest import docker_exec, docker_exec_sh, start_container, restart_container
|
||||
|
||||
|
||||
def test_main_wrapper_preserves_docker_workdir(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""The main-wrapper MUST save and restore the original working directory
|
||||
so the container starts in the Docker ``-w`` directory, not /opt/data.
|
||||
|
||||
Regression test for #35472. We pass ``-w /tmp`` and a command that
|
||||
prints its cwd; the output must be ``/tmp``, proving the wrapper
|
||||
restored the cwd after its internal ``cd /opt/data``.
|
||||
"""
|
||||
r = subprocess.run(
|
||||
["docker", "run", "--rm", "-w", "/tmp",
|
||||
built_image, "sh", "-c", "pwd"],
|
||||
capture_output=True, text=True, timeout=60,
|
||||
)
|
||||
assert r.returncode == 0, f"container failed: {r.stderr[-1000:]}"
|
||||
# The stage2 hook emits boot logs (config migration, skills sync)
|
||||
# to stdout before the CMD runs. The actual pwd output is the LAST
|
||||
# line of stdout.
|
||||
last_line = r.stdout.strip().split("\n")[-1].strip()
|
||||
assert last_line == "/tmp", (
|
||||
f"expected cwd /tmp, got {last_line!r} — "
|
||||
f"main-wrapper did not preserve the Docker -w directory"
|
||||
)
|
||||
|
||||
|
||||
def test_dashboard_service_resets_home(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""The dashboard run script must export HOME=/opt/data before dropping
|
||||
privileges, so HOME-anchored state (discord lockfile, XDG dirs) doesn't
|
||||
try to write to /root (the /init context's HOME).
|
||||
|
||||
We check this by inspecting the environment of the dashboard service
|
||||
process if it's running, or by verifying the run script sets HOME
|
||||
before the exec. At runtime, the cleanest check is: start the
|
||||
container with HERMES_DASHBOARD=1 and verify the dashboard process
|
||||
(if it starts) has HOME=/opt/data.
|
||||
|
||||
Since the dashboard requires an auth provider on non-loopback binds,
|
||||
we bind to 127.0.0.1 where the auth gate doesn't engage, and check
|
||||
the process env.
|
||||
"""
|
||||
start_container(built_image, container_name, "HERMES_DASHBOARD=1", "HERMES_DASHBOARD_HOST=127.0.0.1")
|
||||
|
||||
# Check if the dashboard process is running and inspect its HOME.
|
||||
r = docker_exec_sh(
|
||||
container_name,
|
||||
# Find the dashboard process (hermes dashboard) and read its HOME
|
||||
# from /proc/<pid>/environ. If not running, verify the run script
|
||||
# itself exports HOME=/opt/data by grepping the script source.
|
||||
'pid=$(pgrep -f "hermes dashboard" | head -1); '
|
||||
'if [ -n "$pid" ]; then '
|
||||
' tr "\\0" "\\n" < /proc/$pid/environ | grep "^HOME="; '
|
||||
'else '
|
||||
' grep -q "export HOME=/opt/data" '
|
||||
' /opt/hermes/docker/s6-rc.d/dashboard/run && '
|
||||
' echo "HOME=/opt/data"; '
|
||||
'fi',
|
||||
timeout=15,
|
||||
)
|
||||
assert "HOME=/opt/data" in r.stdout, (
|
||||
f"dashboard process or run script does not set HOME=/opt/data: "
|
||||
f"stdout={r.stdout!r} stderr={r.stderr!r}"
|
||||
)
|
||||
|
||||
|
||||
def test_dashboard_does_not_auto_insecure_from_host(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""The dashboard MUST NOT auto-add ``--insecure`` based on
|
||||
HERMES_DASHBOARD_HOST. The auth gate is the authority now.
|
||||
|
||||
The auth gate is the authority on whether non-loopback binds are
|
||||
safe; ``--insecure`` must never be auto-derived from the bind host.
|
||||
|
||||
We start the container with a non-loopback bind host and verify
|
||||
the dashboard process does NOT receive ``--insecure`` in its
|
||||
command line. If the dashboard fails to start (because the auth
|
||||
gate correctly blocks an unauthenticated non-loopback bind), that's
|
||||
also acceptable — the point is no auto-insecure.
|
||||
"""
|
||||
start_container(built_image, container_name, "HERMES_DASHBOARD=1", "HERMES_DASHBOARD_HOST=0.0.0.0")
|
||||
|
||||
# Check the dashboard process command line for --insecure.
|
||||
r = docker_exec_sh(
|
||||
container_name,
|
||||
'pid=$(pgrep -f "hermes dashboard" | head -1); '
|
||||
'if [ -n "$pid" ]; then '
|
||||
' tr "\\0" " " < /proc/$pid/cmdline; '
|
||||
'fi',
|
||||
timeout=10,
|
||||
)
|
||||
cmdline = r.stdout.strip()
|
||||
# If the process is running, it must NOT have --insecure.
|
||||
if cmdline:
|
||||
assert "--insecure" not in cmdline, (
|
||||
f"dashboard process has --insecure in cmdline (auto-derived "
|
||||
f"from host): {cmdline!r}"
|
||||
)
|
||||
|
||||
|
||||
def test_stage2_repairs_profiles_and_cron_ownership(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""profiles/ and cron/ must both be reclaimed after root-context writes.
|
||||
|
||||
The stage2 hook chowns these dirs to hermes:hermes on every boot.
|
||||
We simulate a root-owned file in each, then restart the container
|
||||
and verify ownership is repaired.
|
||||
"""
|
||||
start_container(built_image, container_name)
|
||||
|
||||
# Create root-owned files in profiles/ and cron/ to simulate
|
||||
# docker exec (root) writes.
|
||||
docker_exec(
|
||||
container_name, "mkdir", "-p", "/opt/data/profiles/testprof",
|
||||
user="root", timeout=5,
|
||||
)
|
||||
docker_exec(
|
||||
container_name, "touch", "/opt/data/profiles/testprof/marker",
|
||||
user="root", timeout=5,
|
||||
)
|
||||
docker_exec(
|
||||
container_name, "touch", "/opt/data/cron/root_owned.json",
|
||||
user="root", timeout=5,
|
||||
)
|
||||
|
||||
# Verify they're root-owned before restart.
|
||||
r = docker_exec_sh(
|
||||
container_name,
|
||||
'stat -c "%U" /opt/data/profiles/testprof/marker '
|
||||
'/opt/data/cron/root_owned.json',
|
||||
timeout=5,
|
||||
)
|
||||
assert "root" in r.stdout, (
|
||||
f"expected root-owned files before restart, got: {r.stdout!r}"
|
||||
)
|
||||
|
||||
# Restart — stage2 hook runs again and repairs ownership.
|
||||
restart_container(container_name)
|
||||
|
||||
# Verify files are now owned by hermes.
|
||||
r = docker_exec_sh(
|
||||
container_name,
|
||||
'stat -c "%U" /opt/data/profiles/testprof/marker '
|
||||
'/opt/data/cron/root_owned.json',
|
||||
timeout=5,
|
||||
)
|
||||
assert "hermes" in r.stdout, (
|
||||
f"expected hermes-owned files after restart, got: {r.stdout!r} — "
|
||||
f"stage2 hook did not repair profiles/ and cron/ ownership"
|
||||
)
|
||||
140
tests/docker/test_immutable_install.py
Normal file
140
tests/docker/test_immutable_install.py
Normal file
@@ -0,0 +1,140 @@
|
||||
"""Runtime smoke tests for Docker immutable install tree and install-method stamp.
|
||||
|
||||
Build the real image and verify at runtime:
|
||||
|
||||
1. /opt/hermes is not writable by the hermes user (immutable install tree)
|
||||
2. PYTHONDONTWRITEBYTECODE and HERMES_DISABLE_LAZY_INSTALLS are set
|
||||
3. /opt/hermes/.install_method contains "docker" (code-scoped stamp)
|
||||
4. $HERMES_HOME/.install_method is NOT stamped as "docker" by stage2
|
||||
5. A stale "docker" stamp in $HERMES_HOME is healed (removed) on boot
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from tests.docker.conftest import (
|
||||
docker_exec,
|
||||
docker_exec_sh,
|
||||
restart_container,
|
||||
start_container,
|
||||
)
|
||||
|
||||
|
||||
def test_install_tree_not_writable_by_hermes(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""The hermes user must not be able to modify /opt/hermes.
|
||||
|
||||
The install tree (source, venv, TUI bundle, node_modules) must remain
|
||||
root-owned and non-writable so an agent session cannot self-modify
|
||||
the installation and brick the gateway.
|
||||
"""
|
||||
start_container(built_image, container_name)
|
||||
|
||||
r = docker_exec_sh(
|
||||
container_name,
|
||||
# Try to create a file under /opt/hermes as the hermes user
|
||||
"touch /opt/hermes/test_write 2>&1 && "
|
||||
"echo WRITE_SUCCEEDED || echo WRITE_FAILED",
|
||||
timeout=10,
|
||||
)
|
||||
assert "WRITE_FAILED" in r.stdout, (
|
||||
f"hermes user can write to /opt/hermes (install tree not immutable): "
|
||||
f"{r.stdout}"
|
||||
)
|
||||
|
||||
# Also check a key subdirectory
|
||||
r = docker_exec_sh(
|
||||
container_name,
|
||||
"touch /opt/hermes/.venv/test_write 2>&1 && "
|
||||
"echo WRITE_SUCCEEDED || echo WRITE_FAILED",
|
||||
timeout=10,
|
||||
)
|
||||
assert "WRITE_FAILED" in r.stdout, (
|
||||
f"hermes user can write to /opt/hermes/.venv: {r.stdout}"
|
||||
)
|
||||
|
||||
|
||||
def test_hermes_disable_lazy_installs_and_dont_write_bytecode(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""The container must set PYTHONDONTWRITEBYTECODE and
|
||||
HERMES_DISABLE_LAZY_INSTALLS=1 so no .pyc files are written to the
|
||||
immutable install tree and no lazy installs attempt to modify it."""
|
||||
start_container(built_image, container_name)
|
||||
|
||||
r = docker_exec_sh(
|
||||
container_name,
|
||||
'test "$PYTHONDONTWRITEBYTECODE" = "1" && '
|
||||
'test "$HERMES_DISABLE_LAZY_INSTALLS" = "1" && '
|
||||
'echo ENV_OK || echo ENV_MISSING',
|
||||
timeout=10,
|
||||
)
|
||||
assert "ENV_OK" in r.stdout, (
|
||||
f"expected PYTHONDONTWRITEBYTECODE=1 and "
|
||||
f"HERMES_DISABLE_LAZY_INSTALLS=1, got: {r.stdout} stderr={r.stderr}"
|
||||
)
|
||||
|
||||
|
||||
def test_install_method_stamp_is_code_scoped(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""The 'docker' install-method stamp must be baked at
|
||||
/opt/hermes/.install_method (code-scoped), NOT in $HERMES_HOME."""
|
||||
start_container(built_image, container_name)
|
||||
|
||||
# Code-scoped stamp must exist and say "docker"
|
||||
r = docker_exec_sh(
|
||||
container_name,
|
||||
"cat /opt/hermes/.install_method",
|
||||
timeout=10,
|
||||
)
|
||||
assert r.returncode == 0, (
|
||||
f"/opt/hermes/.install_method not found: {r.stderr}"
|
||||
)
|
||||
assert r.stdout.strip() == "docker", (
|
||||
f"expected 'docker' stamp, got: {r.stdout.strip()!r}"
|
||||
)
|
||||
|
||||
# $HERMES_HOME must NOT have a 'docker' stamp
|
||||
r = docker_exec_sh(
|
||||
container_name,
|
||||
"cat /opt/data/.install_method 2>/dev/null || echo NONE",
|
||||
timeout=10,
|
||||
)
|
||||
assert r.stdout.strip() != "docker", (
|
||||
f"$HERMES_HOME/.install_method is stamped 'docker' - stage2 must "
|
||||
f"not stamp the data volume (shared with host installs)"
|
||||
)
|
||||
|
||||
|
||||
def test_stale_docker_stamp_in_home_is_healed_on_boot(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""A stale 'docker' stamp left in $HERMES_HOME by an older image
|
||||
must be removed on boot so shared homes self-heal."""
|
||||
# Start container, write a stale stamp
|
||||
start_container(built_image, container_name)
|
||||
|
||||
# Write a stale 'docker' stamp as root
|
||||
docker_exec(
|
||||
container_name, "sh", "-c",
|
||||
"printf 'docker\\n' > /opt/data/.install_method",
|
||||
user="root", timeout=5,
|
||||
)
|
||||
# Verify it exists
|
||||
r = docker_exec_sh(container_name, "cat /opt/data/.install_method", timeout=5)
|
||||
assert r.stdout.strip() == "docker"
|
||||
|
||||
# Restart - stage2 should heal it
|
||||
restart_container(container_name)
|
||||
|
||||
# The stale stamp must be gone
|
||||
r = docker_exec_sh(
|
||||
container_name,
|
||||
"test -f /opt/data/.install_method && "
|
||||
"cat /opt/data/.install_method || echo HEALED",
|
||||
timeout=10,
|
||||
)
|
||||
assert "HEALED" in r.stdout or r.stdout.strip() != "docker", (
|
||||
f"stale 'docker' stamp in $HERMES_HOME was not healed on boot: "
|
||||
f"{r.stdout}"
|
||||
)
|
||||
26
tests/docker/test_license_file_present.py
Normal file
26
tests/docker/test_license_file_present.py
Normal file
@@ -0,0 +1,26 @@
|
||||
"""Runtime smoke test for Docker image license-file presence.
|
||||
|
||||
Build the real image and verify the LICENSE file is present inside the
|
||||
container (PEP 639 license-files metadata must resolve inside the
|
||||
Docker image).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import subprocess
|
||||
|
||||
|
||||
def test_docker_image_contains_license_file(built_image: str) -> None:
|
||||
"""The LICENSE file must be present inside the built Docker image.
|
||||
|
||||
PEP 639 license-files metadata references LICENSE, and the Docker
|
||||
build context must not exclude it.
|
||||
"""
|
||||
r = subprocess.run(
|
||||
["docker", "run", "--rm", "--entrypoint", "test",
|
||||
built_image, "-f", "/opt/hermes/LICENSE"],
|
||||
capture_output=True, text=True, timeout=60,
|
||||
)
|
||||
assert r.returncode == 0, (
|
||||
f"LICENSE file not found at /opt/hermes/LICENSE inside the Docker "
|
||||
f"image: {r.stderr[-500:]}"
|
||||
)
|
||||
47
tests/docker/test_log_dir_seed.py
Normal file
47
tests/docker/test_log_dir_seed.py
Normal file
@@ -0,0 +1,47 @@
|
||||
"""Runtime smoke test for Docker $HERMES_HOME/logs/gateways seeding.
|
||||
|
||||
Build the real image and verify logs/ and logs/gateways/ exist and are
|
||||
owned by the hermes user after container boot.
|
||||
|
||||
Regression guard for #45258: if the first gateway log service runs in
|
||||
root context, logs/gateways/ is created root-owned; every profile
|
||||
registered later runs its log service as the dropped hermes user and
|
||||
s6-log crash-loops on mkdir: Permission denied.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from tests.docker.conftest import docker_exec_sh, start_container
|
||||
|
||||
|
||||
def test_logs_gateways_seeded_and_hermes_owned(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""logs/ and logs/gateways/ must exist and be owned by hermes after boot."""
|
||||
start_container(built_image, container_name)
|
||||
|
||||
# Both directories must exist
|
||||
r = docker_exec_sh(
|
||||
container_name,
|
||||
"test -d /opt/data/logs && "
|
||||
"test -d /opt/data/logs/gateways && "
|
||||
"echo DIRS_OK || echo DIRS_MISSING",
|
||||
timeout=10,
|
||||
)
|
||||
assert "DIRS_OK" in r.stdout, (
|
||||
f"logs/ or logs/gateways/ not seeded: {r.stdout}"
|
||||
)
|
||||
|
||||
# Both must be owned by hermes
|
||||
r = docker_exec_sh(
|
||||
container_name,
|
||||
'logs_owner=$(stat -c "%U" /opt/data/logs); '
|
||||
'gateways_owner=$(stat -c "%U" /opt/data/logs/gateways); '
|
||||
'echo "logs=$logs_owner gateways=$gateways_owner"',
|
||||
timeout=10,
|
||||
)
|
||||
assert "logs=hermes" in r.stdout, (
|
||||
f"logs/ not owned by hermes: {r.stdout}"
|
||||
)
|
||||
assert "gateways=hermes" in r.stdout, (
|
||||
f"logs/gateways/ not owned by hermes: {r.stdout}"
|
||||
)
|
||||
@@ -26,7 +26,7 @@ from __future__ import annotations
|
||||
import subprocess
|
||||
import time
|
||||
|
||||
from tests.docker.conftest import docker_exec_sh
|
||||
from tests.docker.conftest import docker_exec_sh, start_container
|
||||
|
||||
PROFILE = "test-harness-profile"
|
||||
|
||||
@@ -69,12 +69,7 @@ def _svstat_wants_up(container: str) -> bool:
|
||||
def test_profile_create_then_gateway_start(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
subprocess.run(
|
||||
["docker", "run", "-d", "--name", container_name, built_image,
|
||||
"sleep", "120"],
|
||||
check=True, capture_output=True, timeout=30,
|
||||
)
|
||||
time.sleep(3)
|
||||
start_container(built_image, container_name, cmd="sleep 120")
|
||||
|
||||
r = _sh(container_name, f"hermes profile create {PROFILE}")
|
||||
assert r.returncode == 0, f"profile create failed: {r.stderr}"
|
||||
@@ -114,12 +109,7 @@ def test_profile_delete_stops_gateway(
|
||||
) -> None:
|
||||
"""Deleting a profile should stop its gateway and remove the s6
|
||||
service slot."""
|
||||
subprocess.run(
|
||||
["docker", "run", "-d", "--name", container_name, built_image,
|
||||
"sleep", "120"],
|
||||
check=True, capture_output=True, timeout=30,
|
||||
)
|
||||
time.sleep(3)
|
||||
start_container(built_image, container_name, cmd="sleep 120")
|
||||
|
||||
_sh(container_name, f"hermes profile create {PROFILE}")
|
||||
_sh(container_name, f"hermes -p {PROFILE} gateway start", timeout=60)
|
||||
@@ -135,4 +125,4 @@ def test_profile_delete_stops_gateway(
|
||||
time.sleep(2)
|
||||
# Service slot should be gone.
|
||||
r = _sh(container_name, f"test -d /run/service/gateway-{PROFILE}")
|
||||
assert r.returncode != 0, "s6 service slot still present after profile delete"
|
||||
assert r.returncode != 0, "s6 service slot still present after profile delete"
|
||||
88
tests/docker/test_puid_pgid_remap.py
Normal file
88
tests/docker/test_puid_pgid_remap.py
Normal file
@@ -0,0 +1,88 @@
|
||||
"""Runtime smoke tests for Docker PUID/PGID and UID/GID remap.
|
||||
|
||||
Build the real image and verify the actual runtime behavior:
|
||||
|
||||
1. PUID/PGID env vars remap the hermes user UID/GID at boot
|
||||
2. HERMES_UID/HERMES_GID take precedence over PUID/PGID aliases
|
||||
3. NAS-style low UIDs (99:100) are accepted and remapped
|
||||
4. Invalid UIDs are rejected
|
||||
5. The remapped user can write to the data volume
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from tests.docker.conftest import docker_exec_sh, start_container
|
||||
|
||||
|
||||
def test_puid_pgid_remaps_hermes_user(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""PUID=1000 PGID=1000 must remap the hermes user to UID 1000."""
|
||||
start_container(built_image, container_name, "PUID=1000", "PGID=1000")
|
||||
|
||||
r = docker_exec_sh(
|
||||
container_name,
|
||||
"id -u hermes",
|
||||
timeout=10,
|
||||
)
|
||||
assert r.stdout.strip() == "1000", (
|
||||
f"expected hermes UID 1000 after PUID remap, got: {r.stdout.strip()}"
|
||||
)
|
||||
|
||||
r = docker_exec_sh(
|
||||
container_name,
|
||||
"id -g hermes",
|
||||
timeout=10,
|
||||
)
|
||||
assert r.stdout.strip() == "1000", (
|
||||
f"expected hermes GID 1000 after PGID remap, got: {r.stdout.strip()}"
|
||||
)
|
||||
|
||||
|
||||
def test_hermes_uid_gid_take_precedence_over_aliases(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""HERMES_UID/HERMES_GID must win over PUID/PGID when both are set."""
|
||||
start_container(built_image, container_name, "HERMES_UID=2000", "HERMES_GID=2001", "PUID=1000", "PGID=1000")
|
||||
|
||||
r = docker_exec_sh(container_name, "id -u hermes", timeout=10)
|
||||
assert r.stdout.strip() == "2000", (
|
||||
f"expected hermes UID 2000 (HERMES_UID wins), got: {r.stdout.strip()}"
|
||||
)
|
||||
|
||||
r = docker_exec_sh(container_name, "id -g hermes", timeout=10)
|
||||
assert r.stdout.strip() == "2001", (
|
||||
f"expected hermes GID 2001 (HERMES_GID wins), got: {r.stdout.strip()}"
|
||||
)
|
||||
|
||||
|
||||
def test_nas_low_uid_accepted(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""NAS-style low UIDs (99:100, common on Unraid) must be accepted."""
|
||||
start_container(built_image, container_name, "PUID=99", "PGID=100")
|
||||
|
||||
r = docker_exec_sh(container_name, "id -u hermes", timeout=10)
|
||||
assert r.stdout.strip() == "99", (
|
||||
f"expected hermes UID 99, got: {r.stdout.strip()}"
|
||||
)
|
||||
|
||||
r = docker_exec_sh(container_name, "id -g hermes", timeout=10)
|
||||
assert r.stdout.strip() == "100", (
|
||||
f"expected hermes GID 100, got: {r.stdout.strip()}"
|
||||
)
|
||||
|
||||
|
||||
def test_remap_enables_data_volume_writes(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""After remap, the hermes user must be able to write to /opt/data."""
|
||||
start_container(built_image, container_name, "PUID=1000", "PGID=1000")
|
||||
|
||||
r = docker_exec_sh(
|
||||
container_name,
|
||||
"touch /opt/data/test_write && echo WRITE_OK || echo WRITE_FAIL",
|
||||
timeout=10,
|
||||
)
|
||||
assert "WRITE_OK" in r.stdout, (
|
||||
f"hermes user cannot write to /opt/data after remap: {r.stdout}"
|
||||
)
|
||||
@@ -19,10 +19,7 @@ operations work correctly under UID 10000.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import subprocess
|
||||
import time
|
||||
|
||||
from tests.docker.conftest import docker_exec
|
||||
from tests.docker.conftest import docker_exec, start_container
|
||||
|
||||
|
||||
_REGISTER_SCRIPT = """
|
||||
@@ -45,49 +42,39 @@ print("UNREGISTERED")
|
||||
"""
|
||||
|
||||
|
||||
def _exec(container: str, *args: str, timeout: int = 30) -> subprocess.CompletedProcess:
|
||||
return docker_exec(container, *args, timeout=timeout)
|
||||
|
||||
|
||||
def test_s6_register_creates_service_dir_in_live_container(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""S6ServiceManager.register_profile_gateway must create
|
||||
``/run/service/gateway-<profile>/`` and trigger s6-svscan rescan
|
||||
against the real s6 supervision tree."""
|
||||
subprocess.run(
|
||||
["docker", "run", "-d", "--name", container_name, built_image,
|
||||
"sleep", "120"],
|
||||
check=True, capture_output=True, timeout=30,
|
||||
)
|
||||
# Give the supervision tree a moment to come up.
|
||||
time.sleep(3)
|
||||
start_container(built_image, container_name, cmd="sleep 120")
|
||||
|
||||
r = _exec(container_name, "python3", "-c", _REGISTER_SCRIPT, timeout=30)
|
||||
r = docker_exec(container_name, "python3", "-c", _REGISTER_SCRIPT, timeout=30)
|
||||
assert "REGISTERED" in r.stdout, (
|
||||
f"register failed: stderr={r.stderr!r} stdout={r.stdout!r}"
|
||||
)
|
||||
|
||||
# Service directory exists with the expected structure.
|
||||
r = _exec(container_name, "test", "-d", "/run/service/gateway-phase3test")
|
||||
r = docker_exec(container_name, "test", "-d", "/run/service/gateway-phase3test")
|
||||
assert r.returncode == 0, "service directory not created"
|
||||
|
||||
r = _exec(container_name, "test", "-f", "/run/service/gateway-phase3test/run")
|
||||
r = docker_exec(container_name, "test", "-f", "/run/service/gateway-phase3test/run")
|
||||
assert r.returncode == 0, "run script not created"
|
||||
|
||||
r = _exec(container_name, "test", "-f",
|
||||
r = docker_exec(container_name, "test", "-f",
|
||||
"/run/service/gateway-phase3test/log/run")
|
||||
assert r.returncode == 0, "log/run script not created"
|
||||
|
||||
# s6-svscan picked it up — s6-svstat works against the dir.
|
||||
# `docker exec` doesn't put /command/ on PATH (only the supervision
|
||||
# tree does), so call s6-svstat by absolute path.
|
||||
r = _exec(container_name, "/command/s6-svstat",
|
||||
r = docker_exec(container_name, "/command/s6-svstat",
|
||||
"/run/service/gateway-phase3test")
|
||||
assert r.returncode == 0, f"s6-svstat failed: {r.stderr or r.stdout}"
|
||||
|
||||
# list_profile_gateways picks it up.
|
||||
r = _exec(container_name, "python3", "-c", (
|
||||
r = docker_exec(container_name, "python3", "-c", (
|
||||
"from hermes_cli.service_manager import S6ServiceManager;"
|
||||
"print(S6ServiceManager().list_profile_gateways())"
|
||||
))
|
||||
@@ -100,29 +87,24 @@ def test_s6_unregister_removes_service_dir_in_live_container(
|
||||
"""unregister_profile_gateway must stop the service, remove the
|
||||
directory, and trigger s6-svscan rescan so the supervise process
|
||||
is dropped."""
|
||||
subprocess.run(
|
||||
["docker", "run", "-d", "--name", container_name, built_image,
|
||||
"sleep", "120"],
|
||||
check=True, capture_output=True, timeout=30,
|
||||
)
|
||||
time.sleep(3)
|
||||
start_container(built_image, container_name, cmd="sleep 120")
|
||||
|
||||
# First register so we have something to unregister.
|
||||
r = _exec(container_name, "python3", "-c", _REGISTER_SCRIPT, timeout=30)
|
||||
r = docker_exec(container_name, "python3", "-c", _REGISTER_SCRIPT, timeout=30)
|
||||
assert "REGISTERED" in r.stdout
|
||||
|
||||
# Then unregister.
|
||||
r = _exec(container_name, "python3", "-c", _UNREGISTER_SCRIPT, timeout=30)
|
||||
r = docker_exec(container_name, "python3", "-c", _UNREGISTER_SCRIPT, timeout=30)
|
||||
assert "UNREGISTERED" in r.stdout, (
|
||||
f"unregister failed: stderr={r.stderr!r} stdout={r.stdout!r}"
|
||||
)
|
||||
|
||||
# Directory is gone.
|
||||
r = _exec(container_name, "test", "-d", "/run/service/gateway-phase3test")
|
||||
r = docker_exec(container_name, "test", "-d", "/run/service/gateway-phase3test")
|
||||
assert r.returncode != 0, "service directory still exists after unregister"
|
||||
|
||||
# list_profile_gateways no longer includes it.
|
||||
r = _exec(container_name, "python3", "-c", (
|
||||
r = docker_exec(container_name, "python3", "-c", (
|
||||
"from hermes_cli.service_manager import S6ServiceManager;"
|
||||
"print(S6ServiceManager().list_profile_gateways())"
|
||||
))
|
||||
|
||||
60
tests/docker/test_smoke.py
Normal file
60
tests/docker/test_smoke.py
Normal file
@@ -0,0 +1,60 @@
|
||||
"""Runtime smoke tests for the Docker image entrypoint and subcommands.
|
||||
|
||||
Converted from the former ``.github/actions/hermes-smoke-test`` composite
|
||||
action. These tests exercise the image's real ENTRYPOINT (``/init`` +
|
||||
``main-wrapper.sh``) via ``docker run --rm <image> --help`` and
|
||||
``docker run --rm <image> dashboard --help`` to catch basic runtime
|
||||
regressions before publishing.
|
||||
|
||||
The harness expects the ``built_image`` fixture from
|
||||
``tests/docker/conftest.py``. When Docker isn't available every test
|
||||
here is skipped at collection time.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import subprocess
|
||||
|
||||
|
||||
def test_hermes_help(built_image: str) -> None:
|
||||
"""``docker run --rm <image> --help`` must exit 0.
|
||||
|
||||
Uses the image's real ENTRYPOINT (``/init`` + ``main-wrapper.sh``)
|
||||
so this exercises the actual production startup path. PR #30136
|
||||
review caught that an ``--entrypoint`` override in the old composite
|
||||
action had been silently neutered by the s6-overlay migration —
|
||||
``stage2-hook`` ignores CMD args passed after an overridden
|
||||
entrypoint, so the smoke test was a no-op.
|
||||
"""
|
||||
r = subprocess.run(
|
||||
["docker", "run", "--rm", built_image, "--help"],
|
||||
capture_output=True, text=True, timeout=60,
|
||||
)
|
||||
assert r.returncode == 0, (
|
||||
f"hermes --help failed (exit {r.returncode}): "
|
||||
f"stdout={r.stdout[-2000:]!r} stderr={r.stderr[-2000:]!r}"
|
||||
)
|
||||
assert "Traceback" not in r.stderr, (
|
||||
f"hermes --help produced a traceback: {r.stderr[-2000:]!r}"
|
||||
)
|
||||
|
||||
|
||||
def test_dashboard_subcommand_present(built_image: str) -> None:
|
||||
"""``docker run --rm <image> dashboard --help`` must exit 0.
|
||||
|
||||
Regression guard for #9153: the ``dashboard`` subcommand was present
|
||||
in source but missing from the published image. If this fails,
|
||||
something in the Dockerfile is excluding the dashboard subcommand
|
||||
from the installed package.
|
||||
"""
|
||||
r = subprocess.run(
|
||||
["docker", "run", "--rm", built_image, "dashboard", "--help"],
|
||||
capture_output=True, text=True, timeout=60,
|
||||
)
|
||||
assert r.returncode == 0, (
|
||||
f"hermes dashboard --help failed (exit {r.returncode}): "
|
||||
f"stdout={r.stdout[-2000:]!r} stderr={r.stderr[-2000:]!r}"
|
||||
)
|
||||
combined = (r.stdout + r.stderr).lower()
|
||||
assert "dashboard" in combined or "usage" in combined, (
|
||||
f"dashboard --help output unexpected: {combined[-2000:]!r}"
|
||||
)
|
||||
82
tests/docker/test_stage2_browser_discovery.py
Normal file
82
tests/docker/test_stage2_browser_discovery.py
Normal file
@@ -0,0 +1,82 @@
|
||||
"""Runtime smoke tests for Docker stage2 browser executable discovery.
|
||||
|
||||
Build the real image and verify the chromium binary is actually
|
||||
discovered at boot: ``AGENT_BROWSER_EXECUTABLE_PATH`` is set, points to
|
||||
a real executable, and is a browser binary (not a shared library picked
|
||||
up by a broad ``find | grep``).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from tests.docker.conftest import docker_exec_sh, start_container
|
||||
|
||||
|
||||
def test_stage2_discovers_chromium_binary(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""The stage2 hook must discover the Playwright chromium binary and
|
||||
export AGENT_BROWSER_EXECUTABLE_PATH so the browser tool can find it.
|
||||
|
||||
The discovery uses filename matching, not a broad ``find | grep``:
|
||||
shared libraries (libGLESv2.so etc.) inherit the executable bit from
|
||||
Playwright's tarball but must not be picked up. This test verifies the
|
||||
discovered binary is a real browser, not a .so.
|
||||
"""
|
||||
start_container(built_image, container_name)
|
||||
|
||||
# AGENT_BROWSER_EXECUTABLE_PATH must be set via s6 container_environment.
|
||||
r = docker_exec_sh(
|
||||
container_name,
|
||||
"cat /run/s6/container_environment/AGENT_BROWSER_EXECUTABLE_PATH",
|
||||
timeout=10,
|
||||
)
|
||||
assert r.returncode == 0, (
|
||||
f"AGENT_BROWSER_EXECUTABLE_PATH not set by stage2 hook: {r.stderr}"
|
||||
)
|
||||
browser_path = r.stdout.strip()
|
||||
assert browser_path, "AGENT_BROWSER_EXECUTABLE_PATH is empty"
|
||||
|
||||
# Must be a real file and executable.
|
||||
r = docker_exec_sh(
|
||||
container_name,
|
||||
f'test -x "{browser_path}"',
|
||||
timeout=5,
|
||||
)
|
||||
assert r.returncode == 0, (
|
||||
f"discovered browser path is not executable: {browser_path}"
|
||||
)
|
||||
|
||||
# Must be a browser binary by basename — NOT a shared library.
|
||||
accepted_names = (
|
||||
"chrome", "chromium", "chrome-headless-shell",
|
||||
"headless_shell", "chromium-browser",
|
||||
)
|
||||
r = docker_exec_sh(
|
||||
container_name,
|
||||
f'basename "{browser_path}"',
|
||||
timeout=5,
|
||||
)
|
||||
basename = r.stdout.strip()
|
||||
assert basename in accepted_names, (
|
||||
f"discovered binary basename {basename!r} is not a recognized "
|
||||
f"browser name (accepted: {accepted_names}) — the discovery may "
|
||||
f"have picked up a shared library (.so) instead of the real browser"
|
||||
)
|
||||
|
||||
|
||||
def test_stage2_browser_path_accessible_to_hermes_user(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""The discovered browser binary must be accessible to the
|
||||
unprivileged hermes user (UID 10000), since that's who runs
|
||||
agent-browser subprocesses."""
|
||||
start_container(built_image, container_name)
|
||||
|
||||
r = docker_exec_sh(
|
||||
container_name,
|
||||
'path="$(cat /run/s6/container_environment/AGENT_BROWSER_EXECUTABLE_PATH)" '
|
||||
'&& test -r "$path" && test -x "$path"',
|
||||
timeout=10,
|
||||
)
|
||||
assert r.returncode == 0, (
|
||||
f"browser binary not readable+executable by hermes user: {r.stderr}"
|
||||
)
|
||||
54
tests/docker/test_tini_compat_shim.py
Normal file
54
tests/docker/test_tini_compat_shim.py
Normal file
@@ -0,0 +1,54 @@
|
||||
"""Runtime smoke test for the Docker tini compatibility shim (#34192).
|
||||
|
||||
Build the real image and verify:
|
||||
|
||||
1. /usr/bin/tini exists and is a symlink to /init (the compat shim
|
||||
for orchestration templates that still reference /usr/bin/tini)
|
||||
2. The actual ENTRYPOINT is /init (s6-overlay), not /usr/bin/tini
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import subprocess
|
||||
|
||||
|
||||
def test_tini_compat_symlink_exists(built_image: str) -> None:
|
||||
"""/usr/bin/tini must exist as a symlink to /init.
|
||||
|
||||
Regression for #34192: orchestration templates (e.g. Hostinger's
|
||||
'Hermes WebUI' catalog) still pin /usr/bin/tini as the entrypoint.
|
||||
The shim symlinks it to /init so legacy wrappers exec the right
|
||||
PID-1 reaper without behavior change.
|
||||
"""
|
||||
r = subprocess.run(
|
||||
["docker", "run", "--rm", "--entrypoint", "sh",
|
||||
built_image, "-c",
|
||||
'test -L /usr/bin/tini && '
|
||||
'test "$(readlink -f /usr/bin/tini)" = "/init"'],
|
||||
capture_output=True, text=True, timeout=60,
|
||||
)
|
||||
assert r.returncode == 0, (
|
||||
f"/usr/bin/tini is not a symlink to /init: {r.stderr[-500:]}"
|
||||
)
|
||||
|
||||
|
||||
def test_entrypoint_is_init_not_tini(built_image: str) -> None:
|
||||
"""The image's actual ENTRYPOINT must be /init (s6-overlay).
|
||||
|
||||
The tini shim is only for legacy external wrappers; the image's own
|
||||
runtime must continue to use the canonical /init.
|
||||
"""
|
||||
r = subprocess.run(
|
||||
["docker", "inspect", built_image,
|
||||
"--format", "{{json .Config.Entrypoint}}"],
|
||||
capture_output=True, text=True, timeout=30,
|
||||
)
|
||||
assert r.returncode == 0, f"docker inspect failed: {r.stderr}"
|
||||
entrypoint = r.stdout.strip()
|
||||
assert "/init" in entrypoint, (
|
||||
f"ENTRYPOINT is not /init: {entrypoint!r}"
|
||||
)
|
||||
# The entrypoint array should be ["/init", "/opt/hermes/docker/main-wrapper.sh"]
|
||||
# /usr/bin/tini should NOT be in the entrypoint.
|
||||
assert "tini" not in entrypoint.lower(), (
|
||||
f"ENTRYPOINT references tini instead of /init: {entrypoint!r}"
|
||||
)
|
||||
93
tests/docker/test_toplevel_chown.py
Normal file
93
tests/docker/test_toplevel_chown.py
Normal file
@@ -0,0 +1,93 @@
|
||||
"""Runtime smoke tests for Docker top-level state-file ownership repair.
|
||||
|
||||
Build the real image and verify the actual runtime behavior:
|
||||
|
||||
1. Root-owned top-level state files (auth.json, state.db, gateway.lock,
|
||||
gateway_state.json) are chowned to hermes on boot
|
||||
2. Non-allowlisted host-owned files are NOT touched (targeted, not
|
||||
blanket find -user root sweep)
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from tests.docker.conftest import (
|
||||
docker_exec,
|
||||
docker_exec_sh,
|
||||
restart_container,
|
||||
start_container,
|
||||
)
|
||||
|
||||
|
||||
# The files the stage2 hook should repair (mirrors the allowlist in
|
||||
# stage2-hook.sh). We test a representative subset.
|
||||
ALLOWLISTED_FILES = ("auth.json", "state.db", "gateway.lock", "gateway_state.json")
|
||||
|
||||
|
||||
def test_root_owned_state_files_repaired_on_boot(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""Root-owned top-level state files must be chowned to hermes on boot."""
|
||||
start_container(built_image, container_name)
|
||||
|
||||
# Create root-owned state files to simulate docker exec (root) writes
|
||||
for f in ALLOWLISTED_FILES:
|
||||
docker_exec(
|
||||
container_name, "touch", f"/opt/data/{f}",
|
||||
user="root", timeout=5,
|
||||
)
|
||||
|
||||
# Verify they're root-owned
|
||||
r = docker_exec_sh(
|
||||
container_name,
|
||||
" ".join(f'stat -c %U /opt/data/{f}' for f in ALLOWLISTED_FILES),
|
||||
timeout=5,
|
||||
)
|
||||
for line in r.stdout.split():
|
||||
assert line == "root", f"expected root-owned, got: {line}"
|
||||
|
||||
# Restart - stage2 should repair ownership
|
||||
restart_container(container_name)
|
||||
|
||||
# Verify files are now hermes-owned
|
||||
r = docker_exec_sh(
|
||||
container_name,
|
||||
" ".join(f'stat -c %U /opt/data/{f}' for f in ALLOWLISTED_FILES),
|
||||
timeout=5,
|
||||
)
|
||||
for line in r.stdout.split():
|
||||
assert line == "hermes", (
|
||||
f"expected hermes-owned after restart, got: {line}"
|
||||
)
|
||||
|
||||
|
||||
def test_non_allowlisted_host_file_not_touched(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""A non-allowlisted host-owned file must NOT be chowned, even if
|
||||
root-owned. Regression guard for #19788 / #19795: a bind-mounted
|
||||
$HERMES_HOME may contain host-owned files Hermes does not manage."""
|
||||
start_container(built_image, container_name)
|
||||
|
||||
# Create a non-allowlisted file as root
|
||||
docker_exec(
|
||||
container_name, "touch", "/opt/data/host_secret.json",
|
||||
user="root", timeout=5,
|
||||
)
|
||||
# Make it root-owned explicitly (it already is, but be sure)
|
||||
docker_exec(
|
||||
container_name, "chown", "root:root", "/opt/data/host_secret.json",
|
||||
user="root", timeout=5,
|
||||
)
|
||||
|
||||
# Restart
|
||||
restart_container(container_name)
|
||||
|
||||
# The file must STILL be root-owned (not touched by stage2)
|
||||
r = docker_exec_sh(
|
||||
container_name,
|
||||
"stat -c %U /opt/data/host_secret.json",
|
||||
timeout=5,
|
||||
)
|
||||
assert r.stdout.strip() == "root", (
|
||||
f"non-allowlisted host file was chowned by stage2 (should be "
|
||||
f"preserved): {r.stdout.strip()}"
|
||||
)
|
||||
66
tests/docker/test_user_flag_guard.py
Normal file
66
tests/docker/test_user_flag_guard.py
Normal file
@@ -0,0 +1,66 @@
|
||||
"""Runtime smoke tests for Docker --user flag guard.
|
||||
|
||||
Build the real image and verify the actual runtime behavior:
|
||||
|
||||
1. docker run --user <arbitrary-uid> is rejected with actionable guidance
|
||||
2. Root start (default) works fine
|
||||
3. --user <hermes-uid> (10000) is allowed (supported non-root start)
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import subprocess
|
||||
|
||||
|
||||
def test_arbitrary_user_uid_rejected(
|
||||
built_image: str,
|
||||
) -> None:
|
||||
"""docker run --user 1000 must be rejected with actionable guidance."""
|
||||
r = subprocess.run(
|
||||
["docker", "run", "--rm", "--user", "1000:1000",
|
||||
built_image, "echo", "should_not_reach"],
|
||||
capture_output=True, text=True, timeout=60,
|
||||
)
|
||||
assert r.returncode != 0, (
|
||||
f"container started with arbitrary --user UID unexpectedly: {r.stdout}"
|
||||
)
|
||||
assert "should_not_reach" not in r.stdout, (
|
||||
f"container ran despite --user rejection: {r.stdout}"
|
||||
)
|
||||
combined = r.stdout + r.stderr
|
||||
assert "not supported" in combined.lower(), (
|
||||
f"rejection message missing 'not supported': {combined[-500:]}"
|
||||
)
|
||||
# Must mention the remediation env vars
|
||||
assert "HERMES_UID" in combined or "PUID" in combined, (
|
||||
f"rejection message missing remediation guidance: {combined[-500:]}"
|
||||
)
|
||||
|
||||
|
||||
def test_root_start_works(
|
||||
built_image: str,
|
||||
) -> None:
|
||||
"""Root start (the default) must work without issues."""
|
||||
r = subprocess.run(
|
||||
["docker", "run", "--rm", built_image, "sh", "-c", "echo OK"],
|
||||
capture_output=True, text=True, timeout=60,
|
||||
)
|
||||
assert r.returncode == 0, f"root start failed: {r.stderr[-500:]}"
|
||||
assert "OK" in r.stdout
|
||||
|
||||
|
||||
def test_user_pinned_to_hermes_uid_works(
|
||||
built_image: str,
|
||||
) -> None:
|
||||
"""docker run --user 10000:10000 (the hermes UID) must be allowed.
|
||||
|
||||
This is the supported non-root start from #34648 / #34837.
|
||||
"""
|
||||
r = subprocess.run(
|
||||
["docker", "run", "--rm", "--user", "10000:10000",
|
||||
built_image, "sh", "-c", "echo OK"],
|
||||
capture_output=True, text=True, timeout=60,
|
||||
)
|
||||
assert r.returncode == 0, (
|
||||
f"--user 10000:10000 (hermes UID) was rejected: {r.stderr[-500:]}"
|
||||
)
|
||||
assert "OK" in r.stdout
|
||||
@@ -12,22 +12,16 @@ docstring.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import subprocess
|
||||
import time
|
||||
|
||||
from tests.docker.conftest import docker_exec, docker_exec_sh
|
||||
from tests.docker.conftest import docker_exec, docker_exec_sh, start_container, start_container
|
||||
|
||||
|
||||
def test_orphan_zombies_reaped(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""Spawn an orphan child that exits immediately. PID 1 must reap it."""
|
||||
subprocess.run(
|
||||
["docker", "run", "-d", "--name", container_name, built_image,
|
||||
"sleep", "60"],
|
||||
check=True, capture_output=True, timeout=30,
|
||||
)
|
||||
time.sleep(2)
|
||||
start_container(built_image, container_name, cmd="sleep 60")
|
||||
|
||||
# `( ( sleep 0.1 & ) & ); sleep 1` creates a grandchild detached from
|
||||
# the original docker exec session — it becomes an orphan reparented
|
||||
@@ -42,4 +36,4 @@ def test_orphan_zombies_reaped(
|
||||
line for line in r.stdout.split("\n")
|
||||
if line.strip().startswith("Z")
|
||||
]
|
||||
assert not zombies, f"Zombies not reaped by PID 1: {zombies}"
|
||||
assert not zombies, f"Zombies not reaped by PID 1: {zombies}"
|
||||
@@ -27,7 +27,6 @@ import pytest
|
||||
# against each other (and against any other file that also touches
|
||||
# ``app.state``) — the marker name is shared across all dashboard-auth test
|
||||
# files that gate the app.
|
||||
pytestmark = pytest.mark.xdist_group("dashboard_auth_app_state")
|
||||
from fastapi import FastAPI
|
||||
from fastapi.responses import Response
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
@@ -10,7 +10,6 @@ import pytest
|
||||
# against each other (and against any other file that also touches
|
||||
# ``app.state``) — the marker name is shared across all dashboard-auth test
|
||||
# files that gate the app.
|
||||
pytestmark = pytest.mark.xdist_group("dashboard_auth_app_state")
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from hermes_cli import web_server
|
||||
|
||||
@@ -16,12 +16,6 @@ from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
# Phase 5 / Phase 6: these tests mutate ``web_server.app.state.auth_required``
|
||||
# at module level. Run them in the same xdist worker so they don't race
|
||||
# against each other (and against any other file that also touches
|
||||
# ``app.state``) — the marker name is shared across all dashboard-auth test
|
||||
# files that gate the app.
|
||||
pytestmark = pytest.mark.xdist_group("dashboard_auth_app_state")
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from hermes_cli import web_server
|
||||
|
||||
@@ -16,11 +16,6 @@ import time
|
||||
|
||||
import pytest
|
||||
|
||||
# These tests mutate ``web_server.app.state.auth_required`` at module level,
|
||||
# so they share the dashboard-auth app-state xdist group to avoid racing
|
||||
# other gate tests.
|
||||
pytestmark = pytest.mark.xdist_group("dashboard_auth_app_state")
|
||||
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from hermes_cli import web_server
|
||||
|
||||
@@ -32,10 +32,6 @@ from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
# Same xdist group as the other dashboard-auth tests — they all mutate
|
||||
# web_server.app.state.auth_required at module level.
|
||||
pytestmark = pytest.mark.xdist_group("dashboard_auth_app_state")
|
||||
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from hermes_cli import web_server
|
||||
|
||||
@@ -20,10 +20,6 @@ from hermes_cli import web_server
|
||||
from hermes_cli.dashboard_auth import clear_providers, register_provider
|
||||
from tests.hermes_cli.conftest_dashboard_auth import StubAuthProvider
|
||||
|
||||
# These tests mutate ``web_server.app.state.auth_required`` so they share
|
||||
# the same xdist group as the other dashboard-auth gated_app tests.
|
||||
pytestmark = pytest.mark.xdist_group("dashboard_auth_app_state")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def gated_client():
|
||||
|
||||
@@ -17,12 +17,6 @@ from types import SimpleNamespace
|
||||
|
||||
import pytest
|
||||
|
||||
# Phase 5 / Phase 6: these tests mutate ``web_server.app.state.auth_required``
|
||||
# at module level. Run them in the same xdist worker so they don't race
|
||||
# against each other (and against any other file that also touches
|
||||
# ``app.state``) — the marker name is shared across all dashboard-auth test
|
||||
# files that gate the app.
|
||||
pytestmark = pytest.mark.xdist_group("dashboard_auth_app_state")
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from hermes_cli import web_server
|
||||
|
||||
@@ -1,91 +0,0 @@
|
||||
"""Regression tests for Docker HOME overrides under s6/with-contenv."""
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parent.parent
|
||||
DASHBOARD_RUN = REPO_ROOT / "docker" / "s6-rc.d" / "dashboard" / "run"
|
||||
MAIN_WRAPPER = REPO_ROOT / "docker" / "main-wrapper.sh"
|
||||
STAGE2_HOOK = REPO_ROOT / "docker" / "stage2-hook.sh"
|
||||
|
||||
|
||||
def test_main_wrapper_preserves_docker_workdir() -> None:
|
||||
"""The main-wrapper MUST save and restore the original working
|
||||
directory so the container starts in the Docker ``-w`` directory,
|
||||
not /opt/data. Regression test for #35472.
|
||||
"""
|
||||
text = MAIN_WRAPPER.read_text(encoding="utf-8")
|
||||
|
||||
# Must save original cwd before cd /opt/data.
|
||||
assert "_hermes_orig_cwd" in text, (
|
||||
"main-wrapper.sh must save the original cwd before cd /opt/data"
|
||||
)
|
||||
assert 'HERMES_ORIG_CWD:-$PWD' in text, (
|
||||
"main-wrapper.sh must capture PWD as the fallback original cwd"
|
||||
)
|
||||
|
||||
# Must cd to /opt/data for init (existing behaviour preserved).
|
||||
assert "cd /opt/data" in text
|
||||
|
||||
# Must restore original cwd before exec'ing the user command.
|
||||
# The restore cd must appear AFTER venv activation but BEFORE the
|
||||
# first exec / if-block.
|
||||
activate_idx = text.index("/opt/hermes/.venv/bin/activate")
|
||||
restore_idx = text.index('cd "$_hermes_orig_cwd"')
|
||||
exec_idx = text.index("if [ $# -eq 0 ]")
|
||||
assert activate_idx < restore_idx < exec_idx, (
|
||||
"cd $_hermes_orig_cwd must appear after venv activation and "
|
||||
"before the exec routing block"
|
||||
)
|
||||
|
||||
|
||||
def test_dashboard_run_resets_home_before_dropping_privileges() -> None:
|
||||
text = DASHBOARD_RUN.read_text(encoding="utf-8")
|
||||
|
||||
assert "#!/command/with-contenv sh" in text
|
||||
assert "export HOME=/opt/data" in text
|
||||
assert "exec s6-setuidgid hermes hermes dashboard" in text
|
||||
|
||||
|
||||
def test_dashboard_run_does_not_derive_insecure_from_bind_host() -> None:
|
||||
"""The s6 dashboard run script MUST NOT auto-add ``--insecure`` based on
|
||||
``HERMES_DASHBOARD_HOST``. Doing so disables the OAuth auth gate on
|
||||
every non-loopback bind even when an auth provider is registered —
|
||||
the exact regression that exposed every wildcard-subdomain agent
|
||||
dashboard publicly until early 2026.
|
||||
|
||||
The opt-in is now explicit: ``HERMES_DASHBOARD_INSECURE=1`` (truthy).
|
||||
The auth gate is the authority on whether non-loopback binds are safe.
|
||||
"""
|
||||
text = DASHBOARD_RUN.read_text(encoding="utf-8")
|
||||
|
||||
# No legacy host-derived flip.
|
||||
assert '127.0.0.1|localhost' not in text, (
|
||||
"Run script still derives --insecure from the bind host. The gate "
|
||||
"is the authority now — opt in via HERMES_DASHBOARD_INSECURE instead."
|
||||
)
|
||||
assert 'case "$dash_host" in' not in text, (
|
||||
"Legacy host-derived --insecure case-statement is back."
|
||||
)
|
||||
|
||||
# New opt-in env var present.
|
||||
assert "HERMES_DASHBOARD_INSECURE" in text, (
|
||||
"Explicit HERMES_DASHBOARD_INSECURE opt-in is missing."
|
||||
)
|
||||
# Truthy values aligned with the rest of the s6 scripts
|
||||
# (e.g. HERMES_DASHBOARD).
|
||||
for truthy in ("1", "true", "TRUE", "True", "yes", "YES", "Yes"):
|
||||
assert truthy in text, (
|
||||
f"HERMES_DASHBOARD_INSECURE should accept truthy value {truthy!r}"
|
||||
)
|
||||
|
||||
|
||||
def test_stage2_hook_repairs_profiles_and_cron_ownership_on_every_boot() -> None:
|
||||
"""profiles/ and cron/ must both be reclaimed after root-context writes."""
|
||||
text = STAGE2_HOOK.read_text(encoding="utf-8")
|
||||
|
||||
assert 'if [ -d "$HERMES_HOME/profiles" ]; then' in text
|
||||
assert 'chown -R hermes:hermes "$HERMES_HOME/profiles" 2>/dev/null || true' in text
|
||||
|
||||
assert 'if [ -d "$HERMES_HOME/cron" ]; then' in text
|
||||
assert 'chown -R hermes:hermes "$HERMES_HOME/cron" 2>/dev/null || true' in text
|
||||
@@ -1,19 +0,0 @@
|
||||
"""Regression tests for Docker stage2 browser executable discovery."""
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def test_stage2_discovers_playwright_arm64_headless_shell() -> None:
|
||||
"""Playwright's --only-shell layout may use a headless_shell basename."""
|
||||
script = Path("docker/stage2-hook.sh").read_text()
|
||||
|
||||
assert "-name 'headless_shell'" in script
|
||||
|
||||
|
||||
def test_stage2_discovery_stays_filename_matched() -> None:
|
||||
"""Avoid broad path grep that can pick executable shared libraries."""
|
||||
script = Path("docker/stage2-hook.sh").read_text()
|
||||
|
||||
discovery_block = script.split("browser_bin=$(", 1)[1].split(")\n if", 1)[0]
|
||||
assert "find \"$PLAYWRIGHT_BROWSERS_PATH\" -type f -executable" in discovery_block
|
||||
assert "grep" not in discovery_block
|
||||
@@ -1,49 +0,0 @@
|
||||
"""Regression test for #34192 — Dockerfile must keep the tini compat shim
|
||||
for orchestration templates that still reference /usr/bin/tini.
|
||||
|
||||
This is a documentation-as-test guard: removing the shim is a real
|
||||
choice, but it should be done deliberately (e.g. once Hostinger's
|
||||
'Hermes WebUI' catalog updates to /init) and not by accident.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def _dockerfile_text() -> str:
|
||||
return (Path(__file__).parent.parent / "Dockerfile").read_text(encoding="utf-8")
|
||||
|
||||
|
||||
def test_tini_compat_symlink_present():
|
||||
"""The /usr/bin/tini -> /init symlink line must exist for #34192."""
|
||||
df = _dockerfile_text()
|
||||
assert "ln -sf /init /usr/bin/tini" in df, (
|
||||
"Dockerfile must keep the tini compat symlink (#34192). "
|
||||
"Removing it breaks orchestration templates that still pin "
|
||||
"/usr/bin/tini as the entrypoint (Hostinger 'Hermes WebUI' "
|
||||
"catalog as of v0.14.x)."
|
||||
)
|
||||
|
||||
|
||||
def test_tini_compat_comment_explains_why():
|
||||
"""The symlink line is comment-anchored to #34192 so a future reader
|
||||
knows why it exists. Removing the comment makes it look like dead
|
||||
code worth deleting."""
|
||||
df = _dockerfile_text()
|
||||
assert "#34192" in df, (
|
||||
"The Dockerfile tini compat shim must keep its #34192 anchor "
|
||||
"comment so future maintainers know why the symlink is there."
|
||||
)
|
||||
|
||||
|
||||
def test_entrypoint_still_init_not_tini():
|
||||
"""Sanity check: the actual ENTRYPOINT is still /init (s6-overlay).
|
||||
The shim is for legacy external wrappers, not for the image's own
|
||||
runtime — that path must continue to use the canonical /init."""
|
||||
df = _dockerfile_text()
|
||||
assert 'ENTRYPOINT [ "/init"' in df, (
|
||||
"Dockerfile ENTRYPOINT must remain /init (s6-overlay). The "
|
||||
"tini shim is only for external wrappers that haven't been "
|
||||
"updated yet."
|
||||
)
|
||||
@@ -1,5 +1,6 @@
|
||||
"""Guards for the multi-container Hermes WebUI install surface."""
|
||||
|
||||
"""Test that setup.py uses temporary output directories when the source
|
||||
tree is read-only (as it is inside the Docker WebUI install surface).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
@@ -20,18 +21,6 @@ def _is_under(path: str, root: Path) -> bool:
|
||||
return True
|
||||
|
||||
|
||||
def test_docker_context_includes_license_file() -> None:
|
||||
"""PEP 639 license-files metadata must resolve inside the Docker image."""
|
||||
dockerignore = (REPO_ROOT / ".dockerignore").read_text(encoding="utf-8")
|
||||
active_lines = [
|
||||
line.strip()
|
||||
for line in dockerignore.splitlines()
|
||||
if line.strip() and not line.lstrip().startswith("#")
|
||||
]
|
||||
|
||||
assert "LICENSE" not in active_lines
|
||||
|
||||
|
||||
def test_setup_uses_temporary_outputs_when_source_tree_is_read_only(
|
||||
monkeypatch,
|
||||
) -> None:
|
||||
@@ -12,22 +12,16 @@ def _dockerfile_text() -> str:
|
||||
return DOCKERFILE.read_text()
|
||||
|
||||
|
||||
def test_dockerfile_makes_opt_hermes_root_owned_and_non_writable() -> None:
|
||||
def test_dockerfile_makes_opt_hermes_readonly_for_hermes_user() -> None:
|
||||
text = _dockerfile_text()
|
||||
|
||||
assert "COPY --chown=hermes:hermes . ." not in text
|
||||
assert "COPY . ." in text
|
||||
assert "chown -R root:root /opt/hermes" in text
|
||||
assert "chmod -R a+rX /opt/hermes" in text
|
||||
assert "chmod -R a-w /opt/hermes" in text
|
||||
|
||||
immutable_block = re.search(
|
||||
r"RUN mkdir -p /opt/hermes/bin && \\\n"
|
||||
r"(?:.*\\\n)+?"
|
||||
r"\s+chmod -R a-w /opt/hermes",
|
||||
text,
|
||||
)
|
||||
assert immutable_block, "Dockerfile must lock /opt/hermes after installing code/deps"
|
||||
# --chmod on the source COPY bakes read-only perms at copy time instead
|
||||
# of a separate chmod -R pass (which walked ~30k files — #49113).
|
||||
assert "COPY --link --chmod=a+rX,go-w . ." in text
|
||||
# The old tree-walking passes must not be present.
|
||||
assert "chown -R root:root /opt/hermes" not in text
|
||||
assert "chmod -R a+rX /opt/hermes" not in text
|
||||
assert "chmod -R a-w /opt/hermes" not in text
|
||||
|
||||
|
||||
def test_dockerfile_keeps_mutable_state_under_opt_data() -> None:
|
||||
@@ -68,22 +62,20 @@ def test_dockerfile_bakes_code_scoped_install_method_stamp() -> None:
|
||||
(/opt/hermes/.install_method) first; baking it at build time keeps the
|
||||
published image self-identifying as 'docker' WITHOUT writing into the
|
||||
shared $HERMES_HOME data volume (which a host install may also use).
|
||||
It must live inside the immutable block so the runtime user can't alter it.
|
||||
The stamp is created by root in the shim-wiring RUN block; the hermes
|
||||
user can't modify it (go-w from the --chmod on the source COPY).
|
||||
"""
|
||||
text = _dockerfile_text()
|
||||
assert "printf 'docker\\n' > /opt/hermes/.install_method" in text
|
||||
|
||||
immutable_block = re.search(
|
||||
# The stamp must be in the RUN block that wires the exec shim.
|
||||
shim_block = re.search(
|
||||
r"RUN mkdir -p /opt/hermes/bin && \\\n"
|
||||
r"(?:.*\\\n)+?"
|
||||
r"\s+chmod -R a-w /opt/hermes",
|
||||
r"\s+printf 'docker\\n' > /opt/hermes/\.install_method",
|
||||
text,
|
||||
)
|
||||
assert immutable_block, "immutable block must exist"
|
||||
assert ".install_method" in immutable_block.group(0), (
|
||||
"the code-scoped install-method stamp must be baked inside the "
|
||||
"immutable /opt/hermes block"
|
||||
)
|
||||
assert shim_block, "install-method stamp must be in the shim-wiring RUN block"
|
||||
|
||||
|
||||
def test_dockerfile_redirects_lazy_installs_to_durable_target() -> None:
|
||||
|
||||
@@ -1,152 +0,0 @@
|
||||
"""Contract test: the s6-overlay stage2 hook seeds gateway_state.json from
|
||||
HERMES_GATEWAY_BOOTSTRAP_STATE on first boot, so a freshly-provisioned
|
||||
container can come up with the gateway already running.
|
||||
|
||||
Background. On a blank volume there is no gateway_state.json, so the boot
|
||||
reconciler (cont-init.d/02-reconcile-profiles ->
|
||||
container_boot.reconcile_profile_gateways) registers the gateway-default s6
|
||||
slot but leaves it DOWN — it only auto-starts when the last recorded state was
|
||||
"running". A container provisioned on a fresh volume therefore comes up with
|
||||
the gateway down until something starts it.
|
||||
|
||||
An orchestrator that wants the gateway running from first boot sets
|
||||
HERMES_GATEWAY_BOOTSTRAP_STATE=running; stage2-hook.sh (installed as
|
||||
/etc/cont-init.d/01-hermes-setup, which runs lexicographically BEFORE
|
||||
02-reconcile-profiles) seeds the state file so the reconciler sees
|
||||
prior_state=running and brings the slot up on the very first boot.
|
||||
|
||||
This mirrors the existing HERMES_AUTH_JSON_BOOTSTRAP env-seed pattern: it seeds
|
||||
the SAME gateway_state.json the reconciler already consults, guarded by
|
||||
``[ ! -f ]`` so persisted runtime state always wins on subsequent boots (a
|
||||
deliberately-stopped gateway must stay stopped across restarts).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[2]
|
||||
STAGE2_HOOK = REPO_ROOT / "docker" / "stage2-hook.sh"
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def stage2_text() -> str:
|
||||
if not STAGE2_HOOK.exists():
|
||||
pytest.skip("docker/stage2-hook.sh not present in this checkout")
|
||||
return STAGE2_HOOK.read_text()
|
||||
|
||||
|
||||
def _seed_block(text: str) -> str:
|
||||
"""Extract the ``if [ ! -f "$HERMES_HOME/gateway_state.json" ] && … fi``
|
||||
block that seeds the gateway state file from the bootstrap env var."""
|
||||
m = re.search(
|
||||
r'(if \[ ! -f "\$HERMES_HOME/gateway_state\.json" \] && \\\n'
|
||||
r"(?:.*\n)*?fi)",
|
||||
text,
|
||||
)
|
||||
assert m, (
|
||||
"stage2-hook.sh must contain the gateway_state.json bootstrap-seed block "
|
||||
"guarded on HERMES_GATEWAY_BOOTSTRAP_STATE"
|
||||
)
|
||||
return m.group(1)
|
||||
|
||||
|
||||
def test_seed_block_present_and_guarded(stage2_text: str) -> None:
|
||||
block = _seed_block(stage2_text)
|
||||
# Must be a first-boot-only seed (the [ ! -f ] guard) keyed on the env var.
|
||||
assert '[ ! -f "$HERMES_HOME/gateway_state.json" ]' in block, (
|
||||
"seed must be guarded by [ ! -f ] so persisted state wins on restart"
|
||||
)
|
||||
assert "HERMES_GATEWAY_BOOTSTRAP_STATE" in block
|
||||
assert "gateway_state" in block
|
||||
|
||||
|
||||
def _run_seed(
|
||||
text: str, *, env_value: str | None, preexisting: str | None
|
||||
) -> str | None:
|
||||
"""Run the extracted seed block in a sandbox $HERMES_HOME.
|
||||
|
||||
``env_value`` is the HERMES_GATEWAY_BOOTSTRAP_STATE value (None = unset).
|
||||
``preexisting`` is the contents of a gateway_state.json placed before the
|
||||
block runs (None = no file). Returns the file's contents afterwards, or
|
||||
None if it doesn't exist. ``chown``/``chmod`` are stubbed so the block
|
||||
runs without real root.
|
||||
"""
|
||||
bash = shutil.which("bash")
|
||||
if bash is None:
|
||||
pytest.skip("bash not available")
|
||||
block = _seed_block(text)
|
||||
|
||||
with tempfile.TemporaryDirectory() as d:
|
||||
dpath = Path(d)
|
||||
home = dpath / "home"
|
||||
home.mkdir()
|
||||
state_file = home / "gateway_state.json"
|
||||
if preexisting is not None:
|
||||
state_file.write_text(preexisting)
|
||||
|
||||
env_line = (
|
||||
f'export HERMES_GATEWAY_BOOTSTRAP_STATE="{env_value}"\n'
|
||||
if env_value is not None
|
||||
else "unset HERMES_GATEWAY_BOOTSTRAP_STATE\n"
|
||||
)
|
||||
script = (
|
||||
"set -e\n"
|
||||
f'HERMES_HOME="{home}"\n'
|
||||
# Stub privilege ops — the sandbox isn't root.
|
||||
"chown() { :; }\n"
|
||||
"chmod() { :; }\n"
|
||||
+ env_line
|
||||
+ block
|
||||
)
|
||||
script_path = dpath / "harness.sh"
|
||||
script_path.write_text(script)
|
||||
|
||||
proc = subprocess.run(
|
||||
[bash, str(script_path)], capture_output=True, text=True
|
||||
)
|
||||
assert proc.returncode == 0, proc.stderr
|
||||
|
||||
if not state_file.exists():
|
||||
return None
|
||||
return state_file.read_text()
|
||||
|
||||
|
||||
def test_seeds_running_state_on_blank_volume(stage2_text: str) -> None:
|
||||
"""env=running + no pre-existing file -> writes a valid running state."""
|
||||
out = _run_seed(stage2_text, env_value="running", preexisting=None)
|
||||
assert out is not None, "seed must create gateway_state.json"
|
||||
assert json.loads(out).get("gateway_state") == "running"
|
||||
|
||||
|
||||
def test_does_not_clobber_existing_state(stage2_text: str) -> None:
|
||||
"""The [ ! -f ] guard: an existing state file is never overwritten, even
|
||||
when the bootstrap env var says running. A deliberately-stopped gateway
|
||||
must stay stopped across restarts."""
|
||||
existing = json.dumps({"gateway_state": "stopped", "pid": 123})
|
||||
out = _run_seed(stage2_text, env_value="running", preexisting=existing)
|
||||
assert out == existing, "seed must not clobber a persisted state file"
|
||||
|
||||
|
||||
def test_no_seed_when_env_unset(stage2_text: str) -> None:
|
||||
"""No env var -> no file written (preserves the default down-on-first-boot
|
||||
behaviour for orchestrators that don't opt in)."""
|
||||
out = _run_seed(stage2_text, env_value=None, preexisting=None)
|
||||
assert out is None, "seed must not run when HERMES_GATEWAY_BOOTSTRAP_STATE is unset"
|
||||
|
||||
|
||||
def test_non_running_value_ignored(stage2_text: str) -> None:
|
||||
"""Only a literal "running" is honoured; any other value is ignored so a
|
||||
typo can't write a bogus state. (The reconciler's _AUTOSTART_STATES is
|
||||
exactly {"running"}.)"""
|
||||
for bogus in ("stopped", "Running", "1", "true", "starting"):
|
||||
out = _run_seed(stage2_text, env_value=bogus, preexisting=None)
|
||||
assert out is None, (
|
||||
f"only 'running' should seed a state file, not {bogus!r}"
|
||||
)
|
||||
@@ -1,48 +0,0 @@
|
||||
"""Contract tests for the Docker stage2 immutable install-tree policy.
|
||||
|
||||
Hosted/container Hermes keeps user-writable state under HERMES_HOME
|
||||
(/opt/data). The installed source, venv, TUI bundle, and node_modules under
|
||||
/opt/hermes must remain root-owned/non-writable by the runtime hermes user so
|
||||
an agent session cannot self-modify the installation and brick the gateway.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[2]
|
||||
STAGE2_HOOK = REPO_ROOT / "docker" / "stage2-hook.sh"
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def stage2_text() -> str:
|
||||
if not STAGE2_HOOK.exists():
|
||||
pytest.skip("docker/stage2-hook.sh not present in this checkout")
|
||||
return STAGE2_HOOK.read_text()
|
||||
|
||||
|
||||
def test_stage2_does_not_chown_install_tree_to_hermes(stage2_text: str) -> None:
|
||||
assert "Fixing ownership of build trees under $INSTALL_DIR" not in stage2_text
|
||||
assert 'chown -R hermes:hermes \\\n "$INSTALL_DIR/.venv"' not in stage2_text
|
||||
|
||||
assert "venv_owner=$(stat -c %u \"$INSTALL_DIR/.venv\"" not in stage2_text
|
||||
assert "chown of build trees failed" not in stage2_text
|
||||
for install_tree in (
|
||||
'"$INSTALL_DIR/.venv" \\',
|
||||
'"$INSTALL_DIR/ui-tui" \\',
|
||||
'"$INSTALL_DIR/gateway" \\',
|
||||
'"$INSTALL_DIR/node_modules" \\',
|
||||
):
|
||||
assert install_tree not in stage2_text, (
|
||||
f"stage2 must not chown {install_tree} back to hermes; "
|
||||
"the Dockerfile keeps /opt/hermes immutable and writable state "
|
||||
"belongs under HERMES_HOME"
|
||||
)
|
||||
|
||||
|
||||
def test_stage2_documents_immutable_install_contract(stage2_text: str) -> None:
|
||||
assert "Immutable install tree" in stage2_text
|
||||
assert "PYTHONDONTWRITEBYTECODE" in stage2_text
|
||||
assert "HERMES_DISABLE_LAZY_INSTALLS=1" in stage2_text
|
||||
assert "/opt/hermes" in stage2_text
|
||||
@@ -1,61 +0,0 @@
|
||||
"""Contract test: the s6-overlay stage2 hook must NOT stamp the install method
|
||||
into the shared $HERMES_HOME, and must heal a stale 'docker' stamp left there
|
||||
by older images.
|
||||
|
||||
Background (shared-$HERMES_HOME bug)
|
||||
------------------------------------
|
||||
$HERMES_HOME (/opt/data) is a DATA volume that users commonly bind-mount from
|
||||
the host (``~/.hermes:/opt/data``) and sometimes share with a host-side
|
||||
Desktop/CLI install. Older images wrote ``printf 'docker' > $HERMES_HOME/.install_method``
|
||||
at boot, which clobbered the host install's own marker — so the host's in-app
|
||||
updater read 'docker' and refused to run ``hermes update`` ("doesn't apply
|
||||
inside the Docker container").
|
||||
|
||||
The fix scopes the stamp to the install tree (baked at
|
||||
``/opt/hermes/.install_method`` in the Dockerfile, read first by
|
||||
``detect_install_method``). stage2 must therefore:
|
||||
|
||||
* NOT write the 'docker' stamp into $HERMES_HOME any more, and
|
||||
* proactively remove a stale 'docker' stamp from $HERMES_HOME so homes
|
||||
already poisoned by an older image self-heal on the next boot.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[2]
|
||||
STAGE2_HOOK = REPO_ROOT / "docker" / "stage2-hook.sh"
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def stage2_text() -> str:
|
||||
if not STAGE2_HOOK.exists():
|
||||
pytest.skip("docker/stage2-hook.sh not present in this checkout")
|
||||
return STAGE2_HOOK.read_text()
|
||||
|
||||
|
||||
def test_stage2_does_not_write_install_method_into_home(stage2_text: str) -> None:
|
||||
# No write/tee of the home-scoped install-method stamp anywhere.
|
||||
assert not re.search(
|
||||
r"(tee|>)\s*\"?\$HERMES_HOME/\.install_method", stage2_text
|
||||
), (
|
||||
"stage2 must not stamp $HERMES_HOME/.install_method — that data dir "
|
||||
"may be shared with a host install whose marker would be clobbered"
|
||||
)
|
||||
|
||||
|
||||
def test_stage2_heals_stale_docker_home_stamp(stage2_text: str) -> None:
|
||||
# It must remove a stale 'docker' stamp from $HERMES_HOME so already
|
||||
# poisoned shared homes recover.
|
||||
assert 'rm -f "$HERMES_HOME/.install_method"' in stage2_text, (
|
||||
"stage2 must remove a stale 'docker' stamp from $HERMES_HOME to heal "
|
||||
"homes poisoned by older images"
|
||||
)
|
||||
# The removal must be guarded on the value being 'docker' so we never
|
||||
# delete a legitimately-different stamp a user/host install put there.
|
||||
assert re.search(r'\[\s*"\$stamped"\s*=\s*"docker"\s*\]', stage2_text), (
|
||||
"the stale-stamp removal must be guarded on the value == 'docker'"
|
||||
)
|
||||
@@ -1,60 +0,0 @@
|
||||
"""Contract test: the s6-overlay stage2 hook seeds $HERMES_HOME/logs/gateways
|
||||
as the hermes user.
|
||||
|
||||
Regression guard for #45258: the per-profile gateway log service
|
||||
(`gateway-<profile>/log/run`) creates `logs/gateways/` via `mkdir -p` but only
|
||||
chowns the leaf `logs/gateways/<profile>`. If the first log service to boot
|
||||
runs in root context, the `gateways/` parent is created root-owned and stays
|
||||
that way; every profile registered later runs its log service as the dropped
|
||||
hermes user and s6-log crash-loops on `mkdir: Permission denied`.
|
||||
|
||||
Seeding `logs/gateways` in stage2 (cont-init runs before any service starts)
|
||||
guarantees the parent already exists hermes-owned by the time the first
|
||||
log/run executes its `mkdir -p`.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[2]
|
||||
STAGE2_HOOK = REPO_ROOT / "docker" / "stage2-hook.sh"
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def stage2_text() -> str:
|
||||
if not STAGE2_HOOK.exists():
|
||||
pytest.skip("docker/stage2-hook.sh not present in this checkout")
|
||||
return STAGE2_HOOK.read_text()
|
||||
|
||||
|
||||
def _seed_mkdir_block(text: str) -> str:
|
||||
"""Extract the `as_hermes mkdir -p \\ ...` seed block."""
|
||||
m = re.search(r"as_hermes mkdir -p \\\n(?:[^\n]*\\\n)*[^\n]*\n", text)
|
||||
assert m, "stage2-hook.sh must contain the as_hermes mkdir -p seed block"
|
||||
return m.group(0)
|
||||
|
||||
|
||||
def test_logs_gateways_is_seeded(stage2_text: str) -> None:
|
||||
block = _seed_mkdir_block(stage2_text)
|
||||
assert '"$HERMES_HOME/logs/gateways"' in block, (
|
||||
"logs/gateways must be seeded hermes-owned in stage2 so profiles "
|
||||
"added after first boot can create their log dirs (#45258)"
|
||||
)
|
||||
# The parent must also be seeded so mkdir -p inside the block never
|
||||
# creates logs/ implicitly with surprising ownership.
|
||||
assert '"$HERMES_HOME/logs"' in block
|
||||
|
||||
|
||||
def test_logs_subtree_is_healed_when_chown_needed(stage2_text: str) -> None:
|
||||
"""The needs_chown repair loop must cover the logs subtree recursively —
|
||||
that is what makes the seed entry above sufficient (no separate
|
||||
logs/gateways loop entry needed)."""
|
||||
m = re.search(r"for sub in ([^;]*); do", stage2_text)
|
||||
assert m, "stage2-hook.sh must contain the needs_chown subdir repair loop"
|
||||
assert "logs" in m.group(1).split(), (
|
||||
"the needs_chown loop must recursively chown logs/ — it covers "
|
||||
"logs/gateways, so the seed list does not need a loop twin"
|
||||
)
|
||||
@@ -1,110 +0,0 @@
|
||||
"""Contract test: the s6-overlay stage2 hook accepts PUID/PGID as aliases for
|
||||
HERMES_UID/HERMES_GID.
|
||||
|
||||
Regression guard for #15290. NAS platforms (UGOS, Synology, unRAID) bind-mount
|
||||
/opt/data from a host directory owned by the user's own UID and expect the
|
||||
LinuxServer.io PUID/PGID convention. Without the alias those vars are silently
|
||||
ignored, the s6-setuidgid drop lands on UID 10000, and the runtime cannot read
|
||||
the volume. HERMES_UID/HERMES_GID must still take precedence when both are
|
||||
set.
|
||||
|
||||
The s6-overlay rework moved bootstrap from docker/entrypoint.sh (now a shim)
|
||||
to docker/stage2-hook.sh, which is installed as /etc/cont-init.d/01-hermes-setup
|
||||
by the Dockerfile. This test targets the post-rework location.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[2]
|
||||
STAGE2_HOOK = REPO_ROOT / "docker" / "stage2-hook.sh"
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def stage2_text() -> str:
|
||||
if not STAGE2_HOOK.exists():
|
||||
pytest.skip("docker/stage2-hook.sh not present in this checkout")
|
||||
return STAGE2_HOOK.read_text()
|
||||
|
||||
|
||||
def _alias_lines(text: str) -> list[str]:
|
||||
"""The stage2 hook lines that resolve HERMES_UID/HERMES_GID from aliases."""
|
||||
return [
|
||||
line.strip()
|
||||
for line in text.splitlines()
|
||||
if line.strip().startswith(("HERMES_UID=", "HERMES_GID="))
|
||||
]
|
||||
|
||||
|
||||
def test_stage2_hook_resolves_puid_pgid_aliases(stage2_text: str) -> None:
|
||||
alias_lines = _alias_lines(stage2_text)
|
||||
assert any("PUID" in line for line in alias_lines), (
|
||||
"docker/stage2-hook.sh must resolve HERMES_UID from a PUID alias; see #15290"
|
||||
)
|
||||
assert any("PGID" in line for line in alias_lines), (
|
||||
"docker/stage2-hook.sh must resolve HERMES_GID from a PGID alias; see #15290"
|
||||
)
|
||||
|
||||
|
||||
def _resolve(stage2_text: str, env: dict[str, str]) -> str:
|
||||
"""Run the stage2 hook's alias-resolution lines in isolation and report the
|
||||
resolved ``HERMES_UID:HERMES_GID`` pair."""
|
||||
bash = shutil.which("bash")
|
||||
if bash is None:
|
||||
pytest.skip("bash not available")
|
||||
script = "\n".join(_alias_lines(stage2_text))
|
||||
script += '\necho "${HERMES_UID:-}:${HERMES_GID:-}"\n'
|
||||
proc = subprocess.run(
|
||||
[bash, "-ec", script],
|
||||
env={"PATH": os.environ.get("PATH", "")} | env,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
assert proc.returncode == 0, proc.stderr
|
||||
return proc.stdout.strip()
|
||||
|
||||
|
||||
def test_puid_pgid_populate_hermes_uid_gid(stage2_text: str) -> None:
|
||||
assert _resolve(stage2_text, {"PUID": "1000", "PGID": "10"}) == "1000:10"
|
||||
|
||||
|
||||
def test_hermes_uid_gid_take_precedence_over_aliases(stage2_text: str) -> None:
|
||||
resolved = _resolve(
|
||||
stage2_text,
|
||||
{"HERMES_UID": "2000", "HERMES_GID": "2001", "PUID": "1000", "PGID": "10"},
|
||||
)
|
||||
assert resolved == "2000:2001"
|
||||
|
||||
|
||||
def test_no_uid_vars_leaves_values_empty(stage2_text: str) -> None:
|
||||
# An empty resolution means the stage2 hook keeps the default hermes user.
|
||||
assert _resolve(stage2_text, {}) == ":"
|
||||
|
||||
|
||||
def test_stage2_hook_creates_s6_envdir_before_writing_browser_path(stage2_text: str) -> None:
|
||||
"""Regression guard for browser-path export on runtimes where the
|
||||
s6 container_environment directory is absent when the cont-init hook runs.
|
||||
"""
|
||||
mkdir_line = "mkdir -p /run/s6/container_environment"
|
||||
write_line = (
|
||||
"printf '%s' \"$browser_bin\" > "
|
||||
"/run/s6/container_environment/AGENT_BROWSER_EXECUTABLE_PATH"
|
||||
)
|
||||
|
||||
assert mkdir_line in stage2_text
|
||||
assert write_line in stage2_text
|
||||
assert stage2_text.index(mkdir_line) < stage2_text.index(write_line)
|
||||
|
||||
|
||||
def test_stage2_hook_runs_config_migration_as_hermes(stage2_text: str) -> None:
|
||||
assert "scripts/docker_config_migrate.py" in stage2_text
|
||||
assert 's6-setuidgid hermes "$INSTALL_DIR/.venv/bin/python"' in stage2_text
|
||||
|
||||
|
||||
def test_stage2_hook_documents_config_migration_opt_out(stage2_text: str) -> None:
|
||||
assert "HERMES_SKIP_CONFIG_MIGRATION" in stage2_text
|
||||
@@ -1,138 +0,0 @@
|
||||
"""Contract test: the s6-overlay stage2 hook resets ownership of hermes-owned
|
||||
top-level state files in $HERMES_HOME — but only those, never arbitrary
|
||||
host-owned files.
|
||||
|
||||
Regression guard for the gateway restart loop reported in #35098: files such
|
||||
as gateway.lock / state.db / auth.json live directly under $HERMES_HOME (not in
|
||||
a subdir), so the targeted subdir chown misses them. When created or rewritten
|
||||
by `docker exec <container> hermes …` (root unless `-u` is passed) they land
|
||||
root-owned and the unprivileged hermes runtime then hits PermissionError on next
|
||||
startup.
|
||||
|
||||
The fix uses an explicit allowlist rather than a blanket `find -user root`
|
||||
sweep, preserving the targeted-ownership contract from #19788 / PR #19795: a
|
||||
bind-mounted $HERMES_HOME may contain host-owned files Hermes does not manage,
|
||||
and those must never be chowned.
|
||||
|
||||
The s6-overlay rework moved bootstrap from docker/entrypoint.sh (now a shim) to
|
||||
docker/stage2-hook.sh, installed as /etc/cont-init.d/01-hermes-setup. This test
|
||||
targets that location.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[2]
|
||||
STAGE2_HOOK = REPO_ROOT / "docker" / "stage2-hook.sh"
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def stage2_text() -> str:
|
||||
if not STAGE2_HOOK.exists():
|
||||
pytest.skip("docker/stage2-hook.sh not present in this checkout")
|
||||
return STAGE2_HOOK.read_text()
|
||||
|
||||
|
||||
def _toplevel_chown_loop(text: str) -> str:
|
||||
"""Extract the `for f in … chown hermes:hermes "$HERMES_HOME/$f" … done`
|
||||
block that repairs top-level state-file ownership."""
|
||||
m = re.search(
|
||||
r"(for f in \\\n(?:.*\\\n)*?.*; do\n(?:.*\n)*?done)",
|
||||
text,
|
||||
)
|
||||
assert m, "stage2-hook.sh must contain the top-level-file chown for-loop (#35098)"
|
||||
block = m.group(1)
|
||||
assert 'chown hermes:hermes "$HERMES_HOME/$f"' in block, (
|
||||
"the top-level-file loop must chown each allowlisted file to hermes"
|
||||
)
|
||||
return block
|
||||
|
||||
|
||||
def test_toplevel_chown_loop_present(stage2_text: str) -> None:
|
||||
block = _toplevel_chown_loop(stage2_text)
|
||||
# The reported-broken files must be covered.
|
||||
for required in ("auth.json", "state.db", "gateway.lock", "gateway_state.json"):
|
||||
assert required in block, (
|
||||
f"top-level chown allowlist must include {required!r} (#35098)"
|
||||
)
|
||||
|
||||
|
||||
def test_no_blanket_find_user_root_sweep(stage2_text: str) -> None:
|
||||
"""The fix must NOT reintroduce a blanket `find … -user root` chown of
|
||||
$HERMES_HOME contents — that would clobber host-owned files in a bind mount
|
||||
(#19788 / PR #19795)."""
|
||||
assert not re.search(r"find\s+\"?\$\{?HERMES_HOME\}?\"?[^\n]*-user\s+root", stage2_text), (
|
||||
"stage2-hook.sh must not blanket-chown root-owned files under "
|
||||
"$HERMES_HOME via `find -user root`; use the targeted allowlist instead "
|
||||
"so host-owned bind-mounted files are preserved (#19788, #19795)."
|
||||
)
|
||||
|
||||
|
||||
def _run_loop(text: str, present_files: list[str]) -> list[str]:
|
||||
"""Run the extracted chown loop in a sandbox $HERMES_HOME, with `chown`
|
||||
stubbed to record which paths it was asked to touch. Returns the basenames
|
||||
the loop attempted to chown."""
|
||||
bash = shutil.which("bash")
|
||||
if bash is None:
|
||||
pytest.skip("bash not available")
|
||||
block = _toplevel_chown_loop(text)
|
||||
|
||||
import tempfile
|
||||
|
||||
with tempfile.TemporaryDirectory() as d:
|
||||
dpath = Path(d)
|
||||
home = dpath / "home"
|
||||
home.mkdir()
|
||||
for f in present_files:
|
||||
(home / f).touch()
|
||||
# A non-allowlisted, "host-owned" file that must never be chowned.
|
||||
(home / "host_secret.json").touch()
|
||||
|
||||
# Stub chown to record the basename of its last argument (the path),
|
||||
# so we observe exactly which files the allowlist loop selected
|
||||
# without needing real root privileges.
|
||||
script = (
|
||||
"set -e\n"
|
||||
f'HERMES_HOME="{home}"\n'
|
||||
f'chown() {{ for a in "$@"; do :; done; echo "${{a##*/}}" >> "{dpath}/chown.log"; }}\n'
|
||||
+ block
|
||||
)
|
||||
script_path = dpath / "harness.sh"
|
||||
script_path.write_text(script)
|
||||
|
||||
proc = subprocess.run([bash, str(script_path)], capture_output=True, text=True)
|
||||
assert proc.returncode == 0, proc.stderr
|
||||
|
||||
log = dpath / "chown.log"
|
||||
if not log.exists():
|
||||
return []
|
||||
return [ln for ln in log.read_text().splitlines() if ln]
|
||||
|
||||
|
||||
def test_loop_chowns_present_allowlisted_files(stage2_text: str) -> None:
|
||||
touched = _run_loop(stage2_text, ["auth.json", "state.db", "gateway.lock"])
|
||||
assert "auth.json" in touched
|
||||
assert "state.db" in touched
|
||||
assert "gateway.lock" in touched
|
||||
|
||||
|
||||
def test_loop_skips_nonallowlisted_host_file(stage2_text: str) -> None:
|
||||
"""A file NOT on the allowlist (e.g. a host-owned file in a bind mount) must
|
||||
never be chowned, even if present."""
|
||||
touched = _run_loop(stage2_text, ["auth.json"])
|
||||
assert "host_secret.json" not in touched, (
|
||||
"the allowlist loop must not touch non-allowlisted files (#19788)"
|
||||
)
|
||||
|
||||
|
||||
def test_loop_skips_absent_files(stage2_text: str) -> None:
|
||||
"""Allowlisted files that don't exist are skipped (no spurious chown)."""
|
||||
touched = _run_loop(stage2_text, ["auth.json"])
|
||||
# state.db wasn't created, so it must not appear.
|
||||
assert "state.db" not in touched
|
||||
@@ -1,86 +0,0 @@
|
||||
"""Regression tests for Docker stage2 UID/GID handling on NAS hosts.
|
||||
|
||||
Unraid commonly runs appdata as nobody:users (99:100). The stage2 hook must
|
||||
accept those non-root numeric IDs and keep legacy/new pairing stores writable
|
||||
after targeted ownership reconciliation.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[2]
|
||||
STAGE2_HOOK = REPO_ROOT / "docker" / "stage2-hook.sh"
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def stage2_text() -> str:
|
||||
if not STAGE2_HOOK.exists():
|
||||
pytest.skip("docker/stage2-hook.sh not present in this checkout")
|
||||
return STAGE2_HOOK.read_text()
|
||||
|
||||
|
||||
def _uid_gid_validator(text: str) -> str:
|
||||
marker = "# --- UID/GID remap ---"
|
||||
before_marker = text.split(marker, 1)[0]
|
||||
start = before_marker.index("validate_uid_gid()")
|
||||
return before_marker[start:]
|
||||
|
||||
|
||||
def _validate_uid_gid(text: str, value: str) -> bool:
|
||||
bash = shutil.which("bash")
|
||||
if bash is None:
|
||||
pytest.skip("bash not available")
|
||||
script = _uid_gid_validator(text) + '\nvalidate_uid_gid "$CANDIDATE"\n'
|
||||
proc = subprocess.run(
|
||||
[bash, "-c", script],
|
||||
env={"PATH": os.environ.get("PATH", ""), "CANDIDATE": value},
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
return proc.returncode == 0
|
||||
|
||||
|
||||
@pytest.mark.parametrize("value", ["1", "99", "100", "1000", "65534"])
|
||||
def test_uid_gid_validator_accepts_non_root_nas_ids(stage2_text: str, value: str) -> None:
|
||||
assert _validate_uid_gid(stage2_text, value), (
|
||||
f"stage2 hook must accept NAS UID/GID {value}; Unraid uses 99:100 (#38070)"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("value", ["", "0", "abc", "99x", "65535"])
|
||||
def test_uid_gid_validator_rejects_root_invalid_and_out_of_range(
|
||||
stage2_text: str,
|
||||
value: str,
|
||||
) -> None:
|
||||
assert not _validate_uid_gid(stage2_text, value)
|
||||
|
||||
|
||||
def _targeted_chown_subdirs(text: str) -> list[str]:
|
||||
m = re.search(
|
||||
r"for sub in (?P<items>.*?); do\n\s*if \[ -e \"\$HERMES_HOME/\$sub\" \]",
|
||||
text,
|
||||
re.DOTALL,
|
||||
)
|
||||
assert m, "stage2-hook.sh must contain the targeted subdir chown loop"
|
||||
return m.group("items").split()
|
||||
|
||||
|
||||
def test_targeted_chown_covers_legacy_and_new_pairing_dirs(stage2_text: str) -> None:
|
||||
subdirs = _targeted_chown_subdirs(stage2_text)
|
||||
assert "pairing" in subdirs
|
||||
assert "platforms/pairing" in subdirs
|
||||
|
||||
|
||||
def test_seeded_directory_list_covers_legacy_and_new_pairing_dirs(stage2_text: str) -> None:
|
||||
seed_block = stage2_text.split("as_hermes mkdir -p \\", 1)[1].split(
|
||||
"# --- Install-method stamp",
|
||||
1,
|
||||
)[0]
|
||||
assert '"$HERMES_HOME/pairing"' in seed_block
|
||||
assert '"$HERMES_HOME/platforms/pairing"' in seed_block
|
||||
@@ -1,119 +0,0 @@
|
||||
"""Contract test: the s6-overlay stage2 hook and main-wrapper reject an
|
||||
unsupported `docker run --user <arbitrary-uid>:<gid>` start with actionable
|
||||
guidance, while still allowing:
|
||||
|
||||
- root start (id -u == 0)
|
||||
- `--user <hermes-uid>` (the supported non-root start, #34648 / #34837)
|
||||
|
||||
Background: in the tini era `docker run --user $(id -u):$(id -g)` was used to
|
||||
make container-written files match the host user. Under s6-overlay this can't
|
||||
work — the bootstrap (UID remap, volume/build-tree chown, config seeding) needs
|
||||
root, and the baked image dirs are owned by the hermes build UID, so an
|
||||
arbitrary pinned UID can't write them (EACCES on a bind mount, hard crash on a
|
||||
named volume). The supported path is root start + HERMES_UID/HERMES_GID (or the
|
||||
PUID/PGID aliases), which remaps the hermes user and chowns the volume.
|
||||
|
||||
The guard fires only when the current UID is neither root NOR the hermes UID,
|
||||
so the #34648 `--user 10000:10000` case (pinning to the hermes UID itself) is
|
||||
unaffected.
|
||||
|
||||
Extraction + stubbed-shell-run mirrors
|
||||
tests/tools/test_stage2_hook_toplevel_chown.py.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[2]
|
||||
STAGE2_HOOK = REPO_ROOT / "docker" / "stage2-hook.sh"
|
||||
MAIN_WRAPPER = REPO_ROOT / "docker" / "main-wrapper.sh"
|
||||
|
||||
|
||||
def _read(p: Path) -> str:
|
||||
if not p.exists():
|
||||
pytest.skip(f"{p} not present in this checkout")
|
||||
return p.read_text()
|
||||
|
||||
|
||||
def _guard_block(text: str) -> str:
|
||||
"""Extract the `cur_uid=...; if [ ... ]; then ... exit 1; fi` guard."""
|
||||
m = re.search(
|
||||
r"(cur_uid=\"\$\(id -u\)\"\nif \[ \"\$cur_uid\" != 0 \](?:.*\n)*?fi)",
|
||||
text,
|
||||
)
|
||||
assert m, "expected the --user guard block (cur_uid + non-root/non-hermes check)"
|
||||
return m.group(1)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("path", [STAGE2_HOOK, MAIN_WRAPPER])
|
||||
def test_guard_present_and_mentions_remediation(path: Path) -> None:
|
||||
text = _read(path)
|
||||
block = _guard_block(text)
|
||||
# Must check non-root AND non-hermes-uid (so --user 10000:10000 is allowed).
|
||||
assert '"$cur_uid" != 0' in block
|
||||
assert '"$cur_uid" != "$(id -u hermes)"' in block
|
||||
assert "exit 1" in block
|
||||
# Must point users at the supported env vars.
|
||||
assert "HERMES_UID" in block and "HERMES_GID" in block
|
||||
assert "PUID" in block and "PGID" in block
|
||||
|
||||
|
||||
def _run_guard(text: str, *, cur_uid: int, hermes_uid: int = 10000) -> subprocess.CompletedProcess:
|
||||
"""Run the extracted guard with `id` stubbed. Returns the completed process
|
||||
(rc 1 + stderr message when rejected, rc 0 when allowed through)."""
|
||||
bash = shutil.which("bash")
|
||||
if bash is None:
|
||||
pytest.skip("bash not available")
|
||||
block = _guard_block(text)
|
||||
with tempfile.TemporaryDirectory() as d:
|
||||
script = (
|
||||
"set -e\n"
|
||||
# Stub `id`: `id -u` -> cur_uid; `id -u hermes` -> hermes_uid.
|
||||
f'id() {{ if [ "$2" = hermes ]; then echo {hermes_uid}; else echo {cur_uid}; fi; }}\n'
|
||||
+ block
|
||||
+ "\necho GUARD_PASSED\n" # only reached when the guard allows through
|
||||
)
|
||||
sp = Path(d) / "h.sh"
|
||||
sp.write_text(script)
|
||||
return subprocess.run([bash, str(sp)], capture_output=True, text=True)
|
||||
|
||||
|
||||
def test_arbitrary_user_uid_is_rejected() -> None:
|
||||
"""An arbitrary host UID (1000), neither root nor hermes, is rejected."""
|
||||
for text in (_read(STAGE2_HOOK), _read(MAIN_WRAPPER)):
|
||||
proc = _run_guard(text, cur_uid=1000, hermes_uid=10000)
|
||||
assert proc.returncode == 1, f"expected rejection, got rc={proc.returncode}"
|
||||
assert "not supported" in proc.stderr
|
||||
assert "GUARD_PASSED" not in proc.stdout
|
||||
|
||||
|
||||
def test_root_start_passes() -> None:
|
||||
"""Root start (uid 0) is never blocked."""
|
||||
for text in (_read(STAGE2_HOOK), _read(MAIN_WRAPPER)):
|
||||
proc = _run_guard(text, cur_uid=0, hermes_uid=10000)
|
||||
assert proc.returncode == 0, proc.stderr
|
||||
assert "GUARD_PASSED" in proc.stdout
|
||||
|
||||
|
||||
def test_user_pinned_to_hermes_uid_passes() -> None:
|
||||
"""`--user 10000:10000` (the hermes UID itself) is the supported non-root
|
||||
start from #34648 / #34837 and must NOT be blocked."""
|
||||
for text in (_read(STAGE2_HOOK), _read(MAIN_WRAPPER)):
|
||||
proc = _run_guard(text, cur_uid=10000, hermes_uid=10000)
|
||||
assert proc.returncode == 0, proc.stderr
|
||||
assert "GUARD_PASSED" in proc.stdout
|
||||
|
||||
|
||||
def test_user_pinned_to_remapped_hermes_uid_passes() -> None:
|
||||
"""After a HERMES_UID remap the hermes UID is e.g. 4242; a container pinned
|
||||
to that same UID must still pass (cur_uid == hermes_uid)."""
|
||||
for text in (_read(STAGE2_HOOK), _read(MAIN_WRAPPER)):
|
||||
proc = _run_guard(text, cur_uid=4242, hermes_uid=4242)
|
||||
assert proc.returncode == 0, proc.stderr
|
||||
assert "GUARD_PASSED" in proc.stdout
|
||||
Reference in New Issue
Block a user