Compare commits

...

18 Commits

Author SHA1 Message Date
ethernet
ffe043998f change(ci): remove lint PR comment
it's already in the job summary.
having it as a comment just makes people ignore it. don't waste sapce.
2026-06-25 19:51:48 -04:00
ethernet
5cca2b1c2d feat(ci): add CI timing report 2026-06-25 19:51:48 -04:00
ethernet
725ca2ab20 fix(ci): rip out some xdist legacy stuff... how did these ever work?? 2026-06-25 19:47:43 -04:00
ethernet
a7e32ca9c2 change(ci): upload-artifact from v4 -> v7 2026-06-25 19:15:00 -04:00
ethernet
901f107976 try pytest alone.. 2026-06-25 19:12:49 -04:00
ethernet
c73adbd91b wip ignore 2026-06-25 19:12:49 -04:00
ethernet
01a7dfc339 change(ci): update all UV installs 2026-06-25 18:46:28 -04:00
ethernet
db03c207aa change(ci): migrate docker smoketests to real tests 2026-06-25 18:46:28 -04:00
ethernet
e74f230462 change(ci): pretty names 2026-06-25 18:46:28 -04:00
ethernet
eb114af7f1 change(tests): don't pass pytest args when counting tests 2026-06-25 18:46:28 -04:00
ethernet
2118bc5ab3 change(nix): simpler dev setup 2026-06-25 18:46:28 -04:00
ethernet
9f51ec0280 change(nix): ship fat hermes agent by default 2026-06-25 18:46:28 -04:00
ethernet
03046b9b9a change(ci): docker-publish.yml -> docker.yml 2026-06-25 18:46:28 -04:00
ethernet
5e50b121ab change(ci): docker runs again on PRs 2026-06-25 18:46:28 -04:00
ethernet
5a20177fc3 refactor(ci): more test slices 2026-06-25 18:46:28 -04:00
ethernet
3668c2c482 refactor(ci): run tests thru run_tests.sh 2026-06-25 18:23:09 -04:00
ethernet
2fcc3ad9cb refactor(ci): rewrite docker tests to check built container 2026-06-25 16:30:11 -04:00
ethernet
a6d54c9bbe refactor(ci): faster docker builds via --link and chmod removal 2026-06-25 12:25:07 -04:00
63 changed files with 2602 additions and 1703 deletions

2
.envrc
View File

@@ -1,5 +1,5 @@
watch_file pyproject.toml uv.lock
watch_file package-lock.json package.json web/package.json ui-tui/package.json website/package.json apps/shared/package.json apps/desktop/package.json ui-tui/packages/hermes-ink/package.json
watch_file flake.nix flake.lock nix/devShell.nix nix/tui.nix nix/package.nix nix/python.nix
watch_file flake.nix flake.lock nix/devShell.nix nix/tui.nix nix/package.nix nix/python.nix nix/hermes-agent.nix nix/desktop.nix
use flake

View File

@@ -1,50 +0,0 @@
name: Hermes smoke test
description: >
Run the image's built-in entrypoint against `--help` and `dashboard --help`
to catch basic runtime regressions before publishing. Requires the image
to already be loaded into the local Docker daemon under `image`.
Works identically on amd64 and arm64 runners.
inputs:
image:
description: Fully-qualified image tag (e.g. nousresearch/hermes-agent:test)
required: true
runs:
using: composite
steps:
- name: Ensure /tmp/hermes-test is hermes-writable
shell: bash
run: |
# The image runs as the hermes user (UID 10000). GitHub Actions
# creates /tmp/hermes-test root-owned by default, which hermes
# can't write to — chown it to match the in-container UID before
# bind-mounting. Real users doing `docker run -v ~/.hermes:...`
# with their own UID hit the same issue and have their own
# remediations (HERMES_UID env var, or chown locally).
mkdir -p /tmp/hermes-test
sudo chown -R 10000:10000 /tmp/hermes-test
- name: hermes --help
shell: bash
run: |
# Use the image's real ENTRYPOINT (/init + main-wrapper.sh) so
# this exercises the actual production startup path. PR #30136
# review caught that an --entrypoint override here had been
# silently neutered by the s6-overlay migration — stage2-hook
# ignores its CMD args, so the smoke test was a no-op.
docker run --rm \
-v /tmp/hermes-test:/opt/data \
"${{ inputs.image }}" --help
- name: hermes dashboard --help
shell: bash
run: |
# Regression guard for #9153: dashboard was present in source but
# missing from the published image. If this fails, something in
# the Dockerfile is excluding the dashboard subcommand from the
# installed package.
docker run --rm \
-v /tmp/hermes-test:/opt/data \
"${{ inputs.image }}" dashboard --help

View File

@@ -20,6 +20,7 @@ permissions:
pull-requests: write # needed by lint (PR comment) + supply-chain (PR comment)
actions: read # needed by osv-scanner (SARIF upload)
security-events: write # needed by osv-scanner (SARIF upload)
packages: write # needed by docker build
concurrency:
group: ci-${{ github.ref }}
@@ -32,6 +33,7 @@ jobs:
# (all lanes true) so post-merge validation is never weakened.
# ─────────────────────────────────────────────────────────────────────
detect:
name: Detect affected areas
runs-on: ubuntu-latest
outputs:
python: ${{ steps.classify.outputs.python }}
@@ -53,11 +55,15 @@ jobs:
# Skipped workflows (if condition is false) don't spin up runners.
# ─────────────────────────────────────────────────────────────────────
tests:
name: Python tests
needs: detect
if: needs.detect.outputs.python == 'true'
uses: ./.github/workflows/tests.yml
with:
slice_count: 8
lint:
name: Python lints
needs: detect
if: needs.detect.outputs.python == 'true'
uses: ./.github/workflows/lint.yml
@@ -65,35 +71,48 @@ jobs:
event_name: ${{ needs.detect.outputs.event_name }}
typecheck:
name: TypeScript
needs: detect
if: needs.detect.outputs.frontend == 'true'
uses: ./.github/workflows/typecheck.yml
docs-site:
name: Docs Site
needs: detect
if: needs.detect.outputs.site == 'true'
uses: ./.github/workflows/docs-site-checks.yml
history-check:
name: Deny unrelated histories
needs: detect
if: needs.detect.outputs.event_name == 'pull_request'
uses: ./.github/workflows/history-check.yml
contributor-check:
name: Check contributors
needs: detect
if: needs.detect.outputs.python == 'true'
uses: ./.github/workflows/contributor-check.yml
uv-lockfile:
name: Check uv.lock
needs: detect
uses: ./.github/workflows/uv-lockfile-check.yml
docker-lint:
name: Lint Docker scripts
needs: detect
if: needs.detect.outputs.docker_meta == 'true'
uses: ./.github/workflows/docker-lint.yml
docker:
name: Build&Test Docker image
needs: detect
if: needs.detect.outputs.python == 'true' || needs.detect.outputs.frontend == 'true' || needs.detect.outputs.docker_meta == 'true'
uses: ./.github/workflows/docker.yml
supply-chain:
name: Supply-chain scan
needs: detect
if: needs.detect.outputs.event_name == 'pull_request' && (needs.detect.outputs.scan == 'true' || needs.detect.outputs.deps == 'true' || needs.detect.outputs.mcp_catalog == 'true')
uses: ./.github/workflows/supply-chain-audit.yml
@@ -104,7 +123,7 @@ jobs:
mcp_catalog: ${{ needs.detect.outputs.mcp_catalog == 'true' }}
osv-scanner:
needs: detect
name: OSV scan
uses: ./.github/workflows/osv-scanner.yml
# ─────────────────────────────────────────────────────────────────────
@@ -127,6 +146,7 @@ jobs:
- docker-lint
- supply-chain
- osv-scanner
- docker
if: always()
runs-on: ubuntu-latest
steps:
@@ -143,3 +163,67 @@ jobs:
sys.exit(1)
print('All checks passed (or were skipped)')
"
# ─────────────────────────────────────────────────────────────────────
# CI timing report: collect per-job/step durations from the GitHub API,
# cache them on main (as a baseline), and on PRs generate an HTML diff
# report with a gantt chart + per-step breakdown. The report is uploaded
# as an artifact and a markdown summary is written to $GITHUB_STEP_SUMMARY.
# ─────────────────────────────────────────────────────────────────────
ci-timings:
name: CI timing report
needs: all-checks-pass
if: always()
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Restore baseline cache (PR only)
if: github.event_name == 'pull_request'
uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
with:
path: ci-timings-baseline.json
# Prefix-match: exact key will never hit (run_id differs), so
# restore-keys finds the most recent baseline from main.
key: ci-timings-baseline-never-exact
restore-keys: |
ci-timings-baseline-
- name: Collect timings and generate report
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
python3 scripts/ci/timings_report.py \
--baseline ci-timings-baseline.json \
--output ci-timings-report.html \
--json-out ci-timings.json \
--summary-out ci-timings-summary.md
- name: Upload HTML report
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
id: ci-timings-artifact
with:
name: ci-timings-report
path: ci-timings-report.html
retention-days: 14
archive: false
- name: Output summary
env:
REPORT_URL: ${{ steps.ci-timings-artifact.outputs.artifact-url}}
run: |
echo "# CI Timing report" >> "$GITHUB_STEP_SUMMARY"
echo "[View the full interactive report]($REPORT_URL)" >> "$GITHUB_STEP_SUMMARY"
cat ci-timings-summary.md >> "$GITHUB_STEP_SUMMARY"
- name: Save baseline cache (main only)
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
run: cp ci-timings.json ci-timings-baseline.json
- name: Upload baseline to cache (main only)
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
uses: actions/cache/save@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
with:
path: ci-timings-baseline.json
key: ci-timings-baseline-${{ github.run_id }}

View File

@@ -2,7 +2,7 @@ name: Docker / shell lint
# Lints the container build inputs: Dockerfile (via hadolint) and any shell
# scripts under docker/ (via shellcheck). These catch the class of regression
# the behavioral docker-publish smoke test can't — unquoted variable
# the behavioral docker smoke test can't — unquoted variable
# expansions, silently-failing RUN commands, etc.
#
# Rules and ignores are documented in .hadolint.yaml at the repo root.

View File

@@ -1,24 +1,9 @@
name: Docker Build and Publish
name: Docker Build, Test, and Publish
on:
push:
branches: [main]
paths:
- '**/*.py'
- 'pyproject.toml'
- 'uv.lock'
- 'Dockerfile'
- 'docker/**'
- '.github/workflows/docker-publish.yml'
- '.github/actions/hermes-smoke-test/**'
# No paths filter — the job must always run so the required check
# reports a status (path-gated workflows leave checks "pending" forever
# when no matching files change, which blocks merge).
pull_request:
release:
types: [published]
workflow_call:
permissions:
contents: read
@@ -39,11 +24,7 @@ env:
IMAGE_NAME: nousresearch/hermes-agent
jobs:
# ---------------------------------------------------------------------------
# Build amd64 natively. This job also runs the smoke tests (basic --help
# and the dashboard subcommand regression guard from #9153), because amd64
# is the only arch we can `load` into the local daemon on an amd64 runner.
# ---------------------------------------------------------------------------
# Build, test, and optionally push the amd64 image.
build-amd64:
# Only run on the upstream repository, not on forks
if: github.repository == 'NousResearch/hermes-agent'
@@ -53,24 +34,19 @@ jobs:
digest: ${{ steps.push.outputs.digest }}
steps:
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
# The image build + smoke test + integration tests run ONLY on
# push-to-main and release — never on PRs. They are the heaviest jobs
# in CI (~15-45 min) and a broken build surfaces on the main push (and
# is gated pre-merge by docker-lint + uv-lockfile-check). Every step
# below is skipped on PRs, so the job still reports green and the
# required check never hangs.
# The image build + integration tests run on every event
# (PRs, push-to-main, release). Publish steps below are gated to
# push-to-main / release only.
- name: Set up Docker Buildx
if: github.event_name != 'pull_request'
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
# Build once, load into the local daemon for smoke testing. Cached
# Build once, load into the local daemon for testing. Cached
# to gha with a per-arch scope; the push step below reuses every
# layer from this build.
- name: Build image (amd64, smoke test)
if: github.event_name != 'pull_request'
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
- name: Build image (amd64)
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
with:
context: .
file: Dockerfile
@@ -82,25 +58,12 @@ jobs:
cache-from: type=gha,scope=docker-amd64
cache-to: type=gha,mode=max,scope=docker-amd64
- name: Smoke test image
if: github.event_name != 'pull_request'
uses: ./.github/actions/hermes-smoke-test
with:
image: ${{ env.IMAGE_NAME }}:test
# ---------------------------------------------------------------------
# Run the docker-integration test suite against the freshly-built
# image already loaded into the local daemon (`:test`). These tests
# are excluded from the sharded `tests.yml :: test` matrix on purpose
# (see `_SKIP_PARTS` in scripts/run_tests_parallel.py) because each
# shard would otherwise reach the session-scoped ``built_image``
# fixture in ``tests/docker/conftest.py`` and start a 3-7min
# ``docker build`` — guaranteed to
# die in fixture setup.
# image already loaded into the local daemon (`:test`).
#
# Piggybacking here avoids a second image build: the smoke test
# already proved the image loads + runs, so the daemon has it under
# `${IMAGE_NAME}:test` and we just point ``HERMES_TEST_IMAGE`` at
# Piggybacking here avoids a second image build: the build step
# already loaded the image into the daemon under
# `${IMAGE_NAME}:test`, so we just point ``HERMES_TEST_IMAGE`` at
# that. The fixture's ``HERMES_TEST_IMAGE`` branch (see
# tests/docker/conftest.py:62-63) short-circuits the rebuild.
#
@@ -110,26 +73,20 @@ jobs:
# cheapest path to coverage on every PR that touches docker code.
# ---------------------------------------------------------------------
- name: Install uv (for docker tests)
if: github.event_name != 'pull_request'
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0
- name: Set up Python 3.11 (for docker tests)
if: github.event_name != 'pull_request'
run: uv python install 3.11
- name: Install Python dependencies (for docker tests)
if: github.event_name != 'pull_request'
run: |
uv venv .venv --python 3.11
source .venv/bin/activate
# ``dev`` extra pulls in pytest, pytest-asyncio —
# everything tests/docker/ needs. We deliberately avoid ``all``
# here because the docker tests only drive the container via
# subprocess and don't import hermes_agent's optional deps.
uv pip install -e ".[dev]"
uv sync --locked --python 3.11 --extra dev
- name: Run docker integration tests
if: github.event_name != 'pull_request'
env:
# Skip rebuild; use the image already loaded by the build step.
HERMES_TEST_IMAGE: ${{ env.IMAGE_NAME }}:test
@@ -140,11 +97,11 @@ jobs:
NOUS_API_KEY: ""
run: |
source .venv/bin/activate
python -m pytest tests/docker/ -v --tb=short
python -m pytest -m tests/docker/
- name: Log in to Docker Hub
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
@@ -155,7 +112,7 @@ jobs:
- name: Push amd64 by digest
id: push
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
with:
context: .
file: Dockerfile
@@ -179,7 +136,7 @@ jobs:
- name: Upload digest artifact
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7
with:
name: digest-amd64
path: /tmp/digests/*
@@ -187,10 +144,7 @@ jobs:
retention-days: 1
# ---------------------------------------------------------------------------
# Build arm64 natively on GitHub's free arm64 runner. This replaces the
# previous QEMU-emulated arm64 build, which was ~5-10x slower and shared
# a cache scope with amd64. Matches the amd64 job's shape: build+load,
# smoke test, then on push/release push by digest.
# Build, test, and optionally push the arm64 image.
# ---------------------------------------------------------------------------
build-arm64:
if: github.repository == 'NousResearch/hermes-agent'
@@ -200,29 +154,26 @@ jobs:
digest: ${{ steps.push.outputs.digest }}
steps:
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
# arm64 build runs only on push-to-main and release (see build-amd64).
- name: Set up Docker Buildx
if: github.event_name != 'pull_request'
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
# Log in to ghcr.io so the registry-backed build cache below can be
# read (cache-from) on every event and written (cache-to) on
# push/release. Uses the workflow's GITHUB_TOKEN, which is valid for
# the whole job — unlike the gha cache backend's short-lived Azure SAS
# token, which expired mid-build on slow cold-cache arm64 runs and
# crashed the build before the smoke test (the reason the gha cache
# crashed the build before the tests ran (the reason the gha cache
# was removed from arm64 PRs in the first place).
- name: Log in to ghcr.io (build cache)
if: github.event_name != 'pull_request'
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
# Build once, load into the local daemon for smoke testing, then push
# Build once, load into the local daemon for testing, then push
# by digest below. Reads AND writes the registry-backed cache so the
# push reuses layers from this build and the next build starts warm.
#
@@ -230,9 +181,8 @@ jobs:
# cache that previously broke here: its credential is the job-lifetime
# GITHUB_TOKEN, not a short-lived SAS token, so the cold-build-outlives-
# token failure mode cannot recur.
- name: Build image (arm64, smoke test, cached publish)
if: github.event_name != 'pull_request'
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
- name: Build image (arm64, cached publish)
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
with:
context: .
file: Dockerfile
@@ -244,15 +194,30 @@ jobs:
cache-from: type=registry,ref=ghcr.io/nousresearch/hermes-agent:buildcache-arm64
cache-to: type=registry,ref=ghcr.io/nousresearch/hermes-agent:buildcache-arm64,mode=max
- name: Smoke test image
if: github.event_name != 'pull_request'
uses: ./.github/actions/hermes-smoke-test
with:
image: ${{ env.IMAGE_NAME }}:test
- name: Install uv for docker tests
uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0
- name: Set up Python 3.11 for docker tests
run: uv python install 3.11
- name: Install Python dependencies for docker tests
run: |
uv sync --locked --python 3.11 --extra dev
- name: Run docker tests
env:
# Skip rebuild; use the image already loaded by the build step.
HERMES_TEST_IMAGE: ${{ env.IMAGE_NAME }}:test
OPENROUTER_API_KEY: ""
OPENAI_API_KEY: ""
NOUS_API_KEY: ""
run: |
source .venv/bin/activate
python -m pytest -m tests/docker/
- name: Log in to Docker Hub
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
@@ -260,7 +225,7 @@ jobs:
- name: Push arm64 by digest
id: push
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
with:
context: .
file: Dockerfile
@@ -282,7 +247,7 @@ jobs:
- name: Upload digest artifact
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7
with:
name: digest-arm64
path: /tmp/digests/*
@@ -304,17 +269,17 @@ jobs:
timeout-minutes: 10
steps:
- name: Download digests
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
with:
path: /tmp/digests
pattern: digest-*
merge-multiple: true
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
- name: Log in to Docker Hub
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}

View File

@@ -37,7 +37,7 @@ jobs:
fetch-depth: 0 # need full history for merge-base + worktree
- name: Install uv
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0
- name: Install ruff + ty
uses: ./.github/actions/retry
@@ -109,46 +109,6 @@ jobs:
--output .lint-reports/summary.md
cat .lint-reports/summary.md >> "$GITHUB_STEP_SUMMARY"
- name: Upload reports as artifact
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
with:
name: lint-reports
path: .lint-reports/
retention-days: 14
- name: Post / update PR comment
if: inputs.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository
continue-on-error: true
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7
with:
script: |
const fs = require('fs');
const body = fs.readFileSync('.lint-reports/summary.md', 'utf8');
const marker = '<!-- lint-diff-summary -->';
const fullBody = marker + '\n' + body;
const { data: comments } = await github.rest.issues.listComments({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.issue.number,
});
const existing = comments.find(c => c.body && c.body.includes(marker));
if (existing) {
await github.rest.issues.updateComment({
owner: context.repo.owner,
repo: context.repo.repo,
comment_id: existing.id,
body: fullBody,
});
} else {
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.issue.number,
body: fullBody,
});
}
ruff-blocking:
# Enforce the rules in pyproject.toml [tool.ruff.lint.select]. Currently
# PLW1514 (unspecified-encoding) — catches bare ``open()`` /
@@ -164,7 +124,7 @@ jobs:
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Install uv
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0
- name: Install ruff
uses: ./.github/actions/retry

View File

@@ -3,17 +3,17 @@ name: Build Skills Index
on:
schedule:
# Run twice daily: 6 AM and 6 PM UTC
- cron: '0 6,18 * * *'
workflow_dispatch: # Manual trigger
- cron: "0 6,18 * * *"
workflow_dispatch: # Manual trigger
push:
branches: [main]
paths:
- 'scripts/build_skills_index.py'
- '.github/workflows/skills-index.yml'
- "scripts/build_skills_index.py"
- ".github/workflows/skills-index.yml"
permissions:
contents: read
actions: write # to trigger deploy-site.yml on schedule
actions: write # to trigger deploy-site.yml on schedule
jobs:
build-index:
@@ -21,11 +21,11 @@ jobs:
if: github.repository == 'NousResearch/hermes-agent'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
python-version: '3.11'
python-version: "3.11"
- name: Install dependencies
run: pip install httpx==0.28.1 pyyaml==6.0.2
@@ -36,7 +36,7 @@ jobs:
run: python scripts/build_skills_index.py
- name: Upload index artifact
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7
with:
name: skills-index
path: website/static/api/skills-index.json

View File

@@ -2,6 +2,11 @@ name: Tests
on:
workflow_call:
inputs:
slice_count:
description: Number of parallel test slices
type: number
default: 8
permissions:
contents: read
@@ -12,13 +17,29 @@ concurrency:
cancel-in-progress: true
jobs:
generate:
name: "Generate slices"
runs-on: ubuntu-latest
outputs:
slices: ${{ steps.matrix.outputs.slices }}
slice_count: ${{ steps.matrix.outputs.slice_count }}
steps:
- name: Generate test slices
id: matrix
run: |
COUNT="${{ inputs.slice_count }}"
SLICES=$(python3 -c "import json; print(json.dumps({'slice': list(range(1, $COUNT + 1))}))")
echo "slices=$SLICES" >> "$GITHUB_OUTPUT"
echo "slice_count=$COUNT" >> "$GITHUB_OUTPUT"
test:
name: Run tests slice
needs: generate
runs-on: ubuntu-latest
timeout-minutes: 30
strategy:
fail-fast: false
matrix:
slice: [1, 2, 3, 4, 5, 6]
matrix: ${{ fromJSON(needs.generate.outputs.slices) }}
steps:
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
@@ -49,7 +70,7 @@ jobs:
rg --version
- name: Install uv
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0
with:
# Persist uv's download/wheel cache (~/.cache/uv) across runs.
# Keyed on the dependency manifests, so the cache is reused until
@@ -78,8 +99,8 @@ jobs:
# re-download, keeping the persisted cache small and fast to restore.
run: uv cache prune --ci
- name: Run tests (slice ${{ matrix.slice }}/6)
# Per-file isolation via scripts/run_tests_parallel.py: discovers
- name: Run tests (slice ${{ matrix.slice }}/${{ needs.generate.outputs.slice_count }})
# Per-file isolation via scripts/run_tests.sh: discovers
# every test_*.py file under tests/ (excluding integration/ + e2e/),
# then runs `python -m pytest <file>` in a freshly-spawned subprocess
# with bounded parallelism. No xdist, no shared workers, no
@@ -97,14 +118,14 @@ jobs:
# fix. ThreadPoolExecutor + subprocess.run is ~60 lines and does
# the job with cleaner semantics.
#
# Matrix slicing (--slice I/N): files are distributed across 6
# Matrix slicing (--slice I/N): files are distributed across N
# jobs by cached duration (LPT algorithm) so each job gets
# roughly equal wall time. Without a cache, files default to 2s
# estimate and get split roughly evenly by count — still correct,
# just not perfectly balanced.
run: |
source .venv/bin/activate
python scripts/run_tests_parallel.py --slice ${{ matrix.slice }}/6
scripts/run_tests.sh --slice ${{ matrix.slice }}/${{ needs.generate.outputs.slice_count }}
env:
# Ensure tests don't accidentally call real APIs
OPENROUTER_API_KEY: ""
@@ -173,7 +194,7 @@ jobs:
rg --version
- name: Install uv
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0
with:
# Persist uv's download/wheel cache (~/.cache/uv) across runs.
# Keyed on the dependency manifests, so the cache is reused until

View File

@@ -6,6 +6,7 @@ on:
jobs:
typecheck:
name: Check TypeScript
runs-on: ubuntu-latest
strategy:
matrix:
@@ -22,8 +23,7 @@ jobs:
# native builds. Skipping install scripts drops node-pty's node-gyp
# header fetch — the transient flake that killed this job pre-`tsc` — and
# is faster. retry covers the remaining registry blips.
-
uses: ./.github/actions/retry
- uses: ./.github/actions/retry
with:
command: npm ci --ignore-scripts
- run: npm run --prefix ${{ matrix.package }} typecheck
@@ -35,6 +35,7 @@ jobs:
# users build apps/desktop from source on install/update. Run the real
# `vite build` here so that class of break fails in CI instead.
desktop-build:
name: Build desktop app
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
@@ -44,8 +45,7 @@ jobs:
cache: npm
# Keep install scripts here: the production build may need node-pty's
# native binary. retry handles the transient install-time fetch flakes.
-
uses: ./.github/actions/retry
- uses: ./.github/actions/retry
with:
command: npm ci
- run: npm run --prefix apps/desktop build

View File

@@ -5,11 +5,11 @@ name: Publish to PyPI
on:
push:
tags:
- 'v20*' # CalVer tags: v2026.5.15, v2026.5.15.2, etc.
- "v20*" # CalVer tags: v2026.5.15, v2026.5.15.2, etc.
workflow_dispatch:
inputs:
confirm_tag:
description: 'Tag to publish (e.g. v2026.5.15). Must already exist.'
description: "Tag to publish (e.g. v2026.5.15). Must already exist."
required: true
type: string
@@ -27,7 +27,7 @@ jobs:
name: Build distribution 📦
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
# On workflow_dispatch, check out the confirmed tag.
@@ -43,17 +43,17 @@ jobs:
fi
- name: Set up Python
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
python-version: '3.13'
python-version: "3.13"
- name: Install uv
uses: astral-sh/setup-uv@d0cc045d04ccac9d8b7881df0226f9e82c39688e # v6
uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0
- name: Set up Node.js
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
with:
node-version: '22'
node-version: "22"
- name: Build web dashboard
run: cd web && npm ci && npm run build
@@ -81,7 +81,7 @@ jobs:
run: uv build --sdist --wheel
- name: Upload distribution artifacts
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7
with:
name: python-package-distributions
path: dist/
@@ -94,17 +94,17 @@ jobs:
name: pypi
url: https://pypi.org/p/hermes-agent
permissions:
id-token: write # OIDC trusted publishing
id-token: write # OIDC trusted publishing
steps:
- name: Download distribution artifacts
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
with:
name: python-package-distributions
path: dist/
- name: Publish to PyPI
uses: pypa/gh-action-pypi-publish@cef221092ed1bacb1cc03d23a2d87d1d172e277b # v1.14.0
uses: pypa/gh-action-pypi-publish@cef221092ed1bacb1cc03d23a2d87d1d172e277b # v1.14.0
with:
skip-existing: true
@@ -116,12 +116,12 @@ jobs:
needs: publish
runs-on: ubuntu-latest
permissions:
contents: write # attach assets to the existing release
id-token: write # sigstore signing
contents: write # attach assets to the existing release
id-token: write # sigstore signing
steps:
- name: Download distribution artifacts
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
with:
name: python-package-distributions
path: dist/
@@ -145,7 +145,7 @@ jobs:
- name: Sign with Sigstore
if: env.skip_sign != 'true'
uses: sigstore/gh-action-sigstore-python@04cffa1d795717b140764e8b640de88853c92acc # v3.3.0
uses: sigstore/gh-action-sigstore-python@04cffa1d795717b140764e8b640de88853c92acc # v3.3.0
with:
inputs: >-
./dist/*.tar.gz

View File

@@ -4,7 +4,7 @@ name: uv.lock check
# that modify pyproject.toml without regenerating uv.lock (or vice versa)
# must not merge, because the Docker build's `uv sync --frozen` step will
# fail on a stale lockfile and we'd rather catch it here than in the
# docker-publish workflow on main.
# docker workflow on main.
#
# ─────────────────────────────────────────────────────────────────────────
# IMPORTANT: this check runs against the MERGED state, not just your branch
@@ -63,7 +63,7 @@ jobs:
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Install uv
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0
# `uv lock --check` re-resolves the project from pyproject.toml and
# compares the result to uv.lock, exiting non-zero if they disagree.
@@ -100,7 +100,7 @@ jobs:
This check is blocking because the Docker image build uses
`uv sync --frozen --extra all`, which rejects stale lockfiles
— catching it here avoids a ~15 min failed docker-publish run
— catching it here avoids a ~15 min failed docker run
on `main` post-merge.
EOF
echo "::error title=uv.lock out of sync::Run \`uv lock\` locally and commit the result. If on a PR, sync with main first."

View File

@@ -189,7 +189,13 @@ RUN cd web && npm run build && \
# ---------- Source code ----------
# .dockerignore excludes node_modules, so the installs above survive.
COPY . .
# --link decouples this layer from parents for cache purposes; --chmod bakes
# the final read-only permissions at copy time so we skip the separate
# `chmod -R` pass that previously walked ~30k files across the venv +
# node_modules + source (21s amd64 / 222s arm64 — #49113). `a+rX,go-w`
# gives the non-root hermes user read + traverse but no write; root retains
# write so the build steps below don't need chmod u+w dances.
COPY --link --chmod=a+rX,go-w . .
# ---------- Permissions ----------
# Link hermes-agent itself (editable). Deps are already installed in the
@@ -197,19 +203,15 @@ COPY . .
# resolution or downloads.
RUN uv pip install --no-cache-dir --no-deps -e "."
# Keep /opt/hermes immutable for the runtime hermes user. Hosted/container
# instances must not be able to self-edit the installed source or venv; user
# data, skills, plugins, config, logs, and dashboard uploads live under
# /opt/data instead. Root can still repair the image during build/boot, but
# supervised Hermes processes drop to the non-root hermes user.
# Wire the exec shim and install-method stamp. Files under /opt/hermes are
# already root-owned (COPY, uv sync, npm install all run as root) and
# read-only for the hermes user (go-w from the --chmod above).
USER root
RUN mkdir -p /opt/hermes/bin && \
cp /opt/hermes/docker/hermes-exec-shim.sh /opt/hermes/bin/hermes && \
chmod 0755 /opt/hermes/bin/hermes && \
printf 'docker\n' > /opt/hermes/.install_method && \
chown -R root:root /opt/hermes && \
chmod -R a+rX /opt/hermes && \
chmod -R a-w /opt/hermes
printf 'docker\n' > /opt/hermes/.install_method
# The ``.install_method`` stamp is baked next to the running code (the install
# tree), NOT into $HERMES_HOME. $HERMES_HOME (/opt/data) is a shared data
# volume that is commonly bind-mounted from the host and even shared with a
@@ -236,13 +238,11 @@ RUN mkdir -p /opt/hermes/bin && \
#
# The arg is optional — local `docker build` without --build-arg simply
# omits the file, and the runtime falls back to live-git lookup. CI
# (.github/workflows/docker-publish.yml) passes ${{ github.sha }} so
# (.github/workflows/docker.yml) passes ${{ github.sha }} so
# every published image has it.
ARG HERMES_GIT_SHA=
RUN if [ -n "${HERMES_GIT_SHA}" ]; then \
chmod u+w /opt/hermes && \
printf '%s\n' "${HERMES_GIT_SHA}" > /opt/hermes/.hermes_build_sha && \
chmod a-w /opt/hermes /opt/hermes/.hermes_build_sha; \
printf '%s\n' "${HERMES_GIT_SHA}" > /opt/hermes/.hermes_build_sha; \
fi
# ---------- s6-overlay service wiring ----------

View File

@@ -25,10 +25,12 @@
in
{
devShells.default = pkgs.mkShell {
inputsFrom = packages;
packages = with pkgs; [
uv
];
packages =
with pkgs;
[
uv
]
++ self'.packages.default.passthru.devDeps;
shellHook = ''
echo "Hermes Agent dev shell"
${combinedNonNpm}

View File

@@ -37,10 +37,14 @@
}:
let
nodejs = nodejs_22;
hermesVenv = callPackage ./python.nix {
inherit uv2nix pyproject-nix pyproject-build-systems;
dependency-groups = [ "all" ] ++ extraDependencyGroups;
};
mkHermesVenv =
extraDependencyGroups:
callPackage ./python.nix {
inherit uv2nix pyproject-nix pyproject-build-systems;
dependency-groups = [ "all" ] ++ extraDependencyGroups;
};
hermesVenv = mkHermesVenv extraDependencyGroups;
hermesNpmLib = callPackage ./lib.nix {
inherit npm-lockfile-fix nodejs;
@@ -106,12 +110,6 @@ let
pythonPath = lib.makeSearchPath sitePackagesPath allExtraPythonPackages;
pyprojectHash = builtins.hashString "sha256" (builtins.readFile ../pyproject.toml);
uvLockHash =
if builtins.pathExists ../uv.lock then
builtins.hashString "sha256" (builtins.readFile ../uv.lock)
else
"none";
checkPackageCollisions = ''
import pathlib, sys, re
@@ -223,21 +221,10 @@ stdenv.mkDerivation (finalAttrs: {
};
devShellHook = ''
STAMP=".nix-stamps/hermes-agent"
STAMP_VALUE="${pyprojectHash}:${uvLockHash}"
if [ ! -f "$STAMP" ] || [ "$(cat "$STAMP")" != "$STAMP_VALUE" ]; then
echo "hermes-agent: installing Python dependencies..."
uv venv .venv --python ${python312}/bin/python3 2>/dev/null || true
source .venv/bin/activate
uv pip install -e ".[all]"
[ -d mini-swe-agent ] && uv pip install -e ./mini-swe-agent 2>/dev/null || true
mkdir -p .nix-stamps
echo "$STAMP_VALUE" > "$STAMP"
else
source .venv/bin/activate
export HERMES_PYTHON=${hermesVenv}/bin/python3
fi
export HERMES_PYTHON=${hermesVenv}/bin/python3
'';
devDeps = runtimeDeps ++ [ (mkHermesVenv (extraDependencyGroups ++ [ "dev" ])) ];
};
meta = with lib; {

View File

@@ -2,54 +2,62 @@
{ inputs, ... }:
{
perSystem =
{ pkgs, lib, inputs', ... }:
{
pkgs,
lib,
inputs',
...
}:
let
hermesAgent = pkgs.callPackage ./hermes-agent.nix {
minimal = pkgs.callPackage ./hermes-agent.nix {
inherit (inputs) uv2nix pyproject-nix pyproject-build-systems;
npm-lockfile-fix = inputs'.npm-lockfile-fix.packages.default;
# Only embed clean revs — dirtyRev doesn't represent any upstream
# commit, so comparing it would always claim "update available".
rev = inputs.self.rev or null;
};
# All platform-portable optional integrations pre-built.
full = minimal.override {
extraDependencyGroups = [
"anthropic"
"azure-identity"
"bedrock"
"daytona"
"dingtalk"
"edge-tts"
"exa"
"fal"
"feishu"
"firecrawl"
"hindsight"
"honcho"
"messaging"
"modal"
"parallel-web"
"tts-premium"
"voice"
]
# matrix is Linux-only (oqs/liboqs lacks aarch64-darwin wheels).
++ lib.optionals pkgs.stdenv.isLinux [ "matrix" ];
};
in
{
packages = {
default = hermesAgent;
default = full;
inherit minimal;
# Ships discord.py + python-telegram-bot + slack-sdk so a plain
# `nix profile install .#messaging` connects to Discord/Telegram/Slack
# on first run — lazy-install can't write to the read-only /nix/store.
messaging = hermesAgent.override {
messaging = minimal.override {
extraDependencyGroups = [ "messaging" ];
};
# All platform-portable optional integrations pre-built.
# matrix is Linux-only (oqs/liboqs lacks aarch64-darwin wheels).
full = hermesAgent.override {
extraDependencyGroups = [
"anthropic"
"azure-identity"
"bedrock"
"daytona"
"dingtalk"
"edge-tts"
"exa"
"fal"
"feishu"
"firecrawl"
"hindsight"
"honcho"
"messaging"
"modal"
"parallel-web"
"tts-premium"
"voice"
] ++ lib.optionals pkgs.stdenv.isLinux [ "matrix" ];
};
tui = hermesAgent.hermesTui;
web = hermesAgent.hermesWeb;
desktop = hermesAgent.hermesDesktop;
tui = full.hermesTui;
web = full.hermesWeb;
desktop = full.hermesDesktop;
};
};
}

View File

@@ -0,0 +1,782 @@
#!/usr/bin/env python3
"""Collect CI job/step timings from the GitHub API and generate an HTML diff report.
In CI, the script reads GITHUB_TOKEN, GITHUB_REPOSITORY, GITHUB_RUN_ID, and
GITHUB_SHA from the environment to collect timings via the REST API.
If a baseline JSON file (ci-timings-baseline.json by default) exists, the
report includes a diff with per-job and per-step deltas, plus a gantt chart
overlaying current vs baseline bars.
Usage:
# Collect from API (CI mode):
python scripts/ci/timings_report.py
# Regenerate HTML from saved JSON (testing):
python scripts/ci/timings_report.py --from-json ci-timings.json
"""
from __future__ import annotations
import argparse
import json
import os
import sys
import urllib.error
import urllib.parse
import urllib.request
from datetime import datetime
from html import escape
API_BASE = "https://api.github.com"
# ---------------------------------------------------------------------------
# GitHub API helpers
# ---------------------------------------------------------------------------
def api_get(path: str, token: str, params: dict | None = None,
list_key: str | None = None) -> list | dict:
"""Authenticated GitHub API GET with automatic pagination.
For list endpoints, pass list_key to extract items from the paginated
wrapper response (e.g. list_key='jobs' for {'total_count': N, 'jobs': [...]}).
When list_key is omitted, a non-list response is returned as-is (single object).
"""
url = f"{API_BASE}{path}"
if params:
url += "?" + urllib.parse.urlencode(params)
results: list = []
while url:
req = urllib.request.Request(url, headers={
"Authorization": f"Bearer {token}",
"Accept": "application/vnd.github+json",
"X-GitHub-Api-Version": "2022-11-28",
"User-Agent": "ci-timings-report",
})
with urllib.request.urlopen(req) as resp:
data = json.loads(resp.read())
link_header = resp.headers.get("Link", "")
if list_key:
results.extend(data.get(list_key, []))
elif isinstance(data, list):
results.extend(data)
else:
return data
next_url = None
for part in link_header.split(","):
part = part.strip()
if 'rel="next"' in part:
next_url = part[part.find("<") + 1:part.find(">")]
break
url = next_url
return results
def parse_ts(ts: str | None) -> datetime | None:
if not ts:
return None
return datetime.fromisoformat(ts.replace("Z", "+00:00"))
def dur_s(started: str | None, completed: str | None) -> float | None:
s = parse_ts(started)
e = parse_ts(completed)
if not s or not e:
return None
return (e - s).total_seconds()
# ---------------------------------------------------------------------------
# Timings collection
# ---------------------------------------------------------------------------
def _normalize_job(raw: dict) -> dict:
steps = []
for step in (raw.get("steps") or []):
steps.append({
"name": step.get("name", ""),
"number": step.get("number", 0),
"status": step.get("status", ""),
"conclusion": step.get("conclusion", ""),
"started_at": step.get("started_at"),
"completed_at": step.get("completed_at"),
"duration_s": dur_s(step.get("started_at"), step.get("completed_at")),
})
return {
"name": raw.get("name", "unknown"),
"workflow_name": raw.get("_workflow_name", ""),
"job_id": raw.get("id"),
"status": raw.get("status", ""),
"conclusion": raw.get("conclusion", ""),
"started_at": raw.get("started_at"),
"completed_at": raw.get("completed_at"),
"duration_s": dur_s(raw.get("started_at"), raw.get("completed_at")),
"html_url": raw.get("html_url", ""),
"steps": steps,
}
def collect_timings(token: str, repo: str, run_id: str, head_sha: str) -> dict:
"""Collect job/step timings from the GitHub API.
1. Get orchestrator run's direct jobs (detect, all-checks-pass, etc.).
Skip workflow-call placeholder jobs (step name starts with "Run ./.github/").
2. Find sub-workflow runs via head_sha + event=workflow_call.
3. Get each sub-workflow run's jobs with full step timing.
"""
owner, repo_name = repo.split("/")
# Orchestrator run info
run_info = api_get(f"/repos/{owner}/{repo_name}/actions/runs/{run_id}", token)
created_at = run_info.get("created_at", "")
# Orchestrator direct jobs
orch_jobs = api_get(f"/repos/{owner}/{repo_name}/actions/runs/{run_id}/jobs",
token, list_key="jobs")
direct = []
for job in orch_jobs:
steps = job.get("steps") or []
if any(s.get("name", "").startswith("Run ./.github/") for s in steps):
continue # workflow-call placeholder
if job.get("status") in ("in_progress", "queued"):
continue # skip self / unfinished
direct.append(job)
# Sub-workflow runs
sub_runs = api_get(f"/repos/{owner}/{repo_name}/actions/runs", token, params={
"head_sha": head_sha,
"event": "workflow_call",
"per_page": 100,
}, list_key="workflow_runs")
sub_runs = [r for r in sub_runs if r.get("created_at", "") >= created_at]
sub_jobs_raw = []
for sr in sub_runs:
sr_id = sr["id"]
sr_name = sr.get("name", "")
sr_jobs = api_get(f"/repos/{owner}/{repo_name}/actions/runs/{sr_id}/jobs",
token, list_key="jobs")
for j in sr_jobs:
j["_workflow_name"] = sr_name
j["_workflow_run_id"] = sr_id
sub_jobs_raw.append(j)
# Normalize + sort
all_jobs = [_normalize_job(j) for j in direct + sub_jobs_raw]
all_jobs = [j for j in all_jobs if j["status"] not in ("in_progress", "queued")]
all_jobs.sort(key=lambda j: j.get("started_at") or "")
return {
"run_id": run_id,
"head_sha": head_sha,
"created_at": created_at,
"jobs": all_jobs,
}
# ---------------------------------------------------------------------------
# Formatting helpers
# ---------------------------------------------------------------------------
def fmt_dur(seconds: float | None) -> str:
if seconds is None:
return ""
if seconds < 60:
return f"{seconds:.1f}s"
m = int(seconds // 60)
s = seconds % 60
if s == 0:
return f"{m}m"
return f"{m}m{s:.0f}s"
def fmt_delta(current: float | None, baseline: float | None) -> tuple[str, str]:
"""Return (text, css_class) for a delta."""
if current is None or baseline is None:
return ("", "neutral")
delta = current - baseline
if baseline == 0:
pct_str = "new" if delta > 0 else "0%"
else:
pct = (delta / baseline) * 100
pct_str = f"{pct:+.1f}%"
if abs(delta) < 1.0:
cls = "neutral"
elif delta > 0:
cls = "slower"
else:
cls = "faster"
sign = "+" if delta >= 0 else ""
return (f"{sign}{delta:.1f}s ({pct_str})", cls)
def nice_ticks(max_seconds: float, num_ticks: int = 8) -> list[int]:
if max_seconds <= 0:
return [0]
raw = max_seconds / num_ticks
for nice in [5, 10, 15, 30, 60, 120, 180, 300, 600, 900, 1800, 3600, 7200]:
if nice >= raw:
step = nice
break
else:
step = max(int(raw), 3600)
return list(range(0, int(max_seconds) + step + 1, step))
def fmt_tick(seconds: int) -> str:
if seconds < 60:
return f"{seconds}s"
m, s = divmod(seconds, 60)
if s == 0:
return f"{m}m"
return f"{m}m{s}s"
# ---------------------------------------------------------------------------
# Stats computation
# ---------------------------------------------------------------------------
def compute_stats(timings: dict, baseline: dict | None = None) -> dict:
jobs = timings.get("jobs", [])
bl_jobs = {j["name"]: j for j in (baseline or {}).get("jobs", [])}
# Wall time
starts = [s for s in (parse_ts(j.get("started_at")) for j in jobs) if s is not None]
ends = [e for e in (parse_ts(j.get("completed_at")) for j in jobs) if e is not None]
wall = (max(ends) - min(starts)).total_seconds() if starts and ends else 0
compute = sum(j.get("duration_s") or 0 for j in jobs)
# Baseline wall/compute
bl_wall = None
bl_compute = None
if baseline:
bl_starts = [s for s in (parse_ts(j.get("started_at")) for j in baseline.get("jobs", [])) if s is not None]
bl_ends = [e for e in (parse_ts(j.get("completed_at")) for j in baseline.get("jobs", [])) if e is not None]
if bl_starts and bl_ends:
bl_wall = (max(bl_ends) - min(bl_starts)).total_seconds()
bl_compute = sum(j.get("duration_s") or 0 for j in baseline.get("jobs", []))
# Per-job deltas
faster = 0
slower = 0
unchanged = 0
no_baseline = 0
for j in jobs:
bl = bl_jobs.get(j["name"])
if not bl:
no_baseline += 1
continue
cur_d = j.get("duration_s") or 0
bl_d = bl.get("duration_s") or 0
if abs(cur_d - bl_d) < 1.0:
unchanged += 1
elif cur_d > bl_d:
slower += 1
else:
faster += 1
return {
"wall": wall,
"compute": compute,
"bl_wall": bl_wall,
"bl_compute": bl_compute,
"faster": faster,
"slower": slower,
"unchanged": unchanged,
"no_baseline": no_baseline,
"total_jobs": len(jobs),
}
# ---------------------------------------------------------------------------
# HTML generation
# ---------------------------------------------------------------------------
CSS = """
* { box-sizing: border-box; margin: 0; padding: 0; }
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif;
background: #0d1117; color: #e6edf3; line-height: 1.5; padding: 24px;
}
h1 { font-size: 24px; border-bottom: 1px solid #30363d; padding-bottom: 12px; margin-bottom: 8px; }
.meta { color: #8b949e; font-size: 13px; margin-bottom: 24px; }
h2 { font-size: 18px; margin: 32px 0 12px; }
/* Stats cards */
.stats { display: flex; gap: 12px; flex-wrap: wrap; margin-bottom: 24px; }
.stat-card {
background: #161b22; border: 1px solid #30363d; border-radius: 8px;
padding: 14px 18px; min-width: 140px;
}
.stat-label { font-size: 12px; color: #8b949e; text-transform: uppercase; letter-spacing: 0.5px; }
.stat-value { font-size: 22px; font-weight: 600; margin: 4px 0; }
.stat-delta { font-size: 13px; }
.faster { color: #3fb950; }
.slower { color: #f85149; }
.neutral { color: #8b949e; }
/* Gantt */
.gantt-wrap { overflow-x: auto; }
.gantt { min-width: 700px; }
.gantt-row { display: flex; align-items: center; height: 28px; }
.gantt-label {
width: 220px; padding-right: 12px; text-align: right;
font-size: 12px; white-space: nowrap; overflow: hidden; text-overflow: ellipsis;
}
.gantt-track { flex: 1; position: relative; height: 100%; border-left: 1px solid #21262d; }
.gantt-bar {
position: absolute; height: 18px; border-radius: 3px;
display: flex; align-items: center; justify-content: center;
font-size: 10px; color: transparent; overflow: hidden;
transition: color 0.15s;
}
.gantt-bar:hover { color: #fff; z-index: 10; }
.gantt-bar.current { background: #1f6feb; top: 5px; z-index: 2; }
.gantt-bar.baseline {
background: transparent; border: 1px dashed #8b949e; top: 2px; height: 24px; z-index: 1;
}
.gantt-axis { display: flex; height: 20px; position: relative; border-top: 1px solid #30363d; margin-top: 4px; }
.gantt-tick { position: absolute; font-size: 10px; color: #8b949e; transform: translateX(-50%); top: 4px; }
.gantt-tick::before { content: ''; position: absolute; top: -4px; left: 50%; width: 1px; height: 4px; background: #30363d; }
.legend { display: flex; gap: 16px; margin-top: 8px; font-size: 12px; color: #8b949e; }
.legend-swatch { display: inline-block; width: 16px; height: 10px; border-radius: 2px; margin-right: 4px; vertical-align: middle; }
/* Tables */
table { border-collapse: collapse; width: 100%; font-size: 13px; margin-bottom: 16px; }
th, td { border: 1px solid #30363d; padding: 6px 10px; text-align: left; }
th { background: #161b22; font-weight: 600; position: sticky; top: 0; }
tr:hover td { background: #161b22; }
.num { text-align: right; font-variant-numeric: tabular-nums; }
.job-name { font-weight: 500; }
/* Step details */
details { margin-bottom: 8px; background: #161b22; border: 1px solid #30363d; border-radius: 6px; }
summary { padding: 8px 12px; cursor: pointer; font-weight: 500; font-size: 14px; user-select: none; }
summary:hover { background: #21262d; }
details[open] summary { border-bottom: 1px solid #30363d; }
details table { border: none; margin: 0; }
details td, details th { font-size: 12px; }
/* Worst regressions */
.regressions { margin-bottom: 24px; }
.regressions table { font-size: 13px; }
.tag {
display: inline-block; padding: 1px 6px; border-radius: 3px; font-size: 11px; font-weight: 500;
}
.tag.slow { background: rgba(248,81,73,0.15); color: #f85149; }
.tag.fast { background: rgba(63,185,80,0.15); color: #3fb950; }
"""
def _gantt_bars(timings: dict, baseline: dict | None) -> str:
"""Render the gantt chart HTML.
Both current and baseline timelines are normalized to start at t=0
(relative to each run's earliest job start). The axis scale spans
0..max_end across both runs so bars are directly comparable.
"""
jobs = [j for j in timings.get("jobs", []) if j.get("started_at") and j.get("completed_at")]
bl_map = {j["name"]: j for j in (baseline or {}).get("jobs", [])}
# Current run: relative offsets from earliest start
cur_starts = [s for s in (parse_ts(j.get("started_at")) for j in jobs) if s is not None]
cur_ends = [e for e in (parse_ts(j.get("completed_at")) for j in jobs) if e is not None]
if not cur_starts or not cur_ends:
return '<p style="color:#8b949e">No timing data available.</p>'
cur_t0 = min(cur_starts)
cur_max = (max(cur_ends) - cur_t0).total_seconds()
# Baseline run: relative offsets from its earliest start
bl_t0 = None
bl_max = 0.0
bl_jobs_timed = []
for bl_j in bl_map.values():
s = parse_ts(bl_j.get("started_at"))
e = parse_ts(bl_j.get("completed_at"))
if s is not None and e is not None:
bl_jobs_timed.append((bl_j, s, e))
if bl_t0 is None or s < bl_t0:
bl_t0 = s
rel_end = (e - s).total_seconds() + (s - (bl_t0 or s)).total_seconds()
if bl_t0 is not None:
bl_max = max((e - bl_t0).total_seconds() for _, _, e in bl_jobs_timed) if bl_jobs_timed else 0
total_s = max(cur_max, bl_max)
if total_s <= 0:
total_s = 1
rows = []
for j in jobs:
s = parse_ts(j.get("started_at"))
e = parse_ts(j.get("completed_at"))
if s is None or e is None:
continue
left = (s - cur_t0).total_seconds() / total_s * 100
width = max((e - s).total_seconds() / total_s * 100, 0.5) # min 0.5% for visibility
dur = j.get("duration_s") or 0
bl = bl_map.get(j["name"])
bl_bar = ""
if bl and bl_t0 is not None:
bl_s = parse_ts(bl.get("started_at"))
bl_e = parse_ts(bl.get("completed_at"))
if bl_s is not None and bl_e is not None:
bl_left = (bl_s - bl_t0).total_seconds() / total_s * 100
bl_width = max((bl_e - bl_s).total_seconds() / total_s * 100, 0.5)
bl_dur = bl.get("duration_s") or 0
bl_bar = (
f'<div class="gantt-bar baseline" '
f'style="left:{bl_left:.2f}%;width:{bl_width:.2f}%" '
f'title="baseline: {fmt_dur(bl_dur)}"></div>'
)
name_display = escape(j["name"])
if j.get("workflow_name"):
name_display = f'{escape(j["workflow_name"])} / {escape(j["name"])}'
delta_info = ""
if bl and bl.get("duration_s") is not None:
d_text, d_cls = fmt_delta(dur, bl.get("duration_s"))
delta_info = f'{d_text}'
rows.append(
f'<div class="gantt-row">'
f'<div class="gantt-label" title="{escape(j["name"])}">{name_display}</div>'
f'<div class="gantt-track">'
f'{bl_bar}'
f'<div class="gantt-bar current" '
f'style="left:{left:.2f}%;width:{width:.2f}%" '
f'title="{escape(j["name"])}: {fmt_dur(dur)}{delta_info}"></div>'
f'</div></div>'
)
# Axis
ticks = nice_ticks(total_s)
tick_html = "".join(
f'<span class="gantt-tick" style="left:{(t / total_s * 100):.1f}%">{fmt_tick(t)}</span>'
for t in ticks
)
axis = f'<div class="gantt-axis"><div class="gantt-track">{tick_html}</div></div>'
legend = (
'<div class="legend">'
'<span><span class="legend-swatch" style="background:#1f6feb"></span>Current</span>'
)
if baseline:
legend += '<span><span class="legend-swatch" style="border:1px dashed #8b949e"></span>Baseline (main)</span>'
legend += '</div>'
return f'<div class="gantt-wrap"><div class="gantt">{"".join(rows)}{axis}</div></div>{legend}'
def _stats_cards(stats: dict) -> str:
wall_text = fmt_dur(stats["wall"])
wall_delta = ""
if stats["bl_wall"] is not None:
d, cls = fmt_delta(stats["wall"], stats["bl_wall"])
wall_delta = f'<span class="stat-delta {cls}">{d}</span>'
compute_text = fmt_dur(stats["compute"])
compute_delta = ""
if stats["bl_compute"] is not None:
d, cls = fmt_delta(stats["compute"], stats["bl_compute"])
compute_delta = f'<span class="stat-delta {cls}">{d}</span>'
cards = [
f'<div class="stat-card"><span class="stat-label">Wall Time</span>'
f'<div class="stat-value">{wall_text}</div>{wall_delta}</div>',
f'<div class="stat-card"><span class="stat-label">Total Compute</span>'
f'<div class="stat-value">{compute_text}</div>{compute_delta}</div>',
f'<div class="stat-card"><span class="stat-label">Jobs Faster</span>'
f'<div class="stat-value faster">{stats["faster"]}</div></div>',
f'<div class="stat-card"><span class="stat-label">Jobs Slower</span>'
f'<div class="stat-value slower">{stats["slower"]}</div></div>',
f'<div class="stat-card"><span class="stat-label">Unchanged</span>'
f'<div class="stat-value neutral">{stats["unchanged"]}</div></div>',
f'<div class="stat-card"><span class="stat-label">No Baseline</span>'
f'<div class="stat-value neutral">{stats["no_baseline"]}</div></div>',
]
return f'<div class="stats">{"".join(cards)}</div>'
def _job_table(timings: dict, baseline: dict | None) -> str:
bl_map = {j["name"]: j for j in (baseline or {}).get("jobs", [])}
rows = []
for j in timings.get("jobs", []):
dur = j.get("duration_s")
bl = bl_map.get(j["name"])
bl_dur = bl.get("duration_s") if bl else None
delta_text, delta_cls = fmt_delta(dur, bl_dur)
name = escape(j["name"])
if j.get("workflow_name"):
name = f'{escape(j["workflow_name"])} / {escape(j["name"])}'
concl = j.get("conclusion", "")
concl_icon = {"success": "", "failure": "", "skipped": ""}.get(concl, "?")
concl_cls = {"success": "faster", "failure": "slower", "skipped": "neutral"}.get(concl, "neutral")
rows.append(
f'<tr>'
f'<td class="job-name">{name}</td>'
f'<td class="num">{fmt_dur(dur)}</td>'
f'<td class="num">{fmt_dur(bl_dur)}</td>'
f'<td class="num {delta_cls}">{delta_text}</td>'
f'<td class="{concl_cls}" style="text-align:center">{concl_icon}</td>'
f'</tr>'
)
return (
'<table><thead><tr>'
'<th>Job</th><th class="num">Current</th><th class="num">Baseline</th>'
'<th class="num">Delta</th><th>Status</th>'
'</tr></thead><tbody>' + "".join(rows) + '</tbody></table>'
)
def _step_details(timings: dict, baseline: dict | None) -> str:
bl_map = {j["name"]: j for j in (baseline or {}).get("jobs", [])}
blocks = []
for j in timings.get("jobs", []):
if not j.get("steps"):
continue
bl = bl_map.get(j["name"], {})
bl_steps = {s["name"]: s for s in bl.get("steps", [])}
dur = j.get("duration_s") or 0
bl_dur = bl.get("duration_s") if bl else None
delta_text, delta_cls = fmt_delta(dur, bl_dur)
summary_text = f'{escape(j["name"])}{fmt_dur(dur)}'
if bl_dur is not None:
summary_text += f' <span class="{delta_cls}">({delta_text})</span>'
step_rows = []
for s in j["steps"]:
s_dur = s.get("duration_s")
bl_s = bl_steps.get(s["name"])
bl_s_dur = bl_s.get("duration_s") if bl_s else None
s_delta, s_cls = fmt_delta(s_dur, bl_s_dur)
step_rows.append(
f'<tr>'
f'<td>{escape(s["name"])}</td>'
f'<td class="num">{fmt_dur(s_dur)}</td>'
f'<td class="num">{fmt_dur(bl_s_dur)}</td>'
f'<td class="num {s_cls}">{s_delta}</td>'
f'</tr>'
)
blocks.append(
f'<details><summary>{summary_text}</summary>'
f'<table><thead><tr>'
'<th>Step</th><th class="num">Current</th><th class="num">Baseline</th>'
'<th class="num">Delta</th>'
f'</tr></thead><tbody>{"".join(step_rows)}</tbody></table>'
f'</details>'
)
return "".join(blocks) if blocks else '<p style="color:#8b949e">No step data available.</p>'
def _regressions(timings: dict, baseline: dict | None) -> str:
"""Show top 10 biggest absolute regressions/improvements across all steps."""
if not baseline:
return ""
bl_map = {j["name"]: j for j in baseline.get("jobs", [])}
deltas = [] # (abs_delta, job_name, step_name, current, baseline, is_slower)
for j in timings.get("jobs", []):
bl = bl_map.get(j["name"])
if not bl:
continue
bl_steps = {s["name"]: s for s in bl.get("steps", [])}
for s in j.get("steps", []):
bl_s = bl_steps.get(s["name"])
if not bl_s:
continue
cur = s.get("duration_s") or 0
bl_d = bl_s.get("duration_s") or 0
diff = cur - bl_d
if abs(diff) < 1.0:
continue
deltas.append((abs(diff), diff, j["name"], s["name"], cur, bl_d))
deltas.sort(key=lambda x: x[0], reverse=True)
top = deltas[:10]
if not top:
return ""
rows = []
for _, diff, job, step, cur, bl_d in top:
cls = "slower" if diff > 0 else "faster"
tag = f'<span class="tag {"slow" if diff > 0 else "fast"}">{"+" if diff > 0 else ""}{diff:.1f}s</span>'
rows.append(
f'<tr>'
f'<td class="job-name">{escape(job)}</td>'
f'<td>{escape(step)}</td>'
f'<td class="num">{fmt_dur(cur)}</td>'
f'<td class="num">{fmt_dur(bl_d)}</td>'
f'<td>{tag}</td>'
f'</tr>'
)
return (
'<div class="regressions">'
'<table><thead><tr>'
'<th>Job</th><th>Step</th><th class="num">Current</th><th class="num">Baseline</th>'
'<th>Delta</th>'
'</tr></thead><tbody>' + "".join(rows) + '</tbody></table>'
'</div>'
)
def generate_html(timings: dict, baseline: dict | None = None) -> str:
stats = compute_stats(timings, baseline)
sha_short = (timings.get("head_sha") or "")[:7]
run_id = timings.get("run_id", "?")
created = timings.get("created_at", "")
bl_info = ""
if baseline:
bl_sha = (baseline.get("head_sha") or "")[:7]
bl_info = f' | Baseline: <code>{bl_sha}</code> (main)'
html = (
f'<!DOCTYPE html>\n<html lang="en">\n<head>\n'
f'<meta charset="utf-8">\n'
f'<meta name="viewport" content="width=device-width, initial-scale=1">\n'
f'<title>CI Timing Report — {sha_short}</title>\n'
f'<style>{CSS}</style>\n'
f'</head>\n<body>\n'
f'<h1>CI Timing Report</h1>\n'
f'<div class="meta">Run <code>{escape(run_id)}</code> | SHA <code>{sha_short}</code>'
f' | Generated {escape(created)}{bl_info}</div>\n'
)
html += '<h2>Global Stats</h2>\n'
html += _stats_cards(stats)
if baseline:
html += '<h2>Top Regressions & Improvements</h2>\n'
html += _regressions(timings, baseline)
html += '<h2>Gantt Chart</h2>\n'
html += _gantt_bars(timings, baseline)
html += '<h2>Per-Job Comparison</h2>\n'
html += _job_table(timings, baseline)
html += '<h2>Step Details</h2>\n'
html += _step_details(timings, baseline)
html += '</body>\n</html>\n'
return html
# ---------------------------------------------------------------------------
# Markdown summary for $GITHUB_STEP_SUMMARY
# ---------------------------------------------------------------------------
def generate_summary(timings: dict, baseline: dict | None = None) -> str:
stats = compute_stats(timings, baseline)
bl_map = {j["name"]: j for j in (baseline or {}).get("jobs", [])}
lines = ["## CI Timing Summary\n"]
# Global stats table
lines.append("| Metric | Current | Baseline | Delta |")
lines.append("|--------|---------|----------|-------|")
wall_d = ""
if stats["bl_wall"] is not None:
d, _ = fmt_delta(stats["wall"], stats["bl_wall"])
wall_d = d
lines.append(f"| Wall time | {fmt_dur(stats['wall'])} | {fmt_dur(stats['bl_wall'])} | {wall_d} |")
compute_d = ""
if stats["bl_compute"] is not None:
d, _ = fmt_delta(stats["compute"], stats["bl_compute"])
compute_d = d
lines.append(f"| Total compute | {fmt_dur(stats['compute'])} | {fmt_dur(stats['bl_compute'])} | {compute_d} |")
lines.append(f"| Jobs faster | {stats['faster']} | — | — |")
lines.append(f"| Jobs slower | {stats['slower']} | — | — |")
lines.append(f"| Jobs unchanged | {stats['unchanged']} | — | — |")
lines.append(f"| Jobs without baseline | {stats['no_baseline']} | — | — |")
lines.append("")
return "\n".join(lines)
# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------
def expect_env(var: str) -> str:
val = os.environ.get(var)
if not val:
raise ValueError(f"missing environment variable {var}")
return val
def main():
parser = argparse.ArgumentParser(description="Collect CI timings and generate HTML report")
parser.add_argument("--from-json", help="Read timings from JSON instead of API")
parser.add_argument("--baseline", default="ci-timings-baseline.json",
help="Baseline JSON path (default: ci-timings-baseline.json)")
parser.add_argument("--output", default="ci-timings-report.html",
help="HTML output path (default: ci-timings-report.html)")
parser.add_argument("--json-out", default="ci-timings.json",
help="JSON output path (default: ci-timings.json)")
parser.add_argument("--summary-out", default="ci-timings-summary.md",
help="Markdown summary output path (default: ci-timings-summary.md)")
args = parser.parse_args()
# Collect or load timings
if args.from_json:
with open(args.from_json, encoding="utf-8") as f:
timings = json.load(f)
else:
token = expect_env("GITHUB_TOKEN")
repo = expect_env("GITHUB_REPOSITORY")
run_id = expect_env("GITHUB_RUN_ID")
head_sha = expect_env("GITHUB_SHA")
timings = collect_timings(token, repo, run_id, head_sha)
# Save JSON
with open(args.json_out, "w", encoding="utf-8") as f:
json.dump(timings, f, indent=2)
print(f"Saved timings to {args.json_out} ({len(timings.get('jobs', []))} jobs)")
# Load baseline
baseline = None
if os.path.exists(args.baseline):
with open(args.baseline, encoding="utf-8") as f:
baseline = json.load(f)
print(f"Loaded baseline from {args.baseline}")
else:
print(f"No baseline file at {args.baseline} — generating current-only report")
# Generate HTML
html = generate_html(timings, baseline)
with open(args.output, "w", encoding="utf-8") as f:
f.write(html)
print(f"Generated HTML report: {args.output}")
# Write summary
summary = generate_summary(timings, baseline)
with open(args.summary_out, "a", encoding="utf-8") as f:
f.write(summary)
print(f"Wrote summary to {args.summary_out}")
if __name__ == "__main__":
main()

View File

@@ -58,7 +58,7 @@ _DEFAULT_ROOTS = ["tests"]
#
# tests/e2e/ — .github/workflows/tests.yml :: e2e job
# tests/integration/ — historical; legacy --ignore flags
# tests/docker/ — .github/workflows/docker-publish.yml ::
# tests/docker/ — .github/workflows/docker.yml ::
# build-amd64 job (runs against the freshly-loaded
# nousresearch/hermes-agent:test image, via
# ``HERMES_TEST_IMAGE`` so the fixture skips
@@ -81,7 +81,7 @@ _DURATIONS_FILE = "test_durations.json"
def _count_tests(
files: List[Path], repo_root: Path, pytest_passthrough: List[str]
files: List[Path], repo_root: Path
) -> dict[Path, int]:
"""Run ``pytest --co -q`` once to count individual tests per file.
@@ -113,7 +113,6 @@ def _count_tests(
"--co", "-q",
*ignore_args,
*[str(f) for f in files],
*pytest_passthrough,
]
try:
result = subprocess.run(
@@ -697,7 +696,7 @@ def main() -> int:
return 1
# Count individual tests per file via a single pytest --co pass.
test_counts = _count_tests(files, repo_root, pytest_passthrough)
test_counts = _count_tests(files, repo_root)
total_tests = sum(test_counts.values())
# Apply slicing if requested — distribute files across CI jobs by

View File

@@ -410,8 +410,8 @@ class TestSendUpdate:
assert created["coro"] is not None
assert created["coro"].cr_frame is None
# Only count warnings about THIS test's coroutine; other tests in the
# same xdist worker (or stdlib mock internals) may emit unrelated
# Only count warnings about THIS test's coroutine; other tests
# may emit unrelated
# "coroutine was never awaited" warnings that bleed through.
runtime_warnings = [
w for w in caught

View File

@@ -20,8 +20,7 @@ def _no_unawaited_warnings(caught, *, coro_name: str = "") -> bool:
"""Return True if no "X was never awaited" warning slipped through.
When *coro_name* is provided, only warnings naming that coroutine are
counted — xdist workers may emit unrelated unawaited-coroutine warnings
(e.g. ``AsyncMockMixin._execute_mock_call``) from concurrent tests.
counted
"""
bad = [
w for w in caught

View File

@@ -39,10 +39,9 @@ def _write_skill(skills_dir: Path, name: str, description: str = "") -> Path:
def hermes_home(monkeypatch):
"""Isolate HERMES_HOME for ``reload_skills`` tests.
Rather than popping cache-bearing modules from ``sys.modules`` (which
races against pytest-xdist's parallel workers), we monkeypatch the
module-level ``HERMES_HOME`` / ``SKILLS_DIR`` constants in place so the
isolation is local to this fixture's scope.
Rather than popping cache-bearing modules from ``sys.modules``,
we monkeypatch the module-level ``HERMES_HOME`` / ``SKILLS_DIR``
constants in place so the isolation is local to this fixture's scope.
"""
td = tempfile.mkdtemp(prefix="hermes-reload-skills-")
monkeypatch.setenv("HERMES_HOME", td)

View File

@@ -13,7 +13,7 @@ from hermes_cli import main as hermes_main
# ---------------------------------------------------------------------------
# Module isolation: _import_cli() wipes tools.* / cli / run_agent from
# sys.modules so it can re-import cli fresh. Without cleanup the wiped
# modules leak into subsequent tests on the same xdist worker, breaking
# modules leak into subsequent tests, breaking
# mock patches that target "tools.file_tools._get_file_ops" etc.
# ---------------------------------------------------------------------------

View File

@@ -184,8 +184,7 @@ class TestGatewayQuickCommands:
from gateway.run import GatewayRunner
# Ensure redaction is active regardless of host HERMES_REDACT_SECRETS state
# or test ordering (the module snapshots env at import time, so other
# tests in the same xdist worker can flip the flag).
# or test ordering
monkeypatch.setattr("agent.redact._REDACT_ENABLED", True)
runner = GatewayRunner.__new__(GatewayRunner)

View File

@@ -8,15 +8,13 @@ Override the image with ``HERMES_TEST_IMAGE`` env var to point at a pre-built
image (faster local iteration); otherwise the ``built_image`` fixture builds
the repo's Dockerfile once per session.
Docker tests need longer timeouts than the suite default (30s), so every
test under this directory is granted a 180s default via
``pytest.mark.timeout`` applied at collection time.
"""
from __future__ import annotations
import os
import shutil
import subprocess
import time
from collections.abc import Iterator
import pytest
@@ -43,11 +41,9 @@ def pytest_collection_modifyitems(config, items): # noqa: D401 - pytest hook
skip_docker = pytest.mark.skip(
reason="Docker not available or daemon not running",
)
extend_timeout = pytest.mark.timeout(180)
for item in items:
if "tests/docker/" not in str(item.fspath).replace(os.sep, "/"):
continue
item.add_marker(extend_timeout)
if not docker_ok:
item.add_marker(skip_docker)
@@ -137,3 +133,181 @@ def docker_exec_sh(
return docker_exec(
container, "sh", "-c", command, user=user, timeout=timeout,
)
def wait_for_container_ready(
container: str,
*,
deadline_s: float = 30.0,
interval_s: float = 0.25,
) -> None:
"""Poll until the container has finished s6 cont-init (stage2 + reconcile).
The readiness signal is ``profile=default`` appearing in
``/opt/data/logs/container-boot.log``, which the 02-reconcile-profiles
cont-init script writes on every boot. That log entry fires AFTER
stage2-hook.sh completes, so by the time it appears the full
cont-init chain (UID remap, chown, config seeding, skills sync,
browser discovery, config migration) has run.
Raises ``TimeoutError`` if the container never becomes ready — much
better than a fixed ``time.sleep()`` that either wastes time on fast
machines or flakes on slow ones.
"""
end = time.monotonic() + deadline_s
while time.monotonic() < end:
r = docker_exec(
container,
"sh", "-c",
"cat /opt/data/logs/container-boot.log 2>/dev/null",
timeout=5,
)
if r.returncode == 0 and "profile=default" in r.stdout:
return
time.sleep(interval_s)
raise TimeoutError(
f"container {container} did not finish cont-init within {deadline_s}s"
)
def start_container(
image: str,
name: str,
*env: str,
cmd: str = "sleep infinity",
timeout: int = 60,
) -> str:
"""Start a detached container and wait for cont-init to finish.
Args:
image: Docker image to run.
name: Container name (cleanup is the caller's responsibility —
typically handled by the ``container_name`` fixture).
env: Env vars as ``KEY=VALUE`` strings, each passed via ``-e``.
cmd: Container CMD (default ``sleep infinity``).
timeout: ``docker run`` subprocess timeout.
Returns the container name. Raises on ``docker run`` failure or if
the container never finishes cont-init within 30s.
"""
args = ["docker", "run", "-d", "--name", name]
for e in env:
args.extend(["-e", e])
args.extend([image, *cmd.split()])
subprocess.run(args, check=True, capture_output=True, timeout=timeout)
wait_for_container_ready(name)
return name
def restart_container(container: str, timeout: int = 60) -> None:
"""Restart a container and wait for cont-init to finish.
Equivalent to ``docker restart <container>`` followed by
:func:`wait_for_container_ready`.
The readiness signal (``profile=default`` in
``/opt/data/logs/container-boot.log``) is append-only and persists
across restarts, so we truncate it BEFORE restarting — otherwise
``wait_for_container_ready`` would match the stale line from the
previous boot and return before cont-init runs on the new boot.
"""
docker_exec(container, "sh", "-c",
"truncate -s 0 /opt/data/logs/container-boot.log 2>/dev/null || true",
user="root", timeout=5)
subprocess.run(
["docker", "restart", container],
check=True, capture_output=True, timeout=timeout,
)
wait_for_container_ready(container)
def poll_container(
container: str,
probe: str,
*,
deadline_s: float = 30.0,
interval_s: float = 0.5,
user: str = "hermes",
) -> tuple[bool, str]:
"""Repeatedly run ``probe`` inside the container until it exits 0 or
``deadline_s`` elapses.
Returns ``(success, last_stdout)``. Useful for waiting on a process
to appear, a port to open, a file to contain a string, etc.
"""
end = time.monotonic() + deadline_s
last = ""
while time.monotonic() < end:
r = docker_exec_sh(container, probe, user=user, timeout=10)
last = r.stdout
if r.returncode == 0:
return True, last
time.sleep(interval_s)
return False, last
def wait_for_path(
container: str,
path: str,
*,
kind: str = "f",
deadline_s: float = 30.0,
interval_s: float = 0.25,
) -> bool:
"""Poll ``test -<kind> <path>`` inside the container until success or timeout.
``kind`` is the ``test`` flag: ``'f'`` for file, ``'d'`` for directory,
``'e'`` for existence. Returns ``True`` on success, ``False`` on timeout.
"""
return poll_container(
container, f"test -{kind} {path}",
deadline_s=deadline_s, interval_s=interval_s,
)[0]
def wait_for_log(
container: str,
log_path: str,
needle: str,
*,
deadline_s: float = 30.0,
interval_s: float = 0.25,
) -> str:
"""Poll until a log file inside the container contains ``needle``.
Returns the full log on success.
"""
end = time.monotonic() + deadline_s
last = ""
while time.monotonic() < end:
r = docker_exec_sh(
container, f"cat {log_path} 2>/dev/null", timeout=5,
)
if r.returncode == 0:
last = r.stdout
if needle in last:
return last
time.sleep(interval_s)
raise AssertionError(f"Didn't see `{needle}` in {log_path} within {deadline_s} in container {container}")
def wait_for_docker_logs(
container: str, needle: str, *, deadline_s: float = 30.0, interval_s: float = 0.5,
) -> str:
"""Poll ``docker logs`` until ``needle`` appears or deadline expires.
Returns the full docker logs on success.
"""
end = time.monotonic() + deadline_s
last = ""
while time.monotonic() < end:
r = subprocess.run(
["docker", "logs", container],
capture_output=True, text=True, timeout=10,
)
last = r.stdout + r.stderr
if needle in last:
return last
time.sleep(interval_s)
raise AssertionError(f"Didn't see `{needle}` in docker logs within {deadline_s} in container {container}")

View File

@@ -0,0 +1,69 @@
"""Runtime smoke test for Docker config-schema migration on boot.
Build the real image and verify: a config.yaml present in $HERMES_HOME
is migrated by docker_config_migrate.py on boot, running as the hermes
user.
"""
from __future__ import annotations
from tests.docker.conftest import docker_exec, docker_exec_sh, start_container
def test_config_migration_runs_on_boot(
built_image: str, container_name: str,
) -> None:
"""A config.yaml in $HERMES_HOME must be migrated on boot by
docker_config_migrate.py, running as the hermes user."""
# Start container
start_container(built_image, container_name)
# Verify config.yaml exists (should be seeded by stage2 if not present)
r = docker_exec_sh(
container_name,
"test -f /opt/data/config.yaml && echo EXISTS || echo MISSING",
timeout=10,
)
assert "EXISTS" in r.stdout, (
f"config.yaml not found in $HERMES_HOME: {r.stdout}"
)
# Verify the migration script exists in the image
r = docker_exec_sh(
container_name,
"test -f /opt/hermes/scripts/docker_config_migrate.py && "
"echo SCRIPT_EXISTS || echo SCRIPT_MISSING",
timeout=10,
)
assert "SCRIPT_EXISTS" in r.stdout, (
f"docker_config_migrate.py not found in image: {r.stdout}"
)
# Verify config.yaml is owned by hermes (migration ran as hermes)
r = docker_exec_sh(
container_name,
'stat -c "%U" /opt/data/config.yaml',
timeout=10,
)
assert r.stdout.strip() == "hermes", (
f"config.yaml not owned by hermes (migration may have run as root): "
f"{r.stdout.strip()}"
)
def test_config_migration_opt_out_env_var_respected(
built_image: str, container_name: str,
) -> None:
"""HERMES_SKIP_CONFIG_MIGRATION=1 must skip the migration."""
start_container(
built_image, container_name, "HERMES_SKIP_CONFIG_MIGRATION=1",
)
# config.yaml should still be seeded (seeding is separate from migration)
r = docker_exec_sh(
container_name,
"test -f /opt/data/config.yaml && echo EXISTS || echo MISSING",
timeout=10,
)
assert "EXISTS" in r.stdout, (
f"config.yaml should be seeded even with migration skipped: {r.stdout}"
)

View File

@@ -21,7 +21,7 @@ import time
import pytest
from tests.docker.conftest import docker_exec, docker_exec_sh
from tests.docker.conftest import docker_exec, docker_exec_sh, wait_for_path, wait_for_log, wait_for_docker_logs, poll_container
def _docker(*args: str, **kw) -> subprocess.CompletedProcess[str]:
@@ -32,41 +32,8 @@ def _docker(*args: str, **kw) -> subprocess.CompletedProcess[str]:
)
def _exec(container: str, *args: str, timeout: int = 30) -> subprocess.CompletedProcess[str]:
return docker_exec(container, *args, timeout=timeout)
def _sh(container: str, cmd: str, timeout: int = 30) -> subprocess.CompletedProcess[str]:
return docker_exec_sh(container, cmd, timeout=timeout)
def _wait_for_path(
container: str,
path: str,
*,
kind: str = "f",
deadline_s: float = 30.0,
interval_s: float = 0.25,
) -> bool:
"""Poll `test -<kind> <path>` inside container until success or timeout.
`kind` is the `test` flag: 'f' for file, 'd' for directory, 'e' for
existence. Returns True on success, False on timeout. Strictly
better than a fixed `time.sleep()` because:
* we don't wait the full budget when the path appears early, and
* the test fails with a precise "waited N seconds" assertion
instead of a confusing one-line failure mid-test when the
sleep was too short.
"""
end = time.monotonic() + deadline_s
while time.monotonic() < end:
r = _sh(container, f"test -{kind} {path}", timeout=5)
if r.returncode == 0:
return True
time.sleep(interval_s)
return False
def _wait_for_reconcile_log_mention(
container: str,
@@ -76,23 +43,8 @@ def _wait_for_reconcile_log_mention(
interval_s: float = 0.25,
) -> str:
"""Poll until /opt/data/logs/container-boot.log mentions `profile`.
Returns the matching log content on success. On timeout, returns
the last observed contents so the assertion can render a
meaningful diagnostic. The container-boot.log is the explicit
signal that the reconciler has finished — much more reliable
than a fixed sleep that hopes 8 seconds is enough.
"""
end = time.monotonic() + deadline_s
last = ""
while time.monotonic() < end:
r = _sh(container, "cat /opt/data/logs/container-boot.log", timeout=5)
if r.returncode == 0:
last = r.stdout
if f"profile={profile}" in last:
return last
time.sleep(interval_s)
return last
return wait_for_log(container, "/opt/data/logs/container-boot.log", f"profile={profile}")
@pytest.fixture
@@ -117,23 +69,7 @@ def restart_container(request, built_image: str):
# it starts issuing commands. The reconciler always writes one
# 'default' line on every boot (PR #30136 item I1) — that's our
# readiness signal.
deadline = time.monotonic() + 30.0
while time.monotonic() < deadline:
r = _docker(
"exec", "-u", "hermes", name, "sh", "-c",
"cat /opt/data/logs/container-boot.log 2>/dev/null",
timeout=5,
)
if r.returncode == 0 and "profile=default" in r.stdout:
break
time.sleep(0.25)
else:
# Defensive: surface a timeout from the fixture itself so the
# test failure points at "container never finished cont-init"
# rather than mid-test where the symptom would be obscure.
raise RuntimeError(
f"container {name} did not finish cont-init within 30s"
)
wait_for_log(name, "/opt/data/logs/container-boot.log", "profile=default")
yield name
_docker("rm", "-f", name)
_docker("volume", "rm", "-f", volume)
@@ -145,20 +81,14 @@ def test_running_gateway_survives_container_restart(restart_container: str) -> N
# Create the profile + start its gateway. The Phase 4 hooks
# register the s6 service slot during create and the dispatch
# path brings it up via s6-svc -u.
r = _exec(container, "hermes", "profile", "create", "coder")
r = docker_exec(container, "hermes", "profile", "create", "coder")
assert r.returncode == 0, f"profile create failed: {r.stderr}"
r = _exec(container, "hermes", "-p", "coder", "gateway", "start", timeout=60)
r = docker_exec(container, "hermes", "-p", "coder", "gateway", "start", timeout=60)
assert r.returncode == 0, f"gateway start failed: {r.stderr}"
# Give the service time to actually come up under supervision.
deadline = time.monotonic() + 15.0
while time.monotonic() < deadline:
r = _sh(container, "/command/s6-svstat /run/service/gateway-coder")
if r.returncode == 0 and "up " in r.stdout:
break
time.sleep(0.5)
assert "up " in r.stdout, f"gateway never came up pre-restart: {r.stdout!r}"
poll_container(container, "/command/s6-svstat /run/service/gateway-coder | grep -q 'up '")
# Persist state so the reconciler will treat the slot as 'running'
# post-restart. The gateway process itself writes gateway_state.json
@@ -170,7 +100,7 @@ def test_running_gateway_survives_container_restart(restart_container: str) -> N
"p = pathlib.Path('/opt/data/profiles/coder/gateway_state.json'); "
"p.write_text(json.dumps({'gateway_state': 'running', 'timestamp': 1}))"
)
_exec(container, "python3", "-c", write_state, timeout=10).check_returncode()
docker_exec(container, "python3", "-c", write_state, timeout=10).check_returncode()
# Restart. After this, /run/service/ is empty until cont-init.d
# runs the reconciler. We need to wait long enough for the
@@ -179,25 +109,22 @@ def test_running_gateway_survives_container_restart(restart_container: str) -> N
# restored slot. Polling the boot log gives us the first signal.
_docker("restart", container, timeout=60).check_returncode()
log = _wait_for_reconcile_log_mention(container, "coder", deadline_s=30.0)
assert "profile=coder" in log, (
f"reconciler never logged coder after restart: {log!r}"
)
assert "action=started" in log
# Service slot exists.
assert _wait_for_path(
assert wait_for_path(
container, "/run/service/gateway-coder", kind="d", deadline_s=10.0,
), "slot not recreated after restart"
# No `down` marker — we asked for auto-start.
r = _sh(container, "test -f /run/service/gateway-coder/down")
r = docker_exec_sh(container, "test -f /run/service/gateway-coder/down")
assert r.returncode != 0, "down marker present despite prior_state=running"
def test_stopped_gateway_stays_stopped_after_restart(restart_container: str) -> None:
container = restart_container
_exec(container, "hermes", "profile", "create", "writer").check_returncode()
docker_exec(container, "hermes", "profile", "create", "writer").check_returncode()
# Write 'stopped' directly so we don't have to race against the
# gateway's own state writes.
@@ -206,19 +133,18 @@ def test_stopped_gateway_stays_stopped_after_restart(restart_container: str) ->
"p = pathlib.Path('/opt/data/profiles/writer/gateway_state.json'); "
"p.write_text(json.dumps({'gateway_state': 'stopped', 'timestamp': 1}))"
)
_exec(container, "python3", "-c", write_state, timeout=10).check_returncode()
docker_exec(container, "python3", "-c", write_state, timeout=10).check_returncode()
_docker("restart", container, timeout=60).check_returncode()
log = _wait_for_reconcile_log_mention(container, "writer", deadline_s=30.0)
assert "profile=writer" in log
_wait_for_reconcile_log_mention(container, "writer", deadline_s=30.0)
# Slot exists.
assert _wait_for_path(
assert wait_for_path(
container, "/run/service/gateway-writer", kind="d", deadline_s=10.0,
)
# Down marker present.
r = _sh(container, "test -f /run/service/gateway-writer/down")
r = docker_exec_sh(container, "test -f /run/service/gateway-writer/down")
assert r.returncode == 0, "down marker missing despite prior_state=stopped"
@@ -229,7 +155,7 @@ def test_stale_gateway_pid_cleaned_up_on_restart(restart_container: str) -> None
process-mismatch checks."""
container = restart_container
_exec(container, "hermes", "profile", "create", "ghost").check_returncode()
docker_exec(container, "hermes", "profile", "create", "ghost").check_returncode()
# Stamp stale runtime files alongside a 'running' state so the
# reconciler walks this profile.
@@ -240,15 +166,15 @@ def test_stale_gateway_pid_cleaned_up_on_restart(restart_container: str) -> None
"(p / 'gateway.pid').write_text(json.dumps({'pid': 99999, 'host': 'old'})); "
"(p / 'processes.json').write_text('[]')"
)
_exec(container, "python3", "-c", stamp, timeout=10).check_returncode()
docker_exec(container, "python3", "-c", stamp, timeout=10).check_returncode()
_docker("restart", container, timeout=60).check_returncode()
_wait_for_reconcile_log_mention(container, "ghost", deadline_s=30.0)
# Stale runtime files swept.
r = _sh(container, "test -f /opt/data/profiles/ghost/gateway.pid")
r = docker_exec_sh(container, "test -f /opt/data/profiles/ghost/gateway.pid")
assert r.returncode != 0, "stale gateway.pid survived restart"
r = _sh(container, "test -f /opt/data/profiles/ghost/processes.json")
r = docker_exec_sh(container, "test -f /opt/data/profiles/ghost/processes.json")
assert r.returncode != 0, "stale processes.json survived restart"
@@ -271,37 +197,20 @@ def test_live_gateway_autostarts_after_real_restart_without_manual_state_stamp(
"""
container = restart_container
_exec(container, "hermes", "profile", "create", "live").check_returncode()
r = _exec(container, "hermes", "-p", "live", "gateway", "start", timeout=60)
docker_exec(container, "hermes", "profile", "create", "live").check_returncode()
r = docker_exec(container, "hermes", "-p", "live", "gateway", "start", timeout=60)
assert r.returncode == 0, f"gateway start failed: {r.stderr}"
# Wait for the gateway to actually come up under supervision AND write
# its own gateway_state=running (we do NOT stamp it ourselves).
deadline = time.monotonic() + 20.0
while time.monotonic() < deadline:
r = _sh(container, "/command/s6-svstat /run/service/gateway-live")
if r.returncode == 0 and "up " in r.stdout:
break
time.sleep(0.5)
assert "up " in r.stdout, f"gateway never came up pre-restart: {r.stdout!r}"
poll_container(container, "/command/s6-svstat /run/service/gateway-live | grep -q 'up '")
# Confirm the gateway persisted its own 'running' state (sanity: we're
# testing the real write path, not a stamped fixture).
deadline = time.monotonic() + 15.0
state = ""
while time.monotonic() < deadline:
r = _sh(
container,
"cat /opt/data/profiles/live/gateway_state.json 2>/dev/null",
)
if r.returncode == 0 and '"gateway_state"' in r.stdout:
state = r.stdout
if '"running"' in state:
break
time.sleep(0.5)
assert '"running"' in state, (
f"gateway never persisted running state pre-restart: {state!r}"
)
# Confirm the gateway persisted its own 'running' state. The gateway has
# to boot Python, discover ~50 plugins, construct GatewayRunner, and
# reach write_runtime_status("running") at run.py start() — on a loaded
# CI runner with parallel docker test containers competing for CPU, this
# can take a while.
wait_for_log(container, "/opt/data/profiles/live/gateway_state.json", '"running"', deadline_s=45, interval_s=1)
# Real restart — Docker sends SIGTERM to PID 1; s6 propagates it to the
# supervised gateway. No planned-stop marker is written (this is not an
@@ -309,9 +218,6 @@ def test_live_gateway_autostarts_after_real_restart_without_manual_state_stamp(
_docker("restart", container, timeout=60).check_returncode()
log = _wait_for_reconcile_log_mention(container, "live", deadline_s=30.0)
assert "profile=live" in log, (
f"reconciler never logged live after restart: {log!r}"
)
# The crux: the reconciler must AUTO-START it, not register it down.
assert "action=started" in log, (
f"gateway did NOT auto-start after a real restart (issue #42675 "
@@ -319,10 +225,10 @@ def test_live_gateway_autostarts_after_real_restart_without_manual_state_stamp(
)
# Slot recreated, and NO down marker (we expect auto-start).
assert _wait_for_path(
assert wait_for_path(
container, "/run/service/gateway-live", kind="d", deadline_s=10.0,
), "slot not recreated after restart"
r = _sh(container, "test -f /run/service/gateway-live/down")
r = docker_exec_sh(container, "test -f /run/service/gateway-live/down")
assert r.returncode != 0, (
"down marker present despite a live gateway being restarted — "
"the signal-initiated shutdown wrongly persisted 'stopped' (#42675)"

View File

@@ -13,39 +13,16 @@ the realistic runtime context. See the conftest module docstring.
from __future__ import annotations
import json
import subprocess
import time
from tests.docker.conftest import docker_exec, docker_exec_sh
def _poll(container: str, probe: str, *, deadline_s: float = 30.0,
interval_s: float = 0.5) -> tuple[bool, str]:
"""Repeatedly run ``probe`` inside the container until it exits 0 or
``deadline_s`` elapses. Returns (success, last stdout)."""
end = time.monotonic() + deadline_s
last = ""
while time.monotonic() < end:
r = docker_exec_sh(container, probe, timeout=10)
last = r.stdout
if r.returncode == 0:
return True, last
time.sleep(interval_s)
return False, last
from tests.docker.conftest import docker_exec, docker_exec_sh, start_container, poll_container
def test_dashboard_not_running_by_default(
built_image: str, container_name: str,
) -> None:
"""Without HERMES_DASHBOARD, no dashboard process should be running."""
subprocess.run(
["docker", "run", "-d", "--name", container_name, built_image,
"sleep", "60"],
check=True, capture_output=True, timeout=30,
)
# Give the entrypoint enough time to finish bootstrap; if a dashboard
# were going to start it'd be visible by now.
time.sleep(5)
start_container(built_image, container_name, cmd="sleep 60")
r = docker_exec(container_name, "pgrep", "-f", "hermes dashboard")
# pgrep exits non-zero when no match found
assert r.returncode != 0, (
@@ -64,12 +41,7 @@ def test_dashboard_slot_reports_down_when_disabled(
writes a `down` marker file in the live service-dir when
HERMES_DASHBOARD is unset, so the slot reflects reality.
"""
subprocess.run(
["docker", "run", "-d", "--name", container_name, built_image,
"sleep", "60"],
check=True, capture_output=True, timeout=30,
)
time.sleep(5)
start_container(built_image, container_name, cmd="sleep 60")
# /command/ isn't on PATH for docker-exec sessions, so call by
# absolute path.
r = docker_exec(
@@ -86,56 +58,42 @@ def test_dashboard_slot_reports_up_when_enabled(
built_image: str, container_name: str,
) -> None:
"""Symmetry: with HERMES_DASHBOARD=1, s6-svstat reports the slot as up."""
subprocess.run(
["docker", "run", "-d", "--name", container_name,
"-e", "HERMES_DASHBOARD=1",
# The default dashboard host is 0.0.0.0, which now engages the
# OAuth auth gate. Without a provider registered (no
# HERMES_DASHBOARD_OAUTH_CLIENT_ID in this test env), start_server
# would fail closed and the slot would never come up. Pin the
# explicit insecure opt-in to keep this test focused on the s6
# supervision contract, not the auth gate.
"-e", "HERMES_DASHBOARD_BASIC_AUTH_USERNAME=admin",
"-e", "HERMES_DASHBOARD_BASIC_AUTH_PASSWORD=test-dashboard-pw",
built_image, "sleep", "120"],
check=True, capture_output=True, timeout=30,
# The default dashboard host is 0.0.0.0, which now engages the
# OAuth auth gate. Without a provider registered (no
# HERMES_DASHBOARD_OAUTH_CLIENT_ID in this test env), start_server
# would fail closed and the slot would never come up. Pin the
# explicit insecure opt-in to keep this test focused on the s6
# supervision contract, not the auth gate.
start_container(
built_image, container_name,
"HERMES_DASHBOARD=1",
"HERMES_DASHBOARD_BASIC_AUTH_USERNAME=admin",
"HERMES_DASHBOARD_BASIC_AUTH_PASSWORD=test-dashboard-pw",
cmd="sleep 120",
)
# uvicorn takes a moment to bind; poll svstat.
deadline = time.monotonic() + 30.0
last = ""
while time.monotonic() < deadline:
r = docker_exec(
container_name, "/command/s6-svstat", "/run/service/dashboard",
)
last = r.stdout
if r.returncode == 0 and "up " in r.stdout:
return # success
time.sleep(0.5)
raise AssertionError(
f"Dashboard slot never reached up state; last svstat: {last!r}"
)
poll_container(container_name, "/command/s6-svstat /run/service/dashboard | grep -q 'up '")
def test_dashboard_opt_in_starts(
built_image: str, container_name: str,
) -> None:
"""With HERMES_DASHBOARD=1, a dashboard process should be visible."""
subprocess.run(
["docker", "run", "-d", "--name", container_name,
"-e", "HERMES_DASHBOARD=1",
# Default bind is 0.0.0.0, which engages the auth gate. Register the
# bundled basic password provider so the gate has a provider and the
# dashboard binds (vs fail-closed). Keeps the test focused on s6
# supervision, not auth.
"-e", "HERMES_DASHBOARD_BASIC_AUTH_USERNAME=admin",
"-e", "HERMES_DASHBOARD_BASIC_AUTH_PASSWORD=test-dashboard-pw",
built_image, "sleep", "120"],
check=True, capture_output=True, timeout=30,
# Default bind is 0.0.0.0, which engages the auth gate. Register the
# bundled basic password provider so the gate has a provider and the
# dashboard binds (vs fail-closed). Keeps the test focused on s6
# supervision, not auth.
start_container(
built_image, container_name,
"HERMES_DASHBOARD=1",
"HERMES_DASHBOARD_BASIC_AUTH_USERNAME=admin",
"HERMES_DASHBOARD_BASIC_AUTH_PASSWORD=test-dashboard-pw",
cmd="sleep 120",
)
# Poll for the dashboard subprocess to appear — the entrypoint
# backgrounds it and bootstrap (skills sync etc.) can take a few
# seconds before the python process actually launches.
ok, _ = _poll(
ok, _ = poll_container(
container_name, "pgrep -f 'hermes dashboard'", deadline_s=30.0,
)
assert ok, "Dashboard should be running with HERMES_DASHBOARD=1"
@@ -145,22 +103,22 @@ def test_dashboard_port_override(
built_image: str, container_name: str,
) -> None:
"""HERMES_DASHBOARD_PORT changes the dashboard's listen port."""
subprocess.run(
["docker", "run", "-d", "--name", container_name,
"-e", "HERMES_DASHBOARD=1", "-e", "HERMES_DASHBOARD_PORT=9120",
# Default bind is 0.0.0.0; register the basic password provider so
# the auth gate has a provider and the dashboard binds. See
# test_dashboard_slot_reports_up_when_enabled for the full rationale.
"-e", "HERMES_DASHBOARD_BASIC_AUTH_USERNAME=admin",
"-e", "HERMES_DASHBOARD_BASIC_AUTH_PASSWORD=test-dashboard-pw",
built_image, "sleep", "120"],
check=True, capture_output=True, timeout=30,
# Default bind is 0.0.0.0; register the basic password provider so
# the auth gate has a provider and the dashboard binds. See
# test_dashboard_slot_reports_up_when_enabled for the full rationale.
start_container(
built_image, container_name,
"HERMES_DASHBOARD=1",
"HERMES_DASHBOARD_PORT=9120",
"HERMES_DASHBOARD_BASIC_AUTH_USERNAME=admin",
"HERMES_DASHBOARD_BASIC_AUTH_PASSWORD=test-dashboard-pw",
cmd="sleep 120",
)
# The dashboard process appearing in pgrep doesn't mean it's bound
# to the port yet — uvicorn takes another second or two to come up.
# The image doesn't ship ss/netstat, so probe /proc/net/tcp directly:
# port 9120 = 0x23A0, state 0A = LISTEN.
ok, stdout = _poll(
ok, stdout = poll_container(
container_name,
"grep -E ' 0+:23A0 .* 0A ' /proc/net/tcp /proc/net/tcp6 "
"2>/dev/null",
@@ -180,20 +138,19 @@ def test_dashboard_restarts_after_crash(
dashboard runs as a longrun s6-rc service and s6-supervise restarts
it after a ~1s backoff (the default).
"""
subprocess.run(
["docker", "run", "-d", "--name", container_name,
"-e", "HERMES_DASHBOARD=1",
# Default bind is 0.0.0.0; register the basic password provider so
# the auth gate has a provider and the supervised dashboard binds.
# See test_dashboard_slot_reports_up_when_enabled for the full
# rationale.
"-e", "HERMES_DASHBOARD_BASIC_AUTH_USERNAME=admin",
"-e", "HERMES_DASHBOARD_BASIC_AUTH_PASSWORD=test-dashboard-pw",
built_image, "sleep", "120"],
check=True, capture_output=True, timeout=30,
# Default bind is 0.0.0.0; register the basic password provider so
# the auth gate has a provider and the supervised dashboard binds.
# See test_dashboard_slot_reports_up_when_enabled for the full
# rationale.
start_container(
built_image, container_name,
"HERMES_DASHBOARD=1",
"HERMES_DASHBOARD_BASIC_AUTH_USERNAME=admin",
"HERMES_DASHBOARD_BASIC_AUTH_PASSWORD=test-dashboard-pw",
cmd="sleep 120",
)
# Wait for the first dashboard to come up.
ok, _ = _poll(
ok, _ = poll_container(
container_name, "pgrep -f 'hermes dashboard'", deadline_s=30.0,
)
assert ok, "Dashboard never started initially"
@@ -338,13 +295,12 @@ def test_dashboard_oauth_gate_engages_on_non_loopback_bind(
responds 200 without a cookie under both gates, so it cannot
distinguish "gate on" from "gate off".
"""
subprocess.run(
["docker", "run", "-d", "--name", container_name,
"-e", "HERMES_DASHBOARD=1",
"-e", "HERMES_DASHBOARD_HOST=0.0.0.0",
"-e", "HERMES_DASHBOARD_OAUTH_CLIENT_ID=agent:test-instance",
built_image, "sleep", "120"],
check=True, capture_output=True, timeout=30,
start_container(
built_image, container_name,
"HERMES_DASHBOARD=1",
"HERMES_DASHBOARD_HOST=0.0.0.0",
"HERMES_DASHBOARD_OAUTH_CLIENT_ID=agent:test-instance",
cmd="sleep 120",
)
# (1) Provider registry visible via the public bootstrap endpoint.
@@ -398,18 +354,17 @@ def test_dashboard_insecure_env_var_no_longer_bypasses_gate(
public-dashboard escape hatch is gone: there is no env that serves the
dashboard on a public bind without an auth provider.
"""
subprocess.run(
["docker", "run", "-d", "--name", container_name,
"-e", "HERMES_DASHBOARD=1",
"-e", "HERMES_DASHBOARD_HOST=0.0.0.0",
"-e", "HERMES_DASHBOARD_INSECURE=1",
built_image, "sleep", "120"],
check=True, capture_output=True, timeout=30,
start_container(
built_image, container_name,
"HERMES_DASHBOARD=1",
"HERMES_DASHBOARD_HOST=0.0.0.0",
"HERMES_DASHBOARD_INSECURE=1",
cmd="sleep 120",
)
# Fail-closed: the dashboard process must NOT successfully serve. Probe
# for a few seconds; /api/status should never become reachable because
# start_server raised SystemExit before binding.
ok, _ = _poll(
ok, _ = poll_container(
container_name,
"curl -fsS -m 2 http://127.0.0.1:9119/api/status >/dev/null 2>&1",
deadline_s=12.0,

View File

@@ -22,6 +22,7 @@ These tests verify:
"""
from __future__ import annotations
from tests.docker.conftest import docker_exec
import subprocess
import time
@@ -36,8 +37,8 @@ _RUN_READY_TIMEOUT_S = 20
def _wait_for_init(container: str) -> None:
"""Block until /init is up enough that `docker exec` is responsive."""
deadline = time.time() + _RUN_READY_TIMEOUT_S
while time.time() < deadline:
deadline = time.monotonic() + _RUN_READY_TIMEOUT_S
while time.monotonic() < deadline:
r = subprocess.run(
["docker", "exec", container, "true"],
capture_output=True, timeout=5,
@@ -287,4 +288,4 @@ def test_e2e_login_then_supervised_gateway_can_read_auth(
"Files written by `docker exec` are unreadable to the hermes user "
f"(supervised gateway UID): {unreadable}. The shim failed to drop "
"privileges before the write."
)
)

View File

@@ -6,7 +6,7 @@ fails inside the published image and ``hermes dump`` used to report
``$HERMES_GIT_SHA`` build-arg to ``/opt/hermes/.hermes_build_sha`` and
``hermes_cli/build_info.py`` reads it as a fallback.
CI (``.github/workflows/docker-publish.yml``) always sets the build-arg
CI (``.github/workflows/docker.yml``) always sets the build-arg
to ``${{ github.sha }}``. Local ``docker build`` (the ``built_image``
fixture in ``tests/docker/conftest.py``) does NOT — so locally the file
is absent and ``hermes dump`` correctly falls back to ``(unknown)``.

View File

@@ -0,0 +1,157 @@
"""Runtime smoke tests for Docker gateway_state.json bootstrap seeding.
Build the real image and verify the actual runtime behavior:
1. HERMES_GATEWAY_BOOTSTRAP_STATE=running on a fresh volume seeds
gateway_state.json with running state
2. An existing gateway_state.json is never clobbered (first-boot-only)
3. No env var = no seed (default down-on-first-boot preserved)
4. Only literal "running" is honored; other values are ignored
"""
from __future__ import annotations
import json
import subprocess
from tests.docker.conftest import docker_exec, docker_exec_sh, wait_for_container_ready
def _start_container(
built_image: str, name: str, *env: str,
) -> str:
"""Start a container with given env vars, return its name."""
args = ["docker", "run", "-d", "--name", name]
for e in env:
args.extend(["-e", e])
args.extend([built_image, "sleep", "infinity"])
subprocess.run(args, check=True, capture_output=True, timeout=60)
wait_for_container_ready(name)
return name
def test_seeds_running_state_on_blank_volume(
built_image: str, container_name: str,
) -> None:
"""HERMES_GATEWAY_BOOTSTRAP_STATE=running on a fresh volume must
seed gateway_state.json with a valid running state."""
_start_container(
built_image, container_name,
"HERMES_GATEWAY_BOOTSTRAP_STATE=running",
)
r = docker_exec_sh(
container_name,
"cat /opt/data/gateway_state.json 2>/dev/null || echo NONE",
timeout=10,
)
assert r.stdout.strip() != "NONE", (
f"gateway_state.json not seeded on fresh volume: {r.stdout}"
)
state = json.loads(r.stdout.strip())
assert state.get("gateway_state") == "running", (
f"expected gateway_state=running, got: {state}"
)
def test_does_not_clobber_existing_state(
built_image: str, container_name: str,
) -> None:
"""An existing gateway_state.json must never be overwritten by the
seed, even when the bootstrap env var says running.
We use a named volume so we can pre-create the state file before
the container boots. The [ ! -f ] guard in stage2 must skip seeding
because the file already exists. We check the file immediately after
boot — before the gateway service has a chance to write its own
state — by reading it as fast as possible after container start.
"""
import json as _json
volume = f"{container_name}-vol"
subprocess.run(
["docker", "volume", "create", volume],
check=True, capture_output=True, timeout=10,
)
# Pre-create the state file via a throwaway container
existing = _json.dumps({"gateway_state": "stopped", "pid": 123})
subprocess.run(
["docker", "run", "--rm", "-v", f"{volume}:/opt/data",
"--entrypoint", "sh", built_image,
"-c", f"printf '{existing}\\n' > /opt/data/gateway_state.json"],
check=True, capture_output=True, timeout=30,
)
# Boot with the env var set — stage2 must NOT clobber the existing file
subprocess.run(
["docker", "run", "-d", "--name", container_name,
"-v", f"{volume}:/opt/data",
"-e", "HERMES_GATEWAY_BOOTSTRAP_STATE=running",
built_image, "sleep", "infinity"],
check=True, capture_output=True, timeout=60,
)
# Read the file as quickly as possible — the gateway service may
# start and write its own state, but the stage2 [ ! -f ] guard runs
# during cont-init (before any service starts), so the file must
# still be our "stopped" state at this point.
wait_for_container_ready(container_name)
r = docker_exec_sh(
container_name, "cat /opt/data/gateway_state.json", timeout=10,
)
state = _json.loads(r.stdout.strip())
assert state.get("gateway_state") == "stopped", (
f"existing state was clobbered by bootstrap seed: {state}"
)
# Cleanup
subprocess.run(
["docker", "rm", "-f", container_name],
capture_output=True, timeout=10,
)
subprocess.run(
["docker", "volume", "rm", "-f", volume],
capture_output=True, timeout=10,
)
def test_no_seed_when_env_unset(
built_image: str, container_name: str,
) -> None:
"""No HERMES_GATEWAY_BOOTSTRAP_STATE = no seed file written."""
_start_container(built_image, container_name)
r = docker_exec_sh(
container_name,
"test -f /opt/data/gateway_state.json && "
"echo EXISTS || echo ABSENT",
timeout=10,
)
assert "ABSENT" in r.stdout, (
f"gateway_state.json was seeded without the env var: {r.stdout}"
)
def test_non_running_value_ignored(
built_image: str, container_name: str,
) -> None:
"""Only literal 'running' is honored; any other value is ignored."""
for bogus in ("stopped", "Running", "1", "true", "starting"):
# Need a fresh container per iteration
name = f"{container_name}-{bogus}"
_start_container(
built_image, name,
f"HERMES_GATEWAY_BOOTSTRAP_STATE={bogus}",
)
r = docker_exec_sh(
name,
"test -f /opt/data/gateway_state.json && "
"echo EXISTS || echo ABSENT",
timeout=10,
)
assert "ABSENT" in r.stdout, (
f"bogus value {bogus!r} should not seed a state file: {r.stdout}"
)
subprocess.run(
["docker", "rm", "-f", name],
capture_output=True, timeout=10,
)

View File

@@ -23,15 +23,15 @@ from __future__ import annotations
import subprocess
import time
from tests.docker.conftest import docker_exec_sh
def _sh(container: str, command: str, timeout: int = 30):
return docker_exec_sh(container, command, timeout=timeout)
from tests.docker.conftest import (
docker_exec_sh,
start_container,
wait_for_docker_logs,
)
def _svstat(container: str, slot: str = "gateway-default") -> str:
r = _sh(container, f"/command/s6-svstat /run/service/{slot}")
r = docker_exec_sh(container, f"/command/s6-svstat /run/service/{slot}")
return r.stdout if r.returncode == 0 else ""
@@ -46,6 +46,43 @@ def _svstat_wants_up(container: str, slot: str = "gateway-default") -> bool:
return "want up" in state
def _wait_for_gateway_or_exit(
container: str,
*,
deadline_s: float = 60.0,
) -> str:
"""Poll until the container is either running a foreground gateway
process or has exited. Returns the final container status.
Used by the ``--no-supervise`` tests where the gateway runs as the
CMD process (not supervised by s6). Under CI load the gateway can
take well over 6s to finish Python imports and reach the gateway
entrypoint — a fixed ``time.sleep(6)`` races. Polling for
``pgrep -f 'hermes.*gateway'`` (the gateway is running) or
``docker inspect`` returning ``exited`` is both faster on quick
machines and flake-free on slow ones.
"""
end = time.monotonic() + deadline_s
while time.monotonic() < end:
r = subprocess.run(
["docker", "inspect", "-f", "{{.State.Status}}", container],
capture_output=True, text=True, timeout=10,
)
status = r.stdout.strip()
if status == "exited":
return "exited"
if status == "running":
# Check if the gateway process is actually running in the
# foreground (the no-supervise path). If it is, we're done.
pgrep = docker_exec_sh(
container, "pgrep -f 'hermes.*gateway' >/dev/null 2>&1",
)
if pgrep.returncode == 0:
return "running"
time.sleep(0.5)
return status
def test_gateway_run_redirects_to_supervised(
built_image: str, container_name: str,
) -> None:
@@ -64,15 +101,27 @@ def test_gateway_run_redirects_to_supervised(
# exit immediately (which is what would happen pre-this-PR on the
# s6 image — the foreground gateway would crash without config,
# the CMD would exit, /init would shut down).
subprocess.run(
["docker", "run", "-d", "--name", container_name, built_image,
"gateway", "run"],
check=True, capture_output=True, timeout=30,
)
start_container(built_image, container_name, cmd="gateway run")
# Give /init time to run cont-init.d, the wrapper time to dispatch
# the redirect, and s6-supervise time to spin up the slot.
time.sleep(5)
# Wait for the redirect breadcrumb to appear in docker logs.
# Under heavy parallel load (32-way docker test fan-out), the CMD
# process (main-wrapper.sh → python → hermes gateway run) can take
# well over 5s to reach the redirect logic. The breadcrumb is the
# definitive signal that the redirect fired — polling for it is
# both faster on quick machines and flake-free on slow ones.
# Under heavy parallel docker load (32-way fan-out), the CMD process
# (main-wrapper.sh → python → hermes gateway run) can take well over
# 30s to import the codebase, load config, and reach the redirect
# logic. 60s matches the deadline other boot-readiness polls use.
logs = wait_for_docker_logs(
container_name, "s6 supervision", deadline_s=60.0,
)
assert "s6 supervision" in logs, (
f"expected loud breadcrumb in docker logs; got:\n{logs}"
)
assert "--no-supervise" in logs, (
f"breadcrumb missing opt-out hint; got:\n{logs}"
)
# Container should still be running. If the redirect didn't fire,
# the foreground gateway would have crashed and the container
@@ -83,7 +132,7 @@ def test_gateway_run_redirects_to_supervised(
)
assert r.returncode == 0 and r.stdout.strip() == "running", (
f"container exited prematurely: {r.stdout!r}; "
f"docker logs:\n{subprocess.run(['docker', 'logs', container_name], capture_output=True, text=True).stdout}"
f"docker logs:\n{logs}"
)
# s6's intent for the default-profile gateway slot should be up.
@@ -96,26 +145,24 @@ def test_gateway_run_redirects_to_supervised(
)
# The CMD process (PID under /init that the wrapper exec'd into)
# should be sleeping, not the gateway. We grep `ps` for the
# `sleep infinity` heartbeat.
r = _sh(container_name, "ps -eo pid,cmd | grep -v grep | grep 'sleep infinity'")
assert r.returncode == 0 and "sleep infinity" in r.stdout, (
f"expected `sleep infinity` heartbeat process; got ps:\n{r.stdout}\n"
f"stderr: {r.stderr}"
# should be sleeping, not the gateway. We count `sleep infinity`
# processes parented to the CMD wrapper (main-wrapper.sh / rc.init
# top), NOT the static main-hermes service's sleep — a bare grep
# for `sleep infinity` would false-positive on the main-hermes
# sleep and pass even before the redirect fires.
r = docker_exec_sh(
container_name,
"ps -eo pid,ppid,cmd | grep -v grep | awk "
"'/main-wrapper.sh|rc.init top/ { wrapper_pid=$1 } "
"$3==\"sleep\" && $4==\"infinity\" && $2==wrapper_pid { c++ } "
"END { print c+0 }'",
)
# And the loud breadcrumb should be in `docker logs` so users see
# the upgrade explanation.
r = subprocess.run(
["docker", "logs", container_name],
capture_output=True, text=True, timeout=10,
)
logs = r.stdout + r.stderr
assert "s6 supervision" in logs, (
f"expected loud breadcrumb in docker logs; got:\n{logs}"
)
assert "--no-supervise" in logs, (
f"breadcrumb missing opt-out hint; got:\n{logs}"
assert r.returncode == 0
redirected_sleeps = int(r.stdout.strip() or 0)
assert redirected_sleeps == 1, (
f"expected one `sleep infinity` heartbeat parented to the CMD "
f"wrapper (the redirect); found {redirected_sleeps}. "
f"ps:\n{docker_exec_sh(container_name, 'ps -eo pid,ppid,cmd').stdout}"
)
@@ -139,25 +186,13 @@ def test_gateway_run_no_supervise_flag_preserves_legacy_behavior(
* The ``gateway-default`` s6 service slot is NOT created.
* No supervision-redirect breadcrumb appears in docker logs.
"""
subprocess.run(
["docker", "run", "-d", "--name", container_name, built_image,
"gateway", "run", "--no-supervise"],
check=True, capture_output=True, timeout=30,
)
# Give startup time. The unconfigured-profile case used to fail
# fast; with a config bind-mounted profile (and a real volume on
# most realistic deployments) the gateway just runs.
time.sleep(6)
start_container(built_image, container_name, cmd="gateway run --no-supervise")
# Container should still be running OR have exited cleanly with
# the gateway's status code. Either is correct for pre-s6
# semantics — what's NOT correct is the supervised behavior
# (sleep infinity heartbeat + supervised gateway slot).
inspect = subprocess.run(
["docker", "inspect", "-f", "{{.State.Status}}", container_name],
capture_output=True, text=True, timeout=10,
)
status = inspect.stdout.strip()
# Wait for the gateway to start in the foreground or the container
# to exit (no-config crash is also valid pre-s6 semantics).
# A fixed time.sleep(6) races under CI parallel docker load —
# the gateway can take well over 6s to finish Python imports.
status = _wait_for_gateway_or_exit(container_name, deadline_s=60.0)
# No redirect breadcrumb anywhere.
logs = subprocess.run(
@@ -175,7 +210,7 @@ def test_gateway_run_no_supervise_flag_preserves_legacy_behavior(
if status == "running":
# Gateway running in foreground — the CMD process should be
# the gateway itself, NOT a sleep-infinity heartbeat.
r = _sh(
r = docker_exec_sh(
container_name,
"ps -eo pid,ppid,cmd | grep -v grep | awk '/main-wrapper.sh|rc.init top/ { wrapper_pid=$1 } "
"$3==\"sleep\" && $4==\"infinity\" && $2==wrapper_pid { c++ } END { print c+0 }'",
@@ -186,7 +221,7 @@ def test_gateway_run_no_supervise_flag_preserves_legacy_behavior(
f"--no-supervise: expected NO `sleep infinity` parented to "
f"the CMD wrapper (foreground gateway should be the CMD), "
f"found {redirected_sleeps}. "
f"ps:\n{_sh(container_name, 'ps -eo pid,ppid,cmd').stdout}"
f"ps:\n{docker_exec_sh(container_name, 'ps -eo pid,ppid,cmd').stdout}"
)
# The gateway-default s6 slot exists (the cont-init.d
@@ -211,13 +246,15 @@ def test_gateway_run_no_supervise_env_var(
Useful when users can't easily change their `docker run` args
(orchestration templates, K8s manifests) but can set env vars.
"""
subprocess.run(
["docker", "run", "-d", "--name", container_name,
"-e", "HERMES_GATEWAY_NO_SUPERVISE=1",
built_image, "gateway", "run"],
check=True, capture_output=True, timeout=30,
start_container(
built_image, container_name,
"HERMES_GATEWAY_NO_SUPERVISE=1",
cmd="gateway run",
)
time.sleep(6)
# Same as the CLI-flag test: wait for the gateway to start or
# the container to exit, instead of a blind time.sleep(6).
status = _wait_for_gateway_or_exit(container_name, deadline_s=60.0)
logs = subprocess.run(
["docker", "logs", container_name],
@@ -231,11 +268,7 @@ def test_gateway_run_no_supervise_env_var(
# Same as the CLI-flag test: the slot exists (reconciler creates
# it) but should not have want-state up.
inspect = subprocess.run(
["docker", "inspect", "-f", "{{.State.Status}}", container_name],
capture_output=True, text=True, timeout=10,
)
if inspect.stdout.strip() == "running":
if status == "running":
assert not _svstat_wants_up(container_name, "gateway-default"), (
"HERMES_GATEWAY_NO_SUPERVISE=1: gateway-default has "
"want-state up, implying the redirect dispatched `start` "
@@ -260,25 +293,33 @@ def test_supervised_gateway_does_not_recurse(
supervised gateway). Two or more would imply recursive spawning
via the redirect → start → run → redirect → ... loop.
"""
subprocess.run(
["docker", "run", "-d", "--name", container_name, built_image,
"gateway", "run"],
check=True, capture_output=True, timeout=30,
)
time.sleep(6)
start_container(built_image, container_name, cmd="gateway run")
# Count python processes running `hermes gateway run`. If the
# recursion guard fails, s6 would respawn fresh `gateway run`
# processes on every cycle, leaving multiple Python-process
# descendants under the gateway-default supervise tree.
r = _sh(container_name, "ps -eo pid,cmd | grep -v grep | grep -E 'python.*hermes.*gateway run' | wc -l")
# Wait for the redirect to fire by polling for the breadcrumb.
# Under CI parallel docker test fan-out, the CMD process
# (main-wrapper.sh → python → hermes gateway run) can take well
# over 6s to reach the redirect logic. A fixed sleep would race:
# if we check too early, the CMD process hasn't exec'd into
# `sleep infinity` yet and the s6-supervised gateway hasn't
# started either — so we'd see the CMD's `hermes gateway run`
# AND the supervised one (2 processes) and falsely conclude
# recursion. Polling the breadcrumb is the definitive signal
# that the redirect fired and the CMD process is now `sleep`.
wait_for_docker_logs(container_name, "s6 supervision")
# Now that the redirect fired, count python processes running
# `hermes gateway run`. If the recursion guard fails, s6 would
# respawn fresh `gateway run` processes on every cycle, leaving
# multiple Python-process descendants under the gateway-default
# supervise tree.
r = docker_exec_sh(container_name, "ps -eo pid,cmd | grep -v grep | grep -E 'python.*hermes.*gateway run' | wc -l")
assert r.returncode == 0
n = int(r.stdout.strip() or 0)
assert n <= 1, (
f"expected at most one supervised python `hermes gateway run` "
f"process (the legitimately-supervised gateway); found {n}. "
f"Recursion guard may have failed. "
f"ps:\n{_sh(container_name, 'ps -eo pid,ppid,cmd').stdout}"
f"ps:\n{docker_exec_sh(container_name, 'ps -eo pid,ppid,cmd').stdout}"
)
# Stronger positive assertion: there should be exactly one
@@ -286,7 +327,7 @@ def test_supervised_gateway_does_not_recurse(
# CMD process (PID 17 typically). The static `main-hermes`
# service has its own `sleep infinity` child; THAT one is fine
# and unrelated to our redirect.
r = _sh(
r = docker_exec_sh(
container_name,
# Find PID of the CMD process (main-wrapper.sh or its sh
# parent), then count `sleep infinity` children.
@@ -298,7 +339,7 @@ def test_supervised_gateway_does_not_recurse(
assert redirected == 1, (
f"expected exactly one `sleep infinity` parented to the CMD "
f"wrapper (the redirect heartbeat); found {redirected}. "
f"ps:\n{_sh(container_name, 'ps -eo pid,ppid,cmd').stdout}"
f"ps:\n{docker_exec_sh(container_name, 'ps -eo pid,ppid,cmd').stdout}"
)
@@ -312,20 +353,47 @@ def test_dashboard_supervised_when_env_set(
redirect: one container = supervised gateway + supervised
dashboard, with zero extra user effort.
"""
subprocess.run(
["docker", "run", "-d", "--name", container_name,
"-e", "HERMES_DASHBOARD=1",
built_image, "gateway", "run"],
check=True, capture_output=True, timeout=30,
start_container(
built_image, container_name,
"HERMES_DASHBOARD=1",
cmd="gateway run",
)
time.sleep(5)
# Both slots should report want-up.
assert _svstat_wants_up(container_name, "gateway-default"), (
f"gateway-default slot not up: {_svstat(container_name)!r}"
# Wait for the redirect to fire (the breadcrumb appears in docker
# logs when the CMD process reaches the redirect logic). This is
# the same signal the other gateway-run tests use.
# A fixed time.sleep(5) was racing: start_container returns when
# cont-init finishes, but the redirect (which creates the
# gateway-default s6 slot) happens later in the CMD process.
wait_for_docker_logs(
container_name, "s6 supervision", deadline_s=60.0,
)
assert _svstat_wants_up(container_name, "dashboard"), (
f"dashboard slot not up: {_svstat(container_name, 'dashboard')!r}"
# Poll for both slots to report want-up, using the same
# _svstat_wants_up helper the other tests use. A simple
# `grep 'want up'` is wrong: when the service is already up,
# s6-svstat output is "up (pid ...) Ns" with no literal "want up"
# — the want-up intent is implied by the absence of "want down".
ok_gateway = False
end = time.monotonic() + 30.0
while time.monotonic() < end:
if _svstat_wants_up(container_name, "gateway-default"):
ok_gateway = True
break
time.sleep(0.5)
assert ok_gateway, (
f"gateway-default slot not want-up: {_svstat(container_name)!r}"
)
ok_dash = False
end = time.monotonic() + 30.0
while time.monotonic() < end:
if _svstat_wants_up(container_name, "dashboard"):
ok_dash = True
break
time.sleep(0.5)
assert ok_dash, (
f"dashboard slot not want-up: {_svstat(container_name, 'dashboard')!r}"
)
@@ -354,14 +422,17 @@ def test_supervised_gateway_stdout_reaches_docker_logs(
Python-logging output, so its presence in ``docker logs`` proves
the stdout-tee is working.
"""
subprocess.run(
["docker", "run", "-d", "--name", container_name, built_image,
"gateway", "run"],
check=True, capture_output=True, timeout=30,
)
# Banner is printed during gateway startup — give it time to
# initialize past the imports + config-load phase.
time.sleep(8)
start_container(built_image, container_name, cmd="gateway run")
# Poll docker logs for the banner glyph (⚕) or "Hermes Gateway
# Starting" — the gateway's rich-console startup banner. A fixed
# sleep(8) races under CI parallel docker test fan-out: the
# supervised gateway can take well over 8s to finish imports +
# config-load + banner print under load, and the assertion would
# fail not because the stdout-tee is broken but because we checked
# too early. Polling with a generous deadline is both faster on
# quick machines and flake-free on slow ones.
wait_for_docker_logs(container_name, "", deadline_s=60.0)
logs = subprocess.run(
["docker", "logs", container_name],
@@ -377,14 +448,14 @@ def test_supervised_gateway_stdout_reaches_docker_logs(
"This means the `1` action directive in _render_log_run isn't "
"forwarding stdout to /init. "
f"docker logs (last 2000 chars):\n{combined[-2000:]}\n"
f"file contents:\n{_sh(container_name, 'cat /opt/data/logs/gateways/default/current').stdout}"
f"file contents:\n{docker_exec_sh(container_name, 'cat /opt/data/logs/gateways/default/current').stdout}"
)
# Cross-check: the same banner must also be in the rotated log
# file (we kept the file destination, just added stdout). The
# file version has s6-log's ISO 8601 timestamp prefix; the
# docker logs version is raw.
file_contents = _sh(
file_contents = docker_exec_sh(
container_name, "cat /opt/data/logs/gateways/default/current",
).stdout
assert "" in file_contents or "Hermes Gateway Starting" in file_contents, (
@@ -392,4 +463,3 @@ def test_supervised_gateway_stdout_reaches_docker_logs(
"destination may have been dropped by the new s6-log script. "
f"File contents:\n{file_contents}"
)

View File

@@ -0,0 +1,169 @@
"""Runtime smoke tests for Docker HOME overrides and script behavior.
Build the real image and verify the actual runtime behavior:
1. main-wrapper preserves the Docker ``-w`` working directory
2. dashboard service resets HOME to /opt/data before privilege drop
3. dashboard does not auto-add ``--insecure`` from a non-loopback bind host
4. stage2 hook repairs profiles/ and cron/ ownership on every boot
"""
from __future__ import annotations
import subprocess
from tests.docker.conftest import docker_exec, docker_exec_sh, start_container, restart_container
def test_main_wrapper_preserves_docker_workdir(
built_image: str, container_name: str,
) -> None:
"""The main-wrapper MUST save and restore the original working directory
so the container starts in the Docker ``-w`` directory, not /opt/data.
Regression test for #35472. We pass ``-w /tmp`` and a command that
prints its cwd; the output must be ``/tmp``, proving the wrapper
restored the cwd after its internal ``cd /opt/data``.
"""
r = subprocess.run(
["docker", "run", "--rm", "-w", "/tmp",
built_image, "sh", "-c", "pwd"],
capture_output=True, text=True, timeout=60,
)
assert r.returncode == 0, f"container failed: {r.stderr[-1000:]}"
# The stage2 hook emits boot logs (config migration, skills sync)
# to stdout before the CMD runs. The actual pwd output is the LAST
# line of stdout.
last_line = r.stdout.strip().split("\n")[-1].strip()
assert last_line == "/tmp", (
f"expected cwd /tmp, got {last_line!r}"
f"main-wrapper did not preserve the Docker -w directory"
)
def test_dashboard_service_resets_home(
built_image: str, container_name: str,
) -> None:
"""The dashboard run script must export HOME=/opt/data before dropping
privileges, so HOME-anchored state (discord lockfile, XDG dirs) doesn't
try to write to /root (the /init context's HOME).
We check this by inspecting the environment of the dashboard service
process if it's running, or by verifying the run script sets HOME
before the exec. At runtime, the cleanest check is: start the
container with HERMES_DASHBOARD=1 and verify the dashboard process
(if it starts) has HOME=/opt/data.
Since the dashboard requires an auth provider on non-loopback binds,
we bind to 127.0.0.1 where the auth gate doesn't engage, and check
the process env.
"""
start_container(built_image, container_name, "HERMES_DASHBOARD=1", "HERMES_DASHBOARD_HOST=127.0.0.1")
# Check if the dashboard process is running and inspect its HOME.
r = docker_exec_sh(
container_name,
# Find the dashboard process (hermes dashboard) and read its HOME
# from /proc/<pid>/environ. If not running, verify the run script
# itself exports HOME=/opt/data by grepping the script source.
'pid=$(pgrep -f "hermes dashboard" | head -1); '
'if [ -n "$pid" ]; then '
' tr "\\0" "\\n" < /proc/$pid/environ | grep "^HOME="; '
'else '
' grep -q "export HOME=/opt/data" '
' /opt/hermes/docker/s6-rc.d/dashboard/run && '
' echo "HOME=/opt/data"; '
'fi',
timeout=15,
)
assert "HOME=/opt/data" in r.stdout, (
f"dashboard process or run script does not set HOME=/opt/data: "
f"stdout={r.stdout!r} stderr={r.stderr!r}"
)
def test_dashboard_does_not_auto_insecure_from_host(
built_image: str, container_name: str,
) -> None:
"""The dashboard MUST NOT auto-add ``--insecure`` based on
HERMES_DASHBOARD_HOST. The auth gate is the authority now.
The auth gate is the authority on whether non-loopback binds are
safe; ``--insecure`` must never be auto-derived from the bind host.
We start the container with a non-loopback bind host and verify
the dashboard process does NOT receive ``--insecure`` in its
command line. If the dashboard fails to start (because the auth
gate correctly blocks an unauthenticated non-loopback bind), that's
also acceptable — the point is no auto-insecure.
"""
start_container(built_image, container_name, "HERMES_DASHBOARD=1", "HERMES_DASHBOARD_HOST=0.0.0.0")
# Check the dashboard process command line for --insecure.
r = docker_exec_sh(
container_name,
'pid=$(pgrep -f "hermes dashboard" | head -1); '
'if [ -n "$pid" ]; then '
' tr "\\0" " " < /proc/$pid/cmdline; '
'fi',
timeout=10,
)
cmdline = r.stdout.strip()
# If the process is running, it must NOT have --insecure.
if cmdline:
assert "--insecure" not in cmdline, (
f"dashboard process has --insecure in cmdline (auto-derived "
f"from host): {cmdline!r}"
)
def test_stage2_repairs_profiles_and_cron_ownership(
built_image: str, container_name: str,
) -> None:
"""profiles/ and cron/ must both be reclaimed after root-context writes.
The stage2 hook chowns these dirs to hermes:hermes on every boot.
We simulate a root-owned file in each, then restart the container
and verify ownership is repaired.
"""
start_container(built_image, container_name)
# Create root-owned files in profiles/ and cron/ to simulate
# docker exec (root) writes.
docker_exec(
container_name, "mkdir", "-p", "/opt/data/profiles/testprof",
user="root", timeout=5,
)
docker_exec(
container_name, "touch", "/opt/data/profiles/testprof/marker",
user="root", timeout=5,
)
docker_exec(
container_name, "touch", "/opt/data/cron/root_owned.json",
user="root", timeout=5,
)
# Verify they're root-owned before restart.
r = docker_exec_sh(
container_name,
'stat -c "%U" /opt/data/profiles/testprof/marker '
'/opt/data/cron/root_owned.json',
timeout=5,
)
assert "root" in r.stdout, (
f"expected root-owned files before restart, got: {r.stdout!r}"
)
# Restart — stage2 hook runs again and repairs ownership.
restart_container(container_name)
# Verify files are now owned by hermes.
r = docker_exec_sh(
container_name,
'stat -c "%U" /opt/data/profiles/testprof/marker '
'/opt/data/cron/root_owned.json',
timeout=5,
)
assert "hermes" in r.stdout, (
f"expected hermes-owned files after restart, got: {r.stdout!r}"
f"stage2 hook did not repair profiles/ and cron/ ownership"
)

View File

@@ -0,0 +1,140 @@
"""Runtime smoke tests for Docker immutable install tree and install-method stamp.
Build the real image and verify at runtime:
1. /opt/hermes is not writable by the hermes user (immutable install tree)
2. PYTHONDONTWRITEBYTECODE and HERMES_DISABLE_LAZY_INSTALLS are set
3. /opt/hermes/.install_method contains "docker" (code-scoped stamp)
4. $HERMES_HOME/.install_method is NOT stamped as "docker" by stage2
5. A stale "docker" stamp in $HERMES_HOME is healed (removed) on boot
"""
from __future__ import annotations
from tests.docker.conftest import (
docker_exec,
docker_exec_sh,
restart_container,
start_container,
)
def test_install_tree_not_writable_by_hermes(
built_image: str, container_name: str,
) -> None:
"""The hermes user must not be able to modify /opt/hermes.
The install tree (source, venv, TUI bundle, node_modules) must remain
root-owned and non-writable so an agent session cannot self-modify
the installation and brick the gateway.
"""
start_container(built_image, container_name)
r = docker_exec_sh(
container_name,
# Try to create a file under /opt/hermes as the hermes user
"touch /opt/hermes/test_write 2>&1 && "
"echo WRITE_SUCCEEDED || echo WRITE_FAILED",
timeout=10,
)
assert "WRITE_FAILED" in r.stdout, (
f"hermes user can write to /opt/hermes (install tree not immutable): "
f"{r.stdout}"
)
# Also check a key subdirectory
r = docker_exec_sh(
container_name,
"touch /opt/hermes/.venv/test_write 2>&1 && "
"echo WRITE_SUCCEEDED || echo WRITE_FAILED",
timeout=10,
)
assert "WRITE_FAILED" in r.stdout, (
f"hermes user can write to /opt/hermes/.venv: {r.stdout}"
)
def test_hermes_disable_lazy_installs_and_dont_write_bytecode(
built_image: str, container_name: str,
) -> None:
"""The container must set PYTHONDONTWRITEBYTECODE and
HERMES_DISABLE_LAZY_INSTALLS=1 so no .pyc files are written to the
immutable install tree and no lazy installs attempt to modify it."""
start_container(built_image, container_name)
r = docker_exec_sh(
container_name,
'test "$PYTHONDONTWRITEBYTECODE" = "1" && '
'test "$HERMES_DISABLE_LAZY_INSTALLS" = "1" && '
'echo ENV_OK || echo ENV_MISSING',
timeout=10,
)
assert "ENV_OK" in r.stdout, (
f"expected PYTHONDONTWRITEBYTECODE=1 and "
f"HERMES_DISABLE_LAZY_INSTALLS=1, got: {r.stdout} stderr={r.stderr}"
)
def test_install_method_stamp_is_code_scoped(
built_image: str, container_name: str,
) -> None:
"""The 'docker' install-method stamp must be baked at
/opt/hermes/.install_method (code-scoped), NOT in $HERMES_HOME."""
start_container(built_image, container_name)
# Code-scoped stamp must exist and say "docker"
r = docker_exec_sh(
container_name,
"cat /opt/hermes/.install_method",
timeout=10,
)
assert r.returncode == 0, (
f"/opt/hermes/.install_method not found: {r.stderr}"
)
assert r.stdout.strip() == "docker", (
f"expected 'docker' stamp, got: {r.stdout.strip()!r}"
)
# $HERMES_HOME must NOT have a 'docker' stamp
r = docker_exec_sh(
container_name,
"cat /opt/data/.install_method 2>/dev/null || echo NONE",
timeout=10,
)
assert r.stdout.strip() != "docker", (
f"$HERMES_HOME/.install_method is stamped 'docker' - stage2 must "
f"not stamp the data volume (shared with host installs)"
)
def test_stale_docker_stamp_in_home_is_healed_on_boot(
built_image: str, container_name: str,
) -> None:
"""A stale 'docker' stamp left in $HERMES_HOME by an older image
must be removed on boot so shared homes self-heal."""
# Start container, write a stale stamp
start_container(built_image, container_name)
# Write a stale 'docker' stamp as root
docker_exec(
container_name, "sh", "-c",
"printf 'docker\\n' > /opt/data/.install_method",
user="root", timeout=5,
)
# Verify it exists
r = docker_exec_sh(container_name, "cat /opt/data/.install_method", timeout=5)
assert r.stdout.strip() == "docker"
# Restart - stage2 should heal it
restart_container(container_name)
# The stale stamp must be gone
r = docker_exec_sh(
container_name,
"test -f /opt/data/.install_method && "
"cat /opt/data/.install_method || echo HEALED",
timeout=10,
)
assert "HEALED" in r.stdout or r.stdout.strip() != "docker", (
f"stale 'docker' stamp in $HERMES_HOME was not healed on boot: "
f"{r.stdout}"
)

View File

@@ -0,0 +1,26 @@
"""Runtime smoke test for Docker image license-file presence.
Build the real image and verify the LICENSE file is present inside the
container (PEP 639 license-files metadata must resolve inside the
Docker image).
"""
from __future__ import annotations
import subprocess
def test_docker_image_contains_license_file(built_image: str) -> None:
"""The LICENSE file must be present inside the built Docker image.
PEP 639 license-files metadata references LICENSE, and the Docker
build context must not exclude it.
"""
r = subprocess.run(
["docker", "run", "--rm", "--entrypoint", "test",
built_image, "-f", "/opt/hermes/LICENSE"],
capture_output=True, text=True, timeout=60,
)
assert r.returncode == 0, (
f"LICENSE file not found at /opt/hermes/LICENSE inside the Docker "
f"image: {r.stderr[-500:]}"
)

View File

@@ -0,0 +1,47 @@
"""Runtime smoke test for Docker $HERMES_HOME/logs/gateways seeding.
Build the real image and verify logs/ and logs/gateways/ exist and are
owned by the hermes user after container boot.
Regression guard for #45258: if the first gateway log service runs in
root context, logs/gateways/ is created root-owned; every profile
registered later runs its log service as the dropped hermes user and
s6-log crash-loops on mkdir: Permission denied.
"""
from __future__ import annotations
from tests.docker.conftest import docker_exec_sh, start_container
def test_logs_gateways_seeded_and_hermes_owned(
built_image: str, container_name: str,
) -> None:
"""logs/ and logs/gateways/ must exist and be owned by hermes after boot."""
start_container(built_image, container_name)
# Both directories must exist
r = docker_exec_sh(
container_name,
"test -d /opt/data/logs && "
"test -d /opt/data/logs/gateways && "
"echo DIRS_OK || echo DIRS_MISSING",
timeout=10,
)
assert "DIRS_OK" in r.stdout, (
f"logs/ or logs/gateways/ not seeded: {r.stdout}"
)
# Both must be owned by hermes
r = docker_exec_sh(
container_name,
'logs_owner=$(stat -c "%U" /opt/data/logs); '
'gateways_owner=$(stat -c "%U" /opt/data/logs/gateways); '
'echo "logs=$logs_owner gateways=$gateways_owner"',
timeout=10,
)
assert "logs=hermes" in r.stdout, (
f"logs/ not owned by hermes: {r.stdout}"
)
assert "gateways=hermes" in r.stdout, (
f"logs/gateways/ not owned by hermes: {r.stdout}"
)

View File

@@ -26,7 +26,7 @@ from __future__ import annotations
import subprocess
import time
from tests.docker.conftest import docker_exec_sh
from tests.docker.conftest import docker_exec_sh, start_container
PROFILE = "test-harness-profile"
@@ -69,12 +69,7 @@ def _svstat_wants_up(container: str) -> bool:
def test_profile_create_then_gateway_start(
built_image: str, container_name: str,
) -> None:
subprocess.run(
["docker", "run", "-d", "--name", container_name, built_image,
"sleep", "120"],
check=True, capture_output=True, timeout=30,
)
time.sleep(3)
start_container(built_image, container_name, cmd="sleep 120")
r = _sh(container_name, f"hermes profile create {PROFILE}")
assert r.returncode == 0, f"profile create failed: {r.stderr}"
@@ -114,12 +109,7 @@ def test_profile_delete_stops_gateway(
) -> None:
"""Deleting a profile should stop its gateway and remove the s6
service slot."""
subprocess.run(
["docker", "run", "-d", "--name", container_name, built_image,
"sleep", "120"],
check=True, capture_output=True, timeout=30,
)
time.sleep(3)
start_container(built_image, container_name, cmd="sleep 120")
_sh(container_name, f"hermes profile create {PROFILE}")
_sh(container_name, f"hermes -p {PROFILE} gateway start", timeout=60)
@@ -135,4 +125,4 @@ def test_profile_delete_stops_gateway(
time.sleep(2)
# Service slot should be gone.
r = _sh(container_name, f"test -d /run/service/gateway-{PROFILE}")
assert r.returncode != 0, "s6 service slot still present after profile delete"
assert r.returncode != 0, "s6 service slot still present after profile delete"

View File

@@ -0,0 +1,88 @@
"""Runtime smoke tests for Docker PUID/PGID and UID/GID remap.
Build the real image and verify the actual runtime behavior:
1. PUID/PGID env vars remap the hermes user UID/GID at boot
2. HERMES_UID/HERMES_GID take precedence over PUID/PGID aliases
3. NAS-style low UIDs (99:100) are accepted and remapped
4. Invalid UIDs are rejected
5. The remapped user can write to the data volume
"""
from __future__ import annotations
from tests.docker.conftest import docker_exec_sh, start_container
def test_puid_pgid_remaps_hermes_user(
built_image: str, container_name: str,
) -> None:
"""PUID=1000 PGID=1000 must remap the hermes user to UID 1000."""
start_container(built_image, container_name, "PUID=1000", "PGID=1000")
r = docker_exec_sh(
container_name,
"id -u hermes",
timeout=10,
)
assert r.stdout.strip() == "1000", (
f"expected hermes UID 1000 after PUID remap, got: {r.stdout.strip()}"
)
r = docker_exec_sh(
container_name,
"id -g hermes",
timeout=10,
)
assert r.stdout.strip() == "1000", (
f"expected hermes GID 1000 after PGID remap, got: {r.stdout.strip()}"
)
def test_hermes_uid_gid_take_precedence_over_aliases(
built_image: str, container_name: str,
) -> None:
"""HERMES_UID/HERMES_GID must win over PUID/PGID when both are set."""
start_container(built_image, container_name, "HERMES_UID=2000", "HERMES_GID=2001", "PUID=1000", "PGID=1000")
r = docker_exec_sh(container_name, "id -u hermes", timeout=10)
assert r.stdout.strip() == "2000", (
f"expected hermes UID 2000 (HERMES_UID wins), got: {r.stdout.strip()}"
)
r = docker_exec_sh(container_name, "id -g hermes", timeout=10)
assert r.stdout.strip() == "2001", (
f"expected hermes GID 2001 (HERMES_GID wins), got: {r.stdout.strip()}"
)
def test_nas_low_uid_accepted(
built_image: str, container_name: str,
) -> None:
"""NAS-style low UIDs (99:100, common on Unraid) must be accepted."""
start_container(built_image, container_name, "PUID=99", "PGID=100")
r = docker_exec_sh(container_name, "id -u hermes", timeout=10)
assert r.stdout.strip() == "99", (
f"expected hermes UID 99, got: {r.stdout.strip()}"
)
r = docker_exec_sh(container_name, "id -g hermes", timeout=10)
assert r.stdout.strip() == "100", (
f"expected hermes GID 100, got: {r.stdout.strip()}"
)
def test_remap_enables_data_volume_writes(
built_image: str, container_name: str,
) -> None:
"""After remap, the hermes user must be able to write to /opt/data."""
start_container(built_image, container_name, "PUID=1000", "PGID=1000")
r = docker_exec_sh(
container_name,
"touch /opt/data/test_write && echo WRITE_OK || echo WRITE_FAIL",
timeout=10,
)
assert "WRITE_OK" in r.stdout, (
f"hermes user cannot write to /opt/data after remap: {r.stdout}"
)

View File

@@ -19,10 +19,7 @@ operations work correctly under UID 10000.
"""
from __future__ import annotations
import subprocess
import time
from tests.docker.conftest import docker_exec
from tests.docker.conftest import docker_exec, start_container
_REGISTER_SCRIPT = """
@@ -45,49 +42,39 @@ print("UNREGISTERED")
"""
def _exec(container: str, *args: str, timeout: int = 30) -> subprocess.CompletedProcess:
return docker_exec(container, *args, timeout=timeout)
def test_s6_register_creates_service_dir_in_live_container(
built_image: str, container_name: str,
) -> None:
"""S6ServiceManager.register_profile_gateway must create
``/run/service/gateway-<profile>/`` and trigger s6-svscan rescan
against the real s6 supervision tree."""
subprocess.run(
["docker", "run", "-d", "--name", container_name, built_image,
"sleep", "120"],
check=True, capture_output=True, timeout=30,
)
# Give the supervision tree a moment to come up.
time.sleep(3)
start_container(built_image, container_name, cmd="sleep 120")
r = _exec(container_name, "python3", "-c", _REGISTER_SCRIPT, timeout=30)
r = docker_exec(container_name, "python3", "-c", _REGISTER_SCRIPT, timeout=30)
assert "REGISTERED" in r.stdout, (
f"register failed: stderr={r.stderr!r} stdout={r.stdout!r}"
)
# Service directory exists with the expected structure.
r = _exec(container_name, "test", "-d", "/run/service/gateway-phase3test")
r = docker_exec(container_name, "test", "-d", "/run/service/gateway-phase3test")
assert r.returncode == 0, "service directory not created"
r = _exec(container_name, "test", "-f", "/run/service/gateway-phase3test/run")
r = docker_exec(container_name, "test", "-f", "/run/service/gateway-phase3test/run")
assert r.returncode == 0, "run script not created"
r = _exec(container_name, "test", "-f",
r = docker_exec(container_name, "test", "-f",
"/run/service/gateway-phase3test/log/run")
assert r.returncode == 0, "log/run script not created"
# s6-svscan picked it up — s6-svstat works against the dir.
# `docker exec` doesn't put /command/ on PATH (only the supervision
# tree does), so call s6-svstat by absolute path.
r = _exec(container_name, "/command/s6-svstat",
r = docker_exec(container_name, "/command/s6-svstat",
"/run/service/gateway-phase3test")
assert r.returncode == 0, f"s6-svstat failed: {r.stderr or r.stdout}"
# list_profile_gateways picks it up.
r = _exec(container_name, "python3", "-c", (
r = docker_exec(container_name, "python3", "-c", (
"from hermes_cli.service_manager import S6ServiceManager;"
"print(S6ServiceManager().list_profile_gateways())"
))
@@ -100,29 +87,24 @@ def test_s6_unregister_removes_service_dir_in_live_container(
"""unregister_profile_gateway must stop the service, remove the
directory, and trigger s6-svscan rescan so the supervise process
is dropped."""
subprocess.run(
["docker", "run", "-d", "--name", container_name, built_image,
"sleep", "120"],
check=True, capture_output=True, timeout=30,
)
time.sleep(3)
start_container(built_image, container_name, cmd="sleep 120")
# First register so we have something to unregister.
r = _exec(container_name, "python3", "-c", _REGISTER_SCRIPT, timeout=30)
r = docker_exec(container_name, "python3", "-c", _REGISTER_SCRIPT, timeout=30)
assert "REGISTERED" in r.stdout
# Then unregister.
r = _exec(container_name, "python3", "-c", _UNREGISTER_SCRIPT, timeout=30)
r = docker_exec(container_name, "python3", "-c", _UNREGISTER_SCRIPT, timeout=30)
assert "UNREGISTERED" in r.stdout, (
f"unregister failed: stderr={r.stderr!r} stdout={r.stdout!r}"
)
# Directory is gone.
r = _exec(container_name, "test", "-d", "/run/service/gateway-phase3test")
r = docker_exec(container_name, "test", "-d", "/run/service/gateway-phase3test")
assert r.returncode != 0, "service directory still exists after unregister"
# list_profile_gateways no longer includes it.
r = _exec(container_name, "python3", "-c", (
r = docker_exec(container_name, "python3", "-c", (
"from hermes_cli.service_manager import S6ServiceManager;"
"print(S6ServiceManager().list_profile_gateways())"
))

View File

@@ -0,0 +1,60 @@
"""Runtime smoke tests for the Docker image entrypoint and subcommands.
Converted from the former ``.github/actions/hermes-smoke-test`` composite
action. These tests exercise the image's real ENTRYPOINT (``/init`` +
``main-wrapper.sh``) via ``docker run --rm <image> --help`` and
``docker run --rm <image> dashboard --help`` to catch basic runtime
regressions before publishing.
The harness expects the ``built_image`` fixture from
``tests/docker/conftest.py``. When Docker isn't available every test
here is skipped at collection time.
"""
from __future__ import annotations
import subprocess
def test_hermes_help(built_image: str) -> None:
"""``docker run --rm <image> --help`` must exit 0.
Uses the image's real ENTRYPOINT (``/init`` + ``main-wrapper.sh``)
so this exercises the actual production startup path. PR #30136
review caught that an ``--entrypoint`` override in the old composite
action had been silently neutered by the s6-overlay migration —
``stage2-hook`` ignores CMD args passed after an overridden
entrypoint, so the smoke test was a no-op.
"""
r = subprocess.run(
["docker", "run", "--rm", built_image, "--help"],
capture_output=True, text=True, timeout=60,
)
assert r.returncode == 0, (
f"hermes --help failed (exit {r.returncode}): "
f"stdout={r.stdout[-2000:]!r} stderr={r.stderr[-2000:]!r}"
)
assert "Traceback" not in r.stderr, (
f"hermes --help produced a traceback: {r.stderr[-2000:]!r}"
)
def test_dashboard_subcommand_present(built_image: str) -> None:
"""``docker run --rm <image> dashboard --help`` must exit 0.
Regression guard for #9153: the ``dashboard`` subcommand was present
in source but missing from the published image. If this fails,
something in the Dockerfile is excluding the dashboard subcommand
from the installed package.
"""
r = subprocess.run(
["docker", "run", "--rm", built_image, "dashboard", "--help"],
capture_output=True, text=True, timeout=60,
)
assert r.returncode == 0, (
f"hermes dashboard --help failed (exit {r.returncode}): "
f"stdout={r.stdout[-2000:]!r} stderr={r.stderr[-2000:]!r}"
)
combined = (r.stdout + r.stderr).lower()
assert "dashboard" in combined or "usage" in combined, (
f"dashboard --help output unexpected: {combined[-2000:]!r}"
)

View File

@@ -0,0 +1,82 @@
"""Runtime smoke tests for Docker stage2 browser executable discovery.
Build the real image and verify the chromium binary is actually
discovered at boot: ``AGENT_BROWSER_EXECUTABLE_PATH`` is set, points to
a real executable, and is a browser binary (not a shared library picked
up by a broad ``find | grep``).
"""
from __future__ import annotations
from tests.docker.conftest import docker_exec_sh, start_container
def test_stage2_discovers_chromium_binary(
built_image: str, container_name: str,
) -> None:
"""The stage2 hook must discover the Playwright chromium binary and
export AGENT_BROWSER_EXECUTABLE_PATH so the browser tool can find it.
The discovery uses filename matching, not a broad ``find | grep``:
shared libraries (libGLESv2.so etc.) inherit the executable bit from
Playwright's tarball but must not be picked up. This test verifies the
discovered binary is a real browser, not a .so.
"""
start_container(built_image, container_name)
# AGENT_BROWSER_EXECUTABLE_PATH must be set via s6 container_environment.
r = docker_exec_sh(
container_name,
"cat /run/s6/container_environment/AGENT_BROWSER_EXECUTABLE_PATH",
timeout=10,
)
assert r.returncode == 0, (
f"AGENT_BROWSER_EXECUTABLE_PATH not set by stage2 hook: {r.stderr}"
)
browser_path = r.stdout.strip()
assert browser_path, "AGENT_BROWSER_EXECUTABLE_PATH is empty"
# Must be a real file and executable.
r = docker_exec_sh(
container_name,
f'test -x "{browser_path}"',
timeout=5,
)
assert r.returncode == 0, (
f"discovered browser path is not executable: {browser_path}"
)
# Must be a browser binary by basename — NOT a shared library.
accepted_names = (
"chrome", "chromium", "chrome-headless-shell",
"headless_shell", "chromium-browser",
)
r = docker_exec_sh(
container_name,
f'basename "{browser_path}"',
timeout=5,
)
basename = r.stdout.strip()
assert basename in accepted_names, (
f"discovered binary basename {basename!r} is not a recognized "
f"browser name (accepted: {accepted_names}) — the discovery may "
f"have picked up a shared library (.so) instead of the real browser"
)
def test_stage2_browser_path_accessible_to_hermes_user(
built_image: str, container_name: str,
) -> None:
"""The discovered browser binary must be accessible to the
unprivileged hermes user (UID 10000), since that's who runs
agent-browser subprocesses."""
start_container(built_image, container_name)
r = docker_exec_sh(
container_name,
'path="$(cat /run/s6/container_environment/AGENT_BROWSER_EXECUTABLE_PATH)" '
'&& test -r "$path" && test -x "$path"',
timeout=10,
)
assert r.returncode == 0, (
f"browser binary not readable+executable by hermes user: {r.stderr}"
)

View File

@@ -0,0 +1,54 @@
"""Runtime smoke test for the Docker tini compatibility shim (#34192).
Build the real image and verify:
1. /usr/bin/tini exists and is a symlink to /init (the compat shim
for orchestration templates that still reference /usr/bin/tini)
2. The actual ENTRYPOINT is /init (s6-overlay), not /usr/bin/tini
"""
from __future__ import annotations
import subprocess
def test_tini_compat_symlink_exists(built_image: str) -> None:
"""/usr/bin/tini must exist as a symlink to /init.
Regression for #34192: orchestration templates (e.g. Hostinger's
'Hermes WebUI' catalog) still pin /usr/bin/tini as the entrypoint.
The shim symlinks it to /init so legacy wrappers exec the right
PID-1 reaper without behavior change.
"""
r = subprocess.run(
["docker", "run", "--rm", "--entrypoint", "sh",
built_image, "-c",
'test -L /usr/bin/tini && '
'test "$(readlink -f /usr/bin/tini)" = "/init"'],
capture_output=True, text=True, timeout=60,
)
assert r.returncode == 0, (
f"/usr/bin/tini is not a symlink to /init: {r.stderr[-500:]}"
)
def test_entrypoint_is_init_not_tini(built_image: str) -> None:
"""The image's actual ENTRYPOINT must be /init (s6-overlay).
The tini shim is only for legacy external wrappers; the image's own
runtime must continue to use the canonical /init.
"""
r = subprocess.run(
["docker", "inspect", built_image,
"--format", "{{json .Config.Entrypoint}}"],
capture_output=True, text=True, timeout=30,
)
assert r.returncode == 0, f"docker inspect failed: {r.stderr}"
entrypoint = r.stdout.strip()
assert "/init" in entrypoint, (
f"ENTRYPOINT is not /init: {entrypoint!r}"
)
# The entrypoint array should be ["/init", "/opt/hermes/docker/main-wrapper.sh"]
# /usr/bin/tini should NOT be in the entrypoint.
assert "tini" not in entrypoint.lower(), (
f"ENTRYPOINT references tini instead of /init: {entrypoint!r}"
)

View File

@@ -0,0 +1,93 @@
"""Runtime smoke tests for Docker top-level state-file ownership repair.
Build the real image and verify the actual runtime behavior:
1. Root-owned top-level state files (auth.json, state.db, gateway.lock,
gateway_state.json) are chowned to hermes on boot
2. Non-allowlisted host-owned files are NOT touched (targeted, not
blanket find -user root sweep)
"""
from __future__ import annotations
from tests.docker.conftest import (
docker_exec,
docker_exec_sh,
restart_container,
start_container,
)
# The files the stage2 hook should repair (mirrors the allowlist in
# stage2-hook.sh). We test a representative subset.
ALLOWLISTED_FILES = ("auth.json", "state.db", "gateway.lock", "gateway_state.json")
def test_root_owned_state_files_repaired_on_boot(
built_image: str, container_name: str,
) -> None:
"""Root-owned top-level state files must be chowned to hermes on boot."""
start_container(built_image, container_name)
# Create root-owned state files to simulate docker exec (root) writes
for f in ALLOWLISTED_FILES:
docker_exec(
container_name, "touch", f"/opt/data/{f}",
user="root", timeout=5,
)
# Verify they're root-owned
r = docker_exec_sh(
container_name,
" ".join(f'stat -c %U /opt/data/{f}' for f in ALLOWLISTED_FILES),
timeout=5,
)
for line in r.stdout.split():
assert line == "root", f"expected root-owned, got: {line}"
# Restart - stage2 should repair ownership
restart_container(container_name)
# Verify files are now hermes-owned
r = docker_exec_sh(
container_name,
" ".join(f'stat -c %U /opt/data/{f}' for f in ALLOWLISTED_FILES),
timeout=5,
)
for line in r.stdout.split():
assert line == "hermes", (
f"expected hermes-owned after restart, got: {line}"
)
def test_non_allowlisted_host_file_not_touched(
built_image: str, container_name: str,
) -> None:
"""A non-allowlisted host-owned file must NOT be chowned, even if
root-owned. Regression guard for #19788 / #19795: a bind-mounted
$HERMES_HOME may contain host-owned files Hermes does not manage."""
start_container(built_image, container_name)
# Create a non-allowlisted file as root
docker_exec(
container_name, "touch", "/opt/data/host_secret.json",
user="root", timeout=5,
)
# Make it root-owned explicitly (it already is, but be sure)
docker_exec(
container_name, "chown", "root:root", "/opt/data/host_secret.json",
user="root", timeout=5,
)
# Restart
restart_container(container_name)
# The file must STILL be root-owned (not touched by stage2)
r = docker_exec_sh(
container_name,
"stat -c %U /opt/data/host_secret.json",
timeout=5,
)
assert r.stdout.strip() == "root", (
f"non-allowlisted host file was chowned by stage2 (should be "
f"preserved): {r.stdout.strip()}"
)

View File

@@ -0,0 +1,66 @@
"""Runtime smoke tests for Docker --user flag guard.
Build the real image and verify the actual runtime behavior:
1. docker run --user <arbitrary-uid> is rejected with actionable guidance
2. Root start (default) works fine
3. --user <hermes-uid> (10000) is allowed (supported non-root start)
"""
from __future__ import annotations
import subprocess
def test_arbitrary_user_uid_rejected(
built_image: str,
) -> None:
"""docker run --user 1000 must be rejected with actionable guidance."""
r = subprocess.run(
["docker", "run", "--rm", "--user", "1000:1000",
built_image, "echo", "should_not_reach"],
capture_output=True, text=True, timeout=60,
)
assert r.returncode != 0, (
f"container started with arbitrary --user UID unexpectedly: {r.stdout}"
)
assert "should_not_reach" not in r.stdout, (
f"container ran despite --user rejection: {r.stdout}"
)
combined = r.stdout + r.stderr
assert "not supported" in combined.lower(), (
f"rejection message missing 'not supported': {combined[-500:]}"
)
# Must mention the remediation env vars
assert "HERMES_UID" in combined or "PUID" in combined, (
f"rejection message missing remediation guidance: {combined[-500:]}"
)
def test_root_start_works(
built_image: str,
) -> None:
"""Root start (the default) must work without issues."""
r = subprocess.run(
["docker", "run", "--rm", built_image, "sh", "-c", "echo OK"],
capture_output=True, text=True, timeout=60,
)
assert r.returncode == 0, f"root start failed: {r.stderr[-500:]}"
assert "OK" in r.stdout
def test_user_pinned_to_hermes_uid_works(
built_image: str,
) -> None:
"""docker run --user 10000:10000 (the hermes UID) must be allowed.
This is the supported non-root start from #34648 / #34837.
"""
r = subprocess.run(
["docker", "run", "--rm", "--user", "10000:10000",
built_image, "sh", "-c", "echo OK"],
capture_output=True, text=True, timeout=60,
)
assert r.returncode == 0, (
f"--user 10000:10000 (hermes UID) was rejected: {r.stderr[-500:]}"
)
assert "OK" in r.stdout

View File

@@ -12,22 +12,16 @@ docstring.
"""
from __future__ import annotations
import subprocess
import time
from tests.docker.conftest import docker_exec, docker_exec_sh
from tests.docker.conftest import docker_exec, docker_exec_sh, start_container, start_container
def test_orphan_zombies_reaped(
built_image: str, container_name: str,
) -> None:
"""Spawn an orphan child that exits immediately. PID 1 must reap it."""
subprocess.run(
["docker", "run", "-d", "--name", container_name, built_image,
"sleep", "60"],
check=True, capture_output=True, timeout=30,
)
time.sleep(2)
start_container(built_image, container_name, cmd="sleep 60")
# `( ( sleep 0.1 & ) & ); sleep 1` creates a grandchild detached from
# the original docker exec session — it becomes an orphan reparented
@@ -42,4 +36,4 @@ def test_orphan_zombies_reaped(
line for line in r.stdout.split("\n")
if line.strip().startswith("Z")
]
assert not zombies, f"Zombies not reaped by PID 1: {zombies}"
assert not zombies, f"Zombies not reaped by PID 1: {zombies}"

View File

@@ -27,7 +27,6 @@ import pytest
# against each other (and against any other file that also touches
# ``app.state``) — the marker name is shared across all dashboard-auth test
# files that gate the app.
pytestmark = pytest.mark.xdist_group("dashboard_auth_app_state")
from fastapi import FastAPI
from fastapi.responses import Response
from fastapi.testclient import TestClient

View File

@@ -10,7 +10,6 @@ import pytest
# against each other (and against any other file that also touches
# ``app.state``) — the marker name is shared across all dashboard-auth test
# files that gate the app.
pytestmark = pytest.mark.xdist_group("dashboard_auth_app_state")
from fastapi.testclient import TestClient
from hermes_cli import web_server

View File

@@ -16,12 +16,6 @@ from __future__ import annotations
import pytest
# Phase 5 / Phase 6: these tests mutate ``web_server.app.state.auth_required``
# at module level. Run them in the same xdist worker so they don't race
# against each other (and against any other file that also touches
# ``app.state``) — the marker name is shared across all dashboard-auth test
# files that gate the app.
pytestmark = pytest.mark.xdist_group("dashboard_auth_app_state")
from fastapi.testclient import TestClient
from hermes_cli import web_server

View File

@@ -16,11 +16,6 @@ import time
import pytest
# These tests mutate ``web_server.app.state.auth_required`` at module level,
# so they share the dashboard-auth app-state xdist group to avoid racing
# other gate tests.
pytestmark = pytest.mark.xdist_group("dashboard_auth_app_state")
from fastapi.testclient import TestClient
from hermes_cli import web_server

View File

@@ -32,10 +32,6 @@ from __future__ import annotations
import pytest
# Same xdist group as the other dashboard-auth tests — they all mutate
# web_server.app.state.auth_required at module level.
pytestmark = pytest.mark.xdist_group("dashboard_auth_app_state")
from fastapi.testclient import TestClient
from hermes_cli import web_server

View File

@@ -20,10 +20,6 @@ from hermes_cli import web_server
from hermes_cli.dashboard_auth import clear_providers, register_provider
from tests.hermes_cli.conftest_dashboard_auth import StubAuthProvider
# These tests mutate ``web_server.app.state.auth_required`` so they share
# the same xdist group as the other dashboard-auth gated_app tests.
pytestmark = pytest.mark.xdist_group("dashboard_auth_app_state")
@pytest.fixture
def gated_client():

View File

@@ -17,12 +17,6 @@ from types import SimpleNamespace
import pytest
# Phase 5 / Phase 6: these tests mutate ``web_server.app.state.auth_required``
# at module level. Run them in the same xdist worker so they don't race
# against each other (and against any other file that also touches
# ``app.state``) — the marker name is shared across all dashboard-auth test
# files that gate the app.
pytestmark = pytest.mark.xdist_group("dashboard_auth_app_state")
from fastapi.testclient import TestClient
from hermes_cli import web_server

View File

@@ -1,91 +0,0 @@
"""Regression tests for Docker HOME overrides under s6/with-contenv."""
from pathlib import Path
REPO_ROOT = Path(__file__).resolve().parent.parent
DASHBOARD_RUN = REPO_ROOT / "docker" / "s6-rc.d" / "dashboard" / "run"
MAIN_WRAPPER = REPO_ROOT / "docker" / "main-wrapper.sh"
STAGE2_HOOK = REPO_ROOT / "docker" / "stage2-hook.sh"
def test_main_wrapper_preserves_docker_workdir() -> None:
"""The main-wrapper MUST save and restore the original working
directory so the container starts in the Docker ``-w`` directory,
not /opt/data. Regression test for #35472.
"""
text = MAIN_WRAPPER.read_text(encoding="utf-8")
# Must save original cwd before cd /opt/data.
assert "_hermes_orig_cwd" in text, (
"main-wrapper.sh must save the original cwd before cd /opt/data"
)
assert 'HERMES_ORIG_CWD:-$PWD' in text, (
"main-wrapper.sh must capture PWD as the fallback original cwd"
)
# Must cd to /opt/data for init (existing behaviour preserved).
assert "cd /opt/data" in text
# Must restore original cwd before exec'ing the user command.
# The restore cd must appear AFTER venv activation but BEFORE the
# first exec / if-block.
activate_idx = text.index("/opt/hermes/.venv/bin/activate")
restore_idx = text.index('cd "$_hermes_orig_cwd"')
exec_idx = text.index("if [ $# -eq 0 ]")
assert activate_idx < restore_idx < exec_idx, (
"cd $_hermes_orig_cwd must appear after venv activation and "
"before the exec routing block"
)
def test_dashboard_run_resets_home_before_dropping_privileges() -> None:
text = DASHBOARD_RUN.read_text(encoding="utf-8")
assert "#!/command/with-contenv sh" in text
assert "export HOME=/opt/data" in text
assert "exec s6-setuidgid hermes hermes dashboard" in text
def test_dashboard_run_does_not_derive_insecure_from_bind_host() -> None:
"""The s6 dashboard run script MUST NOT auto-add ``--insecure`` based on
``HERMES_DASHBOARD_HOST``. Doing so disables the OAuth auth gate on
every non-loopback bind even when an auth provider is registered —
the exact regression that exposed every wildcard-subdomain agent
dashboard publicly until early 2026.
The opt-in is now explicit: ``HERMES_DASHBOARD_INSECURE=1`` (truthy).
The auth gate is the authority on whether non-loopback binds are safe.
"""
text = DASHBOARD_RUN.read_text(encoding="utf-8")
# No legacy host-derived flip.
assert '127.0.0.1|localhost' not in text, (
"Run script still derives --insecure from the bind host. The gate "
"is the authority now — opt in via HERMES_DASHBOARD_INSECURE instead."
)
assert 'case "$dash_host" in' not in text, (
"Legacy host-derived --insecure case-statement is back."
)
# New opt-in env var present.
assert "HERMES_DASHBOARD_INSECURE" in text, (
"Explicit HERMES_DASHBOARD_INSECURE opt-in is missing."
)
# Truthy values aligned with the rest of the s6 scripts
# (e.g. HERMES_DASHBOARD).
for truthy in ("1", "true", "TRUE", "True", "yes", "YES", "Yes"):
assert truthy in text, (
f"HERMES_DASHBOARD_INSECURE should accept truthy value {truthy!r}"
)
def test_stage2_hook_repairs_profiles_and_cron_ownership_on_every_boot() -> None:
"""profiles/ and cron/ must both be reclaimed after root-context writes."""
text = STAGE2_HOOK.read_text(encoding="utf-8")
assert 'if [ -d "$HERMES_HOME/profiles" ]; then' in text
assert 'chown -R hermes:hermes "$HERMES_HOME/profiles" 2>/dev/null || true' in text
assert 'if [ -d "$HERMES_HOME/cron" ]; then' in text
assert 'chown -R hermes:hermes "$HERMES_HOME/cron" 2>/dev/null || true' in text

View File

@@ -1,19 +0,0 @@
"""Regression tests for Docker stage2 browser executable discovery."""
from pathlib import Path
def test_stage2_discovers_playwright_arm64_headless_shell() -> None:
"""Playwright's --only-shell layout may use a headless_shell basename."""
script = Path("docker/stage2-hook.sh").read_text()
assert "-name 'headless_shell'" in script
def test_stage2_discovery_stays_filename_matched() -> None:
"""Avoid broad path grep that can pick executable shared libraries."""
script = Path("docker/stage2-hook.sh").read_text()
discovery_block = script.split("browser_bin=$(", 1)[1].split(")\n if", 1)[0]
assert "find \"$PLAYWRIGHT_BROWSERS_PATH\" -type f -executable" in discovery_block
assert "grep" not in discovery_block

View File

@@ -1,49 +0,0 @@
"""Regression test for #34192 — Dockerfile must keep the tini compat shim
for orchestration templates that still reference /usr/bin/tini.
This is a documentation-as-test guard: removing the shim is a real
choice, but it should be done deliberately (e.g. once Hostinger's
'Hermes WebUI' catalog updates to /init) and not by accident.
"""
from __future__ import annotations
from pathlib import Path
def _dockerfile_text() -> str:
return (Path(__file__).parent.parent / "Dockerfile").read_text(encoding="utf-8")
def test_tini_compat_symlink_present():
"""The /usr/bin/tini -> /init symlink line must exist for #34192."""
df = _dockerfile_text()
assert "ln -sf /init /usr/bin/tini" in df, (
"Dockerfile must keep the tini compat symlink (#34192). "
"Removing it breaks orchestration templates that still pin "
"/usr/bin/tini as the entrypoint (Hostinger 'Hermes WebUI' "
"catalog as of v0.14.x)."
)
def test_tini_compat_comment_explains_why():
"""The symlink line is comment-anchored to #34192 so a future reader
knows why it exists. Removing the comment makes it look like dead
code worth deleting."""
df = _dockerfile_text()
assert "#34192" in df, (
"The Dockerfile tini compat shim must keep its #34192 anchor "
"comment so future maintainers know why the symlink is there."
)
def test_entrypoint_still_init_not_tini():
"""Sanity check: the actual ENTRYPOINT is still /init (s6-overlay).
The shim is for legacy external wrappers, not for the image's own
runtime — that path must continue to use the canonical /init."""
df = _dockerfile_text()
assert 'ENTRYPOINT [ "/init"' in df, (
"Dockerfile ENTRYPOINT must remain /init (s6-overlay). The "
"tini shim is only for external wrappers that haven't been "
"updated yet."
)

View File

@@ -1,5 +1,6 @@
"""Guards for the multi-container Hermes WebUI install surface."""
"""Test that setup.py uses temporary output directories when the source
tree is read-only (as it is inside the Docker WebUI install surface).
"""
from __future__ import annotations
from pathlib import Path
@@ -20,18 +21,6 @@ def _is_under(path: str, root: Path) -> bool:
return True
def test_docker_context_includes_license_file() -> None:
"""PEP 639 license-files metadata must resolve inside the Docker image."""
dockerignore = (REPO_ROOT / ".dockerignore").read_text(encoding="utf-8")
active_lines = [
line.strip()
for line in dockerignore.splitlines()
if line.strip() and not line.lstrip().startswith("#")
]
assert "LICENSE" not in active_lines
def test_setup_uses_temporary_outputs_when_source_tree_is_read_only(
monkeypatch,
) -> None:

View File

@@ -12,22 +12,16 @@ def _dockerfile_text() -> str:
return DOCKERFILE.read_text()
def test_dockerfile_makes_opt_hermes_root_owned_and_non_writable() -> None:
def test_dockerfile_makes_opt_hermes_readonly_for_hermes_user() -> None:
text = _dockerfile_text()
assert "COPY --chown=hermes:hermes . ." not in text
assert "COPY . ." in text
assert "chown -R root:root /opt/hermes" in text
assert "chmod -R a+rX /opt/hermes" in text
assert "chmod -R a-w /opt/hermes" in text
immutable_block = re.search(
r"RUN mkdir -p /opt/hermes/bin && \\\n"
r"(?:.*\\\n)+?"
r"\s+chmod -R a-w /opt/hermes",
text,
)
assert immutable_block, "Dockerfile must lock /opt/hermes after installing code/deps"
# --chmod on the source COPY bakes read-only perms at copy time instead
# of a separate chmod -R pass (which walked ~30k files — #49113).
assert "COPY --link --chmod=a+rX,go-w . ." in text
# The old tree-walking passes must not be present.
assert "chown -R root:root /opt/hermes" not in text
assert "chmod -R a+rX /opt/hermes" not in text
assert "chmod -R a-w /opt/hermes" not in text
def test_dockerfile_keeps_mutable_state_under_opt_data() -> None:
@@ -68,22 +62,20 @@ def test_dockerfile_bakes_code_scoped_install_method_stamp() -> None:
(/opt/hermes/.install_method) first; baking it at build time keeps the
published image self-identifying as 'docker' WITHOUT writing into the
shared $HERMES_HOME data volume (which a host install may also use).
It must live inside the immutable block so the runtime user can't alter it.
The stamp is created by root in the shim-wiring RUN block; the hermes
user can't modify it (go-w from the --chmod on the source COPY).
"""
text = _dockerfile_text()
assert "printf 'docker\\n' > /opt/hermes/.install_method" in text
immutable_block = re.search(
# The stamp must be in the RUN block that wires the exec shim.
shim_block = re.search(
r"RUN mkdir -p /opt/hermes/bin && \\\n"
r"(?:.*\\\n)+?"
r"\s+chmod -R a-w /opt/hermes",
r"\s+printf 'docker\\n' > /opt/hermes/\.install_method",
text,
)
assert immutable_block, "immutable block must exist"
assert ".install_method" in immutable_block.group(0), (
"the code-scoped install-method stamp must be baked inside the "
"immutable /opt/hermes block"
)
assert shim_block, "install-method stamp must be in the shim-wiring RUN block"
def test_dockerfile_redirects_lazy_installs_to_durable_target() -> None:

View File

@@ -1,152 +0,0 @@
"""Contract test: the s6-overlay stage2 hook seeds gateway_state.json from
HERMES_GATEWAY_BOOTSTRAP_STATE on first boot, so a freshly-provisioned
container can come up with the gateway already running.
Background. On a blank volume there is no gateway_state.json, so the boot
reconciler (cont-init.d/02-reconcile-profiles ->
container_boot.reconcile_profile_gateways) registers the gateway-default s6
slot but leaves it DOWN — it only auto-starts when the last recorded state was
"running". A container provisioned on a fresh volume therefore comes up with
the gateway down until something starts it.
An orchestrator that wants the gateway running from first boot sets
HERMES_GATEWAY_BOOTSTRAP_STATE=running; stage2-hook.sh (installed as
/etc/cont-init.d/01-hermes-setup, which runs lexicographically BEFORE
02-reconcile-profiles) seeds the state file so the reconciler sees
prior_state=running and brings the slot up on the very first boot.
This mirrors the existing HERMES_AUTH_JSON_BOOTSTRAP env-seed pattern: it seeds
the SAME gateway_state.json the reconciler already consults, guarded by
``[ ! -f ]`` so persisted runtime state always wins on subsequent boots (a
deliberately-stopped gateway must stay stopped across restarts).
"""
from __future__ import annotations
import json
import re
import shutil
import subprocess
import tempfile
from pathlib import Path
import pytest
REPO_ROOT = Path(__file__).resolve().parents[2]
STAGE2_HOOK = REPO_ROOT / "docker" / "stage2-hook.sh"
@pytest.fixture(scope="module")
def stage2_text() -> str:
if not STAGE2_HOOK.exists():
pytest.skip("docker/stage2-hook.sh not present in this checkout")
return STAGE2_HOOK.read_text()
def _seed_block(text: str) -> str:
"""Extract the ``if [ ! -f "$HERMES_HOME/gateway_state.json" ] && … fi``
block that seeds the gateway state file from the bootstrap env var."""
m = re.search(
r'(if \[ ! -f "\$HERMES_HOME/gateway_state\.json" \] && \\\n'
r"(?:.*\n)*?fi)",
text,
)
assert m, (
"stage2-hook.sh must contain the gateway_state.json bootstrap-seed block "
"guarded on HERMES_GATEWAY_BOOTSTRAP_STATE"
)
return m.group(1)
def test_seed_block_present_and_guarded(stage2_text: str) -> None:
block = _seed_block(stage2_text)
# Must be a first-boot-only seed (the [ ! -f ] guard) keyed on the env var.
assert '[ ! -f "$HERMES_HOME/gateway_state.json" ]' in block, (
"seed must be guarded by [ ! -f ] so persisted state wins on restart"
)
assert "HERMES_GATEWAY_BOOTSTRAP_STATE" in block
assert "gateway_state" in block
def _run_seed(
text: str, *, env_value: str | None, preexisting: str | None
) -> str | None:
"""Run the extracted seed block in a sandbox $HERMES_HOME.
``env_value`` is the HERMES_GATEWAY_BOOTSTRAP_STATE value (None = unset).
``preexisting`` is the contents of a gateway_state.json placed before the
block runs (None = no file). Returns the file's contents afterwards, or
None if it doesn't exist. ``chown``/``chmod`` are stubbed so the block
runs without real root.
"""
bash = shutil.which("bash")
if bash is None:
pytest.skip("bash not available")
block = _seed_block(text)
with tempfile.TemporaryDirectory() as d:
dpath = Path(d)
home = dpath / "home"
home.mkdir()
state_file = home / "gateway_state.json"
if preexisting is not None:
state_file.write_text(preexisting)
env_line = (
f'export HERMES_GATEWAY_BOOTSTRAP_STATE="{env_value}"\n'
if env_value is not None
else "unset HERMES_GATEWAY_BOOTSTRAP_STATE\n"
)
script = (
"set -e\n"
f'HERMES_HOME="{home}"\n'
# Stub privilege ops — the sandbox isn't root.
"chown() { :; }\n"
"chmod() { :; }\n"
+ env_line
+ block
)
script_path = dpath / "harness.sh"
script_path.write_text(script)
proc = subprocess.run(
[bash, str(script_path)], capture_output=True, text=True
)
assert proc.returncode == 0, proc.stderr
if not state_file.exists():
return None
return state_file.read_text()
def test_seeds_running_state_on_blank_volume(stage2_text: str) -> None:
"""env=running + no pre-existing file -> writes a valid running state."""
out = _run_seed(stage2_text, env_value="running", preexisting=None)
assert out is not None, "seed must create gateway_state.json"
assert json.loads(out).get("gateway_state") == "running"
def test_does_not_clobber_existing_state(stage2_text: str) -> None:
"""The [ ! -f ] guard: an existing state file is never overwritten, even
when the bootstrap env var says running. A deliberately-stopped gateway
must stay stopped across restarts."""
existing = json.dumps({"gateway_state": "stopped", "pid": 123})
out = _run_seed(stage2_text, env_value="running", preexisting=existing)
assert out == existing, "seed must not clobber a persisted state file"
def test_no_seed_when_env_unset(stage2_text: str) -> None:
"""No env var -> no file written (preserves the default down-on-first-boot
behaviour for orchestrators that don't opt in)."""
out = _run_seed(stage2_text, env_value=None, preexisting=None)
assert out is None, "seed must not run when HERMES_GATEWAY_BOOTSTRAP_STATE is unset"
def test_non_running_value_ignored(stage2_text: str) -> None:
"""Only a literal "running" is honoured; any other value is ignored so a
typo can't write a bogus state. (The reconciler's _AUTOSTART_STATES is
exactly {"running"}.)"""
for bogus in ("stopped", "Running", "1", "true", "starting"):
out = _run_seed(stage2_text, env_value=bogus, preexisting=None)
assert out is None, (
f"only 'running' should seed a state file, not {bogus!r}"
)

View File

@@ -1,48 +0,0 @@
"""Contract tests for the Docker stage2 immutable install-tree policy.
Hosted/container Hermes keeps user-writable state under HERMES_HOME
(/opt/data). The installed source, venv, TUI bundle, and node_modules under
/opt/hermes must remain root-owned/non-writable by the runtime hermes user so
an agent session cannot self-modify the installation and brick the gateway.
"""
from __future__ import annotations
from pathlib import Path
import pytest
REPO_ROOT = Path(__file__).resolve().parents[2]
STAGE2_HOOK = REPO_ROOT / "docker" / "stage2-hook.sh"
@pytest.fixture(scope="module")
def stage2_text() -> str:
if not STAGE2_HOOK.exists():
pytest.skip("docker/stage2-hook.sh not present in this checkout")
return STAGE2_HOOK.read_text()
def test_stage2_does_not_chown_install_tree_to_hermes(stage2_text: str) -> None:
assert "Fixing ownership of build trees under $INSTALL_DIR" not in stage2_text
assert 'chown -R hermes:hermes \\\n "$INSTALL_DIR/.venv"' not in stage2_text
assert "venv_owner=$(stat -c %u \"$INSTALL_DIR/.venv\"" not in stage2_text
assert "chown of build trees failed" not in stage2_text
for install_tree in (
'"$INSTALL_DIR/.venv" \\',
'"$INSTALL_DIR/ui-tui" \\',
'"$INSTALL_DIR/gateway" \\',
'"$INSTALL_DIR/node_modules" \\',
):
assert install_tree not in stage2_text, (
f"stage2 must not chown {install_tree} back to hermes; "
"the Dockerfile keeps /opt/hermes immutable and writable state "
"belongs under HERMES_HOME"
)
def test_stage2_documents_immutable_install_contract(stage2_text: str) -> None:
assert "Immutable install tree" in stage2_text
assert "PYTHONDONTWRITEBYTECODE" in stage2_text
assert "HERMES_DISABLE_LAZY_INSTALLS=1" in stage2_text
assert "/opt/hermes" in stage2_text

View File

@@ -1,61 +0,0 @@
"""Contract test: the s6-overlay stage2 hook must NOT stamp the install method
into the shared $HERMES_HOME, and must heal a stale 'docker' stamp left there
by older images.
Background (shared-$HERMES_HOME bug)
------------------------------------
$HERMES_HOME (/opt/data) is a DATA volume that users commonly bind-mount from
the host (``~/.hermes:/opt/data``) and sometimes share with a host-side
Desktop/CLI install. Older images wrote ``printf 'docker' > $HERMES_HOME/.install_method``
at boot, which clobbered the host install's own marker — so the host's in-app
updater read 'docker' and refused to run ``hermes update`` ("doesn't apply
inside the Docker container").
The fix scopes the stamp to the install tree (baked at
``/opt/hermes/.install_method`` in the Dockerfile, read first by
``detect_install_method``). stage2 must therefore:
* NOT write the 'docker' stamp into $HERMES_HOME any more, and
* proactively remove a stale 'docker' stamp from $HERMES_HOME so homes
already poisoned by an older image self-heal on the next boot.
"""
from __future__ import annotations
import re
from pathlib import Path
import pytest
REPO_ROOT = Path(__file__).resolve().parents[2]
STAGE2_HOOK = REPO_ROOT / "docker" / "stage2-hook.sh"
@pytest.fixture(scope="module")
def stage2_text() -> str:
if not STAGE2_HOOK.exists():
pytest.skip("docker/stage2-hook.sh not present in this checkout")
return STAGE2_HOOK.read_text()
def test_stage2_does_not_write_install_method_into_home(stage2_text: str) -> None:
# No write/tee of the home-scoped install-method stamp anywhere.
assert not re.search(
r"(tee|>)\s*\"?\$HERMES_HOME/\.install_method", stage2_text
), (
"stage2 must not stamp $HERMES_HOME/.install_method — that data dir "
"may be shared with a host install whose marker would be clobbered"
)
def test_stage2_heals_stale_docker_home_stamp(stage2_text: str) -> None:
# It must remove a stale 'docker' stamp from $HERMES_HOME so already
# poisoned shared homes recover.
assert 'rm -f "$HERMES_HOME/.install_method"' in stage2_text, (
"stage2 must remove a stale 'docker' stamp from $HERMES_HOME to heal "
"homes poisoned by older images"
)
# The removal must be guarded on the value being 'docker' so we never
# delete a legitimately-different stamp a user/host install put there.
assert re.search(r'\[\s*"\$stamped"\s*=\s*"docker"\s*\]', stage2_text), (
"the stale-stamp removal must be guarded on the value == 'docker'"
)

View File

@@ -1,60 +0,0 @@
"""Contract test: the s6-overlay stage2 hook seeds $HERMES_HOME/logs/gateways
as the hermes user.
Regression guard for #45258: the per-profile gateway log service
(`gateway-<profile>/log/run`) creates `logs/gateways/` via `mkdir -p` but only
chowns the leaf `logs/gateways/<profile>`. If the first log service to boot
runs in root context, the `gateways/` parent is created root-owned and stays
that way; every profile registered later runs its log service as the dropped
hermes user and s6-log crash-loops on `mkdir: Permission denied`.
Seeding `logs/gateways` in stage2 (cont-init runs before any service starts)
guarantees the parent already exists hermes-owned by the time the first
log/run executes its `mkdir -p`.
"""
from __future__ import annotations
import re
from pathlib import Path
import pytest
REPO_ROOT = Path(__file__).resolve().parents[2]
STAGE2_HOOK = REPO_ROOT / "docker" / "stage2-hook.sh"
@pytest.fixture(scope="module")
def stage2_text() -> str:
if not STAGE2_HOOK.exists():
pytest.skip("docker/stage2-hook.sh not present in this checkout")
return STAGE2_HOOK.read_text()
def _seed_mkdir_block(text: str) -> str:
"""Extract the `as_hermes mkdir -p \\ ...` seed block."""
m = re.search(r"as_hermes mkdir -p \\\n(?:[^\n]*\\\n)*[^\n]*\n", text)
assert m, "stage2-hook.sh must contain the as_hermes mkdir -p seed block"
return m.group(0)
def test_logs_gateways_is_seeded(stage2_text: str) -> None:
block = _seed_mkdir_block(stage2_text)
assert '"$HERMES_HOME/logs/gateways"' in block, (
"logs/gateways must be seeded hermes-owned in stage2 so profiles "
"added after first boot can create their log dirs (#45258)"
)
# The parent must also be seeded so mkdir -p inside the block never
# creates logs/ implicitly with surprising ownership.
assert '"$HERMES_HOME/logs"' in block
def test_logs_subtree_is_healed_when_chown_needed(stage2_text: str) -> None:
"""The needs_chown repair loop must cover the logs subtree recursively —
that is what makes the seed entry above sufficient (no separate
logs/gateways loop entry needed)."""
m = re.search(r"for sub in ([^;]*); do", stage2_text)
assert m, "stage2-hook.sh must contain the needs_chown subdir repair loop"
assert "logs" in m.group(1).split(), (
"the needs_chown loop must recursively chown logs/ — it covers "
"logs/gateways, so the seed list does not need a loop twin"
)

View File

@@ -1,110 +0,0 @@
"""Contract test: the s6-overlay stage2 hook accepts PUID/PGID as aliases for
HERMES_UID/HERMES_GID.
Regression guard for #15290. NAS platforms (UGOS, Synology, unRAID) bind-mount
/opt/data from a host directory owned by the user's own UID and expect the
LinuxServer.io PUID/PGID convention. Without the alias those vars are silently
ignored, the s6-setuidgid drop lands on UID 10000, and the runtime cannot read
the volume. HERMES_UID/HERMES_GID must still take precedence when both are
set.
The s6-overlay rework moved bootstrap from docker/entrypoint.sh (now a shim)
to docker/stage2-hook.sh, which is installed as /etc/cont-init.d/01-hermes-setup
by the Dockerfile. This test targets the post-rework location.
"""
from __future__ import annotations
import os
import shutil
import subprocess
from pathlib import Path
import pytest
REPO_ROOT = Path(__file__).resolve().parents[2]
STAGE2_HOOK = REPO_ROOT / "docker" / "stage2-hook.sh"
@pytest.fixture(scope="module")
def stage2_text() -> str:
if not STAGE2_HOOK.exists():
pytest.skip("docker/stage2-hook.sh not present in this checkout")
return STAGE2_HOOK.read_text()
def _alias_lines(text: str) -> list[str]:
"""The stage2 hook lines that resolve HERMES_UID/HERMES_GID from aliases."""
return [
line.strip()
for line in text.splitlines()
if line.strip().startswith(("HERMES_UID=", "HERMES_GID="))
]
def test_stage2_hook_resolves_puid_pgid_aliases(stage2_text: str) -> None:
alias_lines = _alias_lines(stage2_text)
assert any("PUID" in line for line in alias_lines), (
"docker/stage2-hook.sh must resolve HERMES_UID from a PUID alias; see #15290"
)
assert any("PGID" in line for line in alias_lines), (
"docker/stage2-hook.sh must resolve HERMES_GID from a PGID alias; see #15290"
)
def _resolve(stage2_text: str, env: dict[str, str]) -> str:
"""Run the stage2 hook's alias-resolution lines in isolation and report the
resolved ``HERMES_UID:HERMES_GID`` pair."""
bash = shutil.which("bash")
if bash is None:
pytest.skip("bash not available")
script = "\n".join(_alias_lines(stage2_text))
script += '\necho "${HERMES_UID:-}:${HERMES_GID:-}"\n'
proc = subprocess.run(
[bash, "-ec", script],
env={"PATH": os.environ.get("PATH", "")} | env,
capture_output=True,
text=True,
)
assert proc.returncode == 0, proc.stderr
return proc.stdout.strip()
def test_puid_pgid_populate_hermes_uid_gid(stage2_text: str) -> None:
assert _resolve(stage2_text, {"PUID": "1000", "PGID": "10"}) == "1000:10"
def test_hermes_uid_gid_take_precedence_over_aliases(stage2_text: str) -> None:
resolved = _resolve(
stage2_text,
{"HERMES_UID": "2000", "HERMES_GID": "2001", "PUID": "1000", "PGID": "10"},
)
assert resolved == "2000:2001"
def test_no_uid_vars_leaves_values_empty(stage2_text: str) -> None:
# An empty resolution means the stage2 hook keeps the default hermes user.
assert _resolve(stage2_text, {}) == ":"
def test_stage2_hook_creates_s6_envdir_before_writing_browser_path(stage2_text: str) -> None:
"""Regression guard for browser-path export on runtimes where the
s6 container_environment directory is absent when the cont-init hook runs.
"""
mkdir_line = "mkdir -p /run/s6/container_environment"
write_line = (
"printf '%s' \"$browser_bin\" > "
"/run/s6/container_environment/AGENT_BROWSER_EXECUTABLE_PATH"
)
assert mkdir_line in stage2_text
assert write_line in stage2_text
assert stage2_text.index(mkdir_line) < stage2_text.index(write_line)
def test_stage2_hook_runs_config_migration_as_hermes(stage2_text: str) -> None:
assert "scripts/docker_config_migrate.py" in stage2_text
assert 's6-setuidgid hermes "$INSTALL_DIR/.venv/bin/python"' in stage2_text
def test_stage2_hook_documents_config_migration_opt_out(stage2_text: str) -> None:
assert "HERMES_SKIP_CONFIG_MIGRATION" in stage2_text

View File

@@ -1,138 +0,0 @@
"""Contract test: the s6-overlay stage2 hook resets ownership of hermes-owned
top-level state files in $HERMES_HOME — but only those, never arbitrary
host-owned files.
Regression guard for the gateway restart loop reported in #35098: files such
as gateway.lock / state.db / auth.json live directly under $HERMES_HOME (not in
a subdir), so the targeted subdir chown misses them. When created or rewritten
by `docker exec <container> hermes …` (root unless `-u` is passed) they land
root-owned and the unprivileged hermes runtime then hits PermissionError on next
startup.
The fix uses an explicit allowlist rather than a blanket `find -user root`
sweep, preserving the targeted-ownership contract from #19788 / PR #19795: a
bind-mounted $HERMES_HOME may contain host-owned files Hermes does not manage,
and those must never be chowned.
The s6-overlay rework moved bootstrap from docker/entrypoint.sh (now a shim) to
docker/stage2-hook.sh, installed as /etc/cont-init.d/01-hermes-setup. This test
targets that location.
"""
from __future__ import annotations
import os
import re
import shutil
import subprocess
from pathlib import Path
import pytest
REPO_ROOT = Path(__file__).resolve().parents[2]
STAGE2_HOOK = REPO_ROOT / "docker" / "stage2-hook.sh"
@pytest.fixture(scope="module")
def stage2_text() -> str:
if not STAGE2_HOOK.exists():
pytest.skip("docker/stage2-hook.sh not present in this checkout")
return STAGE2_HOOK.read_text()
def _toplevel_chown_loop(text: str) -> str:
"""Extract the `for f in … chown hermes:hermes "$HERMES_HOME/$f" … done`
block that repairs top-level state-file ownership."""
m = re.search(
r"(for f in \\\n(?:.*\\\n)*?.*; do\n(?:.*\n)*?done)",
text,
)
assert m, "stage2-hook.sh must contain the top-level-file chown for-loop (#35098)"
block = m.group(1)
assert 'chown hermes:hermes "$HERMES_HOME/$f"' in block, (
"the top-level-file loop must chown each allowlisted file to hermes"
)
return block
def test_toplevel_chown_loop_present(stage2_text: str) -> None:
block = _toplevel_chown_loop(stage2_text)
# The reported-broken files must be covered.
for required in ("auth.json", "state.db", "gateway.lock", "gateway_state.json"):
assert required in block, (
f"top-level chown allowlist must include {required!r} (#35098)"
)
def test_no_blanket_find_user_root_sweep(stage2_text: str) -> None:
"""The fix must NOT reintroduce a blanket `find … -user root` chown of
$HERMES_HOME contents — that would clobber host-owned files in a bind mount
(#19788 / PR #19795)."""
assert not re.search(r"find\s+\"?\$\{?HERMES_HOME\}?\"?[^\n]*-user\s+root", stage2_text), (
"stage2-hook.sh must not blanket-chown root-owned files under "
"$HERMES_HOME via `find -user root`; use the targeted allowlist instead "
"so host-owned bind-mounted files are preserved (#19788, #19795)."
)
def _run_loop(text: str, present_files: list[str]) -> list[str]:
"""Run the extracted chown loop in a sandbox $HERMES_HOME, with `chown`
stubbed to record which paths it was asked to touch. Returns the basenames
the loop attempted to chown."""
bash = shutil.which("bash")
if bash is None:
pytest.skip("bash not available")
block = _toplevel_chown_loop(text)
import tempfile
with tempfile.TemporaryDirectory() as d:
dpath = Path(d)
home = dpath / "home"
home.mkdir()
for f in present_files:
(home / f).touch()
# A non-allowlisted, "host-owned" file that must never be chowned.
(home / "host_secret.json").touch()
# Stub chown to record the basename of its last argument (the path),
# so we observe exactly which files the allowlist loop selected
# without needing real root privileges.
script = (
"set -e\n"
f'HERMES_HOME="{home}"\n'
f'chown() {{ for a in "$@"; do :; done; echo "${{a##*/}}" >> "{dpath}/chown.log"; }}\n'
+ block
)
script_path = dpath / "harness.sh"
script_path.write_text(script)
proc = subprocess.run([bash, str(script_path)], capture_output=True, text=True)
assert proc.returncode == 0, proc.stderr
log = dpath / "chown.log"
if not log.exists():
return []
return [ln for ln in log.read_text().splitlines() if ln]
def test_loop_chowns_present_allowlisted_files(stage2_text: str) -> None:
touched = _run_loop(stage2_text, ["auth.json", "state.db", "gateway.lock"])
assert "auth.json" in touched
assert "state.db" in touched
assert "gateway.lock" in touched
def test_loop_skips_nonallowlisted_host_file(stage2_text: str) -> None:
"""A file NOT on the allowlist (e.g. a host-owned file in a bind mount) must
never be chowned, even if present."""
touched = _run_loop(stage2_text, ["auth.json"])
assert "host_secret.json" not in touched, (
"the allowlist loop must not touch non-allowlisted files (#19788)"
)
def test_loop_skips_absent_files(stage2_text: str) -> None:
"""Allowlisted files that don't exist are skipped (no spurious chown)."""
touched = _run_loop(stage2_text, ["auth.json"])
# state.db wasn't created, so it must not appear.
assert "state.db" not in touched

View File

@@ -1,86 +0,0 @@
"""Regression tests for Docker stage2 UID/GID handling on NAS hosts.
Unraid commonly runs appdata as nobody:users (99:100). The stage2 hook must
accept those non-root numeric IDs and keep legacy/new pairing stores writable
after targeted ownership reconciliation.
"""
from __future__ import annotations
import os
import re
import shutil
import subprocess
from pathlib import Path
import pytest
REPO_ROOT = Path(__file__).resolve().parents[2]
STAGE2_HOOK = REPO_ROOT / "docker" / "stage2-hook.sh"
@pytest.fixture(scope="module")
def stage2_text() -> str:
if not STAGE2_HOOK.exists():
pytest.skip("docker/stage2-hook.sh not present in this checkout")
return STAGE2_HOOK.read_text()
def _uid_gid_validator(text: str) -> str:
marker = "# --- UID/GID remap ---"
before_marker = text.split(marker, 1)[0]
start = before_marker.index("validate_uid_gid()")
return before_marker[start:]
def _validate_uid_gid(text: str, value: str) -> bool:
bash = shutil.which("bash")
if bash is None:
pytest.skip("bash not available")
script = _uid_gid_validator(text) + '\nvalidate_uid_gid "$CANDIDATE"\n'
proc = subprocess.run(
[bash, "-c", script],
env={"PATH": os.environ.get("PATH", ""), "CANDIDATE": value},
capture_output=True,
text=True,
)
return proc.returncode == 0
@pytest.mark.parametrize("value", ["1", "99", "100", "1000", "65534"])
def test_uid_gid_validator_accepts_non_root_nas_ids(stage2_text: str, value: str) -> None:
assert _validate_uid_gid(stage2_text, value), (
f"stage2 hook must accept NAS UID/GID {value}; Unraid uses 99:100 (#38070)"
)
@pytest.mark.parametrize("value", ["", "0", "abc", "99x", "65535"])
def test_uid_gid_validator_rejects_root_invalid_and_out_of_range(
stage2_text: str,
value: str,
) -> None:
assert not _validate_uid_gid(stage2_text, value)
def _targeted_chown_subdirs(text: str) -> list[str]:
m = re.search(
r"for sub in (?P<items>.*?); do\n\s*if \[ -e \"\$HERMES_HOME/\$sub\" \]",
text,
re.DOTALL,
)
assert m, "stage2-hook.sh must contain the targeted subdir chown loop"
return m.group("items").split()
def test_targeted_chown_covers_legacy_and_new_pairing_dirs(stage2_text: str) -> None:
subdirs = _targeted_chown_subdirs(stage2_text)
assert "pairing" in subdirs
assert "platforms/pairing" in subdirs
def test_seeded_directory_list_covers_legacy_and_new_pairing_dirs(stage2_text: str) -> None:
seed_block = stage2_text.split("as_hermes mkdir -p \\", 1)[1].split(
"# --- Install-method stamp",
1,
)[0]
assert '"$HERMES_HOME/pairing"' in seed_block
assert '"$HERMES_HOME/platforms/pairing"' in seed_block

View File

@@ -1,119 +0,0 @@
"""Contract test: the s6-overlay stage2 hook and main-wrapper reject an
unsupported `docker run --user <arbitrary-uid>:<gid>` start with actionable
guidance, while still allowing:
- root start (id -u == 0)
- `--user <hermes-uid>` (the supported non-root start, #34648 / #34837)
Background: in the tini era `docker run --user $(id -u):$(id -g)` was used to
make container-written files match the host user. Under s6-overlay this can't
work — the bootstrap (UID remap, volume/build-tree chown, config seeding) needs
root, and the baked image dirs are owned by the hermes build UID, so an
arbitrary pinned UID can't write them (EACCES on a bind mount, hard crash on a
named volume). The supported path is root start + HERMES_UID/HERMES_GID (or the
PUID/PGID aliases), which remaps the hermes user and chowns the volume.
The guard fires only when the current UID is neither root NOR the hermes UID,
so the #34648 `--user 10000:10000` case (pinning to the hermes UID itself) is
unaffected.
Extraction + stubbed-shell-run mirrors
tests/tools/test_stage2_hook_toplevel_chown.py.
"""
from __future__ import annotations
import re
import shutil
import subprocess
import tempfile
from pathlib import Path
import pytest
REPO_ROOT = Path(__file__).resolve().parents[2]
STAGE2_HOOK = REPO_ROOT / "docker" / "stage2-hook.sh"
MAIN_WRAPPER = REPO_ROOT / "docker" / "main-wrapper.sh"
def _read(p: Path) -> str:
if not p.exists():
pytest.skip(f"{p} not present in this checkout")
return p.read_text()
def _guard_block(text: str) -> str:
"""Extract the `cur_uid=...; if [ ... ]; then ... exit 1; fi` guard."""
m = re.search(
r"(cur_uid=\"\$\(id -u\)\"\nif \[ \"\$cur_uid\" != 0 \](?:.*\n)*?fi)",
text,
)
assert m, "expected the --user guard block (cur_uid + non-root/non-hermes check)"
return m.group(1)
@pytest.mark.parametrize("path", [STAGE2_HOOK, MAIN_WRAPPER])
def test_guard_present_and_mentions_remediation(path: Path) -> None:
text = _read(path)
block = _guard_block(text)
# Must check non-root AND non-hermes-uid (so --user 10000:10000 is allowed).
assert '"$cur_uid" != 0' in block
assert '"$cur_uid" != "$(id -u hermes)"' in block
assert "exit 1" in block
# Must point users at the supported env vars.
assert "HERMES_UID" in block and "HERMES_GID" in block
assert "PUID" in block and "PGID" in block
def _run_guard(text: str, *, cur_uid: int, hermes_uid: int = 10000) -> subprocess.CompletedProcess:
"""Run the extracted guard with `id` stubbed. Returns the completed process
(rc 1 + stderr message when rejected, rc 0 when allowed through)."""
bash = shutil.which("bash")
if bash is None:
pytest.skip("bash not available")
block = _guard_block(text)
with tempfile.TemporaryDirectory() as d:
script = (
"set -e\n"
# Stub `id`: `id -u` -> cur_uid; `id -u hermes` -> hermes_uid.
f'id() {{ if [ "$2" = hermes ]; then echo {hermes_uid}; else echo {cur_uid}; fi; }}\n'
+ block
+ "\necho GUARD_PASSED\n" # only reached when the guard allows through
)
sp = Path(d) / "h.sh"
sp.write_text(script)
return subprocess.run([bash, str(sp)], capture_output=True, text=True)
def test_arbitrary_user_uid_is_rejected() -> None:
"""An arbitrary host UID (1000), neither root nor hermes, is rejected."""
for text in (_read(STAGE2_HOOK), _read(MAIN_WRAPPER)):
proc = _run_guard(text, cur_uid=1000, hermes_uid=10000)
assert proc.returncode == 1, f"expected rejection, got rc={proc.returncode}"
assert "not supported" in proc.stderr
assert "GUARD_PASSED" not in proc.stdout
def test_root_start_passes() -> None:
"""Root start (uid 0) is never blocked."""
for text in (_read(STAGE2_HOOK), _read(MAIN_WRAPPER)):
proc = _run_guard(text, cur_uid=0, hermes_uid=10000)
assert proc.returncode == 0, proc.stderr
assert "GUARD_PASSED" in proc.stdout
def test_user_pinned_to_hermes_uid_passes() -> None:
"""`--user 10000:10000` (the hermes UID itself) is the supported non-root
start from #34648 / #34837 and must NOT be blocked."""
for text in (_read(STAGE2_HOOK), _read(MAIN_WRAPPER)):
proc = _run_guard(text, cur_uid=10000, hermes_uid=10000)
assert proc.returncode == 0, proc.stderr
assert "GUARD_PASSED" in proc.stdout
def test_user_pinned_to_remapped_hermes_uid_passes() -> None:
"""After a HERMES_UID remap the hermes UID is e.g. 4242; a container pinned
to that same UID must still pass (cur_uid == hermes_uid)."""
for text in (_read(STAGE2_HOOK), _read(MAIN_WRAPPER)):
proc = _run_guard(text, cur_uid=4242, hermes_uid=4242)
assert proc.returncode == 0, proc.stderr
assert "GUARD_PASSED" in proc.stdout