mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-15 22:51:14 +08:00
Compare commits
16 Commits
ironclaw-p
...
dependabot
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8a99325dbd | ||
|
|
aca11c227e | ||
|
|
6cb88a0874 | ||
|
|
8fce54499f | ||
|
|
b0c99c12dd | ||
|
|
ddf7c7af81 | ||
|
|
d6a8d9dcab | ||
|
|
95715dcb03 | ||
|
|
80f8ffc74c | ||
|
|
c2b7669ad3 | ||
|
|
b770967263 | ||
|
|
61ee2dbfdb | ||
|
|
f795513782 | ||
|
|
8fe334b056 | ||
|
|
40d7c264f0 | ||
|
|
4eb0ff639b |
@@ -43,7 +43,7 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
|
||||
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
|
||||
|
||||
2
.github/workflows/contributor-check.yml
vendored
2
.github/workflows/contributor-check.yml
vendored
@@ -14,7 +14,7 @@ jobs:
|
||||
check-attribution:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
|
||||
with:
|
||||
fetch-depth: 0 # Full history needed for git log
|
||||
|
||||
|
||||
2
.github/workflows/deploy-site.yml
vendored
2
.github/workflows/deploy-site.yml
vendored
@@ -52,7 +52,7 @@ jobs:
|
||||
name: github-pages
|
||||
url: ${{ steps.deploy.outputs.page_url }}
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
|
||||
|
||||
- uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
|
||||
with:
|
||||
|
||||
6
.github/workflows/docker-lint.yml
vendored
6
.github/workflows/docker-lint.yml
vendored
@@ -40,10 +40,10 @@ jobs:
|
||||
timeout-minutes: 5
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
|
||||
|
||||
- name: hadolint
|
||||
uses: hadolint/hadolint-action@54c9adbab1582c2ef04b2016b760714a4bfde3cf # v3.1.0
|
||||
uses: hadolint/hadolint-action@2332a7b74a6de0dda2e2221d575162eba76ba5e5 # v3.3.0
|
||||
with:
|
||||
dockerfile: Dockerfile
|
||||
config: .hadolint.yaml
|
||||
@@ -55,7 +55,7 @@ jobs:
|
||||
timeout-minutes: 5
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
|
||||
|
||||
- name: shellcheck
|
||||
uses: ludeeus/action-shellcheck@00cae500b08a931fb5698e11e79bfbd38e612a38 # v2.0.0
|
||||
|
||||
22
.github/workflows/docker-publish.yml
vendored
22
.github/workflows/docker-publish.yml
vendored
@@ -57,7 +57,7 @@ jobs:
|
||||
digest: ${{ steps.push.outputs.digest }}
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
|
||||
@@ -66,7 +66,7 @@ jobs:
|
||||
# to gha with a per-arch scope; the push step below reuses every
|
||||
# layer from this build.
|
||||
- name: Build image (amd64, smoke test)
|
||||
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
|
||||
uses: docker/build-push-action@f9f3042f7e2789586610d6e8b85c8f03e5195baf # v7.2.0
|
||||
with:
|
||||
context: .
|
||||
file: Dockerfile
|
||||
@@ -135,7 +135,7 @@ jobs:
|
||||
|
||||
- name: Log in to Docker Hub
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
|
||||
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
|
||||
uses: docker/login-action@650006c6eb7dba73a995cc03b0b2d7f5ca915bee # v4.2.0
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
@@ -146,7 +146,7 @@ jobs:
|
||||
- name: Push amd64 by digest
|
||||
id: push
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
|
||||
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
|
||||
uses: docker/build-push-action@f9f3042f7e2789586610d6e8b85c8f03e5195baf # v7.2.0
|
||||
with:
|
||||
context: .
|
||||
file: Dockerfile
|
||||
@@ -191,7 +191,7 @@ jobs:
|
||||
digest: ${{ steps.push.outputs.digest }}
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
|
||||
@@ -204,7 +204,7 @@ jobs:
|
||||
# crashed the build before the smoke test (the reason the gha cache
|
||||
# was removed from arm64 PRs in the first place).
|
||||
- name: Log in to ghcr.io (build cache)
|
||||
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
|
||||
uses: docker/login-action@650006c6eb7dba73a995cc03b0b2d7f5ca915bee # v4.2.0
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
@@ -225,7 +225,7 @@ jobs:
|
||||
# token failure mode cannot recur.
|
||||
- name: Build image (arm64, smoke test, cache read-only PR)
|
||||
if: github.event_name == 'pull_request'
|
||||
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
|
||||
uses: docker/build-push-action@f9f3042f7e2789586610d6e8b85c8f03e5195baf # v7.2.0
|
||||
with:
|
||||
context: .
|
||||
file: Dockerfile
|
||||
@@ -241,7 +241,7 @@ jobs:
|
||||
# PR/main build starts warm.
|
||||
- name: Build image (arm64, smoke test, cached publish)
|
||||
if: github.event_name != 'pull_request'
|
||||
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
|
||||
uses: docker/build-push-action@f9f3042f7e2789586610d6e8b85c8f03e5195baf # v7.2.0
|
||||
with:
|
||||
context: .
|
||||
file: Dockerfile
|
||||
@@ -260,7 +260,7 @@ jobs:
|
||||
|
||||
- name: Log in to Docker Hub
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
|
||||
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
|
||||
uses: docker/login-action@650006c6eb7dba73a995cc03b0b2d7f5ca915bee # v4.2.0
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
@@ -268,7 +268,7 @@ jobs:
|
||||
- name: Push arm64 by digest
|
||||
id: push
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
|
||||
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
|
||||
uses: docker/build-push-action@f9f3042f7e2789586610d6e8b85c8f03e5195baf # v7.2.0
|
||||
with:
|
||||
context: .
|
||||
file: Dockerfile
|
||||
@@ -322,7 +322,7 @@ jobs:
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
|
||||
|
||||
- name: Log in to Docker Hub
|
||||
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
|
||||
uses: docker/login-action@650006c6eb7dba73a995cc03b0b2d7f5ca915bee # v4.2.0
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
2
.github/workflows/docs-site-checks.yml
vendored
2
.github/workflows/docs-site-checks.yml
vendored
@@ -14,7 +14,7 @@ jobs:
|
||||
docs-site-checks:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
|
||||
|
||||
- uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
|
||||
with:
|
||||
|
||||
2
.github/workflows/history-check.yml
vendored
2
.github/workflows/history-check.yml
vendored
@@ -24,7 +24,7 @@ jobs:
|
||||
check-common-ancestor:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
|
||||
with:
|
||||
fetch-depth: 0 # full history both sides for merge-base
|
||||
|
||||
|
||||
6
.github/workflows/lint.yml
vendored
6
.github/workflows/lint.yml
vendored
@@ -37,7 +37,7 @@ jobs:
|
||||
timeout-minutes: 10
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
|
||||
with:
|
||||
fetch-depth: 0 # need full history for merge-base + worktree
|
||||
|
||||
@@ -167,7 +167,7 @@ jobs:
|
||||
timeout-minutes: 5
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
|
||||
@@ -191,7 +191,7 @@ jobs:
|
||||
timeout-minutes: 5
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v5
|
||||
|
||||
4
.github/workflows/nix-lockfile-fix.yml
vendored
4
.github/workflows/nix-lockfile-fix.yml
vendored
@@ -56,7 +56,7 @@ jobs:
|
||||
app-id: ${{ secrets.APP_ID }}
|
||||
private-key: ${{ secrets.APP_PRIVATE_KEY }}
|
||||
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
|
||||
with:
|
||||
ref: main
|
||||
token: ${{ steps.app-token.outputs.token }}
|
||||
@@ -195,7 +195,7 @@ jobs:
|
||||
|
||||
Triggered by @${{ github.actor }} — [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).
|
||||
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
|
||||
with:
|
||||
repository: ${{ steps.resolve.outputs.owner }}/${{ steps.resolve.outputs.repo }}
|
||||
ref: ${{ steps.resolve.outputs.ref }}
|
||||
|
||||
2
.github/workflows/nix.yml
vendored
2
.github/workflows/nix.yml
vendored
@@ -21,7 +21,7 @@ jobs:
|
||||
runs-on: ${{ matrix.os }}
|
||||
timeout-minutes: 30
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
|
||||
- uses: ./.github/actions/nix-setup
|
||||
with:
|
||||
cachix-auth-token: ${{ secrets.CACHIX_AUTH_TOKEN }}
|
||||
|
||||
2
.github/workflows/skills-index.yml
vendored
2
.github/workflows/skills-index.yml
vendored
@@ -21,7 +21,7 @@ jobs:
|
||||
if: github.repository == 'NousResearch/hermes-agent'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
|
||||
|
||||
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
with:
|
||||
|
||||
8
.github/workflows/supply-chain-audit.yml
vendored
8
.github/workflows/supply-chain-audit.yml
vendored
@@ -32,7 +32,7 @@ jobs:
|
||||
# True when the curated MCP catalog / bundled MCP manifests changed.
|
||||
mcp_catalog: ${{ steps.filter.outputs.mcp_catalog }}
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Check for relevant file changes
|
||||
@@ -72,7 +72,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
@@ -207,7 +207,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
@@ -286,7 +286,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
|
||||
4
.github/workflows/tests.yml
vendored
4
.github/workflows/tests.yml
vendored
@@ -30,7 +30,7 @@ jobs:
|
||||
slice: [1, 2, 3, 4, 5, 6]
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
|
||||
|
||||
- name: Restore duration cache
|
||||
uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
|
||||
@@ -163,7 +163,7 @@ jobs:
|
||||
timeout-minutes: 15
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
|
||||
|
||||
- name: Install ripgrep (prebuilt binary)
|
||||
run: |
|
||||
|
||||
2
.github/workflows/typecheck.yml
vendored
2
.github/workflows/typecheck.yml
vendored
@@ -16,7 +16,7 @@ jobs:
|
||||
[ui-tui, web, apps/bootstrap-installer, apps/desktop, apps/shared]
|
||||
fail-fast: false # report all failures, not just the first one
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
|
||||
- uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
|
||||
with:
|
||||
node-version: 22
|
||||
|
||||
4
.github/workflows/upload_to_pypi.yml
vendored
4
.github/workflows/upload_to_pypi.yml
vendored
@@ -27,7 +27,7 @@ jobs:
|
||||
name: Build distribution 📦
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
|
||||
with:
|
||||
persist-credentials: false
|
||||
# On workflow_dispatch, check out the confirmed tag.
|
||||
@@ -145,7 +145,7 @@ jobs:
|
||||
|
||||
- name: Sign with Sigstore
|
||||
if: env.skip_sign != 'true'
|
||||
uses: sigstore/gh-action-sigstore-python@04cffa1d795717b140764e8b640de88853c92acc # v3.3.0
|
||||
uses: sigstore/gh-action-sigstore-python@5b79a39c381910c090341a2c9b0bf022c8b387e1 # v3.4.0
|
||||
with:
|
||||
inputs: >-
|
||||
./dist/*.tar.gz
|
||||
|
||||
2
.github/workflows/uv-lockfile-check.yml
vendored
2
.github/workflows/uv-lockfile-check.yml
vendored
@@ -71,7 +71,7 @@ jobs:
|
||||
timeout-minutes: 5
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
|
||||
|
||||
@@ -80,9 +80,12 @@ const HOVER_REVEAL_EASE = 'cubic-bezier(0.32,0.72,0,1)'
|
||||
// Offset shadow lifting the revealed panel off the content (same both sides;
|
||||
// the mirror axis is offset-x, which is 0). Same color on light + dark.
|
||||
const HOVER_REVEAL_SHADOW = '0px -18px 18px -5px #00000012'
|
||||
// Edge trigger strip, inset past the OS window-resize grab area.
|
||||
// Edge trigger strip, inset past the OS window-resize grab area AND the
|
||||
// adjacent pane's scrollbar (0.5rem, .scrollbar-dt) — the strip overlays the
|
||||
// neighboring scroller's edge, so any overlap makes the scrollbar reveal the
|
||||
// pane on hover and swallow its clicks (#44140).
|
||||
const HOVER_REVEAL_TRIGGER_WIDTH = 14
|
||||
const HOVER_REVEAL_EDGE_GUTTER = 6
|
||||
const HOVER_REVEAL_EDGE_GUTTER = 'calc(0.5rem + 2px)'
|
||||
|
||||
// Fired (window CustomEvent<{ id }>) to toggle a force-collapsed pane's reveal
|
||||
// from the keyboard, since its store-open toggle is a no-op while collapsed.
|
||||
|
||||
@@ -316,6 +316,7 @@ as_hermes mkdir -p \
|
||||
"$HERMES_HOME/cron" \
|
||||
"$HERMES_HOME/sessions" \
|
||||
"$HERMES_HOME/logs" \
|
||||
"$HERMES_HOME/logs/gateways" \
|
||||
"$HERMES_HOME/hooks" \
|
||||
"$HERMES_HOME/memories" \
|
||||
"$HERMES_HOME/skills" \
|
||||
|
||||
@@ -28,11 +28,14 @@ from typing import Literal, Sequence
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
# Only this prior state triggers automatic restart. Everything else
|
||||
# Only this desired state triggers automatic restart. Everything else
|
||||
# (startup_failed, starting, stopped, missing) registers the slot in
|
||||
# the down state and waits for explicit user action — this avoids the
|
||||
# crash-loop where a broken gateway keeps being restarted across
|
||||
# `docker restart` cycles.
|
||||
# `docker restart` cycles. Older installs only have gateway_state;
|
||||
# newer lifecycle commands persist desired_state separately so a transient
|
||||
# runtime state (draining/startup_failed) does not erase the operator's
|
||||
# durable start/stop intent across pod/container recreation.
|
||||
_AUTOSTART_STATES = frozenset({"running"})
|
||||
|
||||
# Stale runtime files we sweep before recreating service slots. These
|
||||
@@ -104,7 +107,7 @@ def reconcile_profile_gateways(
|
||||
container_argv=container_argv,
|
||||
dry_run=dry_run,
|
||||
)
|
||||
default_prior_state = legacy_default_state or _read_prior_state(hermes_home)
|
||||
default_prior_state = legacy_default_state or _read_desired_state(hermes_home)
|
||||
default_should_start = default_prior_state in _AUTOSTART_STATES
|
||||
if not dry_run:
|
||||
_cleanup_stale_runtime_files(hermes_home)
|
||||
@@ -139,7 +142,7 @@ def reconcile_profile_gateways(
|
||||
)
|
||||
continue
|
||||
|
||||
prior_state = _read_prior_state(entry)
|
||||
prior_state = _read_desired_state(entry)
|
||||
should_start = prior_state in _AUTOSTART_STATES
|
||||
|
||||
if not dry_run:
|
||||
@@ -188,6 +191,7 @@ def _maybe_migrate_legacy_gateway_run_state(
|
||||
import time
|
||||
state_file.write_text(json.dumps({
|
||||
"gateway_state": "running",
|
||||
"desired_state": "running",
|
||||
"timestamp": int(time.time()),
|
||||
"migrated_from": "legacy-container-cmd",
|
||||
}) + "\n")
|
||||
@@ -203,8 +207,15 @@ def _read_container_argv() -> tuple[str, ...]:
|
||||
return tuple(part.decode("utf-8", "replace") for part in raw.split(b"\0") if part)
|
||||
|
||||
|
||||
def _is_legacy_gateway_run_request(argv: Sequence[str]) -> bool:
|
||||
"""Return True for Docker commands equivalent to `gateway run`."""
|
||||
def _strip_container_argv_prefix(argv: Sequence[str]) -> list[str]:
|
||||
"""Strip the s6/wrapper prefix off PID 1 argv, leaving the hermes args.
|
||||
|
||||
The container PID 1 argv looks like
|
||||
``/init /opt/hermes/docker/main-wrapper.sh <subcommand> [args...]`` and
|
||||
the wrapper re-execs ``hermes <subcommand>``. Peel ``init`` →
|
||||
``main-wrapper.sh`` → ``hermes`` so callers can match on the bare
|
||||
subcommand. Shared by the legacy-gateway and dashboard role detectors.
|
||||
"""
|
||||
args = list(argv)
|
||||
if args and Path(args[0]).name == "init":
|
||||
args = args[1:]
|
||||
@@ -212,20 +223,58 @@ def _is_legacy_gateway_run_request(argv: Sequence[str]) -> bool:
|
||||
args = args[1:]
|
||||
if args and Path(args[0]).name == "hermes":
|
||||
args = args[1:]
|
||||
return args
|
||||
|
||||
|
||||
def _is_legacy_gateway_run_request(argv: Sequence[str]) -> bool:
|
||||
"""Return True for Docker commands equivalent to `gateway run`."""
|
||||
args = _strip_container_argv_prefix(argv)
|
||||
if "--no-supervise" in args:
|
||||
return False
|
||||
return len(args) >= 2 and args[0] == "gateway" and args[1] == "run"
|
||||
|
||||
|
||||
def _read_prior_state(profile_dir: Path) -> str | None:
|
||||
"""Read gateway_state.json's ``gateway_state`` field, or None if
|
||||
missing or unparseable. Unparseable counts as "no prior state" so
|
||||
we don't bork the whole reconciliation on a corrupt file."""
|
||||
def _is_dashboard_container(argv: Sequence[str]) -> bool:
|
||||
"""Return True when the container's command is the dashboard.
|
||||
|
||||
A dashboard-only container (``hermes dashboard ...``) never spawns or
|
||||
supervises per-profile gateways — that is the gateway container's job.
|
||||
Reconciling profile gateway s6 slots there is not just wasted work: when
|
||||
the gateway and dashboard containers share a bind-mounted HERMES_HOME,
|
||||
both race to ``flock()`` the same ``logs/gateways/<profile>/lock`` files,
|
||||
producing "Resource busy" failures and an s6-log restart storm. So the
|
||||
dashboard container skips reconciliation entirely.
|
||||
|
||||
Detected from PID 1 argv (``/proc/1/cmdline``) rather than an operator
|
||||
flag: the role is a fact about the container's command, not a tunable,
|
||||
and a flag can be forgotten in a hand-written compose/k8s manifest —
|
||||
reintroducing the exact storm this prevents. Mirrors the argv handling
|
||||
in :func:`_is_legacy_gateway_run_request`.
|
||||
"""
|
||||
args = _strip_container_argv_prefix(argv)
|
||||
return bool(args) and args[0] == "dashboard"
|
||||
|
||||
|
||||
def _read_desired_state(profile_dir: Path) -> str | None:
|
||||
"""Read the persisted gateway desired state for reconciliation.
|
||||
|
||||
Newer state files carry ``desired_state``: operator intent written by
|
||||
s6 lifecycle commands. Older files only carry ``gateway_state``; keep
|
||||
that as a compatibility fallback so existing running/stopped profiles
|
||||
preserve their behavior until the next explicit start/stop.
|
||||
|
||||
Missing or unparseable files count as "no desired state" so we don't
|
||||
bork the whole reconciliation on a corrupt file.
|
||||
"""
|
||||
state_file = profile_dir / "gateway_state.json"
|
||||
if not state_file.exists():
|
||||
return None
|
||||
try:
|
||||
return json.loads(state_file.read_text()).get("gateway_state")
|
||||
data = json.loads(state_file.read_text())
|
||||
desired_state = data.get("desired_state")
|
||||
if desired_state is not None:
|
||||
return desired_state
|
||||
return data.get("gateway_state")
|
||||
except (OSError, json.JSONDecodeError):
|
||||
log.warning(
|
||||
"could not read %s; treating as no prior state", state_file,
|
||||
@@ -378,6 +427,22 @@ _LOG_ROTATE_BYTES = 256 * 1024
|
||||
|
||||
def main() -> int:
|
||||
"""Entry point invoked from /etc/cont-init.d/02-reconcile-profiles."""
|
||||
# A dashboard-only container never spawns or supervises per-profile
|
||||
# gateways, so reconciling their s6 slots here is pure waste — and
|
||||
# actively harmful: when the gateway and dashboard containers share a
|
||||
# bind-mounted HERMES_HOME, both race to flock() the same s6-log lock
|
||||
# files under logs/gateways/<profile>/lock, producing "Resource busy"
|
||||
# failures and a restart storm. Detect the role from PID 1 argv and
|
||||
# skip reconciliation in the dashboard container. No operator flag:
|
||||
# the role is a fact about the container's command, and a flag can be
|
||||
# forgotten in a hand-written manifest, reintroducing the storm.
|
||||
if _is_dashboard_container(_read_container_argv()):
|
||||
print(
|
||||
"reconcile: skipping (dashboard container — does not need "
|
||||
"per-profile gateways)"
|
||||
)
|
||||
return 0
|
||||
|
||||
hermes_home = Path(os.environ.get("HERMES_HOME", "/opt/data"))
|
||||
scandir = Path(os.environ.get("S6_PROFILE_GATEWAY_SCANDIR", "/run/service"))
|
||||
actions = reconcile_profile_gateways(
|
||||
|
||||
@@ -452,8 +452,19 @@ def _apply_profile_override() -> None:
|
||||
if Path(hermes_home_env).parent.name == "profiles":
|
||||
return
|
||||
|
||||
# 2. If no flag, check active_profile in the hermes root
|
||||
if profile_name is None:
|
||||
# 2. If no flag, check active_profile in the hermes root.
|
||||
#
|
||||
# EXCEPTION: a supervised s6 gateway child (exported by the container
|
||||
# run-script as HERMES_S6_SUPERVISED_CHILD=1) must NOT follow the sticky
|
||||
# active_profile. Each supervised slot has a fixed profile identity: named
|
||||
# slots pass ``-p <name>`` explicitly (handled in step 1 above), and the
|
||||
# reserved ``gateway-default`` slot runs bare ``hermes gateway run`` to mean
|
||||
# "the root HERMES_HOME profile". If the reserved default child read
|
||||
# active_profile here, switching the active profile (e.g. via the dashboard)
|
||||
# would silently redirect the default gateway into that profile — yielding a
|
||||
# duplicate gateway for the active profile and no real default gateway. See
|
||||
# the "Docker & Profiles & Dashboard" report.
|
||||
if profile_name is None and not os.environ.get("HERMES_S6_SUPERVISED_CHILD"):
|
||||
try:
|
||||
from hermes_constants import get_default_hermes_root
|
||||
|
||||
@@ -10753,8 +10764,24 @@ def cmd_dashboard(args):
|
||||
if getattr(args, "skip_build", False):
|
||||
reexec_argv.append("--skip-build")
|
||||
env = os.environ.copy()
|
||||
# Drop the profile HERMES_HOME so the child binds the machine root.
|
||||
env.pop("HERMES_HOME", None)
|
||||
# Pin the child to the machine ROOT, not the launching profile's
|
||||
# HERMES_HOME. We must resolve the root explicitly instead of just
|
||||
# dropping HERMES_HOME: in the Docker layout the machine root is
|
||||
# /opt/data (set via `ENV HERMES_HOME=/opt/data`), so an unset
|
||||
# HERMES_HOME falls back to $HOME/.hermes = /opt/data/.hermes — an
|
||||
# empty, auto-seeded home where the dashboard sees only the default
|
||||
# profile and the install-method stamp is missing (so the Docker
|
||||
# update-button guard also misfires). get_default_hermes_root()
|
||||
# returns the root for both layouts: ~/.hermes for a standard install
|
||||
# and /opt/data for Docker (it strips a trailing profiles/<name>).
|
||||
# See the support report for the double-mount workaround this avoids.
|
||||
try:
|
||||
from hermes_constants import get_default_hermes_root
|
||||
env["HERMES_HOME"] = str(get_default_hermes_root())
|
||||
except Exception:
|
||||
# Best-effort: if root resolution fails, fall back to the prior
|
||||
# behaviour (drop HERMES_HOME) rather than block the reroute.
|
||||
env.pop("HERMES_HOME", None)
|
||||
# On Windows, os.execvpe() does not truly replace the process — it
|
||||
# spawns via CreateProcess then the parent exits. Under Python 3.14+
|
||||
# this can crash with STATUS_ACCESS_VIOLATION (0xC0000005) when
|
||||
|
||||
@@ -1268,7 +1268,7 @@ def _maybe_register_gateway_service(profile_name: str) -> None:
|
||||
if not mgr.supports_runtime_registration():
|
||||
return # host backend; no-op
|
||||
try:
|
||||
mgr.register_profile_gateway(profile_name)
|
||||
mgr.register_profile_gateway(profile_name, start_now=False)
|
||||
except ValueError:
|
||||
# Already registered (e.g. the container-boot reconciler ran
|
||||
# first and brought up a stale slot). That's fine.
|
||||
|
||||
@@ -77,6 +77,7 @@ class ServiceManager(Protocol):
|
||||
profile: str,
|
||||
*,
|
||||
extra_env: dict[str, str] | None = None,
|
||||
start_now: bool = True,
|
||||
) -> None: ...
|
||||
def unregister_profile_gateway(self, profile: str) -> None: ...
|
||||
def list_profile_gateways(self) -> list[str]: ...
|
||||
@@ -86,7 +87,8 @@ def detect_service_manager() -> ServiceManagerKind:
|
||||
"""Detect which service manager is available in this environment.
|
||||
|
||||
Returns:
|
||||
"s6" — inside a container when /init is s6-svscan (Phase 2+)
|
||||
"s6" — s6-svscan is PID 1 (s6-overlay image; Docker, Podman, or a
|
||||
Fly Firecracker microVM)
|
||||
"windows" — native Windows host
|
||||
"launchd" — macOS host
|
||||
"systemd" — Linux host with a working user/system bus
|
||||
@@ -100,14 +102,20 @@ def detect_service_manager() -> ServiceManagerKind:
|
||||
# Imports deferred so importing this module doesn't drag in the
|
||||
# whole gateway dependency graph for callers that only need the
|
||||
# Protocol type or validate_profile_name().
|
||||
from hermes_constants import is_container
|
||||
from hermes_cli.gateway import (
|
||||
is_macos,
|
||||
is_windows,
|
||||
supports_systemd_services,
|
||||
)
|
||||
|
||||
if is_container() and _s6_running():
|
||||
# Gate on _s6_running() alone (PID 1 comm == s6-svscan AND /run/s6/basedir),
|
||||
# NOT is_container(): the latter only detects Docker/Podman/lxc, so it is
|
||||
# False on Fly's Firecracker microVMs even though s6-overlay is PID 1 there.
|
||||
# That false negative made the whole s6 dispatch path inert on Fly, so
|
||||
# `hermes gateway start/stop/restart` fell through to host code that spawns
|
||||
# a foreground gateway competing with the supervised one. _s6_running() is
|
||||
# already an s6-overlay-specific signal, so the container gate was redundant.
|
||||
if _s6_running():
|
||||
return "s6"
|
||||
if is_windows():
|
||||
return "windows"
|
||||
@@ -175,6 +183,7 @@ class _RegistrationUnsupportedMixin:
|
||||
profile: str,
|
||||
*,
|
||||
extra_env: dict[str, str] | None = None,
|
||||
start_now: bool = True,
|
||||
) -> None:
|
||||
raise NotImplementedError(
|
||||
f"{type(self).__name__} does not support runtime profile "
|
||||
@@ -325,6 +334,62 @@ def get_service_manager() -> ServiceManager:
|
||||
S6_DYNAMIC_SCANDIR = Path("/run/service")
|
||||
S6_SERVICE_PREFIX = "gateway-"
|
||||
|
||||
|
||||
def _profile_dir_for_gateway_service(name: str) -> Path:
|
||||
"""Resolve ``gateway-<profile>`` to its persistent profile directory.
|
||||
|
||||
s6 lifecycle commands may be invoked from any active profile, including
|
||||
``gateway stop --all``. Do not write the caller's HERMES_HOME blindly;
|
||||
derive the shared profile root from the current HERMES_HOME and map the
|
||||
service suffix to either the root default profile or
|
||||
``<root>/profiles/<profile>``.
|
||||
"""
|
||||
import os
|
||||
|
||||
profile = name[len(S6_SERVICE_PREFIX):] if name.startswith(S6_SERVICE_PREFIX) else name
|
||||
validate_profile_name(profile)
|
||||
hermes_home = Path(os.environ.get("HERMES_HOME", "/opt/data"))
|
||||
if hermes_home.parent.name == "profiles":
|
||||
root = hermes_home.parent.parent
|
||||
else:
|
||||
root = hermes_home
|
||||
return root if profile == "default" else root / "profiles" / profile
|
||||
|
||||
|
||||
def _write_gateway_desired_state(name: str, desired_state: str) -> None:
|
||||
"""Persist durable s6 gateway intent next to runtime status.
|
||||
|
||||
``gateway_state`` remains the volatile runtime field written by the
|
||||
gateway process. ``desired_state`` records the operator's start/stop
|
||||
intent so container-boot reconciliation can restore the correct s6
|
||||
want-up/want-down state after pod recreation even if the previous runtime
|
||||
state was transient (draining, startup_failed, etc.). The write is
|
||||
best-effort: a failed persistence attempt must not prevent immediate s6
|
||||
lifecycle control.
|
||||
"""
|
||||
import json
|
||||
import time
|
||||
|
||||
profile_dir = _profile_dir_for_gateway_service(name)
|
||||
state_file = profile_dir / "gateway_state.json"
|
||||
try:
|
||||
if not profile_dir.exists():
|
||||
return
|
||||
try:
|
||||
data = json.loads(state_file.read_text()) if state_file.exists() else {}
|
||||
if not isinstance(data, dict):
|
||||
data = {}
|
||||
except (OSError, json.JSONDecodeError):
|
||||
data = {}
|
||||
data["desired_state"] = desired_state
|
||||
data["updated_at"] = int(time.time())
|
||||
tmp = state_file.with_suffix(state_file.suffix + ".tmp")
|
||||
tmp.write_text(json.dumps(data, separators=(",", ":")) + "\n")
|
||||
tmp.replace(state_file)
|
||||
except OSError:
|
||||
return
|
||||
|
||||
|
||||
# s6-overlay installs its binaries under /command/ and only adds that
|
||||
# directory to PATH for processes started under the supervision tree
|
||||
# (services started by s6-svscan, cont-init.d scripts, etc.). Code
|
||||
@@ -680,7 +745,17 @@ class S6ServiceManager:
|
||||
f': "${{HERMES_HOME:=/opt/data}}"\n'
|
||||
f'log_dir="$HERMES_HOME/logs/gateways/{prof}"\n'
|
||||
f'mkdir -p "$log_dir"\n'
|
||||
# The gateways/ parent must be chowned too (non-recursively):
|
||||
# `mkdir -p` creates it root-owned on a root-context boot, and a
|
||||
# leaf-only chown leaves it that way — every profile registered
|
||||
# later then runs its log service as hermes and crash-loops on
|
||||
# `mkdir: Permission denied`. The parent chown runs on every
|
||||
# root-context boot, so it also heals volumes already poisoned
|
||||
# by older images. Non-recursive on purpose: sibling profile
|
||||
# dirs are each managed by their own log/run. See #45258.
|
||||
f'chown hermes:hermes "$HERMES_HOME/logs/gateways" 2>/dev/null || true\n'
|
||||
f'chown -R hermes:hermes "$log_dir" 2>/dev/null || true\n'
|
||||
f'rm -f "$log_dir/lock"\n'
|
||||
# Skip the drop when already non-root (CAP_SETGID).
|
||||
f'[ "$(id -u)" = 0 ] || exec s6-log 1 n10 s1000000 T "$log_dir"\n'
|
||||
f'exec s6-setuidgid hermes s6-log 1 n10 s1000000 T "$log_dir"\n'
|
||||
@@ -743,6 +818,7 @@ class S6ServiceManager:
|
||||
(permission denied on the supervise FIFO, timeout, etc.).
|
||||
"""
|
||||
self._run_svc("-u", "start", name)
|
||||
_write_gateway_desired_state(name, "running")
|
||||
|
||||
def _supervised_pid(self, name: str) -> int | None:
|
||||
"""Return the PID of the supervised gateway process, or None.
|
||||
@@ -794,6 +870,7 @@ class S6ServiceManager:
|
||||
except Exception:
|
||||
pass
|
||||
self._run_svc("-d", "stop", name)
|
||||
_write_gateway_desired_state(name, "stopped")
|
||||
|
||||
def restart(self, name: str) -> None:
|
||||
"""Restart a registered service (``s6-svc -t`` = SIGTERM).
|
||||
@@ -803,6 +880,7 @@ class S6ServiceManager:
|
||||
S6CommandError: s6-svc exited non-zero for any other reason.
|
||||
"""
|
||||
self._run_svc("-t", "restart", name)
|
||||
_write_gateway_desired_state(name, "running")
|
||||
|
||||
def is_running(self, name: str) -> bool:
|
||||
"""True iff ``s6-svstat`` reports the service as up."""
|
||||
@@ -823,15 +901,15 @@ class S6ServiceManager:
|
||||
profile: str,
|
||||
*,
|
||||
extra_env: dict[str, str] | None = None,
|
||||
start_now: bool = True,
|
||||
) -> None:
|
||||
"""Create the s6 service directory for a profile gateway.
|
||||
|
||||
Triggers ``s6-svscanctl -a`` so s6-svscan picks the new directory
|
||||
up immediately. The service is created in the *up* state — to
|
||||
register without auto-starting, follow up with ``stop(profile)``
|
||||
(or pass the start flag via the future ``start_now=False`` arg,
|
||||
which the Phase 4 reconciliation path uses via a ``down``
|
||||
marker file written directly).
|
||||
up immediately. When *start_now* is ``True`` (the default) the
|
||||
service starts immediately; when ``False`` a ``down`` marker file
|
||||
is written so s6-supervise leaves the service stopped until the
|
||||
user explicitly runs ``hermes -p <profile> gateway start``.
|
||||
|
||||
Raises:
|
||||
ValueError: if the profile name is invalid or the service
|
||||
@@ -879,6 +957,13 @@ class S6ServiceManager:
|
||||
# rationale.
|
||||
_seed_supervise_skeleton(tmp_dir)
|
||||
|
||||
# When start_now is False, write a `down` marker so
|
||||
# s6-supervise does not auto-start the service on rescan.
|
||||
# Mirrors the same pattern in container_boot.py
|
||||
# _register_gateway_slot when start=False.
|
||||
if not start_now:
|
||||
(tmp_dir / "down").touch()
|
||||
|
||||
tmp_dir.rename(svc_dir)
|
||||
except Exception:
|
||||
shutil.rmtree(tmp_dir, ignore_errors=True)
|
||||
|
||||
@@ -1431,6 +1431,15 @@ function Install-Venv {
|
||||
|
||||
if (Test-Path "venv") {
|
||||
Write-Info "Virtual environment already exists, recreating..."
|
||||
# On Windows, native Python extensions (e.g. _bcrypt.pyd) are loaded as
|
||||
# DLLs by any running hermes process. Windows denies deletion of loaded
|
||||
# DLLs, so kill any hermes.exe tree before removing the venv.
|
||||
if ($env:OS -eq "Windows_NT") {
|
||||
$myPid = $PID
|
||||
Write-Info "Stopping any running hermes processes before recreating venv..."
|
||||
& taskkill /F /T /IM hermes.exe /FI "PID ne $myPid" 2>$null | Out-Null
|
||||
Start-Sleep -Milliseconds 800
|
||||
}
|
||||
Remove-Item -Recurse -Force "venv"
|
||||
}
|
||||
|
||||
|
||||
@@ -84,6 +84,10 @@ AUTHOR_MAP = {
|
||||
"290859878+synapsesx@users.noreply.github.com": "synapsesx",
|
||||
"157689911+itsflownium@users.noreply.github.com": "itsflownium",
|
||||
"dirtyren@users.noreply.github.com": "dirtyren",
|
||||
"895252509@qq.com": "895252509",
|
||||
"35259607+zxcasongs@users.noreply.github.com": "zxcasongs",
|
||||
"alfred@my-cloud.me": "alfred-smith-0",
|
||||
"tangtaizhong792@gmail.com": "tangtaizong666",
|
||||
"github@aldo.pw": "aldoeliacim",
|
||||
"max@c60spaceship.com": "MaxFreedomPollard",
|
||||
"achaljhawar03@gmail.com": "achaljhawar",
|
||||
|
||||
@@ -296,7 +296,8 @@ def test_live_gateway_autostarts_after_real_restart_without_manual_state_stamp(
|
||||
)
|
||||
if r.returncode == 0 and '"gateway_state"' in r.stdout:
|
||||
state = r.stdout
|
||||
break
|
||||
if '"running"' in state:
|
||||
break
|
||||
time.sleep(0.5)
|
||||
assert '"running"' in state, (
|
||||
f"gateway never persisted running state pre-restart: {state!r}"
|
||||
|
||||
@@ -240,3 +240,86 @@ class TestApplyProfileOverrideHermesHomeGuard:
|
||||
assert result is not None
|
||||
assert result.endswith("coder")
|
||||
assert sys.argv == ["hermes", "--continue"]
|
||||
|
||||
|
||||
class TestSupervisedChildIgnoresStickyProfile:
|
||||
"""The reserved default gateway s6 slot must not follow active_profile.
|
||||
|
||||
Inside the Docker s6 image the ``gateway-default`` service slot runs a
|
||||
bare ``hermes gateway run`` (no ``-p``) to mean "the root HERMES_HOME
|
||||
profile". The run-script exports ``HERMES_S6_SUPERVISED_CHILD=1``.
|
||||
Without a guard, ``_apply_profile_override`` would read the sticky
|
||||
``active_profile`` file (set by e.g. the dashboard profile switcher) and
|
||||
redirect the reserved default gateway into that profile — producing a
|
||||
duplicate gateway for the active profile and no real default gateway.
|
||||
"""
|
||||
|
||||
def test_supervised_child_does_not_follow_active_profile(
|
||||
self, tmp_path, monkeypatch
|
||||
):
|
||||
"""HERMES_S6_SUPERVISED_CHILD + active_profile=briefer must NOT redirect.
|
||||
|
||||
Reproduces the Docker/profile scoping bug: the supervised default
|
||||
gateway is launched as bare ``hermes gateway run`` with
|
||||
HERMES_HOME=/opt/data (the container root, whose parent is NOT
|
||||
``profiles``), and a sticky ``active_profile`` of another profile.
|
||||
The reserved default slot must stay on the root profile.
|
||||
"""
|
||||
hermes_root = tmp_path / ".hermes"
|
||||
hermes_root.mkdir(parents=True, exist_ok=True)
|
||||
(hermes_root / "active_profile").write_text("briefer")
|
||||
(hermes_root / "profiles" / "briefer").mkdir(parents=True, exist_ok=True)
|
||||
|
||||
monkeypatch.setattr(Path, "home", lambda: tmp_path)
|
||||
# Container root HERMES_HOME: parent dir is NOT "profiles", so the
|
||||
# #22502 guard does not short-circuit — step 2 (active_profile) runs.
|
||||
monkeypatch.setenv("HERMES_HOME", str(hermes_root))
|
||||
monkeypatch.setenv("HERMES_S6_SUPERVISED_CHILD", "1")
|
||||
monkeypatch.setattr(sys, "argv", ["hermes", "gateway", "run"])
|
||||
|
||||
from hermes_cli.main import _apply_profile_override
|
||||
_apply_profile_override()
|
||||
|
||||
assert os.environ.get("HERMES_HOME") == str(hermes_root), (
|
||||
"Supervised default gateway must stay on the root profile, not be "
|
||||
f"hijacked by active_profile; got {os.environ.get('HERMES_HOME')!r}"
|
||||
)
|
||||
|
||||
def test_non_supervised_run_still_follows_active_profile(
|
||||
self, tmp_path, monkeypatch
|
||||
):
|
||||
"""Without the sentinel, a normal `hermes gateway run` still honors
|
||||
active_profile — the guard is scoped strictly to supervised children."""
|
||||
result = _run_apply_profile_override(
|
||||
tmp_path,
|
||||
monkeypatch,
|
||||
hermes_home=None,
|
||||
active_profile="briefer",
|
||||
argv=["hermes", "gateway", "run"],
|
||||
)
|
||||
|
||||
assert result is not None
|
||||
assert result.endswith("briefer")
|
||||
|
||||
def test_supervised_named_profile_flag_still_wins(self, tmp_path, monkeypatch):
|
||||
"""A supervised named-profile slot passes ``-p <name>`` explicitly;
|
||||
that must still resolve (the sentinel guard only skips the sticky
|
||||
active_profile fallback, never an explicit flag)."""
|
||||
hermes_root = tmp_path / ".hermes"
|
||||
hermes_root.mkdir(parents=True, exist_ok=True)
|
||||
(hermes_root / "active_profile").write_text("briefer")
|
||||
(hermes_root / "profiles" / "briefer").mkdir(parents=True, exist_ok=True)
|
||||
(hermes_root / "profiles" / "coder").mkdir(parents=True, exist_ok=True)
|
||||
|
||||
monkeypatch.setattr(Path, "home", lambda: tmp_path)
|
||||
monkeypatch.delenv("HERMES_HOME", raising=False)
|
||||
monkeypatch.setenv("HERMES_S6_SUPERVISED_CHILD", "1")
|
||||
monkeypatch.setattr(sys, "argv", ["hermes", "-p", "coder", "gateway", "run"])
|
||||
|
||||
from hermes_cli.main import _apply_profile_override
|
||||
_apply_profile_override()
|
||||
|
||||
result = os.environ.get("HERMES_HOME")
|
||||
assert result is not None
|
||||
assert result.endswith("coder")
|
||||
|
||||
|
||||
@@ -30,6 +30,7 @@ def _make_profile(
|
||||
name: str,
|
||||
*,
|
||||
state: str | None,
|
||||
desired_state: str | None = None,
|
||||
with_pid: bool = False,
|
||||
config: bool = True,
|
||||
) -> Path:
|
||||
@@ -40,10 +41,13 @@ def _make_profile(
|
||||
# SOUL.md is what the reconciler keys on — it's always seeded by
|
||||
# `hermes profile create`. See container_boot._render_run_script.
|
||||
(p / "SOUL.md").write_text("# fake profile\n")
|
||||
if state is not None:
|
||||
(p / "gateway_state.json").write_text(json.dumps({
|
||||
"gateway_state": state, "timestamp": 1234567890,
|
||||
}))
|
||||
if state is not None or desired_state is not None:
|
||||
payload: dict[str, object] = {"timestamp": 1234567890}
|
||||
if state is not None:
|
||||
payload["gateway_state"] = state
|
||||
if desired_state is not None:
|
||||
payload["desired_state"] = desired_state
|
||||
(p / "gateway_state.json").write_text(json.dumps(payload))
|
||||
if with_pid:
|
||||
(p / "gateway.pid").write_text(json.dumps(
|
||||
{"pid": 99999, "host": "old-container"},
|
||||
@@ -130,6 +134,46 @@ def test_startup_failed_does_not_autostart(tmp_path: Path) -> None:
|
||||
assert (scandir / "gateway-broken" / "down").exists()
|
||||
|
||||
|
||||
def test_desired_state_running_autostarts_even_if_runtime_failed(tmp_path: Path) -> None:
|
||||
"""Persisted operator intent wins over transient runtime failures."""
|
||||
scandir = tmp_path / "run-service"; scandir.mkdir()
|
||||
_make_profile(
|
||||
tmp_path,
|
||||
"resilient",
|
||||
state="startup_failed",
|
||||
desired_state="running",
|
||||
)
|
||||
|
||||
actions = reconcile_profile_gateways(
|
||||
hermes_home=tmp_path, scandir=scandir, dry_run=False,
|
||||
)
|
||||
|
||||
assert _named_actions(actions) == [ReconcileAction(
|
||||
profile="resilient", prior_state="running", action="started",
|
||||
)]
|
||||
assert not (scandir / "gateway-resilient" / "down").exists()
|
||||
|
||||
|
||||
def test_desired_state_stopped_blocks_legacy_running_runtime(tmp_path: Path) -> None:
|
||||
"""Explicit stop must survive a stale legacy runtime state of running."""
|
||||
scandir = tmp_path / "run-service"; scandir.mkdir()
|
||||
_make_profile(
|
||||
tmp_path,
|
||||
"quiet",
|
||||
state="running",
|
||||
desired_state="stopped",
|
||||
)
|
||||
|
||||
actions = reconcile_profile_gateways(
|
||||
hermes_home=tmp_path, scandir=scandir, dry_run=False,
|
||||
)
|
||||
|
||||
assert _named_actions(actions) == [ReconcileAction(
|
||||
profile="quiet", prior_state="stopped", action="registered",
|
||||
)]
|
||||
assert (scandir / "gateway-quiet" / "down").exists()
|
||||
|
||||
|
||||
def test_starting_state_does_not_autostart(tmp_path: Path) -> None:
|
||||
"""`starting` means the gateway died mid-boot last time; treat as
|
||||
failed, not as a candidate for auto-restart."""
|
||||
@@ -513,6 +557,7 @@ def test_legacy_gateway_run_cmd_seeds_default_running_state(
|
||||
assert not (scandir / "gateway-default" / "down").exists()
|
||||
state = json.loads((tmp_path / "gateway_state.json").read_text())
|
||||
assert state["gateway_state"] == "running"
|
||||
assert state["desired_state"] == "running"
|
||||
assert state["migrated_from"] == "legacy-container-cmd"
|
||||
|
||||
|
||||
@@ -663,3 +708,144 @@ def test_profiles_default_subdir_is_skipped_with_warning(
|
||||
assert any(
|
||||
"profiles/default/" in record.message for record in caplog.records
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Dashboard-container role detection (skip reconcile on the dashboard)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"container_argv",
|
||||
[
|
||||
# Bare subcommand (docker run ... dashboard ...).
|
||||
("dashboard",),
|
||||
("dashboard", "--host", "127.0.0.1", "--no-open"),
|
||||
# Through s6 /init + the main-wrapper that re-execs `hermes`.
|
||||
("/init", "/opt/hermes/docker/main-wrapper.sh", "dashboard"),
|
||||
(
|
||||
"/init",
|
||||
"/opt/hermes/docker/main-wrapper.sh",
|
||||
"dashboard",
|
||||
"--host",
|
||||
"127.0.0.1",
|
||||
"--no-open",
|
||||
),
|
||||
# Wrapper that kept the explicit `hermes` argv0.
|
||||
("/init", "/opt/hermes/docker/main-wrapper.sh", "hermes", "dashboard"),
|
||||
],
|
||||
)
|
||||
def test_is_dashboard_container_true_for_dashboard_argv(
|
||||
container_argv: tuple[str, ...],
|
||||
) -> None:
|
||||
"""A dashboard command is detected across every wrapper prefix shape."""
|
||||
from hermes_cli.container_boot import _is_dashboard_container
|
||||
|
||||
assert _is_dashboard_container(container_argv) is True
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"container_argv",
|
||||
[
|
||||
(), # empty (/proc/1/cmdline unreadable) — not the dashboard
|
||||
("gateway", "run"),
|
||||
("/init", "/opt/hermes/docker/main-wrapper.sh", "gateway", "run"),
|
||||
("/init", "/opt/hermes/docker/main-wrapper.sh", "hermes", "gateway", "run"),
|
||||
("chat",),
|
||||
# A profile literally named "dashboard" must NOT match — the token
|
||||
# we key on is the SUBCOMMAND, and `gateway run -p dashboard` is a
|
||||
# gateway container.
|
||||
("gateway", "run", "-p", "dashboard"),
|
||||
],
|
||||
)
|
||||
def test_is_dashboard_container_false_for_non_dashboard_argv(
|
||||
container_argv: tuple[str, ...],
|
||||
) -> None:
|
||||
"""Gateway / other commands (and empty argv) are not the dashboard."""
|
||||
from hermes_cli.container_boot import _is_dashboard_container
|
||||
|
||||
assert _is_dashboard_container(container_argv) is False
|
||||
|
||||
|
||||
def test_main_skips_reconcile_in_dashboard_container(
|
||||
tmp_path: Path,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
capsys: pytest.CaptureFixture[str],
|
||||
) -> None:
|
||||
"""main() must NOT reconcile when PID 1 argv is the dashboard command.
|
||||
|
||||
A running profile is seeded so that, if reconcile ran, it would create
|
||||
the gateway-<profile> slot. Asserting the slot is absent proves the
|
||||
skip is real, not just a log line.
|
||||
"""
|
||||
from hermes_cli import container_boot
|
||||
|
||||
scandir = tmp_path / "run-service"; scandir.mkdir()
|
||||
_make_profile(tmp_path, "worker", state="running")
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
monkeypatch.setenv("S6_PROFILE_GATEWAY_SCANDIR", str(scandir))
|
||||
monkeypatch.setattr(
|
||||
container_boot,
|
||||
"_read_container_argv",
|
||||
lambda: ("/init", "/opt/hermes/docker/main-wrapper.sh", "dashboard"),
|
||||
)
|
||||
|
||||
rc = container_boot.main()
|
||||
|
||||
assert rc == 0
|
||||
assert not (scandir / "gateway-worker").exists()
|
||||
assert not (scandir / "gateway-default").exists()
|
||||
assert "skipping (dashboard container" in capsys.readouterr().out
|
||||
|
||||
|
||||
def test_main_reconciles_in_gateway_container(
|
||||
tmp_path: Path,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""main() reconciles normally when PID 1 argv is the gateway command —
|
||||
the dashboard skip is scoped strictly to the dashboard role."""
|
||||
from hermes_cli import container_boot
|
||||
|
||||
scandir = tmp_path / "run-service"; scandir.mkdir()
|
||||
_make_profile(tmp_path, "worker", state="running")
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
monkeypatch.setenv("S6_PROFILE_GATEWAY_SCANDIR", str(scandir))
|
||||
monkeypatch.setattr(
|
||||
container_boot,
|
||||
"_read_container_argv",
|
||||
lambda: ("/init", "/opt/hermes/docker/main-wrapper.sh", "gateway", "run"),
|
||||
)
|
||||
|
||||
rc = container_boot.main()
|
||||
|
||||
assert rc == 0
|
||||
# The worker slot was registered + started (prior_state running).
|
||||
assert (scandir / "gateway-worker").exists()
|
||||
assert not (scandir / "gateway-worker" / "down").exists()
|
||||
|
||||
|
||||
def test_main_ignores_removed_skip_reconcile_env_var(
|
||||
tmp_path: Path,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""The legacy HERMES_SKIP_PROFILE_RECONCILE flag is gone: setting it on a
|
||||
gateway container must NOT suppress reconciliation. Role is decided by
|
||||
PID 1 argv alone, so a stale flag in someone's manifest is inert."""
|
||||
from hermes_cli import container_boot
|
||||
|
||||
scandir = tmp_path / "run-service"; scandir.mkdir()
|
||||
_make_profile(tmp_path, "worker", state="running")
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
monkeypatch.setenv("S6_PROFILE_GATEWAY_SCANDIR", str(scandir))
|
||||
monkeypatch.setenv("HERMES_SKIP_PROFILE_RECONCILE", "1")
|
||||
monkeypatch.setattr(
|
||||
container_boot,
|
||||
"_read_container_argv",
|
||||
lambda: ("/init", "/opt/hermes/docker/main-wrapper.sh", "gateway", "run"),
|
||||
)
|
||||
|
||||
rc = container_boot.main()
|
||||
|
||||
assert rc == 0
|
||||
# Reconcile still ran despite the stale env var.
|
||||
assert (scandir / "gateway-worker").exists()
|
||||
|
||||
@@ -57,6 +57,7 @@ class TestUnifiedDashboardRouting:
|
||||
assert opened == ["http://127.0.0.1:9119/?profile=worker_x"]
|
||||
|
||||
def test_profile_launch_reexecs_machine_dashboard(self, main_mod, monkeypatch):
|
||||
monkeypatch.delenv("HERMES_HOME", raising=False)
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.profiles.get_active_profile_name", lambda: "worker_x"
|
||||
)
|
||||
@@ -79,8 +80,46 @@ class TestUnifiedDashboardRouting:
|
||||
assert "-p" in argv and argv[argv.index("-p") + 1] == "default"
|
||||
assert "--open-profile" in argv
|
||||
assert argv[argv.index("--open-profile") + 1] == "worker_x"
|
||||
# Profile HERMES_HOME dropped so the child binds the machine root.
|
||||
assert "HERMES_HOME" not in env
|
||||
# The child is pinned to the machine ROOT, not the launching profile's
|
||||
# HERMES_HOME. For a standard install (HERMES_HOME unset) that root is
|
||||
# the platform-native default (~/.hermes), NOT dropped — see the Docker
|
||||
# test below for why we resolve explicitly instead of popping.
|
||||
from hermes_constants import get_default_hermes_root
|
||||
assert env.get("HERMES_HOME") == str(get_default_hermes_root())
|
||||
|
||||
def test_reexec_pins_docker_machine_root(self, main_mod, monkeypatch):
|
||||
"""In the Docker layout (HERMES_HOME=/opt/data, profiles under
|
||||
/opt/data/profiles/<name>) the reroute must pin the child to the
|
||||
machine root /opt/data — NOT drop HERMES_HOME.
|
||||
|
||||
Dropping it makes the child fall back to $HOME/.hermes
|
||||
(= /opt/data/.hermes), an empty auto-seeded home, so the dashboard
|
||||
shows only the default profile and the .install_method stamp is
|
||||
missing (which also misfires the Docker update-button guard).
|
||||
Regression test for the support report.
|
||||
"""
|
||||
monkeypatch.setenv("HERMES_HOME", "/opt/data/profiles/oracle")
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.profiles.get_active_profile_name", lambda: "oracle"
|
||||
)
|
||||
monkeypatch.setattr(main_mod, "_dashboard_listening", lambda host, port: False)
|
||||
execs = []
|
||||
|
||||
def fake_exec(exe, argv, env):
|
||||
execs.append((exe, argv, env))
|
||||
raise SystemExit(0)
|
||||
|
||||
monkeypatch.setattr(main_mod.os, "execvpe", fake_exec)
|
||||
|
||||
with pytest.raises(SystemExit):
|
||||
main_mod.cmd_dashboard(_args())
|
||||
|
||||
assert len(execs) == 1
|
||||
_exe, _argv, env = execs[0]
|
||||
# get_default_hermes_root() strips the trailing profiles/<name>, so the
|
||||
# child binds /opt/data — where the real default/oracle/saga profiles
|
||||
# and the .install_method stamp actually live.
|
||||
assert env.get("HERMES_HOME") == "/opt/data"
|
||||
|
||||
def test_desktop_profile_backend_skips_machine_dashboard_reroute(self, main_mod, monkeypatch):
|
||||
"""A desktop-spawned named-profile backend (HERMES_DESKTOP=1) must NOT
|
||||
|
||||
@@ -54,10 +54,12 @@ class _S6Manager:
|
||||
def register_profile_gateway(
|
||||
self, profile: str, *,
|
||||
extra_env: dict[str, str] | None = None,
|
||||
start_now: bool = True,
|
||||
) -> None:
|
||||
if self.raise_on_register is not None:
|
||||
raise self.raise_on_register
|
||||
self.registered.append(profile)
|
||||
self.last_start_now = start_now
|
||||
|
||||
def unregister_profile_gateway(self, profile: str) -> None:
|
||||
if self.raise_on_unregister is not None:
|
||||
@@ -107,6 +109,21 @@ def test_register_calls_through_on_s6(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
assert mgr.registered == ["coder"]
|
||||
|
||||
|
||||
def test_register_passes_start_now_false(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
"""_maybe_register_gateway_service must register with start_now=False
|
||||
so that profile creation does not auto-start a gateway that may
|
||||
conflict with the main gateway's bot-token lock."""
|
||||
_patch_detect_s6(monkeypatch)
|
||||
mgr = _S6Manager()
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.service_manager.get_service_manager", lambda: mgr,
|
||||
)
|
||||
_maybe_register_gateway_service("coder")
|
||||
assert mgr.last_start_now is False, (
|
||||
"profile creation must not auto-start the gateway service"
|
||||
)
|
||||
|
||||
|
||||
def test_register_swallows_duplicate_value_error(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
|
||||
@@ -69,6 +69,20 @@ def test_detect_service_manager_returns_known_value() -> None:
|
||||
assert result in ("systemd", "launchd", "windows", "s6", "none")
|
||||
|
||||
|
||||
def test_detect_service_manager_s6_keys_off_s6_running_not_is_container(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""Regression: Fly runs s6-overlay as PID 1 in a Firecracker microVM, which
|
||||
is not a Docker/Podman container. Gating s6 detection on is_container() made
|
||||
the dispatch path inert on Fly, so `hermes gateway restart` spawned a
|
||||
foreground gateway that fought the supervised one. Detection must key off
|
||||
s6 being PID 1 (`_s6_running`) alone."""
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.service_manager._s6_running", lambda: True,
|
||||
)
|
||||
assert detect_service_manager() == "s6"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _s6_running — must work for unprivileged users, not just root
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -572,6 +586,35 @@ def test_s6_register_creates_service_dir_and_triggers_scan(
|
||||
), f"s6-svscanctl -a not invoked; saw: {fake_subprocess_run}"
|
||||
|
||||
|
||||
def test_s6_register_start_now_false_writes_down_marker(
|
||||
s6_scandir, fake_subprocess_run,
|
||||
) -> None:
|
||||
"""When start_now=False, a `down` marker must be written so
|
||||
s6-supervise does not auto-start the service on rescan."""
|
||||
mgr = S6ServiceManager(scandir=s6_scandir)
|
||||
mgr.register_profile_gateway("coder", start_now=False)
|
||||
|
||||
svc_dir = s6_scandir / "gateway-coder"
|
||||
assert svc_dir.is_dir()
|
||||
assert (svc_dir / "down").is_file(), (
|
||||
"start_now=False must write a `down` marker file"
|
||||
)
|
||||
|
||||
|
||||
def test_s6_register_start_now_true_no_down_marker(
|
||||
s6_scandir, fake_subprocess_run,
|
||||
) -> None:
|
||||
"""When start_now=True (default), no `down` marker should exist."""
|
||||
mgr = S6ServiceManager(scandir=s6_scandir)
|
||||
mgr.register_profile_gateway("coder")
|
||||
|
||||
svc_dir = s6_scandir / "gateway-coder"
|
||||
assert svc_dir.is_dir()
|
||||
assert not (svc_dir / "down").exists(), (
|
||||
"start_now=True must NOT write a `down` marker file"
|
||||
)
|
||||
|
||||
|
||||
def test_s6_register_extra_env_is_quoted(s6_scandir, fake_subprocess_run) -> None:
|
||||
mgr = S6ServiceManager(scandir=s6_scandir)
|
||||
mgr.register_profile_gateway(
|
||||
@@ -683,6 +726,48 @@ def test_s6_lifecycle_dispatches_to_s6_svc(
|
||||
assert flags == ["-u", "-d", "-t"]
|
||||
|
||||
|
||||
def test_s6_lifecycle_persists_named_profile_desired_state(
|
||||
s6_scandir,
|
||||
fake_subprocess_run,
|
||||
tmp_path,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
import json
|
||||
|
||||
hermes_home = tmp_path / "hermes-home"
|
||||
profile_dir = hermes_home / "profiles" / "coder"
|
||||
profile_dir.mkdir(parents=True)
|
||||
(s6_scandir / "gateway-coder").mkdir()
|
||||
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
||||
|
||||
mgr = S6ServiceManager(scandir=s6_scandir)
|
||||
mgr.start("gateway-coder")
|
||||
assert json.loads((profile_dir / "gateway_state.json").read_text())["desired_state"] == "running"
|
||||
mgr.stop("gateway-coder")
|
||||
assert json.loads((profile_dir / "gateway_state.json").read_text())["desired_state"] == "stopped"
|
||||
mgr.restart("gateway-coder")
|
||||
assert json.loads((profile_dir / "gateway_state.json").read_text())["desired_state"] == "running"
|
||||
|
||||
|
||||
def test_s6_lifecycle_persists_default_profile_desired_state(
|
||||
s6_scandir,
|
||||
fake_subprocess_run,
|
||||
tmp_path,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
import json
|
||||
|
||||
hermes_home = tmp_path / "hermes-home"
|
||||
hermes_home.mkdir()
|
||||
(s6_scandir / "gateway-default").mkdir()
|
||||
monkeypatch.setenv("HERMES_HOME", str(hermes_home / "profiles" / "coder"))
|
||||
|
||||
mgr = S6ServiceManager(scandir=s6_scandir)
|
||||
mgr.start("gateway-default")
|
||||
state = json.loads((hermes_home / "gateway_state.json").read_text())
|
||||
assert state["desired_state"] == "running"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Lifecycle errors — friendly messages, not raw CalledProcessError
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -907,3 +992,38 @@ def test_s6_stop_tolerates_marker_write_failure(monkeypatch, s6_scandir):
|
||||
mgr.stop("gateway-coder") # must not raise
|
||||
|
||||
assert any(cmd[0] == "s6-svc" and "-d" in cmd for cmd in svc_calls)
|
||||
|
||||
|
||||
def test_s6_log_run_chowns_gateways_parent(s6_scandir, fake_subprocess_run) -> None:
|
||||
"""The log/run script must chown the logs/gateways/ parent, not just the leaf.
|
||||
|
||||
Regression guard for #45258: `mkdir -p` creates the gateways/ parent
|
||||
root-owned on a root-context boot, and a leaf-only chown leaves it that
|
||||
way. Every profile registered later then runs its log service as the
|
||||
dropped hermes user and s6-log crash-loops on `mkdir: Permission denied`.
|
||||
"""
|
||||
mgr = S6ServiceManager(scandir=s6_scandir)
|
||||
mgr.register_profile_gateway("coder")
|
||||
|
||||
log_text = (s6_scandir / "gateway-coder" / "log" / "run").read_text()
|
||||
|
||||
parent_chown = 'chown hermes:hermes "$HERMES_HOME/logs/gateways"'
|
||||
assert parent_chown in log_text, (
|
||||
"log/run must chown the logs/gateways parent so profiles added "
|
||||
f"after a root-context boot can create their leaf dirs. Saw: {log_text!r}"
|
||||
)
|
||||
# Non-recursive on purpose: sibling profile leaf dirs are each managed
|
||||
# by their own log/run; a recursive parent chown would race them.
|
||||
assert 'chown -R hermes:hermes "$HERMES_HOME/logs/gateways"' not in log_text
|
||||
|
||||
# Ordering: mkdir creates the parent, then the parent chown repairs its
|
||||
# ownership, then the leaf chown — all before s6-log execs.
|
||||
mkdir_idx = log_text.index('mkdir -p "$log_dir"')
|
||||
parent_idx = log_text.index(parent_chown)
|
||||
leaf_idx = log_text.index('chown -R hermes:hermes "$log_dir"')
|
||||
exec_idx = log_text.index("s6-log 1 ")
|
||||
assert mkdir_idx < parent_idx < leaf_idx < exec_idx
|
||||
|
||||
# The parent path must be a runtime env expansion, never a baked-in
|
||||
# absolute path (same contract as the log_dir itself).
|
||||
assert '/opt/data/logs/gateways"' not in log_text
|
||||
|
||||
@@ -27,7 +27,7 @@ def _make_env_config(**overrides):
|
||||
|
||||
|
||||
class TestFileToolsContainerConfig:
|
||||
def _run(self, env_config, task_id):
|
||||
def _run(self, env_config, task_id, task_env_overrides=None):
|
||||
captured = {}
|
||||
mock_env = MagicMock()
|
||||
|
||||
@@ -35,31 +35,51 @@ class TestFileToolsContainerConfig:
|
||||
captured.update(kwargs)
|
||||
return mock_env
|
||||
|
||||
with patch("tools.terminal_tool._get_env_config", return_value=env_config), patch("tools.terminal_tool._task_env_overrides", {}), patch("tools.terminal_tool._active_environments", {}), patch("tools.terminal_tool._creation_locks", {}), patch("tools.terminal_tool._creation_locks_lock", __import__("threading").Lock()), patch("tools.terminal_tool._create_environment", side_effect=fake_create_env), patch("tools.terminal_tool._start_cleanup_thread"), patch("tools.terminal_tool._check_disk_usage_warning"), patch("tools.file_tools._file_ops_cache", {}), patch("tools.file_tools._file_ops_lock", __import__("threading").Lock()):
|
||||
with patch("tools.terminal_tool._get_env_config", return_value=env_config), \
|
||||
patch("tools.terminal_tool._task_env_overrides", task_env_overrides or {}), \
|
||||
patch("tools.terminal_tool._active_environments", {}), \
|
||||
patch("tools.terminal_tool._creation_locks", {}), \
|
||||
patch("tools.terminal_tool._creation_locks_lock", __import__("threading").Lock()), \
|
||||
patch("tools.terminal_tool._create_environment", side_effect=fake_create_env), \
|
||||
patch("tools.terminal_tool._start_cleanup_thread"), \
|
||||
patch("tools.terminal_tool._check_disk_usage_warning"), \
|
||||
patch("tools.file_tools._file_ops_cache", {}), \
|
||||
patch("tools.file_tools._file_ops_lock", __import__("threading").Lock()):
|
||||
file_tools._get_file_ops(task_id)
|
||||
|
||||
return captured.get("container_config", {})
|
||||
return captured
|
||||
|
||||
def test_docker_mount_cwd_to_workspace_passed(self):
|
||||
"""docker_mount_cwd_to_workspace is forwarded to container_config."""
|
||||
cc = self._run(_make_env_config(docker_mount_cwd_to_workspace=True), "t1")
|
||||
cc = self._run(_make_env_config(docker_mount_cwd_to_workspace=True), "t1").get("container_config", {})
|
||||
assert cc.get("docker_mount_cwd_to_workspace") is True
|
||||
|
||||
def test_docker_forward_env_passed(self):
|
||||
"""docker_forward_env is forwarded to container_config."""
|
||||
cc = self._run(_make_env_config(docker_forward_env=["MY_SECRET"]), "t2")
|
||||
cc = self._run(_make_env_config(docker_forward_env=["MY_SECRET"]), "t2").get("container_config", {})
|
||||
assert cc.get("docker_forward_env") == ["MY_SECRET"]
|
||||
|
||||
def test_docker_mount_cwd_defaults_to_false(self):
|
||||
"""docker_mount_cwd_to_workspace defaults to False when absent from config."""
|
||||
cfg = _make_env_config()
|
||||
del cfg["docker_mount_cwd_to_workspace"]
|
||||
cc = self._run(cfg, "t3")
|
||||
cc = self._run(cfg, "t3").get("container_config", {})
|
||||
assert cc.get("docker_mount_cwd_to_workspace") is False
|
||||
|
||||
def test_docker_forward_env_defaults_to_empty_list(self):
|
||||
"""docker_forward_env defaults to [] when absent from config."""
|
||||
cfg = _make_env_config()
|
||||
del cfg["docker_forward_env"]
|
||||
cc = self._run(cfg, "t4")
|
||||
cc = self._run(cfg, "t4").get("container_config", {})
|
||||
assert cc.get("docker_forward_env") == []
|
||||
|
||||
def test_cwd_only_raw_task_override_reaches_file_environment(self):
|
||||
"""CWD-only task overrides collapse to default but must keep their cwd."""
|
||||
captured = self._run(
|
||||
_make_env_config(env_type="local", cwd="/config-cwd"),
|
||||
"desktop-session-cwd",
|
||||
task_env_overrides={"desktop-session-cwd": {"cwd": "/workspace/session"}},
|
||||
)
|
||||
|
||||
assert captured["task_id"] == "default"
|
||||
assert captured["cwd"] == "/workspace/session"
|
||||
|
||||
@@ -21,6 +21,7 @@ from pathlib import Path
|
||||
import pytest
|
||||
|
||||
import tools.file_tools as ft
|
||||
import tools.terminal_tool as terminal_tool
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
@@ -218,6 +219,28 @@ def test_absolute_terminal_cwd_anchors_with_empty_registry(_isolated_cwd, monkey
|
||||
assert not str(resolved).startswith(str(decoy))
|
||||
|
||||
|
||||
def test_registered_task_cwd_override_anchors_before_terminal_env_exists(_isolated_cwd, monkeypatch):
|
||||
"""TUI/Desktop sessions register cwd by raw session key before tools run.
|
||||
|
||||
CWD-only overrides collapse to the shared terminal environment key, but the
|
||||
file resolver must still read the raw task/session override before falling
|
||||
back to TERMINAL_CWD or the process cwd.
|
||||
"""
|
||||
workspace, decoy = _isolated_cwd
|
||||
task_id = "desktop-session-cwd"
|
||||
monkeypatch.setattr(ft, "_get_live_tracking_cwd", lambda task_id="default": None)
|
||||
monkeypatch.delenv("TERMINAL_CWD", raising=False)
|
||||
monkeypatch.setattr(terminal_tool, "_task_env_overrides", {})
|
||||
|
||||
terminal_tool.register_task_env_overrides(task_id, {"cwd": str(workspace)})
|
||||
|
||||
resolved = ft._resolve_path_for_task("target.py", task_id=task_id)
|
||||
|
||||
assert terminal_tool._resolve_container_task_id(task_id) == "default"
|
||||
assert resolved == (workspace / "target.py")
|
||||
assert not str(resolved).startswith(str(decoy))
|
||||
|
||||
|
||||
def test_warning_fires_from_terminal_cwd_when_registry_empty(_isolated_cwd, monkeypatch):
|
||||
"""Divergence warning must fire even before any terminal command runs.
|
||||
|
||||
@@ -291,4 +314,3 @@ def test_patch_reports_resolved_absolute_path(_isolated_cwd, monkeypatch):
|
||||
assert "WORKSPACE_PATCHED" in (workspace / "target.py").read_text()
|
||||
# And the decoy copy is untouched.
|
||||
assert (decoy / "target.py").read_text() == "DECOY_ORIGINAL\n"
|
||||
|
||||
|
||||
60
tests/tools/test_stage2_hook_log_dir_seed.py
Normal file
60
tests/tools/test_stage2_hook_log_dir_seed.py
Normal file
@@ -0,0 +1,60 @@
|
||||
"""Contract test: the s6-overlay stage2 hook seeds $HERMES_HOME/logs/gateways
|
||||
as the hermes user.
|
||||
|
||||
Regression guard for #45258: the per-profile gateway log service
|
||||
(`gateway-<profile>/log/run`) creates `logs/gateways/` via `mkdir -p` but only
|
||||
chowns the leaf `logs/gateways/<profile>`. If the first log service to boot
|
||||
runs in root context, the `gateways/` parent is created root-owned and stays
|
||||
that way; every profile registered later runs its log service as the dropped
|
||||
hermes user and s6-log crash-loops on `mkdir: Permission denied`.
|
||||
|
||||
Seeding `logs/gateways` in stage2 (cont-init runs before any service starts)
|
||||
guarantees the parent already exists hermes-owned by the time the first
|
||||
log/run executes its `mkdir -p`.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[2]
|
||||
STAGE2_HOOK = REPO_ROOT / "docker" / "stage2-hook.sh"
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def stage2_text() -> str:
|
||||
if not STAGE2_HOOK.exists():
|
||||
pytest.skip("docker/stage2-hook.sh not present in this checkout")
|
||||
return STAGE2_HOOK.read_text()
|
||||
|
||||
|
||||
def _seed_mkdir_block(text: str) -> str:
|
||||
"""Extract the `as_hermes mkdir -p \\ ...` seed block."""
|
||||
m = re.search(r"as_hermes mkdir -p \\\n(?:[^\n]*\\\n)*[^\n]*\n", text)
|
||||
assert m, "stage2-hook.sh must contain the as_hermes mkdir -p seed block"
|
||||
return m.group(0)
|
||||
|
||||
|
||||
def test_logs_gateways_is_seeded(stage2_text: str) -> None:
|
||||
block = _seed_mkdir_block(stage2_text)
|
||||
assert '"$HERMES_HOME/logs/gateways"' in block, (
|
||||
"logs/gateways must be seeded hermes-owned in stage2 so profiles "
|
||||
"added after first boot can create their log dirs (#45258)"
|
||||
)
|
||||
# The parent must also be seeded so mkdir -p inside the block never
|
||||
# creates logs/ implicitly with surprising ownership.
|
||||
assert '"$HERMES_HOME/logs"' in block
|
||||
|
||||
|
||||
def test_logs_subtree_is_healed_when_chown_needed(stage2_text: str) -> None:
|
||||
"""The needs_chown repair loop must cover the logs subtree recursively —
|
||||
that is what makes the seed entry above sufficient (no separate
|
||||
logs/gateways loop entry needed)."""
|
||||
m = re.search(r"for sub in ([^;]*); do", stage2_text)
|
||||
assert m, "stage2-hook.sh must contain the needs_chown subdir repair loop"
|
||||
assert "logs" in m.group(1).split(), (
|
||||
"the needs_chown loop must recursively chown logs/ — it covers "
|
||||
"logs/gateways, so the seed list does not need a loop twin"
|
||||
)
|
||||
@@ -96,6 +96,23 @@ def _resolve_path(filepath: str, task_id: str = "default") -> Path:
|
||||
_TERMINAL_CWD_SENTINELS = frozenset({"", ".", "./", "auto", "cwd"})
|
||||
|
||||
|
||||
def _sentinel_free_abs_cwd(raw: str | None) -> str | None:
|
||||
"""Normalize a cwd candidate to an absolute, sentinel-free anchor.
|
||||
|
||||
Returns the expanded path only when *raw* is non-empty, not a sentinel (see
|
||||
``_TERMINAL_CWD_SENTINELS``), and absolute. A relative anchor is meaningless
|
||||
without knowing which cwd it is relative to — exactly the ambiguity that
|
||||
misroutes worktree edits — so relative/sentinel/empty values yield ``None``.
|
||||
"""
|
||||
raw = str(raw or "").strip()
|
||||
if raw.lower() in _TERMINAL_CWD_SENTINELS:
|
||||
return None
|
||||
expanded = os.path.expanduser(raw)
|
||||
if not os.path.isabs(expanded):
|
||||
return None
|
||||
return expanded
|
||||
|
||||
|
||||
def _configured_terminal_cwd() -> str | None:
|
||||
"""Return ``$TERMINAL_CWD`` only when it names a real directory anchor.
|
||||
|
||||
@@ -104,13 +121,26 @@ def _configured_terminal_cwd() -> str | None:
|
||||
relative to, which is exactly the ambiguity that misroutes worktree edits.
|
||||
Only an absolute, sentinel-free value is honored.
|
||||
"""
|
||||
raw = (os.environ.get("TERMINAL_CWD") or "").strip()
|
||||
if raw.lower() in _TERMINAL_CWD_SENTINELS:
|
||||
return _sentinel_free_abs_cwd(os.environ.get("TERMINAL_CWD"))
|
||||
|
||||
|
||||
def _registered_task_cwd_override(task_id: str = "default") -> str | None:
|
||||
"""Return a registered cwd override for the raw task id, when available.
|
||||
|
||||
``terminal_tool`` intentionally collapses CWD-only task overrides to the
|
||||
shared ``"default"`` environment so TUI/dashboard/ACP sessions do not spin
|
||||
up isolated sandboxes just because they have different workspaces. The cwd
|
||||
value itself is still keyed by the raw session/task id, so file tools must
|
||||
read that raw override before falling back to the collapsed container key.
|
||||
"""
|
||||
try:
|
||||
from tools.terminal_tool import resolve_task_overrides
|
||||
|
||||
overrides = resolve_task_overrides(task_id)
|
||||
except Exception:
|
||||
return None
|
||||
expanded = os.path.expanduser(raw)
|
||||
if not os.path.isabs(expanded):
|
||||
return None
|
||||
return expanded
|
||||
|
||||
return _sentinel_free_abs_cwd(overrides.get("cwd"))
|
||||
|
||||
|
||||
def _get_live_tracking_cwd(task_id: str = "default") -> str | None:
|
||||
@@ -149,8 +179,10 @@ def _authoritative_workspace_root(task_id: str = "default") -> str | None:
|
||||
|
||||
Prefers the live terminal cwd (the directory the agent is actually working
|
||||
in). When no terminal command has run yet — so the live registry is empty —
|
||||
falls back to a sentinel-free absolute ``$TERMINAL_CWD``. This is what lets
|
||||
a worktree session warn about (and resolve into) the worktree from the very
|
||||
falls back to a registered task/session cwd override (TUI/Desktop/ACP
|
||||
sessions register a raw-keyed cwd before any tool runs), then to a
|
||||
sentinel-free absolute ``$TERMINAL_CWD``. This is what lets a worktree or
|
||||
Desktop session warn about (and resolve into) its workspace from the very
|
||||
first ``write_file``/``patch``, before any ``cd`` has populated the live cwd.
|
||||
|
||||
Returns ``None`` only when there is genuinely no reliable anchor, in which
|
||||
@@ -159,6 +191,9 @@ def _authoritative_workspace_root(task_id: str = "default") -> str | None:
|
||||
live = _get_live_tracking_cwd(task_id)
|
||||
if live:
|
||||
return live
|
||||
registered = _registered_task_cwd_override(task_id)
|
||||
if registered:
|
||||
return registered
|
||||
return _configured_terminal_cwd()
|
||||
|
||||
|
||||
@@ -168,10 +203,12 @@ def _resolve_base_dir(task_id: str = "default") -> Path:
|
||||
Resolution order:
|
||||
1. The task's live terminal cwd (the directory the agent is actually
|
||||
working in — e.g. a git worktree). Authoritative when known.
|
||||
2. A sentinel-free, absolute ``$TERMINAL_CWD`` (the worktree path set by
|
||||
2. A registered task/session cwd override (TUI/Desktop/ACP sessions
|
||||
register a raw-keyed workspace cwd before any terminal command runs).
|
||||
3. A sentinel-free, absolute ``$TERMINAL_CWD`` (the worktree path set by
|
||||
``cli.py``/``main.py`` for ``-w`` sessions). Used even before any
|
||||
terminal command has populated the live cwd registry.
|
||||
3. The process cwd.
|
||||
4. The process cwd.
|
||||
|
||||
The returned base is ALWAYS absolute. This is the core invariant that
|
||||
prevents the worktree-cwd divergence bug: a relative or sentinel
|
||||
@@ -218,9 +255,10 @@ def _path_resolution_warning(filepath: str, resolved: Path, task_id: str = "defa
|
||||
target. ``None`` when the path is absolute, the base is unknown, or the
|
||||
resolved path is correctly under the workspace root.
|
||||
|
||||
The workspace root is the live terminal cwd when known, else a sentinel-free
|
||||
absolute ``$TERMINAL_CWD`` — so a worktree session whose terminal registry
|
||||
is still empty (no ``cd`` run yet) is warned on the very first write.
|
||||
The workspace root is the live terminal cwd when known, else a registered
|
||||
task/session cwd override, else a sentinel-free absolute ``$TERMINAL_CWD``
|
||||
— so a worktree or Desktop session whose terminal registry is still empty
|
||||
(no ``cd`` run yet) is warned on the very first write.
|
||||
"""
|
||||
try:
|
||||
if Path(filepath).expanduser().is_absolute():
|
||||
@@ -625,7 +663,8 @@ def _get_file_ops(task_id: str = "default") -> ShellFileOperations:
|
||||
)
|
||||
import time
|
||||
|
||||
task_id = _resolve_container_task_id(task_id)
|
||||
raw_task_id = task_id or "default"
|
||||
task_id = _resolve_container_task_id(raw_task_id)
|
||||
|
||||
# Fast path: check cache -- but also verify the underlying environment
|
||||
# is still alive (it may have been killed by the cleanup thread).
|
||||
@@ -658,11 +697,11 @@ def _get_file_ops(task_id: str = "default") -> ShellFileOperations:
|
||||
terminal_env = None
|
||||
|
||||
if terminal_env is None:
|
||||
from tools.terminal_tool import _task_env_overrides
|
||||
from tools.terminal_tool import resolve_task_overrides
|
||||
|
||||
config = _get_env_config()
|
||||
env_type = config["env_type"]
|
||||
overrides = _task_env_overrides.get(task_id, {})
|
||||
overrides = resolve_task_overrides(raw_task_id)
|
||||
|
||||
if env_type == "docker":
|
||||
image = overrides.get("docker_image") or config["docker_image"]
|
||||
|
||||
@@ -1034,6 +1034,26 @@ def _resolve_container_task_id(task_id: Optional[str]) -> str:
|
||||
return "default"
|
||||
|
||||
|
||||
def resolve_task_overrides(task_id: Optional[str]) -> Dict[str, Any]:
|
||||
"""Return the env overrides for *task_id*, raw key first then collapsed.
|
||||
|
||||
``register_task_env_overrides`` writes under the *raw* task/session id, but
|
||||
a CWD-only override collapses (:func:`_resolve_container_task_id`) to the
|
||||
shared ``"default"`` container so per-session surfaces (ACP/gateway/
|
||||
dashboard) don't each spin up their own sandbox. Callers that need the
|
||||
override (terminal command setup, file-tool cwd resolution) must therefore
|
||||
read the raw id FIRST and only fall back to the collapsed container id, or
|
||||
the originating session's override is silently dropped. This is the single
|
||||
source of that lookup so the terminal and file layers can't drift apart.
|
||||
"""
|
||||
raw = task_id or "default"
|
||||
return (
|
||||
_task_env_overrides.get(raw)
|
||||
or _task_env_overrides.get(_resolve_container_task_id(raw))
|
||||
or {}
|
||||
)
|
||||
|
||||
|
||||
# Configuration from environment variables
|
||||
|
||||
def _parse_env_var(name: str, default: str, converter: Any = int, type_label: str = "integer"):
|
||||
@@ -1885,20 +1905,12 @@ def terminal_tool(
|
||||
effective_task_id = _resolve_container_task_id(task_id)
|
||||
|
||||
# Check per-task overrides (set by environments like TerminalBench2Env)
|
||||
# before falling back to global env var config.
|
||||
#
|
||||
# Overrides are keyed by the *raw* task_id (that's the key
|
||||
# ``register_task_env_overrides`` writes under), NOT by the collapsed
|
||||
# container id. A CWD-only override collapses ``effective_task_id`` to
|
||||
# ``"default"`` for container sharing, but its cwd must still be read
|
||||
# back here under the originating task_id, or the override is silently
|
||||
# dropped. Fall back to the collapsed id so isolation-keyed RL/benchmark
|
||||
# overrides (registered under an id that equals their container id) keep
|
||||
# resolving as before.
|
||||
overrides = (
|
||||
(_task_env_overrides.get(task_id) if task_id else None)
|
||||
or _task_env_overrides.get(effective_task_id, {})
|
||||
)
|
||||
# before falling back to global env var config. ``resolve_task_overrides``
|
||||
# reads the raw task id first then the collapsed container id, so a
|
||||
# CWD-only override (which collapses ``effective_task_id`` to
|
||||
# ``"default"``) is still found under its originating session id while
|
||||
# isolation-keyed RL/benchmark overrides keep resolving as before.
|
||||
overrides = resolve_task_overrides(task_id)
|
||||
|
||||
# Select image based on env type, with per-task override support
|
||||
if env_type == "docker":
|
||||
|
||||
Reference in New Issue
Block a user