mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-26 20:03:22 +08:00
Compare commits
372 Commits
salvage/em
...
feat/telem
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1e5e61b4be | ||
|
|
ea53752eff | ||
|
|
7aa6726c06 | ||
|
|
b8fc8c908b | ||
|
|
7cd5eaa646 | ||
|
|
df514654ba | ||
|
|
55af6c447a | ||
|
|
6dfb8326f5 | ||
|
|
6d9ca04574 | ||
|
|
263f6b03eb | ||
|
|
abd6b85200 | ||
|
|
208f0d7c3b | ||
|
|
e4ff494860 | ||
|
|
ffa3d3c811 | ||
|
|
fd2a35b169 | ||
|
|
19ca295a84 | ||
|
|
3e99ec0ff9 | ||
|
|
c7e934a5b4 | ||
|
|
bf0513bca0 | ||
|
|
e7d2f0b93c | ||
|
|
9f3aa1685c | ||
|
|
890e890281 | ||
|
|
a391523bcc | ||
|
|
b8d220f268 | ||
|
|
62af32efe7 | ||
|
|
68680db10d | ||
|
|
7a7f9a5b3d | ||
|
|
488ae376db | ||
|
|
74352a1e61 | ||
|
|
344415892f | ||
|
|
e2b8018729 | ||
|
|
86e748df13 | ||
|
|
cb3f8ec03d | ||
|
|
4ffdedd369 | ||
|
|
4e023f5bc9 | ||
|
|
e7811345c1 | ||
|
|
8a45ce2dd4 | ||
|
|
4cdd1a3230 | ||
|
|
c4ba4770eb | ||
|
|
43f9d24513 | ||
|
|
2e3efce66e | ||
|
|
f7bf740640 | ||
|
|
c6575df927 | ||
|
|
f284d85efa | ||
|
|
42bea9e298 | ||
|
|
d40b5735a4 | ||
|
|
9d225fbf4e | ||
|
|
92b5987ca2 | ||
|
|
0d777453fa | ||
|
|
e4d026aa3b | ||
|
|
b82c83d320 | ||
|
|
751adfa6b9 | ||
|
|
ff8920299c | ||
|
|
ca714f6189 | ||
|
|
0654319644 | ||
|
|
d9bd7ce827 | ||
|
|
2107b86024 | ||
|
|
510bf40705 | ||
|
|
2a1e615565 | ||
|
|
d7021af30f | ||
|
|
4efec63a34 | ||
|
|
2c02583c2b | ||
|
|
525ee58b43 | ||
|
|
150afea942 | ||
|
|
73c8d5a1e7 | ||
|
|
1a38a8ff7d | ||
|
|
edf35918be | ||
|
|
e8561d61e6 | ||
|
|
da73223f4a | ||
|
|
1ca1f9f2c7 | ||
|
|
3bf00e459a | ||
|
|
c4c590e4a1 | ||
|
|
5de8a8fbe8 | ||
|
|
6208d6b3be | ||
|
|
0be10607d9 | ||
|
|
d682f320b3 | ||
|
|
c210e23a02 | ||
|
|
6305ac0e4b | ||
|
|
0aea0c3654 | ||
|
|
a53fc78c02 | ||
|
|
15ee2d6f04 | ||
|
|
d93abd75d1 | ||
|
|
8278d82e17 | ||
|
|
931a5e92cc | ||
|
|
70319626a9 | ||
|
|
2d286a6d00 | ||
|
|
88e01d92e6 | ||
|
|
1d9ed7f48a | ||
|
|
a6a28ce3e2 | ||
|
|
d6269da7fd | ||
|
|
e62afaca62 | ||
|
|
60a2feeebf | ||
|
|
6f2b2a1f34 | ||
|
|
736e981abf | ||
|
|
d6cf383d74 | ||
|
|
d0df264213 | ||
|
|
f3372d3407 | ||
|
|
d0f9c4bcc6 | ||
|
|
818f03cdd8 | ||
|
|
6b3ea2cea6 | ||
|
|
5196575d40 | ||
|
|
4362c1a3af | ||
|
|
f3d6d9bbd3 | ||
|
|
3af22c0ed5 | ||
|
|
a5849917a8 | ||
|
|
25c31cab62 | ||
|
|
7078d9d1e2 | ||
|
|
a8e6a4f00b | ||
|
|
41f302fa73 | ||
|
|
7a65800fed | ||
|
|
72ae163250 | ||
|
|
0c442fa1d3 | ||
|
|
e92b5c6af8 | ||
|
|
380d660cab | ||
|
|
d473e5d07a | ||
|
|
1512bad0bc | ||
|
|
da0320bf40 | ||
|
|
a5a2edd451 | ||
|
|
2f1a47b90e | ||
|
|
7ef0f360d0 | ||
|
|
f0beb6f617 | ||
|
|
fcbdf3c356 | ||
|
|
b177d4ee48 | ||
|
|
b693bee100 | ||
|
|
98f3c19282 | ||
|
|
c06ceb3232 | ||
|
|
1b181724fa | ||
|
|
532b7ed408 | ||
|
|
281b333cc5 | ||
|
|
f2c45e2c81 | ||
|
|
cbe5c5689f | ||
|
|
0c3f197cff | ||
|
|
c15945655f | ||
|
|
411faf08bd | ||
|
|
a4fa1481e2 | ||
|
|
d1cac0e5ef | ||
|
|
96af4bec30 | ||
|
|
4aeaba6922 | ||
|
|
7e2db0a140 | ||
|
|
17beb55e3c | ||
|
|
284be6cc24 | ||
|
|
7157b213f5 | ||
|
|
153ad79524 | ||
|
|
a05a9b0e07 | ||
|
|
2ea94c6c45 | ||
|
|
d635a6d507 | ||
|
|
42e14d1089 | ||
|
|
b649cdee4a | ||
|
|
538c419d2e | ||
|
|
f1617a7ebb | ||
|
|
592c462e3c | ||
|
|
9a4600c5fb | ||
|
|
00779800f6 | ||
|
|
65b13e9dbc | ||
|
|
463bf2be25 | ||
|
|
cb6edbf448 | ||
|
|
a6485bddb8 | ||
|
|
1fe013ee16 | ||
|
|
d335164833 | ||
|
|
a378b1e980 | ||
|
|
4127332f15 | ||
|
|
70650e82a3 | ||
|
|
9a94865552 | ||
|
|
93192059c9 | ||
|
|
2a75c4a8cb | ||
|
|
8d1706ae5c | ||
|
|
41b9b7e719 | ||
|
|
cbd6ba1bdd | ||
|
|
a268dfff0a | ||
|
|
404b06ac4f | ||
|
|
cedbb4cfa2 | ||
|
|
085096fd59 | ||
|
|
7d2c1f3f84 | ||
|
|
710cd48fb1 | ||
|
|
dbf0797335 | ||
|
|
8d1f6debfd | ||
|
|
77d2b50751 | ||
|
|
4d589b1e13 | ||
|
|
489b85ee1e | ||
|
|
e25b56fc64 | ||
|
|
1e4df599ec | ||
|
|
7a79a4447c | ||
|
|
8f0a12ce09 | ||
|
|
9c994377ed | ||
|
|
aacc6bb0a8 | ||
|
|
ed1fdb5b61 | ||
|
|
e0272cfef2 | ||
|
|
59acaa972f | ||
|
|
6800fd6608 | ||
|
|
cae1ee44a7 | ||
|
|
25e2312230 | ||
|
|
b13e2fd694 | ||
|
|
b674f7ba28 | ||
|
|
9214aa7dde | ||
|
|
0225480369 | ||
|
|
743985bf1e | ||
|
|
aab49f6927 | ||
|
|
3faf768cde | ||
|
|
32f837add1 | ||
|
|
de281bcebc | ||
|
|
5b065e32ed | ||
|
|
a130b62678 | ||
|
|
2de7549fe0 | ||
|
|
b41d9b845d | ||
|
|
35e9c63d89 | ||
|
|
6638199c53 | ||
|
|
7e55b934ea | ||
|
|
d8fe1c0b41 | ||
|
|
6da615c77c | ||
|
|
9259d1e5da | ||
|
|
c42d44cb2f | ||
|
|
7fb2027d85 | ||
|
|
f477f892b3 | ||
|
|
fce2af780f | ||
|
|
1a435a6d5d | ||
|
|
b85c460540 | ||
|
|
2187fd884c | ||
|
|
1a174dfb50 | ||
|
|
ae20c3fb90 | ||
|
|
6879d77d74 | ||
|
|
d68a133458 | ||
|
|
7634488074 | ||
|
|
4f521a5382 | ||
|
|
ab9134bf16 | ||
|
|
721cf54fb1 | ||
|
|
f0c5d812b0 | ||
|
|
ac822e4d36 | ||
|
|
a4a74ca9e9 | ||
|
|
d398076c21 | ||
|
|
7243111c57 | ||
|
|
66a0907c95 | ||
|
|
89540d592b | ||
|
|
33926eb315 | ||
|
|
8446c15706 | ||
|
|
c93b9f9057 | ||
|
|
3c75e11571 | ||
|
|
a911bcda18 | ||
|
|
98224ce8b6 | ||
|
|
abc3662bf6 | ||
|
|
73a20a6ad6 | ||
|
|
47fccc0735 | ||
|
|
ba50787180 | ||
|
|
2ee6449fe5 | ||
|
|
be78fbd70e | ||
|
|
4aa793345e | ||
|
|
0ef86febe2 | ||
|
|
7ff48a6291 | ||
|
|
0957d77187 | ||
|
|
81d2dc5d0f | ||
|
|
53f8386587 | ||
|
|
284d06cabf | ||
|
|
3dfbc0ad1d | ||
|
|
d4be583d98 | ||
|
|
dbe14ce35d | ||
|
|
281a439ad4 | ||
|
|
f504aecffe | ||
|
|
050bd01b7b | ||
|
|
901165b5a4 | ||
|
|
0d4cecb352 | ||
|
|
31bced1607 | ||
|
|
fa2f0bf3da | ||
|
|
366c2a3766 | ||
|
|
776f68e1ee | ||
|
|
d93d0aee83 | ||
|
|
78e122ae1a | ||
|
|
c39b2b50ee | ||
|
|
3d56807fbd | ||
|
|
044996e403 | ||
|
|
d539cd9004 | ||
|
|
8e7e104521 | ||
|
|
ccfa079252 | ||
|
|
a39283bf09 | ||
|
|
60d3b8cbce | ||
|
|
7f1c278db8 | ||
|
|
b60260c61a | ||
|
|
0952acbf4d | ||
|
|
06cbc3bae9 | ||
|
|
34bd6a0db5 | ||
|
|
23683c3353 | ||
|
|
935f2bc48d | ||
|
|
4ea3096a85 | ||
|
|
667a9f5139 | ||
|
|
3e508363f7 | ||
|
|
6e88f7b6f7 | ||
|
|
6ef679420e | ||
|
|
6afeea2bea | ||
|
|
e495b33bf1 | ||
|
|
40fddc9e4c | ||
|
|
433db17c0a | ||
|
|
0ba1dfed78 | ||
|
|
807bdc17f6 | ||
|
|
89538d47b8 | ||
|
|
b56aafc2ef | ||
|
|
5511fcf944 | ||
|
|
0c79992db5 | ||
|
|
292a456c06 | ||
|
|
74265c8e84 | ||
|
|
9e924f79a8 | ||
|
|
e32ebc6aa2 | ||
|
|
190b01c553 | ||
|
|
4b7f3826c2 | ||
|
|
aaa2e2cb88 | ||
|
|
e155ca20ea | ||
|
|
02050859f3 | ||
|
|
23c47371d2 | ||
|
|
64131bf975 | ||
|
|
221cd60242 | ||
|
|
72bfc48e63 | ||
|
|
da80ac0042 | ||
|
|
70d28b62fb | ||
|
|
6cc07b6cd0 | ||
|
|
f32be4439c | ||
|
|
97888fed48 | ||
|
|
0089bd820f | ||
|
|
9fd2b2cb9f | ||
|
|
a0471e2464 | ||
|
|
c820eb6a5a | ||
|
|
05c896cf52 | ||
|
|
56b4ef74a6 | ||
|
|
2977e74543 | ||
|
|
45540cfb5e | ||
|
|
351afd353d | ||
|
|
5ecf3bf0e0 | ||
|
|
2196584161 | ||
|
|
45bc4fb37f | ||
|
|
211ba9c7d3 | ||
|
|
af7b7f6322 | ||
|
|
bb7ff7dc30 | ||
|
|
2a10b8384a | ||
|
|
7daa6d83fc | ||
|
|
48a8f84169 | ||
|
|
d0af7fc954 | ||
|
|
cb17a9efb2 | ||
|
|
ba9e3a491b | ||
|
|
672ea1f894 | ||
|
|
833710d33e | ||
|
|
116331dd3f | ||
|
|
760fd9513e | ||
|
|
6780cee679 | ||
|
|
3fffecbdaf | ||
|
|
9bacd7d4bb | ||
|
|
b90f1e4ac0 | ||
|
|
88e136448d | ||
|
|
a6b670d4a2 | ||
|
|
3c1058e2e9 | ||
|
|
2dfcead683 | ||
|
|
807b696295 | ||
|
|
0223ea5f59 | ||
|
|
87c4a5ebb8 | ||
|
|
660e36f097 | ||
|
|
15880da8bb | ||
|
|
c080b2dc3e | ||
|
|
0e69cd4b37 | ||
|
|
3147cbb136 | ||
|
|
100e7be20e | ||
|
|
a4e61ddf04 | ||
|
|
e9b86f352f | ||
|
|
91c465f6e7 | ||
|
|
ae7e857420 | ||
|
|
3972701424 | ||
|
|
0f741cef28 | ||
|
|
5f1d23cfb2 | ||
|
|
f721d2cda9 | ||
|
|
791c992b55 | ||
|
|
30e5d0092d | ||
|
|
5250335863 | ||
|
|
5342eccf12 | ||
|
|
6fd839ac84 | ||
|
|
86b990fe0f | ||
|
|
75b36a138f | ||
|
|
83aa84ae3b | ||
|
|
e7dbfdaad7 |
62
.github/actions/detect-changes/action.yml
vendored
Normal file
62
.github/actions/detect-changes/action.yml
vendored
Normal file
@@ -0,0 +1,62 @@
|
||||
name: Detect affected areas
|
||||
description: >-
|
||||
Classify a PR's changed files into CI work lanes (python, frontend, site,
|
||||
scan, deps, mcp_catalog) so the orchestrator can conditionally call only
|
||||
the sub-workflows a PR can affect. Outputs are always "true" on push/dispatch
|
||||
events and fail open (everything "true") when the diff cannot be computed.
|
||||
|
||||
outputs:
|
||||
python:
|
||||
description: Run Python tests / ruff / ty / windows-footguns.
|
||||
value: ${{ steps.classify.outputs.python }}
|
||||
frontend:
|
||||
description: Run the TypeScript typecheck matrix + desktop build.
|
||||
value: ${{ steps.classify.outputs.frontend }}
|
||||
docker_meta:
|
||||
description: Docker setup and meta files have changed.
|
||||
value: ${{ steps.classify.outputs.docker_meta }}
|
||||
site:
|
||||
description: Build the Docusaurus docs site.
|
||||
value: ${{ steps.classify.outputs.site }}
|
||||
scan:
|
||||
description: Run the supply-chain critical-pattern scanner.
|
||||
value: ${{ steps.classify.outputs.scan }}
|
||||
deps:
|
||||
description: Check pyproject.toml dependency upper bounds.
|
||||
value: ${{ steps.classify.outputs.deps }}
|
||||
mcp_catalog:
|
||||
description: Require MCP catalog security review label.
|
||||
value: ${{ steps.classify.outputs.mcp_catalog }}
|
||||
|
||||
runs:
|
||||
using: composite
|
||||
steps:
|
||||
- name: Classify changed files
|
||||
id: classify
|
||||
shell: bash
|
||||
env:
|
||||
GH_TOKEN: ${{ github.token }}
|
||||
REPO: ${{ github.repository }}
|
||||
EVENT_NAME: ${{ github.event_name }}
|
||||
BASE_SHA: ${{ github.event.pull_request.base.sha }}
|
||||
HEAD_SHA: ${{ github.event.pull_request.head.sha }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
|
||||
# Only pull_request events are gated. Other events (push, release,
|
||||
# dispatch) leave CHANGED empty, so the classifier fails open and every
|
||||
# lane runs. Post-merge / on-demand validation is never weakened.
|
||||
if [ "$EVENT_NAME" = "pull_request" ]; then
|
||||
# Use the compare endpoint with the pinned base/head SHAs from the
|
||||
# event payload instead of the "current PR files" endpoint. The SHAs
|
||||
# are frozen at trigger time, so the file list is deterministic even
|
||||
# if the PR receives a new push between trigger and detect.
|
||||
CHANGED="$(gh api \
|
||||
--paginate \
|
||||
"repos/${REPO}/compare/${BASE_SHA}...${HEAD_SHA}" \
|
||||
--jq '.files[].filename' || true)"
|
||||
fi
|
||||
|
||||
echo "Changed files:"
|
||||
printf '%s\n' "${CHANGED:-(none)}"
|
||||
printf '%s\n' "${CHANGED:-}" | python3 scripts/ci/classify_changes.py
|
||||
50
.github/actions/retry/action.yml
vendored
Normal file
50
.github/actions/retry/action.yml
vendored
Normal file
@@ -0,0 +1,50 @@
|
||||
name: Retry a flaky command
|
||||
description: >-
|
||||
Run a shell command, retrying on non-zero exit. For dependency installs
|
||||
(npm ci, uv sync) whose only failures are transient network/toolchain
|
||||
flakes — a node-gyp header fetch, a registry blip — so CI self-heals
|
||||
instead of needing a manual re-run.
|
||||
|
||||
inputs:
|
||||
command:
|
||||
description: Shell command to run (and retry).
|
||||
required: true
|
||||
attempts:
|
||||
description: Max attempts before giving up.
|
||||
default: "3"
|
||||
delay:
|
||||
description: Seconds to wait between attempts.
|
||||
default: "10"
|
||||
working-directory:
|
||||
description: Directory to run in.
|
||||
default: "."
|
||||
|
||||
runs:
|
||||
using: composite
|
||||
steps:
|
||||
- shell: bash
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
# command goes through env, never interpolated into the script body, so
|
||||
# a command with quotes/specials can't break or inject into the runner.
|
||||
env:
|
||||
_CMD: ${{ inputs.command }}
|
||||
_ATTEMPTS: ${{ inputs.attempts }}
|
||||
_DELAY: ${{ inputs.delay }}
|
||||
run: |
|
||||
set -uo pipefail
|
||||
n=0
|
||||
while :; do
|
||||
n=$((n + 1))
|
||||
echo "::group::attempt $n/$_ATTEMPTS: $_CMD"
|
||||
if bash -c "$_CMD"; then
|
||||
echo "::endgroup::"
|
||||
exit 0
|
||||
fi
|
||||
echo "::endgroup::"
|
||||
if [ "$n" -ge "$_ATTEMPTS" ]; then
|
||||
echo "::error::failed after $n attempts: $_CMD"
|
||||
exit 1
|
||||
fi
|
||||
echo "::warning::attempt $n failed; retrying in ${_DELAY}s: $_CMD"
|
||||
sleep "$_DELAY"
|
||||
done
|
||||
100
.github/workflows/build-windows-installer.yml
vendored
100
.github/workflows/build-windows-installer.yml
vendored
@@ -1,100 +0,0 @@
|
||||
name: Build Windows Installer
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
# Gate: workflow_dispatch is already restricted to users with write access,
|
||||
# but we want ADMIN-only. Explicitly check the triggering actor's repo
|
||||
# permission via the API and fail fast for anyone below admin.
|
||||
authorize:
|
||||
name: Authorize (admins only)
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5
|
||||
steps:
|
||||
- name: Check actor is a repo admin
|
||||
env:
|
||||
GH_TOKEN: ${{ github.token }}
|
||||
ACTOR: ${{ github.actor }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
perm=$(gh api \
|
||||
"repos/${{ github.repository }}/collaborators/${ACTOR}/permission" \
|
||||
--jq '.permission')
|
||||
echo "Actor '${ACTOR}' has permission: ${perm}"
|
||||
if [ "${perm}" != "admin" ]; then
|
||||
echo "::error::'${ACTOR}' is not a repo admin (permission=${perm}). Refusing to build/sign."
|
||||
exit 1
|
||||
fi
|
||||
echo "Authorized: '${ACTOR}' is an admin."
|
||||
|
||||
build:
|
||||
name: Hermes-Setup.exe
|
||||
needs: authorize
|
||||
runs-on: windows-latest
|
||||
timeout-minutes: 30
|
||||
permissions:
|
||||
contents: read
|
||||
# Required for OIDC auth to Azure (azure/login federated credentials).
|
||||
id-token: write
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
|
||||
with:
|
||||
node-version: 22
|
||||
cache: npm
|
||||
|
||||
- name: Install npm dependencies
|
||||
run: npm ci
|
||||
|
||||
- name: Setup Rust
|
||||
uses: dtolnay/rust-toolchain@29eef336d9b2848a0b548edc03f92a220660cdb8 # stable
|
||||
|
||||
- name: Cache Rust targets
|
||||
uses: Swatinem/rust-cache@e18b497796c12c097a38f9edb9d0641fb99eee32 # v2
|
||||
with:
|
||||
workspaces: apps/bootstrap-installer/src-tauri
|
||||
|
||||
- name: Build installer
|
||||
run: npm run tauri:build
|
||||
working-directory: apps/bootstrap-installer
|
||||
|
||||
- name: Azure login (OIDC)
|
||||
uses: azure/login@a457da9ea143d694b1b9c7c869ebb04ebe844ef5 # v2
|
||||
with:
|
||||
client-id: ${{ secrets.AZURE_CLIENT_ID }}
|
||||
tenant-id: ${{ secrets.AZURE_TENANT_ID }}
|
||||
subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
|
||||
|
||||
- name: Sign Hermes-Setup.exe with Azure Artifact Signing
|
||||
uses: azure/artifact-signing-action@c7ab2a863ab5f9a846ddb8265964877ef296ee82 # v2
|
||||
with:
|
||||
endpoint: ${{ vars.AZURE_SIGNING_ENDPOINT }}
|
||||
signing-account-name: ${{ vars.AZURE_SIGNING_ACCOUNT_NAME }}
|
||||
certificate-profile-name: ${{ vars.AZURE_SIGNING_CERTIFICATE_PROFILE }}
|
||||
# Sign both the raw exe and the bundled NSIS installer.
|
||||
files-folder: ${{ github.workspace }}\apps\bootstrap-installer\src-tauri\target\release
|
||||
files-folder-filter: exe
|
||||
files-folder-recurse: true
|
||||
file-digest: SHA256
|
||||
timestamp-rfc3161: http://timestamp.acs.microsoft.com
|
||||
timestamp-digest: SHA256
|
||||
|
||||
- name: Upload NSIS installer
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
|
||||
with:
|
||||
name: Hermes-Setup-installer
|
||||
path: apps/bootstrap-installer/src-tauri/target/release/bundle/nsis/*.exe
|
||||
|
||||
- name: Upload raw exe
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
|
||||
with:
|
||||
name: Hermes-Setup-exe
|
||||
path: apps/bootstrap-installer/src-tauri/target/release/Hermes-Setup.exe
|
||||
145
.github/workflows/ci.yml
vendored
Normal file
145
.github/workflows/ci.yml
vendored
Normal file
@@ -0,0 +1,145 @@
|
||||
name: CI
|
||||
|
||||
# Orchestrator workflow. Runs ``detect-changes`` once, then conditionally
|
||||
# calls the sub-workflows that a PR can actually affect. A final
|
||||
# ``all-checks-pass`` gate job aggregates results so branch protection only
|
||||
# needs to require a single check.
|
||||
#
|
||||
# Sub-workflows are triggered via ``workflow_call`` and keep their own job
|
||||
# definitions, matrices, and concurrency settings. They no longer have
|
||||
# ``push:`` / ``pull_request:`` triggers of their own — everything flows
|
||||
# through this file.
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
push:
|
||||
branches: [main]
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: write # needed by lint (PR comment) + supply-chain (PR comment)
|
||||
actions: read # needed by osv-scanner (SARIF upload)
|
||||
security-events: write # needed by osv-scanner (SARIF upload)
|
||||
|
||||
concurrency:
|
||||
group: ci-${{ github.ref }}
|
||||
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
|
||||
|
||||
jobs:
|
||||
# ─────────────────────────────────────────────────────────────────────
|
||||
# detect: run the classifier once. Every downstream job reads its outputs
|
||||
# to decide whether to run. On push/dispatch the classifier fails open
|
||||
# (all lanes true) so post-merge validation is never weakened.
|
||||
# ─────────────────────────────────────────────────────────────────────
|
||||
detect:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
python: ${{ steps.classify.outputs.python }}
|
||||
frontend: ${{ steps.classify.outputs.frontend }}
|
||||
site: ${{ steps.classify.outputs.site }}
|
||||
scan: ${{ steps.classify.outputs.scan }}
|
||||
deps: ${{ steps.classify.outputs.deps }}
|
||||
docker_meta: ${{ steps.classify.outputs.docker_meta }}
|
||||
mcp_catalog: ${{ steps.classify.outputs.mcp_catalog }}
|
||||
event_name: ${{ github.event_name }}
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- name: Detect affected areas
|
||||
id: classify
|
||||
uses: ./.github/actions/detect-changes
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────
|
||||
# Lane-gated sub-workflows. Each runs in parallel after detect finishes.
|
||||
# Skipped workflows (if condition is false) don't spin up runners.
|
||||
# ─────────────────────────────────────────────────────────────────────
|
||||
tests:
|
||||
needs: detect
|
||||
if: needs.detect.outputs.python == 'true'
|
||||
uses: ./.github/workflows/tests.yml
|
||||
|
||||
lint:
|
||||
needs: detect
|
||||
if: needs.detect.outputs.python == 'true'
|
||||
uses: ./.github/workflows/lint.yml
|
||||
with:
|
||||
event_name: ${{ needs.detect.outputs.event_name }}
|
||||
|
||||
typecheck:
|
||||
needs: detect
|
||||
if: needs.detect.outputs.frontend == 'true'
|
||||
uses: ./.github/workflows/typecheck.yml
|
||||
|
||||
docs-site:
|
||||
needs: detect
|
||||
if: needs.detect.outputs.site == 'true'
|
||||
uses: ./.github/workflows/docs-site-checks.yml
|
||||
|
||||
history-check:
|
||||
needs: detect
|
||||
if: needs.detect.outputs.event_name == 'pull_request'
|
||||
uses: ./.github/workflows/history-check.yml
|
||||
|
||||
contributor-check:
|
||||
needs: detect
|
||||
if: needs.detect.outputs.python == 'true'
|
||||
uses: ./.github/workflows/contributor-check.yml
|
||||
|
||||
uv-lockfile:
|
||||
needs: detect
|
||||
uses: ./.github/workflows/uv-lockfile-check.yml
|
||||
|
||||
docker-lint:
|
||||
needs: detect
|
||||
if: needs.detect.outputs.docker_meta == 'true'
|
||||
uses: ./.github/workflows/docker-lint.yml
|
||||
|
||||
supply-chain:
|
||||
needs: detect
|
||||
if: needs.detect.outputs.event_name == 'pull_request' && (needs.detect.outputs.scan == 'true' || needs.detect.outputs.deps == 'true' || needs.detect.outputs.mcp_catalog == 'true')
|
||||
uses: ./.github/workflows/supply-chain-audit.yml
|
||||
with:
|
||||
event_name: ${{ needs.detect.outputs.event_name }}
|
||||
scan: ${{ needs.detect.outputs.scan == 'true' }}
|
||||
deps: ${{ needs.detect.outputs.deps == 'true' }}
|
||||
mcp_catalog: ${{ needs.detect.outputs.mcp_catalog == 'true' }}
|
||||
|
||||
osv-scanner:
|
||||
needs: detect
|
||||
uses: ./.github/workflows/osv-scanner.yml
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────
|
||||
# Gate: runs after everything. ``if: always()`` ensures it reports a
|
||||
# status even when some deps were skipped. Only actual ``failure``
|
||||
# results cause it to fail; ``skipped`` is treated as success.
|
||||
#
|
||||
# Branch protection should require ONLY this check.
|
||||
# ─────────────────────────────────────────────────────────────────────
|
||||
all-checks-pass:
|
||||
name: All required checks pass
|
||||
needs:
|
||||
- tests
|
||||
- lint
|
||||
- typecheck
|
||||
- docs-site
|
||||
- history-check
|
||||
- contributor-check
|
||||
- uv-lockfile
|
||||
- docker-lint
|
||||
- supply-chain
|
||||
- osv-scanner
|
||||
if: always()
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Evaluate job results
|
||||
env:
|
||||
RESULTS: ${{ toJSON(needs.*.result) }}
|
||||
run: |
|
||||
echo "$RESULTS" | python3 -c "
|
||||
import json, sys
|
||||
results = json.load(sys.stdin)
|
||||
failed = [r for r in results if r == 'failure']
|
||||
if failed:
|
||||
print(f'::error::{len(failed)} job(s) failed')
|
||||
sys.exit(1)
|
||||
print('All checks passed (or were skipped)')
|
||||
"
|
||||
21
.github/workflows/contributor-check.yml
vendored
21
.github/workflows/contributor-check.yml
vendored
@@ -1,11 +1,8 @@
|
||||
name: Contributor Attribution Check
|
||||
|
||||
on:
|
||||
# No paths filter — the job must always run so the required check
|
||||
# reports a status (path-gated workflows leave checks "pending" forever
|
||||
# when no matching files change, which blocks merge).
|
||||
pull_request:
|
||||
branches: [main]
|
||||
workflow_call:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
@@ -17,21 +14,7 @@ jobs:
|
||||
with:
|
||||
fetch-depth: 0 # Full history needed for git log
|
||||
|
||||
- name: Check if relevant files changed
|
||||
id: filter
|
||||
run: |
|
||||
BASE="${{ github.event.pull_request.base.sha }}"
|
||||
HEAD="${{ github.event.pull_request.head.sha }}"
|
||||
CHANGED=$(git diff --name-only "$BASE"..."$HEAD" -- '*.py' '**/*.py' '.github/workflows/contributor-check.yml' || true)
|
||||
if [ -n "$CHANGED" ]; then
|
||||
echo "run=true" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
echo "run=false" >> "$GITHUB_OUTPUT"
|
||||
echo "No Python files changed, skipping attribution check."
|
||||
fi
|
||||
|
||||
- name: Check for unmapped contributor emails
|
||||
if: steps.filter.outputs.run == 'true'
|
||||
run: |
|
||||
# Get the merge base between this PR and main
|
||||
MERGE_BASE=$(git merge-base origin/main HEAD)
|
||||
|
||||
14
.github/workflows/docker-lint.yml
vendored
14
.github/workflows/docker-lint.yml
vendored
@@ -11,19 +11,7 @@ name: Docker / shell lint
|
||||
# activate script doesn't exist at lint time.
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
paths:
|
||||
- Dockerfile
|
||||
- docker/**
|
||||
- .hadolint.yaml
|
||||
- .github/workflows/docker-lint.yml
|
||||
|
||||
# No paths filter — the job must always run so the required check
|
||||
# reports a status (path-gated workflows leave checks "pending" forever
|
||||
# when no matching files change, which blocks merge).
|
||||
pull_request:
|
||||
branches: [main]
|
||||
workflow_call:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
45
.github/workflows/docker-publish.yml
vendored
45
.github/workflows/docker-publish.yml
vendored
@@ -16,7 +16,6 @@ on:
|
||||
# reports a status (path-gated workflows leave checks "pending" forever
|
||||
# when no matching files change, which blocks merge).
|
||||
pull_request:
|
||||
branches: [main]
|
||||
|
||||
release:
|
||||
types: [published]
|
||||
@@ -56,13 +55,21 @@ jobs:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
# The image build + smoke test + integration tests run ONLY on
|
||||
# push-to-main and release — never on PRs. They are the heaviest jobs
|
||||
# in CI (~15-45 min) and a broken build surfaces on the main push (and
|
||||
# is gated pre-merge by docker-lint + uv-lockfile-check). Every step
|
||||
# below is skipped on PRs, so the job still reports green and the
|
||||
# required check never hangs.
|
||||
- name: Set up Docker Buildx
|
||||
if: github.event_name != 'pull_request'
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
|
||||
|
||||
# Build once, load into the local daemon for smoke testing. Cached
|
||||
# to gha with a per-arch scope; the push step below reuses every
|
||||
# layer from this build.
|
||||
- name: Build image (amd64, smoke test)
|
||||
if: github.event_name != 'pull_request'
|
||||
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
|
||||
with:
|
||||
context: .
|
||||
@@ -76,6 +83,7 @@ jobs:
|
||||
cache-to: type=gha,mode=max,scope=docker-amd64
|
||||
|
||||
- name: Smoke test image
|
||||
if: github.event_name != 'pull_request'
|
||||
uses: ./.github/actions/hermes-smoke-test
|
||||
with:
|
||||
image: ${{ env.IMAGE_NAME }}:test
|
||||
@@ -102,12 +110,15 @@ jobs:
|
||||
# cheapest path to coverage on every PR that touches docker code.
|
||||
# ---------------------------------------------------------------------
|
||||
- name: Install uv (for docker tests)
|
||||
if: github.event_name != 'pull_request'
|
||||
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
|
||||
|
||||
- name: Set up Python 3.11 (for docker tests)
|
||||
if: github.event_name != 'pull_request'
|
||||
run: uv python install 3.11
|
||||
|
||||
- name: Install Python dependencies (for docker tests)
|
||||
if: github.event_name != 'pull_request'
|
||||
run: |
|
||||
uv venv .venv --python 3.11
|
||||
source .venv/bin/activate
|
||||
@@ -118,6 +129,7 @@ jobs:
|
||||
uv pip install -e ".[dev]"
|
||||
|
||||
- name: Run docker integration tests
|
||||
if: github.event_name != 'pull_request'
|
||||
env:
|
||||
# Skip rebuild; use the image already loaded by the build step.
|
||||
HERMES_TEST_IMAGE: ${{ env.IMAGE_NAME }}:test
|
||||
@@ -190,7 +202,9 @@ jobs:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
# arm64 build runs only on push-to-main and release (see build-amd64).
|
||||
- name: Set up Docker Buildx
|
||||
if: github.event_name != 'pull_request'
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
|
||||
|
||||
# Log in to ghcr.io so the registry-backed build cache below can be
|
||||
@@ -201,41 +215,21 @@ jobs:
|
||||
# crashed the build before the smoke test (the reason the gha cache
|
||||
# was removed from arm64 PRs in the first place).
|
||||
- name: Log in to ghcr.io (build cache)
|
||||
if: github.event_name != 'pull_request'
|
||||
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
# Build once, load into the local daemon for smoke testing.
|
||||
#
|
||||
# PR builds use the registry-backed cache READ-ONLY (cache-from only):
|
||||
# they pull warm layers pushed by the most recent main build but never
|
||||
# write, so rapid PR pushes don't race on cache writes or pollute the
|
||||
# cache ref. This restores warm-cache speed to arm64 PR builds (which
|
||||
# were running fully uncached and were ~45% slower than amd64, making
|
||||
# them the job most often cancelled on supersede).
|
||||
# Build once, load into the local daemon for smoke testing, then push
|
||||
# by digest below. Reads AND writes the registry-backed cache so the
|
||||
# push reuses layers from this build and the next build starts warm.
|
||||
#
|
||||
# Registry cache (type=registry on ghcr.io) is used instead of the gha
|
||||
# cache that previously broke here: its credential is the job-lifetime
|
||||
# GITHUB_TOKEN, not a short-lived SAS token, so the cold-build-outlives-
|
||||
# token failure mode cannot recur.
|
||||
- name: Build image (arm64, smoke test, cache read-only PR)
|
||||
if: github.event_name == 'pull_request'
|
||||
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
|
||||
with:
|
||||
context: .
|
||||
file: Dockerfile
|
||||
load: true
|
||||
platforms: linux/arm64
|
||||
tags: ${{ env.IMAGE_NAME }}:test
|
||||
build-args: |
|
||||
HERMES_GIT_SHA=${{ github.sha }}
|
||||
cache-from: type=registry,ref=ghcr.io/nousresearch/hermes-agent:buildcache-arm64
|
||||
|
||||
# Main/release builds read AND write the registry cache so the digest
|
||||
# push below reuses layers from this smoke-test build, and so the next
|
||||
# PR/main build starts warm.
|
||||
- name: Build image (arm64, smoke test, cached publish)
|
||||
if: github.event_name != 'pull_request'
|
||||
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
|
||||
@@ -251,6 +245,7 @@ jobs:
|
||||
cache-to: type=registry,ref=ghcr.io/nousresearch/hermes-agent:buildcache-arm64,mode=max
|
||||
|
||||
- name: Smoke test image
|
||||
if: github.event_name != 'pull_request'
|
||||
uses: ./.github/actions/hermes-smoke-test
|
||||
with:
|
||||
image: ${{ env.IMAGE_NAME }}:test
|
||||
|
||||
18
.github/workflows/docs-site-checks.yml
vendored
18
.github/workflows/docs-site-checks.yml
vendored
@@ -1,13 +1,7 @@
|
||||
name: Docs Site Checks
|
||||
|
||||
on:
|
||||
# No paths filter — the job must always run so the required check
|
||||
# reports a status (path-gated workflows leave checks "pending" forever
|
||||
# when no matching files change, which blocks merge).
|
||||
pull_request:
|
||||
branches: [main]
|
||||
|
||||
workflow_dispatch:
|
||||
workflow_call:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
@@ -25,15 +19,19 @@ jobs:
|
||||
cache-dependency-path: website/package-lock.json
|
||||
|
||||
- name: Install website dependencies
|
||||
run: npm ci
|
||||
working-directory: website
|
||||
uses: ./.github/actions/retry
|
||||
with:
|
||||
command: npm ci
|
||||
working-directory: website
|
||||
|
||||
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
- name: Install ascii-guard
|
||||
run: python -m pip install ascii-guard==2.3.0 pyyaml==6.0.3
|
||||
uses: ./.github/actions/retry
|
||||
with:
|
||||
command: python -m pip install ascii-guard==2.3.0 pyyaml==6.0.3
|
||||
|
||||
- name: Extract skill metadata for dashboard
|
||||
run: python3 website/scripts/extract-skills.py
|
||||
|
||||
6
.github/workflows/history-check.yml
vendored
6
.github/workflows/history-check.yml
vendored
@@ -14,11 +14,7 @@ name: History Check
|
||||
# the PR head and main to be non-empty.
|
||||
|
||||
on:
|
||||
# No paths filter — the job must always run so the required check
|
||||
# reports a status (path-gated workflows leave checks "pending" forever
|
||||
# when no matching files change, which blocks merge).
|
||||
pull_request:
|
||||
branches: [main]
|
||||
workflow_call:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
35
.github/workflows/lint.yml
vendored
35
.github/workflows/lint.yml
vendored
@@ -9,18 +9,12 @@ name: Lint (ruff + ty)
|
||||
# enforcement fails.
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
paths-ignore:
|
||||
- "**/*.md"
|
||||
- "docs/**"
|
||||
- "website/**"
|
||||
|
||||
# No paths filter — the job must always run so the required check
|
||||
# reports a status (path-gated workflows leave checks "pending" forever
|
||||
# when no matching files change, which blocks merge).
|
||||
pull_request:
|
||||
branches: [main]
|
||||
workflow_call:
|
||||
inputs:
|
||||
event_name:
|
||||
description: The event name from the calling orchestrator (pull_request or push).
|
||||
type: string
|
||||
required: true
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
@@ -33,6 +27,7 @@ concurrency:
|
||||
jobs:
|
||||
lint-diff:
|
||||
name: ruff + ty diff
|
||||
if: inputs.event_name == 'pull_request'
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 10
|
||||
steps:
|
||||
@@ -45,16 +40,16 @@ jobs:
|
||||
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
|
||||
|
||||
- name: Install ruff + ty
|
||||
run: |
|
||||
uv tool install ruff
|
||||
uv tool install ty
|
||||
uses: ./.github/actions/retry
|
||||
with:
|
||||
command: uv tool install ruff && uv tool install ty
|
||||
|
||||
- name: Determine base ref
|
||||
id: base
|
||||
run: |
|
||||
# For PRs, diff against the merge base with the target branch.
|
||||
# For pushes to main, diff against the previous commit on main.
|
||||
if [ "${{ github.event_name }}" = "pull_request" ]; then
|
||||
if [ "${{ inputs.event_name }}" = "pull_request" ]; then
|
||||
BASE_SHA=$(git merge-base "origin/${{ github.base_ref }}" HEAD)
|
||||
BASE_REF="origin/${{ github.base_ref }}"
|
||||
else
|
||||
@@ -110,7 +105,7 @@ jobs:
|
||||
--base-ty .lint-reports/base/ty.json \
|
||||
--head-ty .lint-reports/head/ty.json \
|
||||
--base-ref "${{ steps.base.outputs.ref }}" \
|
||||
--head-ref "${{ github.event_name == 'pull_request' && github.head_ref || github.ref_name }}" \
|
||||
--head-ref "${{ inputs.event_name == 'pull_request' && github.head_ref || github.ref_name }}" \
|
||||
--output .lint-reports/summary.md
|
||||
cat .lint-reports/summary.md >> "$GITHUB_STEP_SUMMARY"
|
||||
|
||||
@@ -122,7 +117,7 @@ jobs:
|
||||
retention-days: 14
|
||||
|
||||
- name: Post / update PR comment
|
||||
if: github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository
|
||||
if: inputs.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository
|
||||
continue-on-error: true
|
||||
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7
|
||||
with:
|
||||
@@ -172,7 +167,9 @@ jobs:
|
||||
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
|
||||
|
||||
- name: Install ruff
|
||||
run: uv tool install ruff
|
||||
uses: ./.github/actions/retry
|
||||
with:
|
||||
command: uv tool install ruff
|
||||
|
||||
- name: ruff check .
|
||||
# No --exit-zero, no || true. Exit code propagates to the job,
|
||||
|
||||
24
.github/workflows/osv-scanner.yml
vendored
24
.github/workflows/osv-scanner.yml
vendored
@@ -1,8 +1,8 @@
|
||||
name: OSV-Scanner
|
||||
|
||||
# Scans lockfiles (uv.lock, package-lock.json) against the OSV vulnerability
|
||||
# database. Runs on every PR that touches a lockfile and on a weekly schedule
|
||||
# against main.
|
||||
# database. Runs on every PR/push (via the ci.yml orchestrator's workflow_call)
|
||||
# and on a weekly schedule against main.
|
||||
#
|
||||
# This is detection-only — OSV-Scanner does NOT open PRs or modify pins.
|
||||
# It reports known CVEs in currently-pinned dependency versions so we can
|
||||
@@ -10,9 +10,9 @@ name: OSV-Scanner
|
||||
# (full SHA / exact version) is preserved; only the notification signal
|
||||
# is added.
|
||||
#
|
||||
# Complements the existing supply-chain-audit.yml workflow (which scans
|
||||
# for malicious code patterns in PR diffs) by covering the orthogonal
|
||||
# "currently-pinned dep became known-vulnerable" case.
|
||||
# Complements the supply-chain-audit.yml workflow (which scans for malicious
|
||||
# code patterns in PR diffs) by covering the orthogonal "currently-pinned
|
||||
# dep became known-vulnerable" case.
|
||||
#
|
||||
# Uses Google's officially-recommended reusable workflow, pinned by SHA.
|
||||
# Findings land in the repo's Security tab (Code Scanning > OSV-Scanner).
|
||||
@@ -20,19 +20,7 @@ name: OSV-Scanner
|
||||
# vulnerabilities in pinned deps that we may need to patch deliberately.
|
||||
|
||||
on:
|
||||
# No paths filter — the job must always run so the required check
|
||||
# reports a status (path-gated workflows leave checks "pending" forever
|
||||
# when no matching files change, which blocks merge).
|
||||
pull_request:
|
||||
branches: [main]
|
||||
push:
|
||||
branches: [main]
|
||||
paths:
|
||||
- "uv.lock"
|
||||
- "pyproject.toml"
|
||||
- "package.json"
|
||||
- "package-lock.json"
|
||||
- "website/package-lock.json"
|
||||
workflow_call:
|
||||
schedule:
|
||||
# Weekly scan against main — catches CVEs published after merge for
|
||||
# deps that haven't changed since.
|
||||
|
||||
133
.github/workflows/supply-chain-audit.yml
vendored
133
.github/workflows/supply-chain-audit.yml
vendored
@@ -1,16 +1,5 @@
|
||||
name: Supply Chain Audit
|
||||
|
||||
on:
|
||||
# No paths filter — the jobs must always run so required checks
|
||||
# report a status (path-gated workflows leave checks "pending" forever
|
||||
# when no matching files change, which blocks merge).
|
||||
pull_request:
|
||||
types: [opened, synchronize, reopened]
|
||||
|
||||
permissions:
|
||||
pull-requests: write
|
||||
contents: read
|
||||
|
||||
# Narrow, high-signal scanner. Only fires on critical indicators of supply
|
||||
# chain attacks (e.g. the litellm-style payloads). Low-signal heuristics
|
||||
# (plain base64, plain exec/eval, dependency/Dockerfile/workflow edits,
|
||||
@@ -19,56 +8,40 @@ permissions:
|
||||
# the scanner. Keep this file's checks ruthlessly narrow: if you find
|
||||
# yourself adding WARNING-tier patterns here again, make a separate
|
||||
# advisory-only workflow instead.
|
||||
#
|
||||
# Path-gating is handled centrally by the ``ci.yml`` orchestrator's
|
||||
# ``detect`` job. The orchestrator passes ``scan`` / ``deps`` /
|
||||
# ``mcp_catalog`` booleans as inputs; this workflow's jobs gate on those
|
||||
# inputs instead of re-computing the diff.
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
event_name:
|
||||
description: The event name from the calling orchestrator.
|
||||
type: string
|
||||
required: true
|
||||
scan:
|
||||
description: Whether supply-chain-relevant files changed.
|
||||
type: boolean
|
||||
required: true
|
||||
deps:
|
||||
description: Whether pyproject.toml changed.
|
||||
type: boolean
|
||||
required: true
|
||||
mcp_catalog:
|
||||
description: Whether the MCP catalog / installer changed.
|
||||
type: boolean
|
||||
required: true
|
||||
|
||||
permissions:
|
||||
pull-requests: write
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
# ── Path filter (shared by both scan and dep-bounds) ───────────────
|
||||
changes:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
# True when any file the scanner cares about changed in this PR
|
||||
scan: ${{ steps.filter.outputs.scan }}
|
||||
# True when pyproject.toml changed in this PR
|
||||
deps: ${{ steps.filter.outputs.deps }}
|
||||
# True when the curated MCP catalog / bundled MCP manifests changed.
|
||||
mcp_catalog: ${{ steps.filter.outputs.mcp_catalog }}
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Check for relevant file changes
|
||||
id: filter
|
||||
run: |
|
||||
BASE="${{ github.event.pull_request.base.sha }}"
|
||||
HEAD="${{ github.event.pull_request.head.sha }}"
|
||||
SCAN_FILES=$(git diff --name-only "$BASE"..."$HEAD" -- \
|
||||
'*.py' '**/*.py' '*.pth' '**/*.pth' \
|
||||
'setup.py' 'setup.cfg' \
|
||||
'sitecustomize.py' 'usercustomize.py' '__init__.pth' \
|
||||
'pyproject.toml' || true)
|
||||
if [ -n "$SCAN_FILES" ]; then
|
||||
echo "scan=true" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
echo "scan=false" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
DEPS_FILES=$(git diff --name-only "$BASE"..."$HEAD" -- 'pyproject.toml' || true)
|
||||
if [ -n "$DEPS_FILES" ]; then
|
||||
echo "deps=true" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
echo "deps=false" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
MCP_CATALOG_FILES=$(git diff --name-only "$BASE"..."$HEAD" -- \
|
||||
'optional-mcps/**' \
|
||||
'hermes_cli/mcp_catalog.py' || true)
|
||||
if [ -n "$MCP_CATALOG_FILES" ]; then
|
||||
echo "mcp_catalog=true" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
echo "mcp_catalog=false" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
|
||||
scan:
|
||||
name: Scan PR for critical supply chain risks
|
||||
needs: changes
|
||||
if: needs.changes.outputs.scan == 'true'
|
||||
if: inputs.scan
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
@@ -111,7 +84,7 @@ jobs:
|
||||
fi
|
||||
|
||||
# --- base64 decode + exec/eval on the same line (the litellm attack pattern) ---
|
||||
B64_EXEC_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -iE 'base64\.(b64decode|decodebytes|urlsafe_b64decode)' | grep -iE 'exec\(|eval\(' | head -10 || true)
|
||||
B64_EXEC_HITS=$(echo "$DIFF" | grep -n '^+' | grep -iE 'base64\.(b64decode|decodebytes|urlsafe_b64decode)' | grep -iE 'exec\(|eval\(' | head -10 || true)
|
||||
if [ -n "$B64_EXEC_HITS" ]; then
|
||||
FINDINGS="${FINDINGS}
|
||||
### 🚨 CRITICAL: base64 decode + exec/eval combo
|
||||
@@ -125,7 +98,7 @@ jobs:
|
||||
fi
|
||||
|
||||
# --- subprocess with encoded/obfuscated command argument ---
|
||||
PROC_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -E 'subprocess\.(Popen|call|run)\s*\(' | grep -iE 'base64|\\x[0-9a-f]{2}|chr\(' | head -10 || true)
|
||||
PROC_HITS=$(echo "$DIFF" | grep -n '^+' | grep -E 'subprocess\.(Popen|call|run)\s*\(' | grep -iE 'base64|\\x[0-9a-f]{2}|chr\(' | head -10 || true)
|
||||
if [ -n "$PROC_HITS" ]; then
|
||||
FINDINGS="${FINDINGS}
|
||||
### 🚨 CRITICAL: subprocess with encoded/obfuscated command
|
||||
@@ -187,23 +160,9 @@ jobs:
|
||||
echo "::error::CRITICAL supply chain risk patterns detected in this PR. See the PR comment for details."
|
||||
exit 1
|
||||
|
||||
# Gate: reports success when scan was skipped (no relevant files changed).
|
||||
# This ensures the required check always gets a status.
|
||||
scan-gate:
|
||||
name: Scan PR for critical supply chain risks
|
||||
needs: changes
|
||||
# always() so the gate still reports SUCCESS even if `changes` fails/is
|
||||
# skipped — without it, a failed dependency would leave the required
|
||||
# check unreported (i.e. "pending"), the exact failure mode this fixes.
|
||||
if: always() && needs.changes.outputs.scan != 'true'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- run: echo "No supply-chain-relevant files changed, skipping scan."
|
||||
|
||||
dep-bounds:
|
||||
name: Check PyPI dependency upper bounds
|
||||
needs: changes
|
||||
if: needs.changes.outputs.deps == 'true'
|
||||
if: inputs.deps
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
@@ -253,7 +212,7 @@ jobs:
|
||||
$(cat /tmp/unbounded.txt)
|
||||
\`\`\`
|
||||
|
||||
**Fix:** Add an upper bound, e.g. \`\"package>=1.2.0,<2\"\`
|
||||
**Fix:** Add an upper bound, e.g. \`"package>=1.2.0,<2"\`
|
||||
|
||||
---
|
||||
*See PR #2810 and CONTRIBUTING.md for the full policy rationale.*"
|
||||
@@ -266,23 +225,9 @@ jobs:
|
||||
echo "::error::PyPI dependencies without upper bounds detected. Add <next_major ceiling per CONTRIBUTING.md policy."
|
||||
exit 1
|
||||
|
||||
# Gate: reports success when dep-bounds was skipped (no pyproject.toml changed).
|
||||
# This ensures the required check always gets a status.
|
||||
dep-bounds-gate:
|
||||
name: Check PyPI dependency upper bounds
|
||||
needs: changes
|
||||
# always() so the gate still reports SUCCESS even if `changes` fails/is
|
||||
# skipped — without it, a failed dependency would leave the required
|
||||
# check unreported (i.e. "pending"), the exact failure mode this fixes.
|
||||
if: always() && needs.changes.outputs.deps != 'true'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- run: echo "No pyproject.toml changes, skipping dependency bounds check."
|
||||
|
||||
mcp-catalog-review:
|
||||
name: MCP catalog security review
|
||||
needs: changes
|
||||
if: needs.changes.outputs.mcp_catalog == 'true'
|
||||
if: inputs.mcp_catalog
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
@@ -317,11 +262,3 @@ jobs:
|
||||
gh pr comment "$PR" --body "$BODY" || echo "::warning::Could not post PR comment (expected for fork PRs)"
|
||||
echo "::error::MCP catalog changes require the mcp-catalog-reviewed label."
|
||||
exit 1
|
||||
|
||||
mcp-catalog-review-gate:
|
||||
name: MCP catalog security review
|
||||
needs: changes
|
||||
if: always() && needs.changes.outputs.mcp_catalog != 'true'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- run: echo "No MCP catalog changes, skipping MCP catalog security review."
|
||||
|
||||
25
.github/workflows/tests.yml
vendored
25
.github/workflows/tests.yml
vendored
@@ -1,21 +1,12 @@
|
||||
name: Tests
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
paths-ignore:
|
||||
- "**/*.md"
|
||||
- "docs/**"
|
||||
# No paths filter — the job must always run so the required check
|
||||
# reports a status (path-gated workflows leave checks "pending" forever
|
||||
# when no matching files change, which blocks merge).
|
||||
pull_request:
|
||||
branches: [main]
|
||||
workflow_call:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
# Cancel in-progress runs for the same PR/branch
|
||||
# Cancel in-progress runs for the same ref
|
||||
concurrency:
|
||||
group: tests-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
@@ -49,7 +40,7 @@ jobs:
|
||||
RG_VERSION=15.1.0
|
||||
RG_SHA256=1c9297be4a084eea7ecaedf93eb03d058d6faae29bbc57ecdaf5063921491599
|
||||
RG_TARBALL=ripgrep-${RG_VERSION}-x86_64-unknown-linux-musl.tar.gz
|
||||
curl -sSfL -o "$RG_TARBALL" \
|
||||
curl -sSfL --retry 3 --retry-delay 5 -o "$RG_TARBALL" \
|
||||
"https://github.com/BurntSushi/ripgrep/releases/download/${RG_VERSION}/${RG_TARBALL}"
|
||||
echo "${RG_SHA256} ${RG_TARBALL}" | sha256sum -c -
|
||||
tar -xzf "$RG_TARBALL"
|
||||
@@ -78,7 +69,9 @@ jobs:
|
||||
# fails if the lock is out of sync with pyproject.toml), giving a
|
||||
# reproducible env. It also creates .venv itself, so no separate
|
||||
# `uv venv` step is needed.
|
||||
run: uv sync --locked --python 3.11 --extra all --extra dev
|
||||
uses: ./.github/actions/retry
|
||||
with:
|
||||
command: uv sync --locked --python 3.11 --extra all --extra dev
|
||||
|
||||
- name: Minimize uv cache
|
||||
# Optimized for CI: prunes pre-built wheels that are cheap to
|
||||
@@ -171,7 +164,7 @@ jobs:
|
||||
RG_VERSION=15.1.0
|
||||
RG_SHA256=1c9297be4a084eea7ecaedf93eb03d058d6faae29bbc57ecdaf5063921491599
|
||||
RG_TARBALL=ripgrep-${RG_VERSION}-x86_64-unknown-linux-musl.tar.gz
|
||||
curl -sSfL -o "$RG_TARBALL" \
|
||||
curl -sSfL --retry 3 --retry-delay 5 -o "$RG_TARBALL" \
|
||||
"https://github.com/BurntSushi/ripgrep/releases/download/${RG_VERSION}/${RG_TARBALL}"
|
||||
echo "${RG_SHA256} ${RG_TARBALL}" | sha256sum -c -
|
||||
tar -xzf "$RG_TARBALL"
|
||||
@@ -200,7 +193,9 @@ jobs:
|
||||
# fails if the lock is out of sync with pyproject.toml), giving a
|
||||
# reproducible env. It also creates .venv itself, so no separate
|
||||
# `uv venv` step is needed.
|
||||
run: uv sync --locked --python 3.11 --extra all --extra dev
|
||||
uses: ./.github/actions/retry
|
||||
with:
|
||||
command: uv sync --locked --python 3.11 --extra all --extra dev
|
||||
|
||||
- name: Minimize uv cache
|
||||
# Optimized for CI: prunes pre-built wheels that are cheap to
|
||||
|
||||
24
.github/workflows/typecheck.yml
vendored
24
.github/workflows/typecheck.yml
vendored
@@ -2,13 +2,7 @@
|
||||
name: Typecheck
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
# No paths filter — the job must always run so the required check
|
||||
# reports a status (path-gated workflows leave checks "pending" forever
|
||||
# when no matching files change, which blocks merge).
|
||||
pull_request:
|
||||
branches: [main]
|
||||
workflow_call:
|
||||
|
||||
jobs:
|
||||
typecheck:
|
||||
@@ -24,7 +18,14 @@ jobs:
|
||||
with:
|
||||
node-version: 22
|
||||
cache: npm
|
||||
- run: npm ci
|
||||
# --ignore-scripts: typecheck only needs the TS sources + type defs, not
|
||||
# native builds. Skipping install scripts drops node-pty's node-gyp
|
||||
# header fetch — the transient flake that killed this job pre-`tsc` — and
|
||||
# is faster. retry covers the remaining registry blips.
|
||||
-
|
||||
uses: ./.github/actions/retry
|
||||
with:
|
||||
command: npm ci --ignore-scripts
|
||||
- run: npm run --prefix ${{ matrix.package }} typecheck
|
||||
|
||||
# Production build of the desktop renderer. `typecheck` runs `tsc` only,
|
||||
@@ -41,5 +42,10 @@ jobs:
|
||||
with:
|
||||
node-version: 22
|
||||
cache: npm
|
||||
- run: npm ci
|
||||
# Keep install scripts here: the production build may need node-pty's
|
||||
# native binary. retry handles the transient install-time fetch flakes.
|
||||
-
|
||||
uses: ./.github/actions/retry
|
||||
with:
|
||||
command: npm ci
|
||||
- run: npm run --prefix apps/desktop build
|
||||
|
||||
15
.github/workflows/uv-lockfile-check.yml
vendored
15
.github/workflows/uv-lockfile-check.yml
vendored
@@ -44,25 +44,14 @@ name: uv.lock check
|
||||
# the same way. Better to catch it here than after merge.
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
paths:
|
||||
- "pyproject.toml"
|
||||
- "uv.lock"
|
||||
- ".github/workflows/uv-lockfile-check.yml"
|
||||
|
||||
# No paths filter — the job must always run so the required check
|
||||
# reports a status (path-gated workflows leave checks "pending" forever
|
||||
# when no matching files change, which blocks merge).
|
||||
pull_request:
|
||||
branches: [main]
|
||||
workflow_call:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
concurrency:
|
||||
group: uv-lockfile-check-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
check:
|
||||
|
||||
13
Dockerfile
13
Dockerfile
@@ -290,6 +290,19 @@ ENV HERMES_TUI_DIR=/opt/hermes/ui-tui
|
||||
ENV HERMES_HOME=/opt/data
|
||||
ENV HERMES_WRITE_SAFE_ROOT=/opt/data
|
||||
ENV HERMES_DISABLE_LAZY_INSTALLS=1
|
||||
# The published image seals /opt/hermes (root-owned, read-only) so a runtime
|
||||
# lazy install can't mutate the agent's own venv and brick it. But opt-in
|
||||
# backends (Firecrawl web search, Exa, Feishu, …) keep their SDKs in
|
||||
# tools/lazy_deps.py — deliberately NOT baked into [all] (see pyproject.toml
|
||||
# policy 2026-05-12: one quarantined release must not break every install).
|
||||
# Redirect those lazy installs to a writable dir on the durable data volume.
|
||||
# lazy_deps appends this dir to the END of sys.path, so a package installed
|
||||
# here can only ADD modules — it can never shadow or downgrade a core module,
|
||||
# so the sealed-venv guarantee holds even with installs re-enabled. The dir
|
||||
# is seeded + chowned to the hermes user by docker/stage2-hook.sh and lives
|
||||
# on the /opt/data volume, so it persists across container recreates / image
|
||||
# updates (an ABI stamp invalidates it if a rebuild bumps the interpreter).
|
||||
ENV HERMES_LAZY_INSTALL_TARGET=/opt/data/lazy-packages
|
||||
|
||||
# `docker exec` privilege-drop shim. When operators run
|
||||
# `docker exec <c> hermes ...` they default to root, and any file the
|
||||
|
||||
@@ -23,6 +23,11 @@ except ModuleNotFoundError:
|
||||
# new code but ``uv pip install -e .`` didn't finish. Missing bootstrap
|
||||
# means UTF-8 stdio setup is skipped on Windows; POSIX is unaffected.
|
||||
pass
|
||||
else:
|
||||
# Stop a ``utils/``/``proxy/``/``ui/`` package in the launch directory from
|
||||
# shadowing Hermes's own modules — ``hermes acp`` can be started from any
|
||||
# cwd, including a project that has same-named packages on its path.
|
||||
hermes_bootstrap.harden_import_path()
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
|
||||
@@ -74,7 +74,7 @@ _POLISHED_TOOLS = {
|
||||
"kanban_create", "kanban_show", "kanban_comment", "kanban_complete",
|
||||
"kanban_block", "kanban_link", "kanban_heartbeat",
|
||||
"yb_query_group_info", "yb_query_group_members", "yb_search_sticker",
|
||||
"yb_send_dm", "yb_send_sticker", "mixture_of_agents",
|
||||
"yb_send_dm", "yb_send_sticker",
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -106,7 +106,12 @@ def _custom_provider_extra_body_for_agent(
|
||||
base_url: str,
|
||||
custom_providers: List[Dict[str, Any]],
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
if (provider or "").strip().lower() != "custom":
|
||||
provider_norm = (provider or "").strip().lower()
|
||||
if provider_norm == "custom":
|
||||
provider_key_filter = ""
|
||||
elif provider_norm.startswith("custom:"):
|
||||
provider_key_filter = provider_norm.split(":", 1)[1].strip()
|
||||
else:
|
||||
return None
|
||||
|
||||
target_url = _normalized_custom_base_url(base_url)
|
||||
@@ -117,6 +122,13 @@ def _custom_provider_extra_body_for_agent(
|
||||
for entry in custom_providers or []:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
if provider_key_filter:
|
||||
entry_keys = {
|
||||
str(entry.get("provider_key", "") or "").strip().lower(),
|
||||
str(entry.get("name", "") or "").strip().lower(),
|
||||
}
|
||||
if provider_key_filter not in entry_keys:
|
||||
continue
|
||||
if _normalized_custom_base_url(entry.get("base_url")) != target_url:
|
||||
continue
|
||||
extra_body = entry.get("extra_body")
|
||||
@@ -707,6 +719,15 @@ def init_agent(
|
||||
print("🔑 Using credentials: Microsoft Entra ID")
|
||||
elif isinstance(effective_key, str) and len(effective_key) > 12:
|
||||
print(f"🔑 Using token: {effective_key[:8]}...{effective_key[-4:]}")
|
||||
elif agent.provider == "moa":
|
||||
from agent.moa_loop import MoAClient
|
||||
agent.api_mode = "chat_completions"
|
||||
agent.client = MoAClient(agent.model or "default")
|
||||
agent._client_kwargs = {}
|
||||
agent.api_key = api_key or "moa-virtual-provider"
|
||||
agent.base_url = base_url or "moa://local"
|
||||
if not agent.quiet_mode:
|
||||
print(f"🤖 AI Agent initialized with MoA preset: {agent.model}")
|
||||
elif agent.api_mode == "bedrock_converse":
|
||||
# AWS Bedrock — uses boto3 directly, no OpenAI client needed.
|
||||
# Region is extracted from the base_url or defaults to us-east-1.
|
||||
@@ -1506,6 +1527,7 @@ def init_agent(
|
||||
# 3. Check general plugin system (user-installed plugins)
|
||||
# 4. Fall back to built-in ContextCompressor
|
||||
_selected_engine = None
|
||||
_copy_failed = False
|
||||
_engine_name = "compressor" # default
|
||||
try:
|
||||
_ctx_cfg = _agent_cfg.get("context", {}) if isinstance(_agent_cfg, dict) else {}
|
||||
@@ -1523,15 +1545,35 @@ def init_agent(
|
||||
|
||||
# Try general plugin system as fallback
|
||||
if _selected_engine is None:
|
||||
_candidate = None
|
||||
try:
|
||||
from hermes_cli.plugins import get_plugin_context_engine
|
||||
_candidate = get_plugin_context_engine()
|
||||
if _candidate and _candidate.name == _engine_name:
|
||||
_selected_engine = _candidate
|
||||
except Exception:
|
||||
pass
|
||||
_candidate = None
|
||||
if _candidate is not None and _candidate.name == _engine_name:
|
||||
# Deep-copy the shared plugin singleton so a child agent's
|
||||
# update_model() can't mutate the parent's compressor (#42449).
|
||||
# Copy can fail for engines holding uncopyable state (locks, DB
|
||||
# connections, clients); in that case fall back to the built-in
|
||||
# compressor with an ACCURATE message rather than silently
|
||||
# mislabelling it "not found".
|
||||
import copy
|
||||
try:
|
||||
_selected_engine = copy.deepcopy(_candidate)
|
||||
except Exception as _copy_err:
|
||||
_copy_failed = True
|
||||
_ra().logger.warning(
|
||||
"Context engine '%s' could not be safely copied for this "
|
||||
"agent (%s) — falling back to built-in compressor. Plugin "
|
||||
"engines that hold uncopyable state (locks, DB connections) "
|
||||
"should implement __deepcopy__ to copy only mutable budget "
|
||||
"state.",
|
||||
_engine_name, _copy_err,
|
||||
)
|
||||
_selected_engine = None
|
||||
|
||||
if _selected_engine is None:
|
||||
if _selected_engine is None and not _copy_failed:
|
||||
_ra().logger.warning(
|
||||
"Context engine '%s' not found — falling back to built-in compressor",
|
||||
_engine_name,
|
||||
@@ -1621,16 +1663,27 @@ def init_agent(
|
||||
for t in agent.tools
|
||||
if isinstance(t, dict)
|
||||
}
|
||||
for _schema in agent.context_compressor.get_tool_schemas():
|
||||
_tname = _schema.get("name", "")
|
||||
if _tname and _tname in _existing_tool_names:
|
||||
from agent.memory_manager import normalize_tool_schema as _normalize_tool_schema
|
||||
for _raw_schema in agent.context_compressor.get_tool_schemas():
|
||||
_schema = _normalize_tool_schema(_raw_schema)
|
||||
if _schema is None:
|
||||
# A schema with no resolvable name (e.g. an already-wrapped
|
||||
# entry) would append a nameless tool that strict providers
|
||||
# 400 on, disabling the whole toolset (#47707). Skip it.
|
||||
_ra().logger.warning(
|
||||
"Context engine returned a tool schema with no resolvable "
|
||||
"name; skipping to avoid poisoning the request (%r)",
|
||||
_raw_schema,
|
||||
)
|
||||
continue
|
||||
_tname = _schema["name"]
|
||||
if _tname in _existing_tool_names:
|
||||
continue # already registered via plugin/cache path
|
||||
_wrapped = {"type": "function", "function": _schema}
|
||||
agent.tools.append(_wrapped)
|
||||
if _tname:
|
||||
agent.valid_tool_names.add(_tname)
|
||||
agent._context_engine_tool_names.add(_tname)
|
||||
_existing_tool_names.add(_tname)
|
||||
agent.valid_tool_names.add(_tname)
|
||||
agent._context_engine_tool_names.add(_tname)
|
||||
_existing_tool_names.add(_tname)
|
||||
|
||||
# Notify context engine of session start
|
||||
if hasattr(agent, "context_compressor") and agent.context_compressor:
|
||||
|
||||
@@ -1697,6 +1697,27 @@ def switch_model(agent, new_model, new_provider, api_key='', base_url='', api_mo
|
||||
old_model, old_provider, new_model, new_provider,
|
||||
)
|
||||
|
||||
# ── Persist billing route to session DB ──
|
||||
# The agent's _session_db / session_id may not be set in all contexts
|
||||
# (tests, bare agents without a session DB, etc.). This ensures the
|
||||
# dashboard Model cards show the actual provider after a mid-session
|
||||
# /model switch instead of the stale session-creation provider.
|
||||
# See #48248 for the full bug description.
|
||||
_session_db = getattr(agent, "_session_db", None)
|
||||
_session_id = getattr(agent, "session_id", None)
|
||||
if _session_db is not None and _session_id:
|
||||
try:
|
||||
_session_db.update_session_billing_route(
|
||||
_session_id,
|
||||
provider=agent.provider,
|
||||
base_url=agent.base_url,
|
||||
billing_mode=getattr(agent, "api_mode", None),
|
||||
)
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"Failed to persist billing route after model switch",
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
|
||||
def invoke_tool(agent, function_name: str, function_args: dict, effective_task_id: str,
|
||||
|
||||
@@ -1297,7 +1297,15 @@ def run_oauth_setup_token() -> Optional[str]:
|
||||
# Stores credentials in ~/.hermes/.anthropic_oauth.json (our own file).
|
||||
|
||||
_OAUTH_CLIENT_ID = "9d1c250a-e61b-44d9-88ed-5944d1962f5e"
|
||||
_OAUTH_TOKEN_URL = "https://console.anthropic.com/v1/oauth/token"
|
||||
# Anthropic migrated the OAuth token endpoint to platform.claude.com;
|
||||
# console.anthropic.com now 404s. Callers should iterate _OAUTH_TOKEN_URLS
|
||||
# (new host first, console fallback). _OAUTH_TOKEN_URL is kept as the primary
|
||||
# for backward compatibility with existing imports and now points at the live host.
|
||||
_OAUTH_TOKEN_URLS = [
|
||||
"https://platform.claude.com/v1/oauth/token",
|
||||
"https://console.anthropic.com/v1/oauth/token",
|
||||
]
|
||||
_OAUTH_TOKEN_URL = _OAUTH_TOKEN_URLS[0]
|
||||
_OAUTH_REDIRECT_URI = "https://console.anthropic.com/oauth/code/callback"
|
||||
_OAUTH_SCOPES = "org:create_api_key user:profile user:inference"
|
||||
_HERMES_OAUTH_FILE = get_hermes_home() / ".anthropic_oauth.json"
|
||||
@@ -1395,18 +1403,34 @@ def run_hermes_oauth_login_pure() -> Optional[Dict[str, Any]]:
|
||||
"code_verifier": verifier,
|
||||
}).encode()
|
||||
|
||||
req = urllib.request.Request(
|
||||
_OAUTH_TOKEN_URL,
|
||||
data=exchange_data,
|
||||
headers={
|
||||
"Content-Type": "application/json",
|
||||
"User-Agent": f"claude-cli/{_get_claude_code_version()} (external, cli)",
|
||||
},
|
||||
method="POST",
|
||||
)
|
||||
# Anthropic migrated the OAuth token endpoint to platform.claude.com;
|
||||
# console.anthropic.com now 404s. Try the new host first, then fall
|
||||
# back to console for older deployments (mirrors the refresh path).
|
||||
result = None
|
||||
last_error = None
|
||||
for endpoint in _OAUTH_TOKEN_URLS:
|
||||
req = urllib.request.Request(
|
||||
endpoint,
|
||||
data=exchange_data,
|
||||
headers={
|
||||
"Content-Type": "application/json",
|
||||
"User-Agent": f"claude-cli/{_get_claude_code_version()} (external, cli)",
|
||||
},
|
||||
method="POST",
|
||||
)
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=15) as resp:
|
||||
result = json.loads(resp.read().decode())
|
||||
break
|
||||
except Exception as exc:
|
||||
last_error = exc
|
||||
logger.debug("Anthropic token exchange failed at %s: %s", endpoint, exc)
|
||||
continue
|
||||
|
||||
with urllib.request.urlopen(req, timeout=15) as resp:
|
||||
result = json.loads(resp.read().decode())
|
||||
if result is None:
|
||||
raise last_error if last_error is not None else ValueError(
|
||||
"Anthropic token exchange failed"
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"Token exchange failed: {e}")
|
||||
return None
|
||||
|
||||
@@ -101,6 +101,7 @@ class _OpenAIProxy:
|
||||
OpenAI = _OpenAIProxy() # module-level name, resolves lazily on call/isinstance
|
||||
|
||||
from agent.credential_pool import load_pool
|
||||
from agent.model_metadata import MINIMUM_CONTEXT_LENGTH, get_model_context_length
|
||||
from hermes_cli.config import get_hermes_home
|
||||
from hermes_constants import OPENROUTER_BASE_URL
|
||||
from utils import base_url_host_matches, base_url_hostname, env_float, model_forces_max_completion_tokens, normalize_proxy_env_vars
|
||||
@@ -2470,7 +2471,7 @@ def _is_payment_error(exc: Exception) -> bool:
|
||||
# but sometimes wrap them in 429 or other codes.
|
||||
# Daily quota exhaustion from Bedrock, Vertex AI, and similar providers
|
||||
# uses different language but is semantically identical to credit exhaustion.
|
||||
if status in {402, 404, 429, None}:
|
||||
if status in {402, 403, 404, 429, None}:
|
||||
if any(kw in err_lower for kw in (
|
||||
"credits", "insufficient funds",
|
||||
"can only afford", "billing",
|
||||
@@ -2479,6 +2480,8 @@ def _is_payment_error(exc: Exception) -> bool:
|
||||
"balance_depleted", "no usable credits",
|
||||
"model_not_supported_on_free_tier",
|
||||
"not available on the free tier",
|
||||
"requires a subscription", "upgrade for access",
|
||||
"upgrade for higher limits", "reached your session usage limit",
|
||||
# Daily / monthly / weekly quota exhaustion keywords
|
||||
"quota exceeded", "quota_exceeded",
|
||||
"too many tokens per day", "daily limit",
|
||||
@@ -2697,6 +2700,60 @@ def _is_model_not_found_error(exc: Exception) -> bool:
|
||||
))
|
||||
|
||||
|
||||
def _is_model_incompatible_error(exc: Exception) -> bool:
|
||||
"""Detect "this route cannot serve this model" 400s (capability mismatch).
|
||||
|
||||
Distinct from :func:`_is_model_not_found_error` (the model does not exist
|
||||
anywhere): here the model name is valid but the *current provider/account*
|
||||
is structurally unable to run it. The canonical case is a configured
|
||||
fallback that cannot run the main model — e.g. an ``openai-codex`` /
|
||||
ChatGPT-account fallback asked to compress a ``glm-5.2`` conversation::
|
||||
|
||||
Error code: 400 - {'detail': "The 'glm-5.2' model is not supported
|
||||
when using Codex with a ChatGPT account."}
|
||||
|
||||
The candidate authenticates fine and builds a client, so the auth and
|
||||
payment predicates don't fire and the call would otherwise raise and
|
||||
abort the whole auxiliary task (commonly compression — which then drops
|
||||
middle turns and churns the session, destroying the prompt cache).
|
||||
Treating it as a fallback-worthy capability error lets the chain skip the
|
||||
incapable route and continue to the next candidate, mirroring the
|
||||
context-window feasibility screen (#52392).
|
||||
|
||||
Billing/quota 400s belong to :func:`_is_payment_error`; "model does not
|
||||
exist" 400s belong to :func:`_is_model_not_found_error`. This predicate
|
||||
explicitly excludes both so the three don't overlap.
|
||||
"""
|
||||
status = getattr(exc, "status_code", None)
|
||||
if status not in {400, None}:
|
||||
return False
|
||||
err_lower = str(exc).lower()
|
||||
# Not-found 400s ("invalid model ID", "model does not exist") are owned by
|
||||
# _is_model_not_found_error. Billing/free-tier 400s are owned by the
|
||||
# payment path — key on the billing keywords directly here rather than
|
||||
# calling _is_payment_error(), because that predicate is status-gated
|
||||
# ({402,403,404,429,None}) and would not recognise a 400-coded billing
|
||||
# body, letting it leak into this capability bucket.
|
||||
if _is_model_not_found_error(exc):
|
||||
return False
|
||||
if any(kw in err_lower for kw in (
|
||||
"credits", "insufficient funds", "billing", "out of funds",
|
||||
"balance_depleted", "no usable credits", "payment required",
|
||||
"free tier", "free-tier", "not available on the free tier",
|
||||
"model_not_supported_on_free_tier", "quota",
|
||||
)):
|
||||
return False
|
||||
return any(kw in err_lower for kw in (
|
||||
"is not supported when using", # codex/ChatGPT-account model gating
|
||||
"model is not supported",
|
||||
"not supported with this",
|
||||
"not supported for this account",
|
||||
"model_not_supported",
|
||||
"does not support this model",
|
||||
"unsupported model",
|
||||
))
|
||||
|
||||
|
||||
def _evict_cached_clients(provider: str) -> None:
|
||||
"""Drop cached auxiliary clients for a provider so fresh creds are used."""
|
||||
normalized = _normalize_aux_provider(provider)
|
||||
@@ -3147,6 +3204,88 @@ def _try_main_agent_model_fallback(
|
||||
return client, resolved_model or main_model, label
|
||||
|
||||
|
||||
# ── Context-window screening for runtime fallback chains (issue #52392) ──
|
||||
#
|
||||
# When the runtime auxiliary fallback chain selects a candidate that is
|
||||
# reachable but has a context window smaller than the compression task
|
||||
# requires, the call errors out instead of continuing to the next, viable
|
||||
# candidate. The startup feasibility check in
|
||||
# ``agent.conversation_compression.check_compression_model_feasibility``
|
||||
# already filters too-small auxiliary models at startup, but the runtime
|
||||
# fallback chain (``_try_configured_fallback_chain`` and
|
||||
# ``_try_main_fallback_chain``) does not apply the same filter, so
|
||||
# compression can stop at the first alive door even if the room behind it
|
||||
# is too small.
|
||||
#
|
||||
# The helpers below screen each candidate by its effective context window
|
||||
# before it is returned. ``None`` results from ``get_model_context_length``
|
||||
# are passed through (we cannot prove a model is too small, so we do not
|
||||
# block it). This preserves the existing fallback surface for
|
||||
# unrecognised/custom models while closing the gap on the well-known ones.
|
||||
|
||||
def _task_minimum_context_length(task: Optional[str]) -> Optional[int]:
|
||||
"""Return the minimum context length required for an auxiliary task.
|
||||
|
||||
Only ``compression`` carries an explicit minimum today (the same
|
||||
``MINIMUM_CONTEXT_LENGTH`` (64K) floor that
|
||||
``check_compression_model_feasibility`` already enforces at startup).
|
||||
Other tasks (``vision``, ``title_generation``, ``web_extract``,
|
||||
``skills_hub``, ``mcp``, ``session_search``) return ``None`` — they
|
||||
have no per-task context floor and the runtime chain must remain
|
||||
permissive for them.
|
||||
|
||||
Returns ``None`` for an empty/``None`` task name so the helper is a
|
||||
safe no-op when called from generic sites.
|
||||
"""
|
||||
if not task:
|
||||
return None
|
||||
if task == "compression":
|
||||
return MINIMUM_CONTEXT_LENGTH
|
||||
return None
|
||||
|
||||
|
||||
def _candidate_context_window(
|
||||
provider: str,
|
||||
model: str,
|
||||
base_url: str = "",
|
||||
api_key: str = "",
|
||||
) -> Optional[int]:
|
||||
"""Resolve the effective context window for a fallback candidate.
|
||||
|
||||
Thin wrapper around :func:`agent.model_metadata.get_model_context_length`
|
||||
that swallows probe failures (returns ``None``). Callers treat
|
||||
``None`` as "unknown — pass through" so the existing fallback
|
||||
surface is preserved when the context-length resolver chain cannot
|
||||
determine a value (custom endpoints, models not in the registry,
|
||||
offline endpoints).
|
||||
|
||||
Best-effort, never raises — the runtime fallback chain must keep
|
||||
moving even if the resolver hits a probe error.
|
||||
"""
|
||||
if not model:
|
||||
return None
|
||||
try:
|
||||
ctx = get_model_context_length(
|
||||
model,
|
||||
base_url=base_url,
|
||||
api_key=api_key,
|
||||
provider=provider,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.debug(
|
||||
"Auxiliary fallback: could not resolve context window for %s/%s: %s",
|
||||
provider, model, exc,
|
||||
)
|
||||
return None
|
||||
# ``get_model_context_length`` returns an int (with a 256K default
|
||||
# fallback when nothing else matches). We still propagate ``None`` if
|
||||
# a future change returns ``Optional[int]`` — being explicit is
|
||||
# cheap and the test suite covers both shapes.
|
||||
if isinstance(ctx, int) and ctx > 0:
|
||||
return ctx
|
||||
return None
|
||||
|
||||
|
||||
def _try_configured_fallback_chain(
|
||||
task: str,
|
||||
failed_provider: str,
|
||||
@@ -3171,6 +3310,7 @@ def _try_configured_fallback_chain(
|
||||
|
||||
skip = failed_provider.lower().strip()
|
||||
tried = []
|
||||
min_ctx = _task_minimum_context_length(task)
|
||||
|
||||
for i, entry in enumerate(chain):
|
||||
if not isinstance(entry, dict):
|
||||
@@ -3188,6 +3328,20 @@ def _try_configured_fallback_chain(
|
||||
fb_client, resolved_model = None, None
|
||||
|
||||
if fb_client is not None:
|
||||
if min_ctx is not None and resolved_model:
|
||||
fb_ctx = _candidate_context_window(
|
||||
fb_provider,
|
||||
resolved_model,
|
||||
base_url=str(entry.get("base_url") or ""),
|
||||
api_key=_fallback_entry_api_key(entry) or "",
|
||||
)
|
||||
if fb_ctx is not None and fb_ctx < min_ctx:
|
||||
logger.info(
|
||||
"Auxiliary %s: skipping %s (%s context=%d < min=%d), continuing chain",
|
||||
task, label, resolved_model, fb_ctx, min_ctx,
|
||||
)
|
||||
tried.append(f"{label} (context too small: {fb_ctx}<{min_ctx})")
|
||||
continue
|
||||
logger.info(
|
||||
"Auxiliary %s: %s on %s — configured fallback to %s (%s)",
|
||||
task, reason, failed_provider, label, resolved_model or fb_model or "default",
|
||||
@@ -3203,6 +3357,28 @@ def _try_configured_fallback_chain(
|
||||
return None, None, ""
|
||||
|
||||
|
||||
def _try_configured_fallback_for_unavailable_client(
|
||||
task: Optional[str],
|
||||
failed_provider: str,
|
||||
) -> Tuple[Optional[Any], Optional[str], str]:
|
||||
"""Try task fallback_chain when an explicit aux provider cannot build.
|
||||
|
||||
This covers the "no client" case before any request is sent: missing
|
||||
raw env key, unavailable OAuth/pool credentials, or provider resolver
|
||||
returning ``(None, None)``. It deliberately stops at the configured
|
||||
per-task fallback chain; the main-agent model remains the last-resort
|
||||
runtime fallback for request-time capacity errors.
|
||||
"""
|
||||
explicit = (failed_provider or "").strip().lower()
|
||||
if not task or not explicit or explicit in {"auto"}:
|
||||
return None, None, ""
|
||||
return _try_configured_fallback_chain(
|
||||
task,
|
||||
explicit,
|
||||
reason="provider unavailable",
|
||||
)
|
||||
|
||||
|
||||
def _fallback_entry_api_key(entry: Dict[str, Any]) -> Optional[str]:
|
||||
"""Resolve inline or env-backed API key from a fallback-chain entry."""
|
||||
explicit = str(entry.get("api_key") or "").strip()
|
||||
@@ -3261,6 +3437,7 @@ def _try_main_fallback_chain(
|
||||
main_norm = (_read_main_provider() or "").strip().lower()
|
||||
skip = {p for p in (failed_norm, main_norm, "auto") if p}
|
||||
tried: List[str] = []
|
||||
min_ctx = _task_minimum_context_length(task)
|
||||
|
||||
for i, entry in enumerate(chain):
|
||||
if not isinstance(entry, dict):
|
||||
@@ -3284,6 +3461,20 @@ def _try_main_fallback_chain(
|
||||
logger.debug("Auxiliary %s: main fallback %s failed to resolve: %s", task or "call", label, exc)
|
||||
fb_client, resolved_model = None, None
|
||||
if fb_client is not None:
|
||||
if min_ctx is not None:
|
||||
fb_ctx = _candidate_context_window(
|
||||
fb_provider,
|
||||
resolved_model or fb_model,
|
||||
base_url=str(entry.get("base_url") or ""),
|
||||
api_key=_fallback_entry_api_key(entry) or "",
|
||||
)
|
||||
if fb_ctx is not None and fb_ctx < min_ctx:
|
||||
logger.info(
|
||||
"Auxiliary %s: skipping %s (context=%d < min=%d), continuing chain",
|
||||
task or "call", label, fb_ctx, min_ctx,
|
||||
)
|
||||
tried.append(f"{label} (context too small: {fb_ctx}<{min_ctx})")
|
||||
continue
|
||||
logger.info(
|
||||
"Auxiliary %s: %s on %s — main fallback chain to %s (%s)",
|
||||
task or "call", reason, failed_provider or "auto", label,
|
||||
@@ -5344,21 +5535,30 @@ def call_llm(
|
||||
)
|
||||
if client is None:
|
||||
# When the user explicitly chose a non-OpenRouter provider but no
|
||||
# credentials were found, fail fast instead of silently routing
|
||||
# through OpenRouter (which causes confusing 404s).
|
||||
# credentials were found, honor the task fallback_chain before
|
||||
# raising. Missing raw env keys are recoverable for auxiliary
|
||||
# tasks because fallback entries may use OAuth / credential-pool
|
||||
# auth (for example openai-codex).
|
||||
_explicit = (resolved_provider or "").strip().lower()
|
||||
if _explicit and _explicit not in {"auto", "openrouter", "custom"}:
|
||||
raise RuntimeError(
|
||||
f"Provider '{_explicit}' is set in config.yaml but no API key "
|
||||
f"was found. Set the {_explicit.upper()}_API_KEY environment "
|
||||
f"variable, or switch to a different provider with `hermes model`."
|
||||
fb_client, fb_model, fb_label = _try_configured_fallback_for_unavailable_client(
|
||||
task, _explicit,
|
||||
)
|
||||
if fb_client is not None:
|
||||
client, final_model = fb_client, fb_model
|
||||
resolved_provider = fb_label or resolved_provider
|
||||
else:
|
||||
raise RuntimeError(
|
||||
f"Provider '{_explicit}' is set in config.yaml but no API key "
|
||||
f"was found. Set the {_explicit.upper()}_API_KEY environment "
|
||||
f"variable, or switch to a different provider with `hermes model`."
|
||||
)
|
||||
# For auto/custom with no credentials, try the full auto chain
|
||||
# rather than hardcoding OpenRouter (which may be depleted).
|
||||
# Pass model=None so each provider uses its own default —
|
||||
# resolved_model may be an OpenRouter-format slug that doesn't
|
||||
# work on other providers.
|
||||
if not resolved_base_url:
|
||||
if client is None and not resolved_base_url:
|
||||
logger.info("Auxiliary %s: provider %s unavailable, trying auto-detection chain",
|
||||
task or "call", resolved_provider)
|
||||
client, final_model = _get_cached_client("auto", main_runtime=main_runtime, task=task)
|
||||
@@ -5657,6 +5857,7 @@ def call_llm(
|
||||
_is_payment_error(first_err)
|
||||
or _is_connection_error(first_err)
|
||||
or _is_rate_limit_error(first_err)
|
||||
or _is_model_incompatible_error(first_err)
|
||||
)
|
||||
# Respect explicit provider choice for transient errors (auth, request
|
||||
# validation, etc.) but allow fallback when the provider clearly cannot
|
||||
@@ -5667,7 +5868,19 @@ def call_llm(
|
||||
is_auto = resolved_provider in {"auto", "", None}
|
||||
# Capacity errors bypass the explicit-provider gate: the provider
|
||||
# literally cannot serve this request regardless of user intent.
|
||||
is_capacity_error = _is_payment_error(first_err) or _is_connection_error(first_err)
|
||||
# Rate limits are included: after retries are exhausted, a 429 means
|
||||
# the provider cannot serve this request — fall back. See #52228.
|
||||
# Model-incompatibility 400s are also a hard capability mismatch (the
|
||||
# route cannot run this model at all — e.g. a codex/ChatGPT-account
|
||||
# fallback asked to compress a glm-5.2 conversation), so they bypass
|
||||
# the explicit-provider gate and continue to the next candidate
|
||||
# instead of aborting the auxiliary task and churning the session.
|
||||
is_capacity_error = (
|
||||
_is_payment_error(first_err)
|
||||
or _is_connection_error(first_err)
|
||||
or _is_rate_limit_error(first_err)
|
||||
or _is_model_incompatible_error(first_err)
|
||||
)
|
||||
if should_fallback and (is_auto or is_capacity_error):
|
||||
if _is_payment_error(first_err):
|
||||
reason = "payment error"
|
||||
@@ -5680,6 +5893,8 @@ def call_llm(
|
||||
)
|
||||
elif _is_rate_limit_error(first_err):
|
||||
reason = "rate limit"
|
||||
elif _is_model_incompatible_error(first_err):
|
||||
reason = "model incompatible with route"
|
||||
else:
|
||||
reason = "connection error"
|
||||
logger.info("Auxiliary %s: %s on %s (%s), trying fallback",
|
||||
@@ -5854,12 +6069,21 @@ async def async_call_llm(
|
||||
if client is None:
|
||||
_explicit = (resolved_provider or "").strip().lower()
|
||||
if _explicit and _explicit not in {"auto", "openrouter", "custom"}:
|
||||
raise RuntimeError(
|
||||
f"Provider '{_explicit}' is set in config.yaml but no API key "
|
||||
f"was found. Set the {_explicit.upper()}_API_KEY environment "
|
||||
f"variable, or switch to a different provider with `hermes model`."
|
||||
fb_client, fb_model, fb_label = _try_configured_fallback_for_unavailable_client(
|
||||
task, _explicit,
|
||||
)
|
||||
if not resolved_base_url:
|
||||
if fb_client is not None:
|
||||
client, final_model = _to_async_client(
|
||||
fb_client, fb_model or "", is_vision=(task == "vision")
|
||||
)
|
||||
resolved_provider = fb_label or resolved_provider
|
||||
else:
|
||||
raise RuntimeError(
|
||||
f"Provider '{_explicit}' is set in config.yaml but no API key "
|
||||
f"was found. Set the {_explicit.upper()}_API_KEY environment "
|
||||
f"variable, or switch to a different provider with `hermes model`."
|
||||
)
|
||||
if client is None and not resolved_base_url:
|
||||
logger.info("Auxiliary %s: provider %s unavailable, trying auto-detection chain",
|
||||
task or "call", resolved_provider)
|
||||
client, final_model = _get_cached_client("auto", async_mode=True, main_runtime=main_runtime, task=task)
|
||||
@@ -6109,12 +6333,22 @@ async def async_call_llm(
|
||||
_is_payment_error(first_err)
|
||||
or _is_connection_error(first_err)
|
||||
or _is_rate_limit_error(first_err)
|
||||
or _is_model_incompatible_error(first_err)
|
||||
)
|
||||
# Capacity errors (payment/quota/connection) bypass the explicit-provider
|
||||
# gate — the provider cannot serve the request regardless of user intent.
|
||||
# Capacity errors (payment/quota/connection/rate-limit) bypass the
|
||||
# explicit-provider gate — the provider cannot serve the request
|
||||
# regardless of user intent. Rate limits are included: after retries
|
||||
# are exhausted, a 429 means the provider is at capacity. See #52228.
|
||||
# See #26803: daily token quota must fall back like a 402 credit error.
|
||||
# Model-incompatibility 400s (route cannot run this model at all)
|
||||
# bypass the gate too — see the sync call_llm() path for rationale.
|
||||
is_auto = resolved_provider in {"auto", "", None}
|
||||
is_capacity_error = _is_payment_error(first_err) or _is_connection_error(first_err)
|
||||
is_capacity_error = (
|
||||
_is_payment_error(first_err)
|
||||
or _is_connection_error(first_err)
|
||||
or _is_rate_limit_error(first_err)
|
||||
or _is_model_incompatible_error(first_err)
|
||||
)
|
||||
if should_fallback and (is_auto or is_capacity_error):
|
||||
if _is_payment_error(first_err):
|
||||
reason = "payment error"
|
||||
@@ -6123,6 +6357,8 @@ async def async_call_llm(
|
||||
)
|
||||
elif _is_rate_limit_error(first_err):
|
||||
reason = "rate limit"
|
||||
elif _is_model_incompatible_error(first_err):
|
||||
reason = "model incompatible with route"
|
||||
else:
|
||||
reason = "connection error"
|
||||
logger.info("Auxiliary %s (async): %s on %s (%s), trying fallback",
|
||||
|
||||
@@ -27,6 +27,131 @@ from typing import Any, Dict, List, Optional
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Background-review aux-model selector + routed digest.
|
||||
#
|
||||
# The review fork runs on the MAIN model by default ("auto"), replaying the
|
||||
# full conversation — already warm in the prompt cache, so cheap cache reads.
|
||||
# Optimal and unchanged. A user can route the review to a different, cheaper
|
||||
# model via auxiliary.background_review.{provider,model}. A different model
|
||||
# cannot reuse the parent's cache (different key), so the fork is cold
|
||||
# regardless — replaying the full transcript would just cold-write it. So when
|
||||
# (and only when) routed to a different model, we replay a compact DIGEST to
|
||||
# minimise cold-written tokens. Same model -> full replay; different model ->
|
||||
# digest. That's the whole policy.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _resolve_review_runtime(agent: Any) -> Dict[str, Any]:
|
||||
"""Resolve provider/model/credentials for the review fork.
|
||||
|
||||
Default (auto / unset / same as parent): inherit the parent's live runtime
|
||||
(with codex_app_server -> codex_responses downgrade). ``routed`` is False —
|
||||
the fork uses the main model and the warm cache, exactly as before. When
|
||||
``auxiliary.background_review.{provider,model}`` names a concrete model
|
||||
different from the parent's, resolve that runtime and set ``routed=True``.
|
||||
"""
|
||||
parent_runtime = agent._current_main_runtime()
|
||||
parent_api_mode = parent_runtime.get("api_mode") or None
|
||||
if parent_api_mode == "codex_app_server":
|
||||
parent_api_mode = "codex_responses"
|
||||
parent = {
|
||||
"provider": agent.provider,
|
||||
"model": agent.model,
|
||||
"api_key": parent_runtime.get("api_key") or None,
|
||||
"base_url": parent_runtime.get("base_url") or None,
|
||||
"api_mode": parent_api_mode,
|
||||
"routed": False,
|
||||
}
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
cfg = load_config()
|
||||
except Exception:
|
||||
return parent
|
||||
aux = cfg.get("auxiliary", {}) if isinstance(cfg.get("auxiliary"), dict) else {}
|
||||
task = aux.get("background_review", {}) if isinstance(aux.get("background_review"), dict) else {}
|
||||
task_provider = (str(task.get("provider", "")).strip() or None)
|
||||
task_model = (str(task.get("model", "")).strip() or None)
|
||||
task_base_url = (str(task.get("base_url", "")).strip() or None)
|
||||
task_api_key = (str(task.get("api_key", "")).strip() or None)
|
||||
if not (task_provider and task_provider != "auto" and task_model):
|
||||
return parent
|
||||
if task_provider == (agent.provider or "") and task_model == (agent.model or ""):
|
||||
return parent # same model/provider as parent -> not routed
|
||||
try:
|
||||
from hermes_cli.runtime_provider import resolve_runtime_provider
|
||||
rp = resolve_runtime_provider(
|
||||
requested=task_provider,
|
||||
target_model=task_model,
|
||||
explicit_api_key=task_api_key,
|
||||
explicit_base_url=task_base_url,
|
||||
)
|
||||
return {
|
||||
"provider": rp.get("provider") or task_provider,
|
||||
"model": task_model,
|
||||
"api_key": rp.get("api_key"),
|
||||
"base_url": rp.get("base_url"),
|
||||
"api_mode": rp.get("api_mode"),
|
||||
"routed": True,
|
||||
}
|
||||
except Exception as e:
|
||||
logger.debug("background-review aux routing failed (%s); using main model", e)
|
||||
return parent
|
||||
|
||||
|
||||
def _msg_text(m: Dict) -> str:
|
||||
c = m.get("content")
|
||||
if isinstance(c, str):
|
||||
return c.strip()
|
||||
if isinstance(c, list):
|
||||
return " ".join(b.get("text", "") for b in c if isinstance(b, dict)).strip()
|
||||
return ""
|
||||
|
||||
|
||||
def _digest_history(messages_snapshot: List[Dict], tail: int = 24) -> List[Dict]:
|
||||
"""Compact replay for the routed (different-model) path only.
|
||||
|
||||
Keeps the recent ``tail`` messages verbatim, collapses older turns into one
|
||||
synthetic user-role digest, preserving role alternation. Used ONLY when
|
||||
routed to a different model (cache cold regardless, so fewer cold-written
|
||||
tokens is a pure win). Never on the main-model path (full replay stays warm).
|
||||
"""
|
||||
msgs = list(messages_snapshot or [])
|
||||
if len(msgs) <= tail:
|
||||
return msgs
|
||||
keep = msgs[-tail:]
|
||||
while keep and isinstance(keep[0], dict) and keep[0].get("role") == "tool":
|
||||
tail += 1
|
||||
if len(msgs) <= tail:
|
||||
return msgs
|
||||
keep = msgs[-tail:]
|
||||
old = msgs[:-len(keep)]
|
||||
lines: List[str] = []
|
||||
for m in old:
|
||||
if not isinstance(m, dict):
|
||||
continue
|
||||
role = m.get("role")
|
||||
text = _msg_text(m).replace("\n", " ")
|
||||
if role == "user" and text:
|
||||
lines.append(f"USER: {text[:300]}")
|
||||
elif role == "assistant":
|
||||
tcs = m.get("tool_calls") or []
|
||||
if tcs:
|
||||
names = [(tc.get("function") or {}).get("name", "?") for tc in tcs if isinstance(tc, dict)]
|
||||
lines.append(f"ASSISTANT[tools: {', '.join(names)}]")
|
||||
if text:
|
||||
lines.append(f"ASSISTANT: {text[:200]}")
|
||||
digest = {
|
||||
"role": "user",
|
||||
"content": (
|
||||
"[Earlier conversation digest — older turns summarised to bound the "
|
||||
"review's cold-write cost on the routed aux model. Recent turns "
|
||||
"follow verbatim below.]\n" + "\n".join(lines)
|
||||
),
|
||||
}
|
||||
return [digest] + keep
|
||||
|
||||
|
||||
# Review-prompt strings — used by ``spawn_background_review_thread`` to build
|
||||
# the user-message that the forked review agent receives. AIAgent exposes
|
||||
# them as class attributes (``_MEMORY_REVIEW_PROMPT`` etc.) for back-compat;
|
||||
@@ -488,18 +613,13 @@ def _run_review_in_thread(
|
||||
# creds, or credential-pool setups where the resolver can't
|
||||
# reconstruct auth from scratch -- producing the spurious
|
||||
# "No LLM provider configured" warning at end of turn.
|
||||
_parent_runtime = agent._current_main_runtime()
|
||||
_parent_api_mode = _parent_runtime.get("api_mode") or None
|
||||
# The review fork needs to call agent-loop tools (memory,
|
||||
# skill_manage). Those tools require Hermes' own dispatch,
|
||||
# which the codex_app_server runtime bypasses entirely
|
||||
# (it runs the turn inside codex's subprocess). So when
|
||||
# the parent is on codex_app_server, downgrade the review
|
||||
# fork to codex_responses — same auth/credentials, but
|
||||
# talks to the OpenAI Responses API directly so Hermes
|
||||
# owns the loop and the agent-loop tools dispatch.
|
||||
if _parent_api_mode == "codex_app_server":
|
||||
_parent_api_mode = "codex_responses"
|
||||
# _resolve_review_runtime() returns the parent's live runtime by
|
||||
# default (routed=False; main model, warm cache), or — when the user
|
||||
# set auxiliary.background_review.{provider,model} to a different
|
||||
# model — that model's runtime (routed=True). The codex_app_server
|
||||
# -> codex_responses downgrade is applied inside the resolver.
|
||||
_rt = _resolve_review_runtime(agent)
|
||||
_routed = bool(_rt.get("routed"))
|
||||
# skip_memory=True keeps the review fork from
|
||||
# touching external memory plugins (honcho, mem0,
|
||||
# supermemory, etc.). Without it, the fork's
|
||||
@@ -519,14 +639,14 @@ def _run_review_in_thread(
|
||||
# in the request body — Anthropic's cache key includes it.
|
||||
# (The runtime whitelist below still restricts dispatch.)
|
||||
review_agent = AIAgent(
|
||||
model=agent.model,
|
||||
model=_rt.get("model") or agent.model,
|
||||
max_iterations=16,
|
||||
quiet_mode=True,
|
||||
platform=agent.platform,
|
||||
provider=agent.provider,
|
||||
api_mode=_parent_api_mode,
|
||||
base_url=_parent_runtime.get("base_url") or None,
|
||||
api_key=_parent_runtime.get("api_key") or None,
|
||||
provider=_rt.get("provider") or agent.provider,
|
||||
api_mode=_rt.get("api_mode"),
|
||||
base_url=_rt.get("base_url") or None,
|
||||
api_key=_rt.get("api_key") or None,
|
||||
credential_pool=getattr(agent, "_credential_pool", None),
|
||||
parent_session_id=agent.session_id,
|
||||
enabled_toolsets=getattr(agent, "enabled_toolsets", None),
|
||||
@@ -565,15 +685,20 @@ def _run_review_in_thread(
|
||||
# issue #25322 and PR #17276 for the full analysis +
|
||||
# measured impact (~26% end-to-end cost reduction on
|
||||
# Sonnet 4.5).
|
||||
review_agent._cached_system_prompt = agent._cached_system_prompt
|
||||
# Defensive: pin session_start + session_id to the
|
||||
# parent's so any code path that re-renders parts of
|
||||
# the system prompt (compression, plugin hooks) still
|
||||
# produces byte-identical output. The cached-prompt
|
||||
# assignment above already short-circuits the normal
|
||||
# rebuild path, but these pins guarantee parity even
|
||||
# if a future code path bypasses the cache.
|
||||
review_agent.session_start = agent.session_start
|
||||
# Share the parent's warm cached system prompt ONLY when the review
|
||||
# runs on the SAME model (not routed). When routed to a different
|
||||
# model the parent's cached prompt is for the wrong model/cache key
|
||||
# and would miss anyway, so let the routed fork build its own.
|
||||
if not _routed:
|
||||
review_agent._cached_system_prompt = agent._cached_system_prompt
|
||||
# Defensive: pin session_start + session_id to the
|
||||
# parent's so any code path that re-renders parts of
|
||||
# the system prompt (compression, plugin hooks) still
|
||||
# produces byte-identical output. The cached-prompt
|
||||
# assignment above already short-circuits the normal
|
||||
# rebuild path, but these pins guarantee parity even
|
||||
# if a future code path bypasses the cache.
|
||||
review_agent.session_start = agent.session_start
|
||||
review_agent.session_id = agent.session_id
|
||||
# The fork shares the parent's live session_id (pinned above for
|
||||
# prefix-cache parity). It is single-lifecycle and calls close()
|
||||
@@ -615,6 +740,13 @@ def _run_review_in_thread(
|
||||
),
|
||||
)
|
||||
try:
|
||||
# Routed to a different model -> replay a digest (cache is cold
|
||||
# on that model anyway, so minimise cold-written tokens). Same
|
||||
# model -> replay the full snapshot (warm cache reads).
|
||||
_review_history = (
|
||||
_digest_history(messages_snapshot) if _routed
|
||||
else messages_snapshot
|
||||
)
|
||||
review_agent.run_conversation(
|
||||
user_message=(
|
||||
prompt
|
||||
@@ -622,7 +754,7 @@ def _run_review_in_thread(
|
||||
"management tools. Other tools will be denied "
|
||||
"at runtime — do not attempt them."
|
||||
),
|
||||
conversation_history=messages_snapshot,
|
||||
conversation_history=_review_history,
|
||||
)
|
||||
finally:
|
||||
clear_thread_tool_whitelist()
|
||||
|
||||
@@ -83,6 +83,59 @@ _PROJECT_MARKERS = (
|
||||
# Agent-instruction files surfaced separately from manifests in the snapshot.
|
||||
_CONTEXT_FILES = ("AGENTS.md", "CLAUDE.md", ".cursorrules")
|
||||
|
||||
# Source-file extensions that make a git repo a *code* workspace even with no
|
||||
# manifest. Without this, `git init` on a notes/writing/research folder (a huge
|
||||
# non-coding use case) would flip the whole session into the coding posture just
|
||||
# for having a `.git`. A manifest still wins on its own (see `_PROJECT_MARKERS`).
|
||||
_CODE_EXTENSIONS = frozenset({
|
||||
".py", ".pyi", ".ipynb", ".js", ".jsx", ".ts", ".tsx", ".mjs", ".cjs",
|
||||
".go", ".rs", ".java", ".kt", ".kts", ".scala", ".rb", ".php", ".c", ".h",
|
||||
".cc", ".cpp", ".hpp", ".cs", ".swift", ".m", ".mm", ".dart", ".ex", ".exs",
|
||||
".lua", ".sh", ".bash", ".zsh", ".sql", ".vue", ".svelte", ".r", ".jl",
|
||||
".hs", ".clj", ".erl", ".pl",
|
||||
})
|
||||
|
||||
# Dirs never worth scanning for the code check (deps/build/vcs/venv noise).
|
||||
_CODE_SCAN_SKIP_DIRS = frozenset({
|
||||
".git", "node_modules", "venv", ".venv", "__pycache__", "dist", "build",
|
||||
"target", ".next", ".turbo", "vendor",
|
||||
})
|
||||
|
||||
# Bounded sweep: a code workspace reveals itself in the first handful of entries.
|
||||
_CODE_SCAN_MAX_ENTRIES = 500
|
||||
|
||||
|
||||
def _has_code_files(root: Path) -> bool:
|
||||
"""Cheap, bounded check for source files in a repo's top two levels.
|
||||
|
||||
Lets a git repo of loose scripts (no manifest) still read as a code
|
||||
workspace while a bare notes/writing repo does not. Scans the root and its
|
||||
immediate subdirectories only, capped at ``_CODE_SCAN_MAX_ENTRIES`` stats —
|
||||
a handful of readdirs at session start, not a full walk.
|
||||
"""
|
||||
seen = 0
|
||||
stack = [(root, True)]
|
||||
while stack:
|
||||
directory, is_root = stack.pop()
|
||||
try:
|
||||
with os.scandir(directory) as entries:
|
||||
for entry in entries:
|
||||
seen += 1
|
||||
if seen > _CODE_SCAN_MAX_ENTRIES:
|
||||
return False
|
||||
name = entry.name
|
||||
try:
|
||||
if entry.is_file():
|
||||
if os.path.splitext(name)[1].lower() in _CODE_EXTENSIONS:
|
||||
return True
|
||||
elif is_root and entry.is_dir() and name not in _CODE_SCAN_SKIP_DIRS and not name.startswith("."):
|
||||
stack.append((Path(entry.path), False))
|
||||
except OSError:
|
||||
continue
|
||||
except OSError:
|
||||
continue
|
||||
return False
|
||||
|
||||
# Lockfile → package manager, checked in priority order.
|
||||
_PY_LOCKFILES = (("uv.lock", "uv"), ("poetry.lock", "poetry"), ("Pipfile.lock", "pipenv"))
|
||||
_JS_LOCKFILES = (
|
||||
@@ -368,10 +421,16 @@ def _detect_profile_name(mode: str, platform: str, cwd_str: str) -> str:
|
||||
if platform and platform.strip().lower() not in INTERACTIVE_CODING_PLATFORMS:
|
||||
return GENERAL_PROFILE.name
|
||||
cwd = Path(cwd_str)
|
||||
# A recognized project root (manifest / AGENTS.md / .cursorrules) is a code
|
||||
# workspace on its own — cheap stat checks, no scan.
|
||||
if _marker_root(cwd) is not None:
|
||||
return CODING_PROFILE.name
|
||||
git_root = _git_root(cwd)
|
||||
if git_root is not None and git_root == _home():
|
||||
git_root = None # dotfiles repo at $HOME — not a code workspace
|
||||
if git_root is not None or _marker_root(cwd) is not None:
|
||||
# A bare git repo only counts when it actually holds code, so `git init` on a
|
||||
# notes/writing/research folder stays in the general posture.
|
||||
if git_root is not None and _has_code_files(git_root):
|
||||
return CODING_PROFILE.name
|
||||
return GENERAL_PROFILE.name
|
||||
|
||||
@@ -635,25 +694,32 @@ def _read_small(path: Path) -> str:
|
||||
return ""
|
||||
|
||||
|
||||
def _project_facts(root: Path) -> list[str]:
|
||||
"""Detected project facts for the workspace snapshot.
|
||||
@dataclass(frozen=True)
|
||||
class ProjectFacts:
|
||||
"""Structured project facts — the model's verify loop, detected once.
|
||||
|
||||
The point is to hand the model its *verify loop* up front — which manifest,
|
||||
which package manager, and the exact test/lint/build commands — instead of
|
||||
making it rediscover them every session. Cheap: stat calls plus reads of a
|
||||
couple of small files; built once at prompt-build time (cache-safe).
|
||||
The same data that feeds the workspace snapshot, exposed structurally so
|
||||
non-prompt consumers (e.g. the desktop verify UI) read it instead of
|
||||
re-detecting and drifting from the prompt.
|
||||
"""
|
||||
facts: list[str] = []
|
||||
|
||||
manifests: list[str]
|
||||
package_managers: list[str]
|
||||
verify_commands: list[str]
|
||||
context_files: list[str]
|
||||
|
||||
|
||||
def detect_project_facts(root: Path) -> ProjectFacts:
|
||||
"""Detect manifests, package manager(s), verify commands, and context files.
|
||||
|
||||
Cheap: stat calls plus reads of a couple of small files. The single source
|
||||
of truth for both the prompt snapshot (:func:`_project_facts`) and the
|
||||
gateway's ``project.facts`` — so the UI never re-sniffs verify commands.
|
||||
"""
|
||||
manifests = [m for m in _PROJECT_MARKERS if m not in _CONTEXT_FILES and (root / m).is_file()]
|
||||
package_managers = [
|
||||
pm for lock, pm in (*_PY_LOCKFILES, *_JS_LOCKFILES) if (root / lock).is_file()
|
||||
]
|
||||
if manifests:
|
||||
line = f"- Project: {', '.join(manifests[:6])}"
|
||||
if package_managers:
|
||||
line += f" ({'/'.join(dict.fromkeys(package_managers))})"
|
||||
facts.append(line)
|
||||
package_managers = list(
|
||||
dict.fromkeys(pm for lock, pm in (*_PY_LOCKFILES, *_JS_LOCKFILES) if (root / lock).is_file())
|
||||
)
|
||||
|
||||
verify: list[str] = []
|
||||
if (root / "scripts" / "run_tests.sh").is_file():
|
||||
@@ -673,17 +739,61 @@ def _project_facts(root: Path) -> list[str]:
|
||||
f"make {name}" for name in _VERIFY_TARGETS
|
||||
if re.search(rf"^{re.escape(name)}\s*:", makefile, re.MULTILINE)
|
||||
)
|
||||
if verify:
|
||||
deduped = list(dict.fromkeys(verify))[:_MAX_VERIFY_COMMANDS]
|
||||
facts.append(f"- Verify: {'; '.join(deduped)}")
|
||||
|
||||
context_files = [c for c in _CONTEXT_FILES if (root / c).is_file()]
|
||||
if context_files:
|
||||
facts.append(f"- Context files: {', '.join(context_files)}")
|
||||
return ProjectFacts(
|
||||
manifests=manifests,
|
||||
package_managers=package_managers,
|
||||
verify_commands=list(dict.fromkeys(verify))[:_MAX_VERIFY_COMMANDS],
|
||||
context_files=[c for c in _CONTEXT_FILES if (root / c).is_file()],
|
||||
)
|
||||
|
||||
|
||||
def _project_facts(root: Path) -> list[str]:
|
||||
"""Render :func:`detect_project_facts` as workspace-snapshot lines.
|
||||
|
||||
Hands the model its *verify loop* up front — which manifest, which package
|
||||
manager, and the exact test/lint/build commands — instead of making it
|
||||
rediscover them every session. Built once at prompt-build time; the string
|
||||
output must stay byte-stable to preserve the prompt cache.
|
||||
"""
|
||||
f = detect_project_facts(root)
|
||||
facts: list[str] = []
|
||||
|
||||
if f.manifests:
|
||||
line = f"- Project: {', '.join(f.manifests[:6])}"
|
||||
if f.package_managers:
|
||||
line += f" ({'/'.join(f.package_managers)})"
|
||||
facts.append(line)
|
||||
if f.verify_commands:
|
||||
facts.append(f"- Verify: {'; '.join(f.verify_commands)}")
|
||||
if f.context_files:
|
||||
facts.append(f"- Context files: {', '.join(f.context_files)}")
|
||||
|
||||
return facts
|
||||
|
||||
|
||||
def project_facts_for(cwd: Optional[str | Path] = None) -> Optional[dict[str, Any]]:
|
||||
"""Structured project facts for ``cwd`` — ``None`` outside a workspace.
|
||||
|
||||
Same detection the system-prompt snapshot uses (git root, else marker root),
|
||||
exposed for non-prompt consumers (the desktop verify UI) so they never
|
||||
re-derive "are we coding?" or duplicate the verify-command sniffing.
|
||||
"""
|
||||
resolved = _resolve_cwd(cwd)
|
||||
root = _git_root(resolved) or _marker_root(resolved)
|
||||
if root is None:
|
||||
return None
|
||||
|
||||
f = detect_project_facts(root)
|
||||
return {
|
||||
"root": str(root),
|
||||
"manifests": f.manifests,
|
||||
"packageManagers": f.package_managers,
|
||||
"verifyCommands": f.verify_commands,
|
||||
"contextFiles": f.context_files,
|
||||
}
|
||||
|
||||
|
||||
def build_coding_workspace_block(cwd: Optional[str | Path] = None) -> str:
|
||||
"""Workspace snapshot for the system prompt (empty outside a workspace).
|
||||
|
||||
|
||||
@@ -890,7 +890,15 @@ class ContextCompressor(ContextEngine):
|
||||
# This is independent of the abort_on_summary_failure config flag:
|
||||
# rotating on a broken credential is never the right behavior.
|
||||
self._last_summary_auth_failure: bool = False
|
||||
# When a user-configured summary model fails and we recover by
|
||||
# Set when summary generation ultimately fails due to a transient
|
||||
# network/connection error (httpx/httpcore connection drop, premature
|
||||
# stream close, etc.) — distinct from auth failures but treated the
|
||||
# same way by compress(): ABORT and preserve the session unchanged
|
||||
# rather than destroy the middle window for a deterministic
|
||||
# "summary unavailable" marker. Retrying once the network recovers is
|
||||
# strictly better than discarding context for a transient blip
|
||||
# (#29559, #25585). Independent of abort_on_summary_failure.
|
||||
self._last_summary_network_failure: bool = False
|
||||
# retrying on the main model, record the failure so gateway /
|
||||
# CLI callers can still warn the user even though compression
|
||||
# succeeded. Silent recovery would hide the broken config.
|
||||
@@ -1687,6 +1695,7 @@ This compaction should PRIORITISE preserving all information related to the focu
|
||||
self._summary_model_fallen_back = False
|
||||
self._last_summary_error = None
|
||||
self._last_summary_auth_failure = False
|
||||
self._last_summary_network_failure = False
|
||||
return self._with_summary_prefix(summary)
|
||||
except Exception as e:
|
||||
# ``call_llm`` raises ``RuntimeError`` for two very different cases:
|
||||
@@ -1819,6 +1828,15 @@ This compaction should PRIORITISE preserving all information related to the focu
|
||||
if len(err_text) > 220:
|
||||
err_text = err_text[:217].rstrip() + "..."
|
||||
self._last_summary_error = err_text
|
||||
# A terminal connection/network failure (we reach this branch only
|
||||
# after any main-model fallback has already been tried or is
|
||||
# unavailable). Flag it so compress() ABORTS and preserves the
|
||||
# session unchanged instead of destroying the middle window for a
|
||||
# placeholder marker — retrying once the network recovers is
|
||||
# strictly better than dropping context (#29559, #25585). Mirrors
|
||||
# the auth-failure carve-out; independent of abort_on_summary_failure.
|
||||
if _is_streaming_closed:
|
||||
self._last_summary_network_failure = True
|
||||
logger.warning(
|
||||
"Failed to generate context summary: %s. "
|
||||
"Further summary attempts paused for %d seconds.",
|
||||
@@ -2382,6 +2400,7 @@ This compaction should PRIORITISE preserving all information related to the focu
|
||||
self._last_aux_model_failure_model = None
|
||||
self._last_compress_aborted = False
|
||||
self._last_summary_auth_failure = False
|
||||
self._last_summary_network_failure = False
|
||||
|
||||
# Manual /compress (force=True) bypasses the failure cooldown so the
|
||||
# user can retry immediately after an auto-compress abort. Without
|
||||
@@ -2498,15 +2517,21 @@ This compaction should PRIORITISE preserving all information related to the focu
|
||||
# surface a warning.
|
||||
# Default is False (historical behavior).
|
||||
#
|
||||
# EXCEPTION — auth failures always abort. A 401/403 from the summary
|
||||
# call means the credential or endpoint is broken (invalid/blocked
|
||||
# key, or a token pointed at the wrong inference host). Rotating into
|
||||
# EXCEPTION — auth AND transient network failures always abort. A
|
||||
# 401/403 from the summary call means the credential or endpoint is
|
||||
# broken (invalid/blocked key, or a token pointed at the wrong
|
||||
# inference host). A connection/stream-close error means the network
|
||||
# blipped at the compaction moment (#29559). In BOTH cases rotating into
|
||||
# a child session with a placeholder summary on a broken credential
|
||||
# strands the user on a degraded session for zero benefit — every
|
||||
# subsequent call fails the same way. So when the failure was an auth
|
||||
# error we abort regardless of abort_on_summary_failure, preserving
|
||||
# the conversation unchanged until the credential is fixed.
|
||||
if not summary and (self.abort_on_summary_failure or self._last_summary_auth_failure):
|
||||
if not summary and (
|
||||
self.abort_on_summary_failure
|
||||
or self._last_summary_auth_failure
|
||||
or self._last_summary_network_failure
|
||||
):
|
||||
n_skipped = compress_end - compress_start
|
||||
self._last_summary_dropped_count = 0 # nothing actually dropped
|
||||
self._last_summary_fallback_used = False
|
||||
@@ -2521,6 +2546,15 @@ This compaction should PRIORITISE preserving all information related to the focu
|
||||
"with /compress or start fresh with /new.",
|
||||
n_skipped,
|
||||
)
|
||||
elif self._last_summary_network_failure:
|
||||
logger.warning(
|
||||
"Summary generation failed with a network/connection "
|
||||
"error — aborting compression. %d message(s) preserved "
|
||||
"unchanged; the session was NOT rotated. This is "
|
||||
"transient: retry with /compress once connectivity "
|
||||
"recovers, or continue the conversation as-is.",
|
||||
n_skipped,
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
"Summary generation failed — aborting compression "
|
||||
|
||||
@@ -90,6 +90,7 @@ def check_compression_model_feasibility(agent: Any) -> None:
|
||||
try:
|
||||
from agent.auxiliary_client import (
|
||||
_resolve_task_provider_model,
|
||||
_try_configured_fallback_for_unavailable_client,
|
||||
get_text_auxiliary_client,
|
||||
)
|
||||
from agent.model_metadata import (
|
||||
@@ -97,10 +98,6 @@ def check_compression_model_feasibility(agent: Any) -> None:
|
||||
get_model_context_length,
|
||||
)
|
||||
|
||||
client, aux_model = get_text_auxiliary_client(
|
||||
"compression",
|
||||
main_runtime=agent._current_main_runtime(),
|
||||
)
|
||||
# Best-effort aux provider label for the warning message. The
|
||||
# configured provider may be "auto", in which case we fall back
|
||||
# to the client's base_url hostname so the user can still tell
|
||||
@@ -109,6 +106,19 @@ def check_compression_model_feasibility(agent: Any) -> None:
|
||||
_aux_cfg_provider, _, _, _, _ = _resolve_task_provider_model("compression")
|
||||
except Exception:
|
||||
_aux_cfg_provider = ""
|
||||
client, aux_model = get_text_auxiliary_client(
|
||||
"compression",
|
||||
main_runtime=agent._current_main_runtime(),
|
||||
)
|
||||
if client is None or not aux_model:
|
||||
fb_client, fb_model, fb_label = _try_configured_fallback_for_unavailable_client(
|
||||
"compression",
|
||||
_aux_cfg_provider,
|
||||
)
|
||||
if fb_client is not None and fb_model:
|
||||
client, aux_model = fb_client, fb_model
|
||||
if "(" in fb_label and fb_label.endswith(")"):
|
||||
_aux_cfg_provider = fb_label.rsplit("(", 1)[1][:-1]
|
||||
if client is None or not aux_model:
|
||||
if _aux_cfg_provider and _aux_cfg_provider != "auto":
|
||||
msg = (
|
||||
@@ -805,10 +815,11 @@ def try_shrink_image_parts_in_messages(
|
||||
Pillow couldn't help (caller should surface the original error).
|
||||
|
||||
Strategy: look for ``image_url`` / ``input_image`` parts carrying a
|
||||
``data:image/...;base64,...`` payload. For each one whose encoded
|
||||
size exceeds 4 MB (a safe target that slides under Anthropic's 5 MB
|
||||
ceiling with header overhead) or whose longest side exceeds
|
||||
``max_dimension``, write the base64 to a tempfile, call
|
||||
``data:image/...;base64,...`` payload, plus Anthropic-native
|
||||
``{"type": "image", "source": {"type": "base64", ...}}`` blocks.
|
||||
For each one whose encoded size exceeds 4 MB (a safe target that slides
|
||||
under Anthropic's 5 MB ceiling with header overhead) or whose longest side
|
||||
exceeds ``max_dimension``, write the base64 to a tempfile, call
|
||||
``vision_tools._resize_image_for_vision`` to produce a smaller data
|
||||
URL, and substitute it in place.
|
||||
|
||||
@@ -964,6 +975,28 @@ def try_shrink_image_parts_in_messages(
|
||||
logger.warning("image-shrink recovery: re-encode failed — %s", exc)
|
||||
return None, triggered_by is not None
|
||||
|
||||
def _source_to_data_url(source: Any) -> Optional[str]:
|
||||
if not isinstance(source, dict) or source.get("type") != "base64":
|
||||
return None
|
||||
data = source.get("data")
|
||||
if not isinstance(data, str) or not data:
|
||||
return None
|
||||
media_type = str(source.get("media_type") or "image/jpeg").strip()
|
||||
if not media_type.startswith("image/"):
|
||||
media_type = "image/jpeg"
|
||||
return f"data:{media_type};base64,{data}"
|
||||
|
||||
def _write_data_url_to_source(source: dict, data_url: str) -> None:
|
||||
header, _, data = data_url.partition(",")
|
||||
media_type = "image/jpeg"
|
||||
if header.startswith("data:"):
|
||||
candidate = header[len("data:"):].split(";", 1)[0].strip()
|
||||
if candidate.startswith("image/"):
|
||||
media_type = candidate
|
||||
source["type"] = "base64"
|
||||
source["media_type"] = media_type
|
||||
source["data"] = data
|
||||
|
||||
for msg in api_messages:
|
||||
if not isinstance(msg, dict):
|
||||
continue
|
||||
@@ -974,6 +1007,16 @@ def try_shrink_image_parts_in_messages(
|
||||
if not isinstance(part, dict):
|
||||
continue
|
||||
ptype = part.get("type")
|
||||
if ptype == "image":
|
||||
source = part.get("source")
|
||||
url = _source_to_data_url(source)
|
||||
resized, unshrinkable = _shrink_data_url(url or "")
|
||||
if resized and isinstance(source, dict):
|
||||
_write_data_url_to_source(source, resized)
|
||||
changed_count += 1
|
||||
elif unshrinkable:
|
||||
unshrinkable_oversized += 1
|
||||
continue
|
||||
if ptype not in {"image_url", "input_image"}:
|
||||
continue
|
||||
image_value = part.get("image_url")
|
||||
|
||||
@@ -35,6 +35,7 @@ from agent.turn_context import build_turn_context
|
||||
from agent.turn_retry_state import TurnRetryState
|
||||
from agent.memory_manager import build_memory_context_block
|
||||
from agent.message_sanitization import (
|
||||
close_interrupted_tool_sequence,
|
||||
_repair_tool_call_arguments,
|
||||
_sanitize_messages_non_ascii,
|
||||
_sanitize_messages_surrogates,
|
||||
@@ -55,7 +56,7 @@ from agent.model_metadata import (
|
||||
)
|
||||
from agent.process_bootstrap import _install_safe_stdio
|
||||
from agent.prompt_caching import apply_anthropic_cache_control
|
||||
from agent.retry_utils import jittered_backoff
|
||||
from agent.retry_utils import adaptive_rate_limit_backoff, jittered_backoff
|
||||
from agent.trajectory import has_incomplete_scratchpad
|
||||
from agent.usage_pricing import estimate_usage_cost, normalize_usage
|
||||
from hermes_constants import PARTIAL_STREAM_STUB_ID
|
||||
@@ -501,6 +502,7 @@ def run_conversation(
|
||||
stream_callback: Optional[callable] = None,
|
||||
persist_user_message: Optional[str] = None,
|
||||
persist_user_timestamp: Optional[float] = None,
|
||||
moa_config: Optional[dict[str, Any]] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Run a complete conversation with tool calling until completion.
|
||||
@@ -523,6 +525,19 @@ def run_conversation(
|
||||
Returns:
|
||||
Dict: Complete conversation result with final response and message history
|
||||
"""
|
||||
if moa_config is None:
|
||||
try:
|
||||
from hermes_cli.moa_config import decode_moa_turn
|
||||
|
||||
_decoded_message, _decoded_moa_config = decode_moa_turn(user_message)
|
||||
if _decoded_moa_config is not None:
|
||||
user_message = _decoded_message
|
||||
moa_config = _decoded_moa_config
|
||||
if persist_user_message is None:
|
||||
persist_user_message = _decoded_message
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# ── Per-turn setup (the prologue) ──
|
||||
# All once-per-turn setup — stdio guarding, retry-counter resets, user
|
||||
# message sanitization, todo/nudge hydration, system-prompt restore-or-
|
||||
@@ -801,6 +816,29 @@ def run_conversation(
|
||||
if effective_system:
|
||||
api_messages = [{"role": "system", "content": effective_system}] + api_messages
|
||||
|
||||
if moa_config:
|
||||
try:
|
||||
from agent.moa_loop import aggregate_moa_context
|
||||
|
||||
_moa_context = aggregate_moa_context(
|
||||
user_prompt=original_user_message if isinstance(original_user_message, str) else str(original_user_message),
|
||||
api_messages=api_messages,
|
||||
reference_models=moa_config.get("reference_models") or [],
|
||||
aggregator=moa_config.get("aggregator") or {},
|
||||
temperature=float(moa_config.get("reference_temperature", 0.6) or 0.6),
|
||||
aggregator_temperature=float(moa_config.get("aggregator_temperature", 0.4) or 0.4),
|
||||
max_tokens=int(moa_config.get("max_tokens", 4096) or 4096),
|
||||
)
|
||||
if _moa_context:
|
||||
for _msg in reversed(api_messages):
|
||||
if _msg.get("role") == "user":
|
||||
_base = _msg.get("content", "")
|
||||
if isinstance(_base, str):
|
||||
_msg["content"] = _base + "\n\n" + _moa_context
|
||||
break
|
||||
except Exception as _moa_exc:
|
||||
logger.warning("MoA context aggregation failed: %s", _moa_exc)
|
||||
|
||||
# Inject ephemeral prefill messages right after the system prompt
|
||||
# but before conversation history. Same API-call-time-only pattern.
|
||||
if agent.prefill_messages:
|
||||
@@ -1122,7 +1160,7 @@ def run_conversation(
|
||||
# stream. Mirror the ACP exclusion used for Responses
|
||||
# API upgrade (lines ~1083-1085).
|
||||
elif (
|
||||
agent.provider == "copilot-acp"
|
||||
agent.provider in {"copilot-acp", "moa"}
|
||||
or str(agent.base_url or "").lower().startswith("acp://copilot")
|
||||
or str(agent.base_url or "").lower().startswith("acp+tcp://")
|
||||
):
|
||||
@@ -1396,10 +1434,12 @@ def run_conversation(
|
||||
while time.time() < sleep_end:
|
||||
if agent._interrupt_requested:
|
||||
agent._vprint(f"{agent.log_prefix}⚡ Interrupt detected during retry wait, aborting.", force=True)
|
||||
_interrupt_text = f"Operation interrupted during retry ({_failure_hint}, attempt {retry_count}/{max_retries})."
|
||||
close_interrupted_tool_sequence(messages, _interrupt_text)
|
||||
agent._persist_session(messages, conversation_history)
|
||||
agent.clear_interrupt()
|
||||
return {
|
||||
"final_response": f"Operation interrupted during retry ({_failure_hint}, attempt {retry_count}/{max_retries}).",
|
||||
"final_response": _interrupt_text,
|
||||
"messages": messages,
|
||||
"api_calls": api_call_count,
|
||||
"completed": False,
|
||||
@@ -2663,10 +2703,12 @@ def run_conversation(
|
||||
# Check for interrupt before deciding to retry
|
||||
if agent._interrupt_requested:
|
||||
agent._vprint(f"{agent.log_prefix}⚡ Interrupt detected during error handling, aborting retries.", force=True)
|
||||
_interrupt_text = f"Operation interrupted: handling API error ({error_type}: {agent._clean_error_message(str(api_error))})."
|
||||
close_interrupted_tool_sequence(messages, _interrupt_text)
|
||||
agent._persist_session(messages, conversation_history)
|
||||
agent.clear_interrupt()
|
||||
return {
|
||||
"final_response": f"Operation interrupted: handling API error ({error_type}: {agent._clean_error_message(str(api_error))}).",
|
||||
"final_response": _interrupt_text,
|
||||
"messages": messages,
|
||||
"api_calls": api_call_count,
|
||||
"completed": False,
|
||||
@@ -3537,16 +3579,38 @@ def run_conversation(
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
wait_time = _retry_after if _retry_after else jittered_backoff(retry_count, base_delay=2.0, max_delay=60.0)
|
||||
_backoff_policy = None
|
||||
if is_rate_limited and not _retry_after:
|
||||
wait_time, _backoff_policy = adaptive_rate_limit_backoff(
|
||||
retry_count,
|
||||
base_url=str(_base),
|
||||
model=_model,
|
||||
error=api_error,
|
||||
default_wait=wait_time,
|
||||
)
|
||||
if is_rate_limited:
|
||||
agent._buffer_status(f"⏱️ Rate limited. Waiting {wait_time:.1f}s (attempt {retry_count + 1}/{max_retries})...")
|
||||
_policy_note = ""
|
||||
if _backoff_policy == "zai_coding_overload_long":
|
||||
_policy_note = " (Z.AI Coding overload adaptive long backoff)"
|
||||
elif _backoff_policy == "zai_coding_overload_short":
|
||||
_policy_note = " (Z.AI Coding overload short retry)"
|
||||
_rate_limit_status = f"⏱️ Rate limited. Waiting {wait_time:.1f}s (attempt {retry_count + 1}/{max_retries}){_policy_note}..."
|
||||
# Normal retries are buffered to avoid noisy transient chatter. Long
|
||||
# Z.AI Coding waits are different: they can last minutes, so surface
|
||||
# progress immediately instead of making the TUI look frozen.
|
||||
if _backoff_policy == "zai_coding_overload_long":
|
||||
agent._emit_status(_rate_limit_status)
|
||||
else:
|
||||
agent._buffer_status(_rate_limit_status)
|
||||
else:
|
||||
agent._buffer_status(f"⏳ Retrying in {wait_time:.1f}s (attempt {retry_count}/{max_retries})...")
|
||||
logger.warning(
|
||||
"Retrying API call in %ss (attempt %s/%s) %s error=%s",
|
||||
"Retrying API call in %ss (attempt %s/%s) %s policy=%s error=%s",
|
||||
wait_time,
|
||||
retry_count,
|
||||
max_retries,
|
||||
agent._client_log_context(),
|
||||
_backoff_policy or "default",
|
||||
api_error,
|
||||
)
|
||||
# Sleep in small increments so we can respond to interrupts quickly
|
||||
@@ -3556,10 +3620,12 @@ def run_conversation(
|
||||
while time.time() < sleep_end:
|
||||
if agent._interrupt_requested:
|
||||
agent._vprint(f"{agent.log_prefix}⚡ Interrupt detected during retry wait, aborting.", force=True)
|
||||
_interrupt_text = f"Operation interrupted: retrying API call after error (retry {retry_count}/{max_retries})."
|
||||
close_interrupted_tool_sequence(messages, _interrupt_text)
|
||||
agent._persist_session(messages, conversation_history)
|
||||
agent.clear_interrupt()
|
||||
return {
|
||||
"final_response": f"Operation interrupted: retrying API call after error (retry {retry_count}/{max_retries}).",
|
||||
"final_response": _interrupt_text,
|
||||
"messages": messages,
|
||||
"api_calls": api_call_count,
|
||||
"completed": False,
|
||||
@@ -4050,6 +4116,19 @@ def run_conversation(
|
||||
|
||||
messages.append(assistant_msg)
|
||||
agent._emit_interim_assistant_message(assistant_msg)
|
||||
try:
|
||||
# Persist the assistant tool-call turn before any tool
|
||||
# side effects run. If a destructive tool restarts or
|
||||
# terminates Hermes mid-turn, resume logic still sees the
|
||||
# exact tool-call block that already executed.
|
||||
agent._flush_messages_to_session_db(messages, conversation_history)
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"Incremental tool-call persistence failed before execution "
|
||||
"(session=%s): %s",
|
||||
agent.session_id or "none",
|
||||
exc,
|
||||
)
|
||||
|
||||
# Close any open streaming display (response box, reasoning
|
||||
# box) before tool execution begins. Intermediate turns may
|
||||
@@ -4479,9 +4558,10 @@ def run_conversation(
|
||||
final_msg = agent._build_assistant_message(assistant_message, finish_reason)
|
||||
|
||||
# Pop thinking-only prefill and empty-response retry
|
||||
# scaffolding before appending the final response. These
|
||||
# internal turns are only for the next API retry and should
|
||||
# not become durable transcript context.
|
||||
# scaffolding before appending either a final response or a
|
||||
# verification-stop follow-up. These internal turns are only
|
||||
# for the next API retry and should not become durable
|
||||
# transcript context.
|
||||
while (
|
||||
messages
|
||||
and isinstance(messages[-1], dict)
|
||||
@@ -4493,6 +4573,44 @@ def run_conversation(
|
||||
):
|
||||
messages.pop()
|
||||
|
||||
try:
|
||||
from agent.verification_stop import (
|
||||
build_verify_on_stop_nudge,
|
||||
verify_on_stop_enabled,
|
||||
)
|
||||
|
||||
if verify_on_stop_enabled():
|
||||
_verify_nudge = build_verify_on_stop_nudge(
|
||||
session_id=getattr(agent, "session_id", None),
|
||||
changed_paths=getattr(agent, "_turn_file_mutation_paths", set()),
|
||||
attempts=getattr(agent, "_verification_stop_nudges", 0),
|
||||
)
|
||||
else:
|
||||
_verify_nudge = None
|
||||
except Exception:
|
||||
logger.debug("verification stop-loop check failed", exc_info=True)
|
||||
_verify_nudge = None
|
||||
|
||||
if _verify_nudge:
|
||||
agent._verification_stop_nudges = (
|
||||
getattr(agent, "_verification_stop_nudges", 0) + 1
|
||||
)
|
||||
final_msg["finish_reason"] = "verification_required"
|
||||
messages.append(final_msg)
|
||||
# Keep the attempted final answer in model history so the
|
||||
# synthetic user nudge preserves role alternation, but do
|
||||
# not surface it to the user as an interim answer. The
|
||||
# whole point of this guard is to prevent premature
|
||||
# "done" claims before checks run.
|
||||
messages.append({
|
||||
"role": "user",
|
||||
"content": _verify_nudge,
|
||||
"_verification_stop_synthetic": True,
|
||||
})
|
||||
agent._session_messages = messages
|
||||
agent._emit_status("↻ Verification required before finishing")
|
||||
continue
|
||||
|
||||
messages.append(final_msg)
|
||||
|
||||
_turn_exit_reason = f"text_response(finish_reason={finish_reason})"
|
||||
|
||||
187
agent/display.py
187
agent/display.py
@@ -6,6 +6,7 @@ Used by AIAgent._execute_tool_calls for CLI feedback.
|
||||
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
@@ -177,6 +178,167 @@ def _truncate_preview(text: str, max_len: int | None) -> str:
|
||||
return text
|
||||
|
||||
|
||||
_SHELL_SILENT_HEADS = {"cd", "pushd", "popd", "export", "set", "unset", "source", ".", "true", "false", ":"}
|
||||
_SHELL_PIPE_TAIL_HEADS = {"head", "tail", "wc", "sort", "uniq"}
|
||||
|
||||
|
||||
def _shell_basename(head: str) -> str:
|
||||
return head.rsplit("/", 1)[-1] if head else ""
|
||||
|
||||
|
||||
def _split_shell_words(segment: str) -> list[str]:
|
||||
words: list[str] = []
|
||||
buf: list[str] = []
|
||||
quote: str | None = None
|
||||
|
||||
for i, ch in enumerate(segment):
|
||||
if quote:
|
||||
buf.append(ch)
|
||||
if ch == quote and (i == 0 or segment[i - 1] != "\\"):
|
||||
quote = None
|
||||
continue
|
||||
|
||||
if ch in {"'", '"'}:
|
||||
quote = ch
|
||||
buf.append(ch)
|
||||
continue
|
||||
|
||||
if ch.isspace():
|
||||
if buf:
|
||||
words.append("".join(buf))
|
||||
buf = []
|
||||
continue
|
||||
|
||||
buf.append(ch)
|
||||
|
||||
if buf:
|
||||
words.append("".join(buf))
|
||||
|
||||
return words
|
||||
|
||||
|
||||
def _strip_shell_pipe_tail(segment: str) -> str:
|
||||
words = _split_shell_words(segment)
|
||||
out: list[str] = []
|
||||
|
||||
for i, word in enumerate(words):
|
||||
if word == "|" and _shell_basename(words[i + 1] if i + 1 < len(words) else "") in _SHELL_PIPE_TAIL_HEADS:
|
||||
break
|
||||
out.append(word)
|
||||
|
||||
return " ".join(out).strip()
|
||||
|
||||
|
||||
def _split_shell_compound(command: str) -> list[str]:
|
||||
segments: list[str] = []
|
||||
buf: list[str] = []
|
||||
quote: str | None = None
|
||||
i = 0
|
||||
|
||||
while i < len(command):
|
||||
ch = command[i]
|
||||
|
||||
if quote:
|
||||
buf.append(ch)
|
||||
if ch == quote and (i == 0 or command[i - 1] != "\\"):
|
||||
quote = None
|
||||
i += 1
|
||||
continue
|
||||
|
||||
if ch in {"'", '"'}:
|
||||
quote = ch
|
||||
buf.append(ch)
|
||||
i += 1
|
||||
continue
|
||||
|
||||
op_len = 2 if command.startswith("&&", i) or command.startswith("||", i) else 1 if ch in {";", "\n"} else 0
|
||||
if op_len:
|
||||
segment = _strip_shell_pipe_tail("".join(buf).strip())
|
||||
if segment:
|
||||
segments.append(segment)
|
||||
buf = []
|
||||
i += op_len
|
||||
continue
|
||||
|
||||
buf.append(ch)
|
||||
i += 1
|
||||
|
||||
segment = _strip_shell_pipe_tail("".join(buf).strip())
|
||||
if segment:
|
||||
segments.append(segment)
|
||||
|
||||
return segments
|
||||
|
||||
|
||||
def _shell_head_word(segment: str) -> str:
|
||||
words = _split_shell_words(segment)
|
||||
index = 0
|
||||
while index < len(words) and re.match(r"^[A-Za-z_]\w*=", words[index]):
|
||||
index += 1
|
||||
return _shell_basename(words[index] if index < len(words) else "")
|
||||
|
||||
|
||||
def _clean_shell_segment(segment: str) -> str:
|
||||
words = _split_shell_words(segment)
|
||||
out: list[str] = []
|
||||
i = 0
|
||||
while i < len(words):
|
||||
word = words[i]
|
||||
if re.match(r"^\d*(?:>>?|<)$", word):
|
||||
i += 2
|
||||
continue
|
||||
if re.match(r"^\d*(?:>&|<&)\d+$", word) or re.match(r"^\d*>&\d+$", word):
|
||||
i += 1
|
||||
continue
|
||||
out.append(word)
|
||||
i += 1
|
||||
return " ".join(out).strip()
|
||||
|
||||
|
||||
def _is_shell_boundary_echo(segment: str) -> bool:
|
||||
words = _split_shell_words(segment)
|
||||
if _shell_basename(words[0] if words else "") != "echo":
|
||||
return False
|
||||
rest = " ".join(words[1:])
|
||||
return bool(re.search(r"-{2,}|_exit=|(?:^|\s|=)\$[?{]|PIPESTATUS", rest))
|
||||
|
||||
|
||||
def summarize_shell_command(command: str) -> str:
|
||||
"""Compact shell wrapper/plumbing for display while preserving raw command elsewhere."""
|
||||
original = _oneline(command)
|
||||
if not original:
|
||||
return ""
|
||||
|
||||
segments = _split_shell_compound(original)
|
||||
if len(segments) <= 1:
|
||||
return _clean_shell_segment(segments[0] if segments else original) or original
|
||||
|
||||
core: list[str] = []
|
||||
for segment in segments:
|
||||
cleaned = _clean_shell_segment(segment)
|
||||
head = _shell_head_word(cleaned)
|
||||
if cleaned and head not in _SHELL_SILENT_HEADS and not _is_shell_boundary_echo(cleaned):
|
||||
core.append(cleaned)
|
||||
|
||||
if not core:
|
||||
return original
|
||||
if len(core) == 1:
|
||||
return core[0]
|
||||
|
||||
count = len(core) - 1
|
||||
return f"{core[0]} + {count} {'command' if count == 1 else 'commands'}"
|
||||
|
||||
|
||||
def _read_file_line_label(args: dict) -> str:
|
||||
offset = args.get("offset")
|
||||
limit = args.get("limit")
|
||||
if not isinstance(offset, int) or offset <= 0:
|
||||
return ""
|
||||
if not isinstance(limit, int) or limit <= 1:
|
||||
return f"L{offset}"
|
||||
return f"L{offset}-{offset + limit - 1}"
|
||||
|
||||
|
||||
def _delegate_task_goal_parts(tasks: Any, *, per_goal_len: int) -> tuple[int, list[str]]:
|
||||
if not isinstance(tasks, list):
|
||||
return 0, []
|
||||
@@ -206,7 +368,7 @@ def build_tool_preview(tool_name: str, args: dict, max_len: int | None = None) -
|
||||
"search_files": "pattern", "browser_navigate": "url",
|
||||
"browser_click": "ref", "browser_type": "text",
|
||||
"image_generate": "prompt", "text_to_speech": "text",
|
||||
"vision_analyze": "question", "mixture_of_agents": "user_prompt",
|
||||
"vision_analyze": "question",
|
||||
"skill_view": "name", "skills_list": "category",
|
||||
"cronjob": "action",
|
||||
"execute_code": "code", "delegate_task": "goal",
|
||||
@@ -253,6 +415,23 @@ def build_tool_preview(tool_name: str, args: dict, max_len: int | None = None) -
|
||||
else:
|
||||
return f"planning {len(todos_arg)} task(s)"
|
||||
|
||||
if tool_name in {"terminal", "execute_code"}:
|
||||
key = "code" if tool_name == "execute_code" else "command"
|
||||
command = args.get(key)
|
||||
if command is None:
|
||||
return None
|
||||
preview = summarize_shell_command(str(command))
|
||||
return _truncate_preview(preview, max_len) if preview else None
|
||||
|
||||
if tool_name == "read_file":
|
||||
path = args.get("path") or args.get("file") or args.get("filepath")
|
||||
if path is None:
|
||||
return None
|
||||
label = Path(str(path).replace("\\", "/")).name or str(path)
|
||||
line_label = _read_file_line_label(args)
|
||||
preview = f"{label} {line_label}".strip()
|
||||
return _truncate_preview(preview, max_len) if preview else None
|
||||
|
||||
if tool_name == "session_search":
|
||||
query = _oneline(args.get("query", ""))
|
||||
return f"recall: \"{query[:25]}{'...' if len(query) > 25 else ''}\""
|
||||
@@ -943,7 +1122,7 @@ def get_cute_tool_message(
|
||||
return _wrap(f"┊ 📄 fetch {_trunc(domain, 35)}{extra} {dur}")
|
||||
return _wrap(f"┊ 📄 fetch pages {dur}")
|
||||
if tool_name == "terminal":
|
||||
return _wrap(f"┊ 💻 $ {_trunc(args.get('command', ''), 42)} {dur}")
|
||||
return _wrap(f"┊ 💻 $ {_trunc(build_tool_preview(tool_name, args) or args.get('command', ''), 42)} {dur}")
|
||||
if tool_name == "process":
|
||||
action = args.get("action", "?")
|
||||
sid = args.get("session_id", "")[:12]
|
||||
@@ -951,7 +1130,7 @@ def get_cute_tool_message(
|
||||
"wait": f"wait {sid}", "kill": f"kill {sid}", "write": f"write {sid}", "submit": f"submit {sid}"}
|
||||
return _wrap(f"┊ ⚙️ proc {labels.get(action, f'{action} {sid}')} {dur}")
|
||||
if tool_name == "read_file":
|
||||
return _wrap(f"┊ 📖 read {_path(args.get('path', ''))} {dur}")
|
||||
return _wrap(f"┊ 📖 read {_trunc(build_tool_preview(tool_name, args) or args.get('path', ''), 42)} {dur}")
|
||||
if tool_name == "write_file":
|
||||
return _wrap(f"┊ ✍️ write {_path(args.get('path', ''))} {dur}")
|
||||
if tool_name == "patch":
|
||||
@@ -1037,8 +1216,6 @@ def get_cute_tool_message(
|
||||
return _wrap(f"┊ 🔊 speak {_trunc(args.get('text', ''), 30)} {dur}")
|
||||
if tool_name == "vision_analyze":
|
||||
return _wrap(f"┊ 👁️ vision {_trunc(args.get('question', ''), 30)} {dur}")
|
||||
if tool_name == "mixture_of_agents":
|
||||
return _wrap(f"┊ 🧠 reason {_trunc(args.get('user_prompt', ''), 30)} {dur}")
|
||||
if tool_name == "send_message":
|
||||
return _wrap(f"┊ 📨 send {args.get('target', '?')}: \"{_trunc(args.get('message', ''), 25)}\" {dur}")
|
||||
if tool_name == "cronjob":
|
||||
|
||||
@@ -81,6 +81,19 @@ def _bar_chart(values: List[int], max_width: int = 20) -> List[str]:
|
||||
return ["█" * max(1, int(v / peak * max_width)) if v > 0 else "" for v in values]
|
||||
|
||||
|
||||
def _fmt_ms(ms: float) -> str:
|
||||
"""Compact human duration from milliseconds (e.g. 850ms, 2.4s, 1.5m)."""
|
||||
try:
|
||||
ms = float(ms or 0)
|
||||
except (TypeError, ValueError):
|
||||
return "0ms"
|
||||
if ms < 1000:
|
||||
return f"{int(ms)}ms"
|
||||
if ms < 60_000:
|
||||
return f"{ms / 1000:.1f}s"
|
||||
return f"{ms / 60_000:.1f}m"
|
||||
|
||||
|
||||
class InsightsEngine:
|
||||
"""
|
||||
Analyzes session history and produces usage insights.
|
||||
@@ -138,6 +151,7 @@ class InsightsEngine:
|
||||
},
|
||||
"activity": {},
|
||||
"top_sessions": [],
|
||||
"telemetry": {},
|
||||
}
|
||||
|
||||
# Compute insights
|
||||
@@ -148,6 +162,7 @@ class InsightsEngine:
|
||||
skills = self._compute_skill_breakdown(skill_usage)
|
||||
activity = self._compute_activity_patterns(sessions)
|
||||
top_sessions = self._compute_top_sessions(sessions)
|
||||
telemetry = self._compute_telemetry(cutoff)
|
||||
|
||||
return {
|
||||
"days": days,
|
||||
@@ -161,8 +176,37 @@ class InsightsEngine:
|
||||
"skills": skills,
|
||||
"activity": activity,
|
||||
"top_sessions": top_sessions,
|
||||
"telemetry": telemetry,
|
||||
}
|
||||
|
||||
# =========================================================================
|
||||
# Telemetry (observability) — from the tel_* tables (local telemetry)
|
||||
# =========================================================================
|
||||
|
||||
def _compute_telemetry(self, cutoff: float) -> Dict[str, Any]:
|
||||
"""Roll up the local telemetry tables for the same window.
|
||||
|
||||
Reuses the engine's existing connection. Fully fail-soft: if the tel_*
|
||||
tables are empty or absent (telemetry.local disabled, fresh install), this
|
||||
returns an empty dict and the renderer skips the section.
|
||||
"""
|
||||
try:
|
||||
from agent.telemetry import metrics
|
||||
except Exception:
|
||||
return {}
|
||||
try:
|
||||
since_ns = int(cutoff * 1e9)
|
||||
if not metrics.has_data(conn=self._conn):
|
||||
return {}
|
||||
return {
|
||||
"workflows": metrics.workflow_summary(since_ns=since_ns, conn=self._conn),
|
||||
"model_calls": metrics.model_call_summary(since_ns=since_ns, conn=self._conn),
|
||||
"tool_calls": metrics.tool_call_summary(conn=self._conn),
|
||||
"errors": metrics.error_summary(conn=self._conn),
|
||||
}
|
||||
except Exception:
|
||||
return {}
|
||||
|
||||
# =========================================================================
|
||||
# Data gathering (SQL queries)
|
||||
# =========================================================================
|
||||
@@ -852,8 +896,80 @@ class InsightsEngine:
|
||||
lines.append(f" {ts['label']:<20} {ts['value']:<18} ({ts['date']}, {ts['session_id']})")
|
||||
lines.append("")
|
||||
|
||||
# Telemetry / observability (local telemetry) — only when data exists
|
||||
tel = report.get("telemetry") or {}
|
||||
if tel:
|
||||
self._append_telemetry_section(lines, tel)
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
def _append_telemetry_section(self, lines: List[str], tel: Dict[str, Any]) -> None:
|
||||
"""Render the observability rollups (workflows, tools, providers, errors)."""
|
||||
wf = tel.get("workflows", {})
|
||||
mc = tel.get("model_calls", {})
|
||||
tc = tel.get("tool_calls", {})
|
||||
errs = tel.get("errors", {}).get("by_class", {})
|
||||
|
||||
lines.append(" 📡 Observability (local telemetry)")
|
||||
lines.append(" " + "─" * 56)
|
||||
|
||||
total_runs = wf.get("total_runs", 0)
|
||||
if total_runs:
|
||||
sr = wf.get("success_rate", 0.0) * 100
|
||||
p50 = wf.get("duration_ms_p50", 0)
|
||||
p95 = wf.get("duration_ms_p95", 0)
|
||||
lines.append(
|
||||
f" Workflows: {total_runs:,} Success: {sr:.1f}% "
|
||||
f"Duration p50/p95: {_fmt_ms(p50)} / {_fmt_ms(p95)}"
|
||||
)
|
||||
by_entry = wf.get("by_entrypoint", {})
|
||||
if by_entry:
|
||||
entry_str = ", ".join(
|
||||
f"{k}: {v}" for k, v in sorted(by_entry.items(), key=lambda x: -x[1])
|
||||
)
|
||||
lines.append(f" Entrypoints: {entry_str}")
|
||||
|
||||
# Tool reliability
|
||||
if tc.get("total"):
|
||||
fail_pct = tc.get("failure_rate", 0.0) * 100
|
||||
lines.append(
|
||||
f" Tool calls: {tc['total']:,} Failure rate: {fail_pct:.1f}%"
|
||||
)
|
||||
tools = tc.get("by_tool", {})
|
||||
fails = tc.get("failures_by_tool", {})
|
||||
top = sorted(tools.items(), key=lambda x: -x[1])[:6]
|
||||
if top:
|
||||
parts = []
|
||||
for name, n in top:
|
||||
f = fails.get(name, 0)
|
||||
parts.append(f"{name}: {n}" + (f" ({f} failed)" if f else ""))
|
||||
lines.append(" " + " ".join(parts))
|
||||
|
||||
# Provider / model mix + cache (real names)
|
||||
by_provider = mc.get("by_provider", {})
|
||||
if by_provider:
|
||||
prov_str = ", ".join(
|
||||
f"{k}: {v}" for k, v in sorted(by_provider.items(), key=lambda x: -x[1])
|
||||
)
|
||||
lines.append(f" Providers: {prov_str}")
|
||||
by_model = mc.get("by_model", {})
|
||||
if by_model:
|
||||
model_str = ", ".join(
|
||||
f"{k}: {v}" for k, v in sorted(by_model.items(), key=lambda x: -x[1])[:8]
|
||||
)
|
||||
cache = mc.get("cache_hit_rate", 0.0) * 100
|
||||
suffix = f" Cache hit: {cache:.1f}%" if cache else ""
|
||||
lines.append(f" Models: {model_str}{suffix}")
|
||||
|
||||
# Error classes
|
||||
if errs:
|
||||
err_str = ", ".join(
|
||||
f"{k}: {v}" for k, v in sorted(errs.items(), key=lambda x: -x[1])[:6]
|
||||
)
|
||||
lines.append(f" Errors: {err_str}")
|
||||
|
||||
lines.append("")
|
||||
|
||||
def format_gateway(self, report: Dict) -> str:
|
||||
"""Format the insights report for gateway/messaging (shorter)."""
|
||||
if report.get("empty"):
|
||||
|
||||
136
agent/learn_prompt.py
Normal file
136
agent/learn_prompt.py
Normal file
@@ -0,0 +1,136 @@
|
||||
#!/usr/bin/env python3
|
||||
"""``/learn`` — build the standards-guided prompt that turns whatever the user
|
||||
described into a reusable skill.
|
||||
|
||||
``/learn`` is open-ended. The user can point it at anything they can describe:
|
||||
a directory of code, an API doc URL, a workflow they just walked the agent
|
||||
through in this conversation, or pasted notes. This module builds ONE prompt
|
||||
that instructs the live agent to:
|
||||
|
||||
1. Gather the sources the user named, using the tools it already has
|
||||
(``read_file`` / ``search_files`` for dirs, ``web_extract`` for URLs, the
|
||||
current conversation for "what I just did", the user's text for pasted
|
||||
material).
|
||||
2. Author a single ``SKILL.md`` via ``skill_manage`` that follows the Hermes
|
||||
skill-authoring standards (description <=60 chars, the modern section
|
||||
order, Hermes-tool framing, no invented commands).
|
||||
|
||||
There is no separate distillation engine and no model-tool footprint: the
|
||||
agent does the work with its existing toolset, so this works identically on
|
||||
local, Docker, and remote terminal backends. Every surface (CLI ``/learn``,
|
||||
gateway ``/learn``, the dashboard "Learn a skill" panel) calls
|
||||
:func:`build_learn_prompt` and feeds the result to the agent as a normal turn.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
# The house-style rules, distilled from AGENTS.md "Skill authoring standards
|
||||
# (HARDLINE)" and the hermes-agent-dev new-skill salvage reference. Embedded in
|
||||
# the prompt so the agent authors skills the way a maintainer would by hand.
|
||||
_AUTHORING_STANDARDS = """\
|
||||
Follow the Hermes skill-authoring standards exactly. These are the same
|
||||
HARDLINE rules a maintainer enforces in review:
|
||||
|
||||
Frontmatter:
|
||||
- name: lowercase-hyphenated, <=64 chars, no spaces.
|
||||
- description: ONE sentence, **<=60 characters**, ends with a period. State the
|
||||
capability, not the implementation. No marketing words (powerful,
|
||||
comprehensive, seamless, advanced, robust). Do NOT repeat the skill name. If
|
||||
the description contains a colon, wrap the whole value in double quotes.
|
||||
This is the most-violated rule and it is NOT cosmetic: the system-prompt
|
||||
skill index truncates the description to 60 chars and loads it every
|
||||
session, so anything past char 60 is silently cut and never routes. After
|
||||
you write the description, COUNT the characters; if it is over 60, cut it
|
||||
down before saving — do not ship a sentence and hope.
|
||||
Good (<=60): `Search arXiv papers by keyword, author, or ID.`
|
||||
Bad (123): `A comprehensive skill that lets the agent search arXiv for
|
||||
academic papers using keywords, authors, and categories.`
|
||||
- version: 0.1.0
|
||||
- author: always the literal value `Hermes`. NEVER fill it from the host
|
||||
environment — the OS/login username (e.g. the `user=` line in your
|
||||
environment hints), git config, or any identity you can probe must not be
|
||||
written. Skills get shared and published, so an environment-derived name is
|
||||
a privacy leak the user never opted into; the skill names itself as Hermes.
|
||||
- platforms: declare `[macos]`, `[linux]`, and/or `[windows]` IF the skill
|
||||
uses OS-bound primitives (osascript/apt/systemctl => the matching OS; /proc,
|
||||
os.setsid, signal.SIGKILL => linux; fcntl/termios => POSIX). Prefer fixing it
|
||||
cross-platform first (tempfile.gettempdir(), pathlib.Path, psutil); gate only
|
||||
when the dependency is genuinely platform-bound. Omit the field for portable
|
||||
skills.
|
||||
- metadata.hermes.tags: a few Capitalized, Relevant, Tags.
|
||||
|
||||
Body section order (omit a section only if it genuinely has no content):
|
||||
1. "# <Human Title>" then a 2-3 sentence intro: what it does, what it does NOT
|
||||
do, and the key dependency stance (e.g. "stdlib only").
|
||||
2. "## When to Use" — bullet list of concrete trigger phrases.
|
||||
3. "## Prerequisites" — exact env vars, install steps, credentials.
|
||||
4. "## How to Run" — the canonical invocation, framed through Hermes tools.
|
||||
5. "## Quick Reference" — a flat command/endpoint list, no narration.
|
||||
6. "## Procedure" — numbered steps with copy-paste-exact commands.
|
||||
7. "## Pitfalls" — known limits, rate limits, things that look broken but aren't.
|
||||
8. "## Verification" — a single command/check that proves the skill worked.
|
||||
|
||||
Hermes-tool framing (this is what makes it a skill, not shell docs):
|
||||
- Frame running scripts as "invoke through the `terminal` tool".
|
||||
- Reference Hermes tools by name in backticks: `terminal`, `read_file`,
|
||||
`write_file`, `search_files`, `patch`, `web_extract`, `web_search`,
|
||||
`vision_analyze`, `browser_navigate`, `delegate_task`, `image_generate`,
|
||||
`text_to_speech`, `cronjob`, `memory`, `skill_view`, `execute_code`.
|
||||
- Do NOT name shell utilities the agent already has wrapped: say `read_file`
|
||||
not cat/head/tail, `search_files` not grep/rg/find/ls, `patch` not sed/awk,
|
||||
`web_extract` not curl-to-scrape, `write_file` not echo>file or heredocs.
|
||||
- Third-party CLIs (ffmpeg, gh, an SDK) are fine inside a script file, but the
|
||||
prose still frames them as "invoke through the `terminal` tool". If the
|
||||
skill needs an MCP server, name it and document its setup in Prerequisites.
|
||||
|
||||
Quality bar:
|
||||
- Prefer exact commands, endpoint URLs, function signatures, and config keys
|
||||
that appear VERBATIM in the source. NEVER invent flags, paths, or APIs — if
|
||||
you didn't see it in the source, don't write it.
|
||||
- Keep it tight and scannable: ~100 lines for a simple skill, ~200 for a
|
||||
complex one. Don't re-paste the source docs.
|
||||
- Don't write a router/index/hub skill that only points at other skills.
|
||||
- Larger scripts/parsers belong in a `scripts/` file (add via
|
||||
`skill_manage` write_file), referenced from SKILL.md by relative path — not
|
||||
inlined for the agent to re-type every run. References go in `references/`,
|
||||
templates in `templates/`."""
|
||||
|
||||
|
||||
def build_learn_prompt(user_request: str) -> str:
|
||||
"""Build the agent prompt for an open-ended ``/learn`` request.
|
||||
|
||||
Args:
|
||||
user_request: the free-text the user gave after ``/learn`` — a
|
||||
description of the workflow, paths, URLs, or "what I just did".
|
||||
|
||||
Returns:
|
||||
A complete instruction the agent runs as a normal turn. The agent
|
||||
gathers the described sources with its existing tools and authors the
|
||||
skill via ``skill_manage``.
|
||||
"""
|
||||
req = (user_request or "").strip()
|
||||
if not req:
|
||||
req = (
|
||||
"the workflow we just went through in this conversation — review "
|
||||
"the steps taken and distill them into a reusable skill"
|
||||
)
|
||||
|
||||
return (
|
||||
"[/learn] The user wants you to learn a reusable skill from the "
|
||||
"source(s) they described below, and save it.\n\n"
|
||||
f"WHAT TO LEARN FROM:\n{req}\n\n"
|
||||
"Do this:\n"
|
||||
"1. Gather the material. Resolve whatever the user named using the "
|
||||
"tools you already have — `read_file`/`search_files` for local files "
|
||||
"or directories, `web_extract` for URLs, the current conversation "
|
||||
"history if they referred to something you just did, and the text "
|
||||
"they pasted as-is. If the request is ambiguous about scope, make a "
|
||||
"reasonable choice and note it; do not stall.\n"
|
||||
"2. Author ONE SKILL.md and save it with the `skill_manage` tool "
|
||||
"(action=\"create\"). Pick a sensible category. If the procedure needs "
|
||||
"a non-trivial script, add it under the skill's `scripts/` with "
|
||||
"`skill_manage` write_file and reference it by relative path.\n\n"
|
||||
f"{_AUTHORING_STANDARDS}\n\n"
|
||||
"When done, tell the user the skill name, its category, and a "
|
||||
"one-line summary of what it captured."
|
||||
)
|
||||
@@ -46,6 +46,39 @@ logger = logging.getLogger(__name__)
|
||||
_SYNC_DRAIN_TIMEOUT_S = 5.0
|
||||
|
||||
|
||||
def normalize_tool_schema(schema: Any) -> Optional[Dict[str, Any]]:
|
||||
"""Return a function-tool dict with a resolvable top-level ``name``.
|
||||
|
||||
Context engines and memory providers expose tool schemas via
|
||||
``get_tool_schemas()``. The expected shape is a bare function schema
|
||||
(``{"name": ..., "description": ..., "parameters": ...}``) which callers
|
||||
wrap as ``{"type": "function", "function": schema}``.
|
||||
|
||||
Some providers instead return an entry that is *already* in OpenAI tool
|
||||
form (``{"type": "function", "function": {"name": ...}}``). Wrapping that
|
||||
a second time produces ``{"type": "function", "function": {"type":
|
||||
"function", "function": {...}}}`` whose ``function`` has no top-level
|
||||
``name``. Strict providers (e.g. DeepSeek) reject the *entire* request
|
||||
with ``tools[N].function: missing field name`` (HTTP 400), so one bad
|
||||
schema disables the whole toolset and breaks every turn (#47707).
|
||||
|
||||
This helper normalizes both shapes to the bare function schema and
|
||||
returns ``None`` for anything without a resolvable name, so callers can
|
||||
skip-with-warning rather than appending a nameless tool.
|
||||
"""
|
||||
if not isinstance(schema, dict):
|
||||
return None
|
||||
# Unwrap an already-wrapped OpenAI tool entry.
|
||||
if schema.get("type") == "function" and isinstance(schema.get("function"), dict):
|
||||
schema = schema["function"]
|
||||
if not isinstance(schema, dict):
|
||||
return None
|
||||
name = schema.get("name", "")
|
||||
if not name or not isinstance(name, str):
|
||||
return None
|
||||
return schema
|
||||
|
||||
|
||||
def memory_provider_tools_enabled(enabled_toolsets: Optional[List[str]]) -> bool:
|
||||
"""Return whether external memory-provider tools should be exposed."""
|
||||
if enabled_toolsets is None:
|
||||
@@ -92,11 +125,17 @@ def inject_memory_provider_tools(agent: Any) -> int:
|
||||
agent.valid_tool_names = valid_tool_names
|
||||
|
||||
added = 0
|
||||
for schema in get_schemas():
|
||||
if not isinstance(schema, dict):
|
||||
for raw_schema in get_schemas():
|
||||
schema = normalize_tool_schema(raw_schema)
|
||||
if schema is None:
|
||||
logger.warning(
|
||||
"Memory provider returned a tool schema with no resolvable "
|
||||
"name; skipping to avoid poisoning the request (%r)",
|
||||
raw_schema,
|
||||
)
|
||||
continue
|
||||
tool_name = schema.get("name", "")
|
||||
if not tool_name or tool_name in existing_tool_names:
|
||||
tool_name = schema["name"]
|
||||
if tool_name in existing_tool_names:
|
||||
continue
|
||||
tools.append({"type": "function", "function": schema})
|
||||
valid_tool_names.add(tool_name)
|
||||
@@ -370,8 +409,11 @@ class MemoryManager:
|
||||
_core_tool_names = set(_HERMES_CORE_TOOLS)
|
||||
|
||||
# Index tool names → provider for routing
|
||||
for schema in provider.get_tool_schemas():
|
||||
tool_name = schema.get("name", "")
|
||||
for raw_schema in provider.get_tool_schemas():
|
||||
schema = normalize_tool_schema(raw_schema)
|
||||
if schema is None:
|
||||
continue
|
||||
tool_name = schema["name"]
|
||||
if tool_name in _core_tool_names:
|
||||
logger.warning(
|
||||
"Memory provider '%s' tool '%s' shadows a reserved core "
|
||||
@@ -658,11 +700,19 @@ class MemoryManager:
|
||||
seen = set()
|
||||
for provider in self._providers:
|
||||
try:
|
||||
for schema in provider.get_tool_schemas():
|
||||
name = schema.get("name", "")
|
||||
for raw_schema in provider.get_tool_schemas():
|
||||
schema = normalize_tool_schema(raw_schema)
|
||||
if schema is None:
|
||||
logger.warning(
|
||||
"Memory provider '%s' returned a tool schema with "
|
||||
"no resolvable name; skipping (%r)",
|
||||
provider.name, raw_schema,
|
||||
)
|
||||
continue
|
||||
name = schema["name"]
|
||||
if name in _core_tool_names:
|
||||
continue
|
||||
if name and name not in seen:
|
||||
if name not in seen:
|
||||
schemas.append(schema)
|
||||
seen.add(name)
|
||||
except Exception as e:
|
||||
|
||||
@@ -279,6 +279,38 @@ def _repair_tool_call_arguments(raw_args: str, tool_name: str = "?") -> str:
|
||||
return "{}"
|
||||
|
||||
|
||||
def close_interrupted_tool_sequence(messages: list, final_response: Any = None) -> bool:
|
||||
"""Append a synthetic assistant turn when an interrupted tail is a tool result.
|
||||
|
||||
A turn cut short by ``/stop`` can leave the transcript ending on a raw
|
||||
``tool`` message (a tool finished, or its execution was cancelled, but the
|
||||
model never streamed a closing assistant turn). Persisting that tail means
|
||||
the next user message lands as ``… tool → user`` — a role-alternation
|
||||
violation that strict providers (Gemini, Claude) react to by hallucinating
|
||||
a continuation of the user's message and ignoring prior context, which
|
||||
reads to the user as "lost context" (#48879).
|
||||
|
||||
``finalize_turn`` closes this on the happy interrupt path, but the
|
||||
retry/backoff/error interrupt aborts in ``conversation_loop`` ``return``
|
||||
early and never reach it — this shared helper closes the sequence on all of
|
||||
them. ``final_response`` is usually empty on an interrupt, so an explicit
|
||||
placeholder is used rather than an empty-content assistant turn.
|
||||
|
||||
Mutates ``messages`` in place. Returns True if a closing turn was appended.
|
||||
"""
|
||||
if not messages:
|
||||
return False
|
||||
last = messages[-1]
|
||||
if not isinstance(last, dict) or last.get("role") != "tool":
|
||||
return False
|
||||
text = final_response if isinstance(final_response, str) else ""
|
||||
messages.append({
|
||||
"role": "assistant",
|
||||
"content": text.strip() or "Operation interrupted.",
|
||||
})
|
||||
return True
|
||||
|
||||
|
||||
def _strip_non_ascii(text: str) -> str:
|
||||
"""Remove non-ASCII characters, replacing with closest ASCII equivalent or removing.
|
||||
|
||||
@@ -431,6 +463,7 @@ def _sanitize_structure_non_ascii(payload: Any) -> bool:
|
||||
|
||||
__all__ = [
|
||||
"_SURROGATE_RE",
|
||||
"close_interrupted_tool_sequence",
|
||||
"_sanitize_surrogates",
|
||||
"_sanitize_structure_surrogates",
|
||||
"_sanitize_messages_surrogates",
|
||||
|
||||
306
agent/moa_loop.py
Normal file
306
agent/moa_loop.py
Normal file
@@ -0,0 +1,306 @@
|
||||
"""Mixture-of-Agents runtime helpers for /moa turns.
|
||||
|
||||
The slash command is deliberately not a model tool. It marks one user turn as
|
||||
MoA-enabled; the normal Hermes agent loop still owns tool calling and turn
|
||||
termination, while this module gathers reference-model context before each model
|
||||
iteration.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from typing import Any
|
||||
|
||||
from agent.auxiliary_client import call_llm
|
||||
from agent.transports import get_transport
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Upper bound on concurrent reference-model calls. References are independent
|
||||
# advisory calls (no tools, no inter-dependence), so we fan them out the same
|
||||
# way delegate_task runs a batch: all in flight at once, results collected when
|
||||
# every reference finishes. Presets rarely list more than a handful of
|
||||
# references; this cap just protects against a pathologically large preset
|
||||
# opening dozens of sockets at once.
|
||||
_MAX_REFERENCE_WORKERS = 8
|
||||
|
||||
|
||||
def _slot_label(slot: dict[str, str]) -> str:
|
||||
return f"{slot.get('provider', '').strip()}:{slot.get('model', '').strip()}"
|
||||
|
||||
|
||||
def _run_reference(
|
||||
slot: dict[str, str],
|
||||
ref_messages: list[dict[str, Any]],
|
||||
*,
|
||||
temperature: float,
|
||||
max_tokens: int,
|
||||
) -> tuple[str, str]:
|
||||
"""Call one reference model and return ``(label, text)``.
|
||||
|
||||
Never raises: a failed reference becomes a labelled note so the aggregator
|
||||
can still act with partial context. Designed to run inside a thread pool —
|
||||
``call_llm`` is synchronous/blocking, so threads (not asyncio) are the right
|
||||
concurrency primitive, mirroring ``delegate_task``'s batch fan-out.
|
||||
"""
|
||||
label = _slot_label(slot)
|
||||
try:
|
||||
response = call_llm(
|
||||
task="moa_reference",
|
||||
provider=slot["provider"],
|
||||
model=slot["model"],
|
||||
messages=ref_messages,
|
||||
temperature=temperature,
|
||||
max_tokens=max_tokens,
|
||||
)
|
||||
return label, _extract_text(response) or "(empty response)"
|
||||
except Exception as exc:
|
||||
logger.warning("MoA reference model %s failed: %s", label, exc)
|
||||
return label, f"[failed: {exc}]"
|
||||
|
||||
|
||||
def _run_references_parallel(
|
||||
reference_models: list[dict[str, str]],
|
||||
ref_messages: list[dict[str, Any]],
|
||||
*,
|
||||
temperature: float,
|
||||
max_tokens: int,
|
||||
) -> list[tuple[str, str]]:
|
||||
"""Fan out all reference models in parallel, returning outputs in order.
|
||||
|
||||
Like ``delegate_task``'s batch mode, every reference is dispatched at once
|
||||
and we block until all of them finish before handing the joined results to
|
||||
the aggregator. Output order matches ``reference_models`` so the
|
||||
``Reference {idx}`` labelling stays stable. MoA presets that reference
|
||||
another MoA preset are skipped here (recursion guard) with a labelled note.
|
||||
"""
|
||||
if not reference_models:
|
||||
return []
|
||||
|
||||
results: list[tuple[str, str] | None] = [None] * len(reference_models)
|
||||
futures = {}
|
||||
workers = min(_MAX_REFERENCE_WORKERS, len(reference_models))
|
||||
with ThreadPoolExecutor(max_workers=workers) as executor:
|
||||
for idx, slot in enumerate(reference_models):
|
||||
if slot.get("provider") == "moa":
|
||||
results[idx] = (
|
||||
_slot_label(slot),
|
||||
"[skipped: MoA presets cannot recursively reference MoA]",
|
||||
)
|
||||
continue
|
||||
futures[
|
||||
executor.submit(
|
||||
_run_reference,
|
||||
slot,
|
||||
ref_messages,
|
||||
temperature=temperature,
|
||||
max_tokens=max_tokens,
|
||||
)
|
||||
] = idx
|
||||
# Collect every reference before returning — the aggregator needs the
|
||||
# complete set, so there is no early-exit / first-completed path here.
|
||||
for future, idx in futures.items():
|
||||
results[idx] = future.result()
|
||||
|
||||
return [r for r in results if r is not None]
|
||||
|
||||
|
||||
def _reference_messages(messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
"""Build an advisory-safe view of the conversation for reference models.
|
||||
|
||||
Reference calls are advisory: they never call tools and never emit the
|
||||
``tool_calls`` the main model did. Replaying the full transcript verbatim
|
||||
(a) re-bills the ~8K-token Hermes system prompt per reference per
|
||||
iteration and (b) risks 400s from strict providers (Mistral, Fireworks)
|
||||
that reject orphan ``tool`` messages or ``tool_calls`` the reference never
|
||||
produced. We keep only the user/assistant *text* turns, dropping the
|
||||
system prompt, any ``tool``-role messages, and any ``tool_calls`` payloads.
|
||||
"""
|
||||
trimmed: list[dict[str, Any]] = []
|
||||
for msg in messages:
|
||||
role = msg.get("role")
|
||||
if role not in ("user", "assistant"):
|
||||
# Drop system prompt and tool-result messages.
|
||||
continue
|
||||
content = msg.get("content")
|
||||
if not isinstance(content, str):
|
||||
# Skip non-text (multimodal/tool-call-only) assistant turns.
|
||||
if not content:
|
||||
continue
|
||||
text = content if isinstance(content, str) else ""
|
||||
if role == "assistant" and not text.strip():
|
||||
# Assistant turn that was purely tool calls — nothing advisory.
|
||||
continue
|
||||
trimmed.append({"role": role, "content": text})
|
||||
if not trimmed:
|
||||
# Degenerate case (e.g. first turn was stripped): fall back to a
|
||||
# minimal user turn so the reference still has something to answer.
|
||||
for msg in reversed(messages):
|
||||
if msg.get("role") == "user" and isinstance(msg.get("content"), str):
|
||||
return [{"role": "user", "content": msg["content"]}]
|
||||
return trimmed
|
||||
|
||||
|
||||
|
||||
def _extract_text(response: Any) -> str:
|
||||
try:
|
||||
transport = get_transport("chat_completions")
|
||||
if transport is None:
|
||||
raise RuntimeError("chat_completions transport unavailable")
|
||||
normalized = transport.normalize_response(response)
|
||||
text = (normalized.content or "").strip()
|
||||
if text:
|
||||
return text
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
content = response.choices[0].message.content
|
||||
return (content or "").strip()
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
|
||||
def aggregate_moa_context(
|
||||
*,
|
||||
user_prompt: str,
|
||||
api_messages: list[dict[str, Any]],
|
||||
reference_models: list[dict[str, str]],
|
||||
aggregator: dict[str, str],
|
||||
temperature: float = 0.6,
|
||||
aggregator_temperature: float = 0.4,
|
||||
max_tokens: int = 4096,
|
||||
) -> str:
|
||||
"""Run configured reference models and synthesize their advice.
|
||||
|
||||
Failures are returned as model-specific notes instead of aborting the normal
|
||||
agent loop; the main model can still act with partial context.
|
||||
"""
|
||||
reference_outputs: list[tuple[str, str]] = []
|
||||
ref_messages = _reference_messages(api_messages)
|
||||
reference_outputs = _run_references_parallel(
|
||||
reference_models,
|
||||
ref_messages,
|
||||
temperature=temperature,
|
||||
max_tokens=max_tokens,
|
||||
)
|
||||
|
||||
joined = "\n\n".join(
|
||||
f"Reference {idx} — {label}:\n{text}"
|
||||
for idx, (label, text) in enumerate(reference_outputs, start=1)
|
||||
)
|
||||
synth_prompt = (
|
||||
"You are the aggregator in a Mixture of Agents process. Synthesize the "
|
||||
"reference responses into concise, actionable guidance for the main "
|
||||
"Hermes agent. Focus on next steps, tool-use strategy, risks, and any "
|
||||
"disagreements. Do not answer the user directly unless that is all that "
|
||||
"is needed; produce context the main agent should use in its normal loop.\n\n"
|
||||
f"Original user prompt:\n{user_prompt}\n\n"
|
||||
f"Reference responses:\n{joined}"
|
||||
)
|
||||
|
||||
agg_label = _slot_label(aggregator)
|
||||
try:
|
||||
response = call_llm(
|
||||
task="moa_aggregator",
|
||||
provider=aggregator["provider"],
|
||||
model=aggregator["model"],
|
||||
messages=[{"role": "user", "content": synth_prompt}],
|
||||
temperature=aggregator_temperature,
|
||||
max_tokens=max_tokens,
|
||||
)
|
||||
synthesis = _extract_text(response)
|
||||
except Exception as exc:
|
||||
logger.warning("MoA aggregator model %s failed: %s", agg_label, exc)
|
||||
synthesis = ""
|
||||
|
||||
if not synthesis:
|
||||
synthesis = joined
|
||||
|
||||
return (
|
||||
"[Mixture of Agents context — use this as private guidance for the "
|
||||
"normal Hermes agent loop. You may call tools, continue reasoning, or "
|
||||
"finish normally.]\n"
|
||||
f"Aggregator: {agg_label}\n"
|
||||
f"References: {', '.join(_slot_label(slot) for slot in reference_models)}\n\n"
|
||||
f"{synthesis.strip()}"
|
||||
)
|
||||
|
||||
|
||||
class MoAChatCompletions:
|
||||
"""OpenAI-chat-compatible facade where the aggregator is the acting model."""
|
||||
|
||||
def __init__(self, preset_name: str):
|
||||
self.preset_name = preset_name or "default"
|
||||
|
||||
def create(self, **api_kwargs: Any) -> Any:
|
||||
from hermes_cli.config import load_config
|
||||
from hermes_cli.moa_config import resolve_moa_preset
|
||||
|
||||
preset = resolve_moa_preset(load_config().get("moa") or {}, self.preset_name)
|
||||
messages = list(api_kwargs.get("messages") or [])
|
||||
reference_models = preset.get("reference_models") or []
|
||||
aggregator = preset.get("aggregator") or {}
|
||||
max_tokens = int(preset.get("max_tokens", api_kwargs.get("max_tokens") or 4096) or 4096)
|
||||
temperature = float(preset.get("reference_temperature", 0.6) or 0.6)
|
||||
aggregator_temperature = float(preset.get("aggregator_temperature", api_kwargs.get("temperature") or 0.4) or 0.4)
|
||||
|
||||
# When the preset is disabled, skip the reference fan-out and let the
|
||||
# configured aggregator act alone — it is the preset's acting model, so
|
||||
# a disabled MoA preset is simply "use the aggregator directly."
|
||||
if not preset.get("enabled", True):
|
||||
reference_models = []
|
||||
|
||||
reference_outputs: list[tuple[str, str]] = []
|
||||
ref_messages = _reference_messages(messages)
|
||||
reference_outputs = _run_references_parallel(
|
||||
reference_models,
|
||||
ref_messages,
|
||||
temperature=temperature,
|
||||
max_tokens=max_tokens,
|
||||
)
|
||||
|
||||
agg_messages = [dict(m) for m in messages]
|
||||
if reference_outputs:
|
||||
joined = "\n\n".join(
|
||||
f"Reference {idx} — {label}:\n{text}"
|
||||
for idx, (label, text) in enumerate(reference_outputs, start=1)
|
||||
)
|
||||
guidance = (
|
||||
"[Mixture of Agents reference context]\n"
|
||||
f"Preset: {self.preset_name}\n"
|
||||
f"Aggregator/acting model: {_slot_label(aggregator)}\n"
|
||||
f"References: {', '.join(label for label, _ in reference_outputs)}\n\n"
|
||||
"Use the reference responses below as private context. You are the aggregator and acting model: "
|
||||
"answer the user directly or call tools as needed.\n\n"
|
||||
f"{joined}"
|
||||
)
|
||||
for msg in reversed(agg_messages):
|
||||
if msg.get("role") == "user" and isinstance(msg.get("content"), str):
|
||||
msg["content"] = msg["content"] + "\n\n" + guidance
|
||||
break
|
||||
else:
|
||||
agg_messages.append({"role": "user", "content": guidance})
|
||||
|
||||
if aggregator.get("provider") == "moa":
|
||||
raise RuntimeError("MoA aggregator cannot be another MoA preset")
|
||||
agg_kwargs = dict(api_kwargs)
|
||||
agg_kwargs["messages"] = agg_messages
|
||||
agg_kwargs["model"] = aggregator.get("model")
|
||||
agg_kwargs["temperature"] = aggregator_temperature
|
||||
return call_llm(
|
||||
task="moa_aggregator",
|
||||
provider=aggregator.get("provider"),
|
||||
model=aggregator.get("model"),
|
||||
messages=agg_messages,
|
||||
temperature=aggregator_temperature,
|
||||
max_tokens=agg_kwargs.get("max_tokens"),
|
||||
tools=agg_kwargs.get("tools"),
|
||||
extra_body=agg_kwargs.get("extra_body"),
|
||||
)
|
||||
|
||||
|
||||
class MoAClient:
|
||||
def __init__(self, preset_name: str):
|
||||
self.chat = type("_MoAChat", (), {})()
|
||||
self.chat.completions = MoAChatCompletions(preset_name)
|
||||
158
agent/oneshot.py
Normal file
158
agent/oneshot.py
Normal file
@@ -0,0 +1,158 @@
|
||||
"""Shared one-off LLM requests for non-conversational helpers.
|
||||
|
||||
A "one-shot" is a single, stateless model call that runs *outside* any
|
||||
conversation: it never touches a session's history, never breaks prompt
|
||||
caching, and returns plain text. UI surfaces use it for small generative
|
||||
chores — a commit message from a diff, a rename suggestion, a summary —
|
||||
where spinning up an agent turn would be wrong (it would pollute the thread)
|
||||
and hand-rolling an LLM call at every call site would be worse.
|
||||
|
||||
Two ways to call it:
|
||||
|
||||
* ``run_oneshot(instructions=..., user_input=...)`` — caller supplies the
|
||||
full prompt.
|
||||
* ``run_oneshot(template="commit_message", variables={...})`` — caller
|
||||
names a registered template and passes its variables; the template owns
|
||||
the prompt engineering so it stays consistent across CLI/TUI/desktop.
|
||||
|
||||
Model selection rides the same auxiliary plumbing as title generation
|
||||
(:func:`agent.auxiliary_client.call_llm`): pass ``main_runtime`` to inherit
|
||||
the live session's provider/model, otherwise the configured ``task`` (default
|
||||
``title_generation``) resolves a cheap/fast backend.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Any, Callable, Dict, Optional, Tuple
|
||||
|
||||
from agent.auxiliary_client import call_llm, extract_content_or_reasoning
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# A template turns a variables dict into a (instructions, user_input) pair.
|
||||
# Templates are plain callables (not str.format) so diff/code payloads with
|
||||
# literal "{" / "}" pass through untouched.
|
||||
PromptTemplate = Callable[[Dict[str, Any]], Tuple[str, str]]
|
||||
|
||||
|
||||
def _truncate(text: str, limit: int) -> str:
|
||||
text = text or ""
|
||||
if len(text) <= limit:
|
||||
return text
|
||||
return text[:limit].rstrip() + "\n…(truncated)"
|
||||
|
||||
|
||||
_COMMIT_INSTRUCTIONS = (
|
||||
"You write git commit messages. Given a diff of staged changes, write ONE "
|
||||
"concise Conventional Commits message describing what the change does and why.\n"
|
||||
"Rules:\n"
|
||||
"- Subject line: type(scope): summary — imperative mood, lower-case, no "
|
||||
"trailing period, ≤ 72 characters. Types: feat, fix, refactor, perf, docs, "
|
||||
"test, build, chore, style, ci.\n"
|
||||
"- Omit the scope if it isn't obvious.\n"
|
||||
"- Add a short body (wrapped at ~72 cols) ONLY when the change needs "
|
||||
"explanation; skip it for small/obvious changes.\n"
|
||||
"- Describe the actual change, never restate the diff line-by-line.\n"
|
||||
"- Return ONLY the commit message text — no quotes, no markdown fences, no "
|
||||
"preamble."
|
||||
)
|
||||
|
||||
|
||||
def _commit_message_template(variables: Dict[str, Any]) -> Tuple[str, str]:
|
||||
diff = _truncate(str(variables.get("diff") or ""), 12000)
|
||||
recent = _truncate(str(variables.get("recent_commits") or ""), 1500)
|
||||
|
||||
parts = []
|
||||
if recent.strip():
|
||||
parts.append(
|
||||
"Recent commit subjects from this repo (match their style/conventions):\n"
|
||||
f"{recent}"
|
||||
)
|
||||
parts.append("Diff to describe:\n" + (diff or "(no textual diff available)"))
|
||||
|
||||
# "Regenerate" must yield something new even on models that decode greedily
|
||||
# / pin temperature server-side. A trailing nonce isn't enough, so we hand
|
||||
# back the previous message and require a genuinely different one.
|
||||
avoid = _truncate(str(variables.get("avoid") or "").strip(), 1000)
|
||||
if avoid:
|
||||
parts.append(
|
||||
"You already proposed the message below and the user wants a "
|
||||
"different one. Write a NEW message with different wording (and, if "
|
||||
"reasonable, a different emphasis or scope framing) — do not repeat "
|
||||
f"it:\n{avoid}"
|
||||
)
|
||||
|
||||
return _COMMIT_INSTRUCTIONS, "\n\n".join(parts)
|
||||
|
||||
|
||||
# Registry of named templates. Add an entry here to give a new surface a
|
||||
# consistent, reusable prompt without teaching every caller the prompt text.
|
||||
PROMPT_TEMPLATES: Dict[str, PromptTemplate] = {
|
||||
"commit_message": _commit_message_template,
|
||||
}
|
||||
|
||||
|
||||
def render_template(name: str, variables: Optional[Dict[str, Any]] = None) -> Tuple[str, str]:
|
||||
"""Resolve a registered template into (instructions, user_input).
|
||||
|
||||
Raises KeyError if the template name is unknown so callers fail loudly
|
||||
instead of silently sending an empty prompt.
|
||||
"""
|
||||
template = PROMPT_TEMPLATES.get(name)
|
||||
if template is None:
|
||||
raise KeyError(f"unknown one-shot template: {name}")
|
||||
return template(variables or {})
|
||||
|
||||
|
||||
def run_oneshot(
|
||||
*,
|
||||
instructions: str = "",
|
||||
user_input: str = "",
|
||||
template: Optional[str] = None,
|
||||
variables: Optional[Dict[str, Any]] = None,
|
||||
task: str = "title_generation",
|
||||
max_tokens: int = 1024,
|
||||
temperature: Optional[float] = 0.3,
|
||||
timeout: float = 60.0,
|
||||
main_runtime: Optional[Dict[str, Any]] = None,
|
||||
) -> str:
|
||||
"""Run a single stateless LLM request and return its text.
|
||||
|
||||
Provide either a registered ``template`` (+ ``variables``) or an explicit
|
||||
``instructions`` / ``user_input`` pair. Returns the model's text answer,
|
||||
stripped of surrounding whitespace and any wrapping code fence.
|
||||
|
||||
Raises RuntimeError when no LLM provider is configured (surfaced from
|
||||
:func:`call_llm`) and KeyError for an unknown template name.
|
||||
"""
|
||||
if template:
|
||||
instructions, user_input = render_template(template, variables)
|
||||
|
||||
if not (instructions or "").strip() and not (user_input or "").strip():
|
||||
raise ValueError("run_oneshot requires a template or instructions/user_input")
|
||||
|
||||
messages = []
|
||||
if (instructions or "").strip():
|
||||
messages.append({"role": "system", "content": instructions})
|
||||
messages.append({"role": "user", "content": user_input or ""})
|
||||
|
||||
response = call_llm(
|
||||
task=task,
|
||||
messages=messages,
|
||||
max_tokens=max_tokens,
|
||||
temperature=temperature,
|
||||
timeout=timeout,
|
||||
main_runtime=main_runtime,
|
||||
)
|
||||
|
||||
text = (extract_content_or_reasoning(response) or "").strip()
|
||||
return _strip_code_fence(text)
|
||||
|
||||
|
||||
def _strip_code_fence(text: str) -> str:
|
||||
"""Drop a single wrapping ``` fence the model may have added."""
|
||||
if not text.startswith("```"):
|
||||
return text
|
||||
lines = text.splitlines()
|
||||
if len(lines) >= 2 and lines[0].startswith("```") and lines[-1].strip() == "```":
|
||||
return "\n".join(lines[1:-1]).strip()
|
||||
return text
|
||||
51
agent/pet/__init__.py
Normal file
51
agent/pet/__init__.py
Normal file
@@ -0,0 +1,51 @@
|
||||
"""Petdex pet engine — shared core for the CLI, TUI, and desktop surfaces.
|
||||
|
||||
Petdex (https://github.com/crafter-station/petdex) is a public gallery of
|
||||
animated sprite "pets" for coding agents. Each pet is a ``pet.json`` plus a
|
||||
``spritesheet.{webp,png}`` of 192×208 px cells. Current Codex/petdex sheets use
|
||||
an 8-column × 9-row atlas; older Hermes/petdex sheets used an 8-row atlas.
|
||||
Hermes infers the row taxonomy from the sheet and maps agent activity onto
|
||||
idle/run/review/failed/wave/jump.
|
||||
|
||||
This package is the **single source of truth** for the feature so the base
|
||||
CLI (Python) and TUI (Ink, via ``tui_gateway``) never duplicate the hard
|
||||
parts:
|
||||
|
||||
- :mod:`agent.pet.constants` — frame geometry + the :class:`PetState` enum.
|
||||
- :mod:`agent.pet.state` — map agent activity → a :class:`PetState`.
|
||||
- :mod:`agent.pet.manifest` — fetch the public petdex manifest.
|
||||
- :mod:`agent.pet.store` — install / list / resolve pets on disk
|
||||
(profile-aware via ``get_hermes_home()``).
|
||||
- :mod:`agent.pet.render` — decode a spritesheet and encode frames for a
|
||||
terminal (kitty / iTerm2 / sixel graphics
|
||||
protocols, with a Unicode half-block
|
||||
fallback).
|
||||
|
||||
Rendering in the Electron desktop is necessarily TypeScript (canvas), but it
|
||||
reuses the same on-disk store and the same state semantics.
|
||||
|
||||
The whole feature is a *display* concern: it adds no model tool, mutates no
|
||||
system prompt or toolset, and therefore has zero effect on prompt caching.
|
||||
"""
|
||||
|
||||
from agent.pet.constants import (
|
||||
DEFAULT_SCALE,
|
||||
FRAME_H,
|
||||
FRAME_W,
|
||||
FRAMES_PER_STATE,
|
||||
LOOP_MS,
|
||||
STATE_ROWS,
|
||||
PetState,
|
||||
)
|
||||
from agent.pet.state import derive_pet_state
|
||||
|
||||
__all__ = [
|
||||
"DEFAULT_SCALE",
|
||||
"FRAME_H",
|
||||
"FRAME_W",
|
||||
"FRAMES_PER_STATE",
|
||||
"LOOP_MS",
|
||||
"STATE_ROWS",
|
||||
"PetState",
|
||||
"derive_pet_state",
|
||||
]
|
||||
167
agent/pet/constants.py
Normal file
167
agent/pet/constants.py
Normal file
@@ -0,0 +1,167 @@
|
||||
"""Pet sprite geometry + animation-state taxonomy.
|
||||
|
||||
These values are the common petdex/Codex pet geometry. The real ``pet.json``
|
||||
usually only carries ``id``/``displayName``/``description``/``spritesheetPath``;
|
||||
row taxonomy is inferred from the atlas shape so Hermes can render both legacy
|
||||
8-row sheets and current 9-row Codex sheets.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from enum import Enum
|
||||
|
||||
# Frame geometry (pixels). Current Codex/petdex spritesheets are 8 columns x 9
|
||||
# rows (1536x1872), while older Hermes/petdex sheets used 9 columns x 8 rows
|
||||
# (1728x1664). Renderers derive both row taxonomy and real column count from the
|
||||
# concrete sheet, so either shape works.
|
||||
FRAME_W = 192
|
||||
FRAME_H = 208
|
||||
|
||||
# Frames consumed per animation state (the petdex web app uses CSS
|
||||
# ``steps(6)``). A sheet may physically contain more columns; we only step
|
||||
# through the first ``FRAMES_PER_STATE``.
|
||||
FRAMES_PER_STATE = 6
|
||||
|
||||
# Full-loop duration for one state, milliseconds (petdex default).
|
||||
LOOP_MS = 1100
|
||||
|
||||
# Default on-screen scale relative to native frame size. ``display.pet.scale``
|
||||
# is the single master scalar: the desktop canvas multiplies its native pixels
|
||||
# by it and every terminal surface derives its half-block/kitty column width
|
||||
# from it (see :func:`cols_for_scale`), so one number shrinks all three
|
||||
# interfaces together. (petdex's own clients render at 0.7; we default smaller
|
||||
# so the kitty/GUI mascot stays a glanceable corner sprite. The half-block
|
||||
# fallback can't shrink as far — see ``UNICODE_MIN_COLS`` — and clamps to its
|
||||
# legibility floor instead.)
|
||||
DEFAULT_SCALE = 0.33
|
||||
|
||||
# User-settable scale bounds (``/pet scale``, desktop slider). Floor keeps the
|
||||
# pet clickable/visible; ceiling stops a fat-fingered value from filling the
|
||||
# screen. The unicode fallback additionally clamps to ``UNICODE_MIN_COLS``.
|
||||
MIN_SCALE = 0.1
|
||||
MAX_SCALE = 3.0
|
||||
|
||||
|
||||
def clamp_scale(scale: float) -> float:
|
||||
"""Clamp *scale* to ``[MIN_SCALE, MAX_SCALE]`` (the single validation point)."""
|
||||
return max(MIN_SCALE, min(MAX_SCALE, scale))
|
||||
|
||||
# Terminal cells one native frame spans at ``scale == 1.0``. A cell is ~8px
|
||||
# wide, a frame is ``FRAME_W`` (192) px → 24 cells. This mirrors the kitty
|
||||
# graphics placement (``scaled_px // 8``) so at full scale every renderer agrees.
|
||||
BASE_UNICODE_COLS = FRAME_W // 8
|
||||
|
||||
# Legibility floor for the half-block fallback. A half-block cell samples the
|
||||
# sprite at only 1 horizontal + 2 vertical taps, so below this width a 192×208
|
||||
# pet collapses into an unreadable blob *regardless* of scale. kitty/GUI draw
|
||||
# true pixels and have no such floor — that's why the same ``scale: 0.33`` is
|
||||
# crisp there but mush in half-blocks. ``scale`` shrinks the unicode pet down
|
||||
# TO this floor (and grows it above), instead of past it into noise.
|
||||
UNICODE_MIN_COLS = 16
|
||||
|
||||
|
||||
def cols_for_scale(scale: float) -> int:
|
||||
"""Half-block width implied by *scale*, clamped to the legibility floor.
|
||||
|
||||
Above the floor it tracks the kitty cell box (``scaled_px // 8``) so the two
|
||||
renderers converge at larger sizes; below it the floor keeps the sprite
|
||||
readable rather than letting it devolve into a blob.
|
||||
"""
|
||||
return max(UNICODE_MIN_COLS, round(BASE_UNICODE_COLS * (scale or DEFAULT_SCALE)))
|
||||
|
||||
|
||||
def resolve_cols(scale: float, unicode_cols: int = 0) -> int:
|
||||
"""Resolve terminal width: explicit *unicode_cols* override, else from *scale*."""
|
||||
return int(unicode_cols) if unicode_cols and int(unicode_cols) > 0 else cols_for_scale(scale)
|
||||
|
||||
|
||||
class PetState(str, Enum):
|
||||
"""Animation state a pet can be shown in.
|
||||
|
||||
These are Hermes' activity state names. They are not always identical to the
|
||||
source atlas row names: Codex-format pets use rows like ``jumping`` /
|
||||
``running`` while the UI keeps the shorter ``jump`` / ``run`` names.
|
||||
"""
|
||||
|
||||
IDLE = "idle"
|
||||
WAVE = "wave"
|
||||
RUN = "run"
|
||||
FAILED = "failed"
|
||||
REVIEW = "review"
|
||||
JUMP = "jump"
|
||||
WAITING = "waiting"
|
||||
|
||||
|
||||
# Legacy Hermes/petdex row order (top -> bottom) used by the older 8-row,
|
||||
# 9-column atlas shape.
|
||||
LEGACY_STATE_ROWS: list[str] = [
|
||||
PetState.IDLE.value,
|
||||
PetState.WAVE.value,
|
||||
PetState.RUN.value,
|
||||
PetState.FAILED.value,
|
||||
PetState.REVIEW.value,
|
||||
PetState.JUMP.value,
|
||||
"extra1",
|
||||
"extra2",
|
||||
]
|
||||
|
||||
# Current Petdex row order (top -> bottom) used by 1536x1872 atlases:
|
||||
# 8 columns x 9 rows of 192x208 cells.
|
||||
CODEX_STATE_ROWS: list[str] = [
|
||||
PetState.IDLE.value,
|
||||
"running-right",
|
||||
"running-left",
|
||||
"waving",
|
||||
"jumping",
|
||||
PetState.FAILED.value,
|
||||
PetState.WAITING.value,
|
||||
"running",
|
||||
PetState.REVIEW.value,
|
||||
]
|
||||
|
||||
# Default/fallback for callers without a sheet. Prefer the current 9-row Codex
|
||||
# format because generated pets and the public Codex pet contract use it.
|
||||
STATE_ROWS: list[str] = CODEX_STATE_ROWS
|
||||
|
||||
# Canonical Hermes activity names -> accepted row-name aliases in descending
|
||||
# preference. This keeps our internal state names stable (`wave`/`jump`/`run`)
|
||||
# while matching Petdex's current `waving`/`jumping`/`running` taxonomy.
|
||||
STATE_ALIASES: dict[str, tuple[str, ...]] = {
|
||||
PetState.IDLE.value: (PetState.IDLE.value,),
|
||||
PetState.WAVE.value: (PetState.WAVE.value, "waving"),
|
||||
PetState.JUMP.value: (PetState.JUMP.value, "jumping"),
|
||||
PetState.RUN.value: (PetState.RUN.value, "running"),
|
||||
PetState.FAILED.value: (PetState.FAILED.value,),
|
||||
PetState.REVIEW.value: (PetState.REVIEW.value,),
|
||||
PetState.WAITING.value: (PetState.WAITING.value,),
|
||||
}
|
||||
|
||||
|
||||
def state_aliases_for(state: "PetState | str") -> tuple[str, ...]:
|
||||
"""Return accepted row-name aliases for *state* (always non-empty)."""
|
||||
value = state.value if isinstance(state, PetState) else str(state)
|
||||
aliases = STATE_ALIASES.get(value)
|
||||
return aliases if aliases else (value,)
|
||||
|
||||
|
||||
def state_rows_for_grid(row_count: int | None) -> list[str]:
|
||||
"""Return the row taxonomy for a spritesheet with *row_count* rows."""
|
||||
try:
|
||||
rows = int(row_count or 0)
|
||||
except (TypeError, ValueError):
|
||||
rows = 0
|
||||
|
||||
if rows >= len(CODEX_STATE_ROWS):
|
||||
return CODEX_STATE_ROWS
|
||||
return LEGACY_STATE_ROWS
|
||||
|
||||
|
||||
def state_row_index(state: "PetState | str", row_count: int | None = None) -> int:
|
||||
"""Return the spritesheet row index for *state* (clamped, never raises)."""
|
||||
rows = state_rows_for_grid(row_count)
|
||||
for name in state_aliases_for(state):
|
||||
try:
|
||||
return rows.index(name)
|
||||
except ValueError:
|
||||
continue
|
||||
return 0 # fall back to the idle row
|
||||
29
agent/pet/generate/__init__.py
Normal file
29
agent/pet/generate/__init__.py
Normal file
@@ -0,0 +1,29 @@
|
||||
"""Pet generation — base-draft → hatch pipeline.
|
||||
|
||||
Public surface used by the gateway RPCs, the CLI ``hermes pets generate``
|
||||
command, and tests:
|
||||
|
||||
- :func:`generate_base_drafts` / :func:`hatch_pet` — the two-step flow.
|
||||
- :class:`HatchResult`, :class:`GenerationError`.
|
||||
- :mod:`atlas` — deterministic frame extraction + atlas composition/validation.
|
||||
|
||||
Image generation is delegated to the active reference-capable
|
||||
:class:`~agent.image_gen_provider.ImageGenProvider` (OpenAI gpt-image-2 or Krea);
|
||||
atlas assembly is fully deterministic so it's testable without any API calls.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from agent.pet.generate.imagegen import GenerationError
|
||||
from agent.pet.generate.orchestrate import (
|
||||
HatchResult,
|
||||
generate_base_drafts,
|
||||
hatch_pet,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"GenerationError",
|
||||
"HatchResult",
|
||||
"generate_base_drafts",
|
||||
"hatch_pet",
|
||||
]
|
||||
1183
agent/pet/generate/atlas.py
Normal file
1183
agent/pet/generate/atlas.py
Normal file
File diff suppressed because it is too large
Load Diff
251
agent/pet/generate/imagegen.py
Normal file
251
agent/pet/generate/imagegen.py
Normal file
@@ -0,0 +1,251 @@
|
||||
"""Thin image-generation layer for pet sprites.
|
||||
|
||||
Wraps the active :class:`~agent.image_gen_provider.ImageGenProvider` with the
|
||||
two things sprite generation needs that the agent-facing ``image_generate`` tool
|
||||
doesn't expose: **N variants** (loop) and **reference-image grounding** (so each
|
||||
animation row stays the same character as the chosen base).
|
||||
|
||||
Reference grounding only works on providers that support it — currently OpenAI
|
||||
``gpt-image-2`` (image edits) and Krea (style references). We resolve to one of
|
||||
those and surface a clear, actionable error otherwise rather than silently
|
||||
producing an ungrounded, drifting pet.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Providers that can ground generation on a reference image, in preference order
|
||||
# (Nous Portal → OpenAI → OpenRouter → …). OpenRouter/Nous run a quality-first
|
||||
# model chain and may fall back depending on account access and endpoint behavior,
|
||||
# so fidelity can vary by configured backend + model availability.
|
||||
_REF_CAPABLE = ("nous", "openai", "openai-codex", "openrouter", "krea")
|
||||
|
||||
# Friendly display label per reference-capable provider, surfaced in the desktop
|
||||
# pet-gen picker.
|
||||
_PROVIDER_LABELS: dict[str, str] = {
|
||||
"nous": "Nous Portal",
|
||||
"openrouter": "OpenRouter",
|
||||
"openai": "OpenAI",
|
||||
"openai-codex": "OpenAI (Codex)",
|
||||
"krea": "Krea",
|
||||
}
|
||||
|
||||
|
||||
def _forced_provider_from_env() -> str | None:
|
||||
"""Optional QA override to force a pet-gen backend.
|
||||
|
||||
`HERMES_PET_IMAGE_PROVIDER=<name>` (e.g. `openrouter`) bypasses the normal
|
||||
active/default provider resolution for pet generation only. Unknown values are
|
||||
ignored so existing users are unaffected.
|
||||
"""
|
||||
forced = os.environ.get("HERMES_PET_IMAGE_PROVIDER", "").strip().lower()
|
||||
return forced if forced in _REF_CAPABLE else None
|
||||
|
||||
|
||||
class GenerationError(RuntimeError):
|
||||
"""Raised on any image-generation failure (no provider, API error, IO)."""
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class SpriteProvider:
|
||||
"""Resolved provider plus whether it can take reference images."""
|
||||
|
||||
name: str
|
||||
provider: object
|
||||
supports_references: bool
|
||||
|
||||
|
||||
def _discover() -> None:
|
||||
try:
|
||||
from hermes_cli.plugins import _ensure_plugins_discovered
|
||||
|
||||
_ensure_plugins_discovered()
|
||||
except Exception as exc: # noqa: BLE001 - discovery is best-effort
|
||||
logger.debug("image-gen plugin discovery failed: %s", exc)
|
||||
|
||||
|
||||
def resolve_provider(*, require_references: bool = True, prefer: str | None = None) -> SpriteProvider:
|
||||
"""Pick the image provider to use for sprite work.
|
||||
|
||||
Preference: an explicit *prefer* choice (the desktop pet-gen picker) when it's
|
||||
reference-capable and configured, then the configured/active provider when
|
||||
it's reference-capable, else the first available reference-capable provider.
|
||||
With *require_references* off we fall back to any available provider (used for
|
||||
prompt-only base drafts).
|
||||
"""
|
||||
_discover()
|
||||
from agent.image_gen_registry import get_active_provider, get_provider
|
||||
|
||||
# QA override: force one provider for pet-gen iteration regardless of the
|
||||
# globally active image_gen backend.
|
||||
forced = _forced_provider_from_env()
|
||||
if forced:
|
||||
chosen = get_provider(forced)
|
||||
if chosen is not None and chosen.is_available():
|
||||
return SpriteProvider(name=forced, provider=chosen, supports_references=True)
|
||||
|
||||
# An explicit user pick wins when it's reference-capable and has credentials;
|
||||
# otherwise we ignore it and fall through to the normal resolution.
|
||||
if prefer:
|
||||
chosen = get_provider(prefer)
|
||||
if prefer in _REF_CAPABLE and chosen is not None and chosen.is_available():
|
||||
return SpriteProvider(name=prefer, provider=chosen, supports_references=True)
|
||||
|
||||
# Configured / active provider first.
|
||||
active = None
|
||||
try:
|
||||
active = get_active_provider()
|
||||
except Exception: # noqa: BLE001
|
||||
active = None
|
||||
if active is not None:
|
||||
name = getattr(active, "name", "")
|
||||
if name in _REF_CAPABLE and active.is_available():
|
||||
return SpriteProvider(name=name, provider=active, supports_references=True)
|
||||
|
||||
# Any available reference-capable provider.
|
||||
for name in _REF_CAPABLE:
|
||||
provider = get_provider(name)
|
||||
if provider is not None and provider.is_available():
|
||||
return SpriteProvider(name=name, provider=provider, supports_references=True)
|
||||
|
||||
if not require_references and active is not None and active.is_available():
|
||||
return SpriteProvider(
|
||||
name=getattr(active, "name", "unknown"), provider=active, supports_references=False
|
||||
)
|
||||
|
||||
raise GenerationError(
|
||||
"Pet generation needs an image backend that supports reference images. "
|
||||
"Open `hermes tools` → Image Generation and configure Nous Portal, "
|
||||
"OpenRouter, or OpenAI (gpt-image-2) with an API key."
|
||||
)
|
||||
|
||||
|
||||
def list_sprite_providers() -> list[dict]:
|
||||
"""The reference-capable providers available to pick for pet generation.
|
||||
|
||||
Returns ``[{name, label, default}]`` for every ref-capable provider the user
|
||||
actually has credentials for, in preference order, marking the one
|
||||
:func:`resolve_provider` would choose with no explicit preference. Empty when
|
||||
none is configured (the picker hides itself). Best-effort: discovery hiccups
|
||||
yield an empty list.
|
||||
"""
|
||||
_discover()
|
||||
from agent.image_gen_registry import get_provider
|
||||
|
||||
try:
|
||||
default_name = resolve_provider(require_references=True).name
|
||||
except GenerationError:
|
||||
default_name = ""
|
||||
|
||||
out: list[dict] = []
|
||||
for name in _REF_CAPABLE:
|
||||
provider = get_provider(name)
|
||||
if provider is None or not provider.is_available():
|
||||
continue
|
||||
out.append(
|
||||
{
|
||||
"name": name,
|
||||
"label": _PROVIDER_LABELS.get(name, name),
|
||||
"default": name == default_name,
|
||||
}
|
||||
)
|
||||
return out
|
||||
|
||||
|
||||
def _save_local(image_ref: str, *, prefix: str) -> Path:
|
||||
"""Return a local path for *image_ref*, downloading it if it's a URL."""
|
||||
if image_ref.startswith(("http://", "https://")):
|
||||
from agent.image_gen_provider import save_url_image
|
||||
|
||||
return Path(save_url_image(image_ref, prefix=prefix))
|
||||
return Path(image_ref)
|
||||
|
||||
|
||||
def _rejected_background(error: str) -> bool:
|
||||
"""True when a provider error is specifically about the ``background`` param.
|
||||
|
||||
Transparent backgrounds are a per-model capability (e.g. some gpt-image tiers
|
||||
reject ``background=transparent`` outright). We detect that one rejection so
|
||||
we can retry without the flag rather than failing the whole pet — our chroma
|
||||
key pass makes the result transparent regardless.
|
||||
"""
|
||||
lowered = (error or "").lower()
|
||||
return "background" in lowered and ("not supported" in lowered or "transparent" in lowered)
|
||||
|
||||
|
||||
def generate(
|
||||
prompt: str,
|
||||
*,
|
||||
n: int = 1,
|
||||
reference_images: list[Path] | None = None,
|
||||
provider: SpriteProvider | None = None,
|
||||
prefix: str = "pet_gen",
|
||||
aspect_ratio: str = "square",
|
||||
) -> list[Path]:
|
||||
"""Generate *n* sprite images and return their local paths.
|
||||
|
||||
*reference_images* grounds the output on a base image (required for rows).
|
||||
*aspect_ratio* picks the canvas: ``"square"`` for single-character base
|
||||
drafts, ``"landscape"`` for multi-frame row strips (the wider 1536px canvas
|
||||
gives every frame real horizontal room so winged poses don't have to be
|
||||
shrunk to avoid touching their neighbors).
|
||||
We *ask* for a transparent background, but fall back to an opaque generation
|
||||
(cleaned up downstream by the chroma-key pass) on models that reject the
|
||||
flag. Raises :class:`GenerationError` if nothing usable comes back.
|
||||
"""
|
||||
sprite = provider or resolve_provider(require_references=bool(reference_images))
|
||||
if reference_images and not sprite.supports_references:
|
||||
raise GenerationError(
|
||||
f"image backend '{sprite.name}' cannot use reference images; "
|
||||
"configure OpenAI gpt-image-2 or Krea for pet generation"
|
||||
)
|
||||
|
||||
refs = [str(p) for p in (reference_images or [])]
|
||||
|
||||
def _run(extra: dict) -> tuple[Path | None, str]:
|
||||
kwargs: dict = {"aspect_ratio": aspect_ratio, **extra}
|
||||
if refs:
|
||||
# Providers disagree on the ref kwarg name: our OpenRouter/Nous
|
||||
# backends read ``reference_images``, OpenAI's gpt-image-2 reads
|
||||
# ``reference_image_urls``. Send both; each ignores the other.
|
||||
kwargs["reference_images"] = refs
|
||||
kwargs["reference_image_urls"] = refs
|
||||
try:
|
||||
result = sprite.provider.generate(prompt, **kwargs)
|
||||
except Exception as exc: # noqa: BLE001 - normalize provider crashes
|
||||
logger.debug("provider.generate crashed: %s", exc)
|
||||
return None, str(exc)
|
||||
if not isinstance(result, dict) or not result.get("success"):
|
||||
return None, (result or {}).get("error", "unknown error") if isinstance(result, dict) else "no result"
|
||||
image_ref = result.get("image")
|
||||
if not image_ref:
|
||||
return None, "provider returned no image"
|
||||
try:
|
||||
return _save_local(str(image_ref), prefix=prefix), ""
|
||||
except Exception as exc: # noqa: BLE001
|
||||
return None, f"could not save generated image: {exc}"
|
||||
|
||||
out: list[Path] = []
|
||||
last_error = ""
|
||||
allow_transparent = True
|
||||
for _ in range(max(1, n)):
|
||||
path, err = _run({"background": "transparent"} if allow_transparent else {})
|
||||
# Model doesn't support the transparent flag → drop it for this and every
|
||||
# remaining variant (no point re-probing a capability we just disproved).
|
||||
if path is None and allow_transparent and _rejected_background(err):
|
||||
allow_transparent = False
|
||||
path, err = _run({})
|
||||
if path is not None:
|
||||
out.append(path)
|
||||
else:
|
||||
last_error = err
|
||||
|
||||
if not out:
|
||||
raise GenerationError(last_error or "image generation produced no output")
|
||||
return out
|
||||
358
agent/pet/generate/orchestrate.py
Normal file
358
agent/pet/generate/orchestrate.py
Normal file
@@ -0,0 +1,358 @@
|
||||
"""Pet generation orchestration — the base-draft → hatch flow.
|
||||
|
||||
Two steps, mirroring the UX across every surface:
|
||||
|
||||
1. :func:`generate_base_drafts` — a handful of prompt-only "what should this pet
|
||||
look like" variants. Cheap; the user picks one (or retries for a fresh set).
|
||||
2. :func:`hatch_pet` — takes the chosen base and generates one grounded row
|
||||
strip per Hermes state, slices each into frames, composes the atlas, validates
|
||||
it, and writes the pet into the store.
|
||||
|
||||
Splitting it this way bounds cost (4 cheap base calls per round; the ~6 row
|
||||
calls happen once, on the pet you actually keep) and gives each UI a natural
|
||||
preview/loading point.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import time
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Callable
|
||||
|
||||
from agent.pet.generate import atlas, imagegen, prompts
|
||||
from agent.pet.generate.imagegen import GenerationError, SpriteProvider
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# (event, detail) — e.g. ("row", "idle"), ("compose", ""), ("save", "<slug>").
|
||||
ProgressFn = Callable[[str, str], None]
|
||||
|
||||
# Image generations are independent network calls, so we fan them out instead of
|
||||
# blocking on each in turn — a hatch is ~8 row calls that would otherwise run
|
||||
# back-to-back and routinely blow past the client's RPC timeout. Capped so we
|
||||
# don't hammer the provider's rate limit (one cold call can still be slow).
|
||||
_MAX_PARALLEL_GENERATIONS = 4
|
||||
# How many times to (re)generate a single row before accepting a best-effort
|
||||
# slice. Early attempts demand clean per-pose gutters; the last is lenient so a
|
||||
# stubborn row still yields frames instead of dropping out entirely.
|
||||
_ROW_GEN_ATTEMPTS = 3
|
||||
_MIN_FILLED_STATES = 6
|
||||
_REQUIRED_STATES = frozenset({"idle", "running-right", "waving"})
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class HatchResult:
|
||||
"""Outcome of a successful :func:`hatch_pet`."""
|
||||
|
||||
slug: str
|
||||
display_name: str
|
||||
spritesheet: Path
|
||||
states: list[str]
|
||||
validation: dict
|
||||
|
||||
|
||||
def _harden_transparency(path: Path) -> Path:
|
||||
"""Key out any solid backdrop the provider painted; save as an RGBA PNG.
|
||||
|
||||
``background=transparent`` is requested on every call, but image models honor
|
||||
it inconsistently — some still paint a flat (often near-white) backdrop. We
|
||||
run the same chroma-key pass the row extractor uses so every base draft the
|
||||
user picks between (and the reference the rows are grounded on) is a clean
|
||||
cutout. Best-effort: a decode failure leaves the original untouched.
|
||||
"""
|
||||
from PIL import Image
|
||||
|
||||
try:
|
||||
with Image.open(path) as opened:
|
||||
keyed = atlas.remove_background(opened.convert("RGBA"))
|
||||
# Zero the RGB of any leftover semi-transparent edge pixels so a keyed
|
||||
# draft has no colored halo when composited on the dark UI.
|
||||
keyed = atlas._clear_transparent_rgb(keyed)
|
||||
out = path.with_suffix(".png")
|
||||
keyed.save(out, format="PNG")
|
||||
return out
|
||||
except Exception as exc: # noqa: BLE001 - cosmetic; fall back to the raw image
|
||||
logger.debug("base draft transparency hardening failed for %s: %s", path, exc)
|
||||
return path
|
||||
|
||||
|
||||
def generate_base_drafts(
|
||||
concept: str,
|
||||
*,
|
||||
n: int = 4,
|
||||
style: str = "auto",
|
||||
reference_images: list[Path] | None = None,
|
||||
provider: SpriteProvider | None = None,
|
||||
on_draft: Callable[[int, Path], None] | None = None,
|
||||
is_cancelled: Callable[[], bool] | None = None,
|
||||
) -> list[Path]:
|
||||
"""Generate *n* candidate base looks for *concept*; returns image paths.
|
||||
|
||||
Each draft is hardened to a transparent cutout (see :func:`_harden_transparency`).
|
||||
Drafts are generated concurrently and *on_draft(index, path)* fires as each
|
||||
one finishes (not at the end) so callers can stream previews to the UI
|
||||
instead of leaving it blank until the whole batch is done.
|
||||
|
||||
*is_cancelled*, when supplied, is polled cooperatively: a draft that hasn't
|
||||
started yet is skipped, and once it trips we stop staging/streaming further
|
||||
drafts and cancel any queued work (already-in-flight provider calls can't be
|
||||
hard-killed, but their results are dropped).
|
||||
"""
|
||||
# A user reference image (e.g. their own pet) grounds every draft, so it
|
||||
# needs a reference-capable provider — same requirement as the row passes.
|
||||
refs = reference_images or None
|
||||
sprite = provider or imagegen.resolve_provider(require_references=bool(refs))
|
||||
cancelled = is_cancelled or (lambda: False)
|
||||
|
||||
# Each draft is its own one-shot generation, run concurrently so the user
|
||||
# waits for one image, not N. A single draft failing must not sink the set.
|
||||
# Each gets a distinct variation nudge so the options aren't near-duplicates.
|
||||
logger.info("pet generate: drafting %d base looks for %r (style=%s)", n, concept, style)
|
||||
|
||||
def _one(index: int) -> tuple[int, Path | None, str | None]:
|
||||
if cancelled():
|
||||
return index, None, None
|
||||
t0 = time.monotonic()
|
||||
variation = prompts.BASE_VARIATIONS[index % len(prompts.BASE_VARIATIONS)]
|
||||
prompt = prompts.build_base_prompt(concept, style=style, variation=variation)
|
||||
try:
|
||||
out = imagegen.generate(prompt, n=1, reference_images=refs, provider=sprite, prefix="pet_base")
|
||||
except Exception as exc: # noqa: BLE001 - tolerate a single failed draft
|
||||
logger.warning("pet generate: draft %d failed after %.1fs: %s", index, time.monotonic() - t0, exc)
|
||||
return index, None, str(exc)
|
||||
if not out:
|
||||
logger.warning("pet generate: draft %d produced no image", index)
|
||||
return index, None, "the image provider returned no image"
|
||||
logger.info("pet generate: draft %d ready in %.1fs", index, time.monotonic() - t0)
|
||||
return index, _harden_transparency(out[0]), None
|
||||
|
||||
workers = max(1, min(n, _MAX_PARALLEL_GENERATIONS))
|
||||
results: dict[int, Path] = {}
|
||||
errors: list[str] = []
|
||||
with ThreadPoolExecutor(max_workers=workers) as pool:
|
||||
futures = [pool.submit(_one, i) for i in range(n)]
|
||||
# as_completed runs in *this* (the caller's) thread, so on_draft — and any
|
||||
# gateway event it emits — inherits the request's bound transport, unlike
|
||||
# the worker threads above.
|
||||
for fut in as_completed(futures):
|
||||
if cancelled():
|
||||
logger.info("pet generate: cancelled — dropping remaining drafts")
|
||||
for pending in futures:
|
||||
pending.cancel()
|
||||
break
|
||||
index, path, err = fut.result()
|
||||
if path is None:
|
||||
if err:
|
||||
errors.append(err)
|
||||
continue
|
||||
results[index] = path
|
||||
if on_draft is not None:
|
||||
try:
|
||||
on_draft(index, path)
|
||||
except Exception as exc: # noqa: BLE001 - progress is best-effort
|
||||
logger.debug("on_draft callback failed: %s", exc)
|
||||
|
||||
drafts = [results[i] for i in sorted(results)]
|
||||
if not drafts and not cancelled():
|
||||
# Surface *why* — every draft failed for a reason (a content-policy refusal
|
||||
# on a name like "minion", a provider/auth error, …); the most common one
|
||||
# is the representative cause. Far more useful than "no usable drafts".
|
||||
raise GenerationError(_drafts_failed_reason(errors))
|
||||
return drafts
|
||||
|
||||
|
||||
def _drafts_failed_reason(errors: list[str]) -> str:
|
||||
"""The representative reason a draft round produced nothing, humanized."""
|
||||
if not errors:
|
||||
return "image generation produced no usable drafts"
|
||||
from collections import Counter
|
||||
|
||||
return _humanize_image_error(Counter(errors).most_common(1)[0][0])
|
||||
|
||||
|
||||
def _humanize_image_error(error: str) -> str:
|
||||
"""Turn a raw provider error into a friendly, actionable sentence.
|
||||
|
||||
The big one is moderation: image models refuse trademarked characters and
|
||||
real people (e.g. "minion"), which reads as an opaque 400 otherwise.
|
||||
"""
|
||||
low = error.lower()
|
||||
if any(s in low for s in ("moderation_blocked", "safety system", "content policy", "content_policy")):
|
||||
return (
|
||||
"The image provider blocked this prompt — its safety filter rejects "
|
||||
"trademarked characters and real people. Try an original description."
|
||||
)
|
||||
if any(s in low for s in ("api key", "unauthorized", "401", "auth")):
|
||||
return "The image provider rejected the request — check your API key in Settings → Providers."
|
||||
if "rate limit" in low or "429" in low:
|
||||
return "The image provider is rate-limiting — wait a moment and try again."
|
||||
# Otherwise the first line, trimmed of the noisy provider envelope.
|
||||
return error.splitlines()[0].strip()[:200]
|
||||
|
||||
|
||||
def hatch_pet(
|
||||
*,
|
||||
base_image: str | Path,
|
||||
slug: str,
|
||||
display_name: str = "",
|
||||
description: str = "",
|
||||
concept: str = "",
|
||||
style: str = "auto",
|
||||
on_progress: ProgressFn | None = None,
|
||||
provider: SpriteProvider | None = None,
|
||||
is_cancelled: Callable[[], bool] | None = None,
|
||||
) -> HatchResult:
|
||||
"""Turn an approved base image into a full, installed Hermes pet.
|
||||
|
||||
Generates a grounded row strip per state, extracts frames, composes +
|
||||
validates the atlas, and registers it. The idle row falls back to the base
|
||||
look so the pet always renders. Raises :class:`GenerationError` on failure.
|
||||
|
||||
*is_cancelled*, when supplied, is polled cooperatively: rows that haven't
|
||||
started are skipped, queued rows are cancelled, and once every row is done we
|
||||
abort (raising :class:`GenerationError`) before composing/saving so a stopped
|
||||
hatch never writes a half-built pet.
|
||||
"""
|
||||
base = Path(base_image)
|
||||
if not base.is_file():
|
||||
raise GenerationError(f"base image not found: {base}")
|
||||
|
||||
sprite = provider or imagegen.resolve_provider(require_references=True)
|
||||
progress = on_progress or (lambda *_: None)
|
||||
cancelled = is_cancelled or (lambda: False)
|
||||
label = concept or display_name or slug
|
||||
|
||||
frames_by_state: dict[str, list] = {}
|
||||
total_rows = len(atlas.ROW_SPECS)
|
||||
logger.info("pet hatch %r: generating %d animation rows", slug, total_rows)
|
||||
|
||||
# Generate every state's row strip concurrently — they're independent
|
||||
# grounded calls, so the hatch waits for the slowest row, not their sum. A
|
||||
# single row failing is tolerated (idle is guaranteed below).
|
||||
def _gen_row(spec: tuple[str, int, int]) -> tuple[str, list | None]:
|
||||
state, _row, count = spec
|
||||
if cancelled():
|
||||
return state, None
|
||||
t0 = time.monotonic()
|
||||
last_exc: Exception | None = None
|
||||
# Self-healing: a model occasionally returns a row whose poses are touching
|
||||
# (no clean gutters), which slices badly. We retry such rolls; only the
|
||||
# final attempt falls back to lenient ``auto`` slicing so a stubborn row
|
||||
# still yields *something* rather than dropping the whole row.
|
||||
for attempt in range(_ROW_GEN_ATTEMPTS):
|
||||
if cancelled():
|
||||
return state, None
|
||||
strict = attempt < _ROW_GEN_ATTEMPTS - 1
|
||||
try:
|
||||
strips = imagegen.generate(
|
||||
prompts.build_row_prompt(state, count, label, style=style),
|
||||
n=1,
|
||||
reference_images=[base],
|
||||
provider=sprite,
|
||||
prefix=f"pet_row_{state}",
|
||||
# Wider canvas → each frame gets real horizontal room, so winged
|
||||
# poses keep a full, healthy size and still leave clean gutters.
|
||||
aspect_ratio="landscape",
|
||||
)
|
||||
# ``components`` requires clean per-pose gutters (raises otherwise),
|
||||
# so a touching roll is rejected and regenerated; the last attempt
|
||||
# uses ``auto`` (equal-slot fallback, never raises). Raw (fit=False)
|
||||
# so normalize_cells registers the whole pet at once.
|
||||
method = "components" if strict else "auto"
|
||||
frames = atlas.extract_strip_frames(strips[0], count, method=method, fit=False)
|
||||
logger.info(
|
||||
"pet hatch %r: row %r ready in %.1fs (attempt %d)",
|
||||
slug, state, time.monotonic() - t0, attempt + 1,
|
||||
)
|
||||
return state, frames
|
||||
except Exception as exc: # noqa: BLE001 - retried; one bad row is tolerated
|
||||
last_exc = exc
|
||||
logger.warning(
|
||||
"pet hatch %r: row %r attempt %d/%d failed: %s",
|
||||
slug, state, attempt + 1, _ROW_GEN_ATTEMPTS, exc,
|
||||
)
|
||||
logger.warning(
|
||||
"pet hatch %r: row %r gave up after %.1fs: %s",
|
||||
slug, state, time.monotonic() - t0, last_exc,
|
||||
)
|
||||
return state, None
|
||||
|
||||
# running-left is derived by mirroring running-right (guaranteed-consistent
|
||||
# and one fewer generation), so we don't generate it directly.
|
||||
generated_specs = [spec for spec in atlas.ROW_SPECS if spec[0] != "running-left"]
|
||||
|
||||
workers = max(1, min(len(generated_specs), _MAX_PARALLEL_GENERATIONS))
|
||||
done = 0
|
||||
with ThreadPoolExecutor(max_workers=workers) as pool:
|
||||
futures = [pool.submit(_gen_row, spec) for spec in generated_specs]
|
||||
# as_completed runs on the caller (request) thread, so progress events
|
||||
# emitted here inherit the request transport — unlike the worker threads.
|
||||
for fut in as_completed(futures):
|
||||
if cancelled():
|
||||
logger.info("pet hatch %r: cancelled — dropping remaining rows", slug)
|
||||
for pending in futures:
|
||||
pending.cancel()
|
||||
break
|
||||
state, frames = fut.result()
|
||||
done += 1
|
||||
progress("row", f"{state}:{done}:{total_rows}")
|
||||
if frames:
|
||||
frames_by_state[state] = frames
|
||||
|
||||
if cancelled():
|
||||
raise GenerationError("hatch cancelled")
|
||||
|
||||
# Derive running-left from the approved running-right row (per-frame mirror,
|
||||
# preserving order/timing). Missing running-right is rejected below; a pet
|
||||
# without its canonical walk cycle is a failed hatch, not a shippable mascot.
|
||||
right = frames_by_state.get("running-right")
|
||||
if right:
|
||||
done += 1
|
||||
progress("row", f"running-left:{done}:{total_rows}")
|
||||
frames_by_state["running-left"] = atlas.mirror_frames(right)
|
||||
logger.info("pet hatch %r: row 'running-left' mirrored from running-right", slug)
|
||||
else:
|
||||
logger.warning("pet hatch %r: no running-right to mirror; left walk left empty", slug)
|
||||
|
||||
# Idle is the resting state the renderer falls back to — guarantee it.
|
||||
if not frames_by_state.get("idle"):
|
||||
progress("row", "idle-fallback")
|
||||
frames_by_state["idle"] = [atlas.single_frame(base, fit=False)]
|
||||
|
||||
progress("compose", "")
|
||||
logger.info("pet hatch %r: composing atlas from %d states", slug, len(frames_by_state))
|
||||
# One shared scale + baseline across every state so the pet never slides or
|
||||
# pulses size between frames; compose just packs the normalized cells.
|
||||
sheet = atlas.compose_atlas(atlas.normalize_cells(frames_by_state))
|
||||
validation = atlas.validate_atlas(sheet)
|
||||
if not validation["ok"]:
|
||||
raise GenerationError("; ".join(validation["errors"]) or "atlas validation failed")
|
||||
filled_states = set(validation["filled_states"])
|
||||
missing_required = sorted(_REQUIRED_STATES - filled_states)
|
||||
if missing_required:
|
||||
raise GenerationError(f"missing required animation row(s): {', '.join(missing_required)}")
|
||||
if len(filled_states) < _MIN_FILLED_STATES:
|
||||
raise GenerationError(
|
||||
f"only {len(filled_states)}/{len(atlas.ROW_SPECS)} animation rows were usable; regenerate"
|
||||
)
|
||||
|
||||
from agent.pet import store
|
||||
|
||||
progress("save", slug)
|
||||
logger.info("pet hatch %r: saving pet", slug)
|
||||
pet = store.register_local_pet(
|
||||
sheet,
|
||||
slug=slug,
|
||||
display_name=display_name or slug,
|
||||
description=description,
|
||||
)
|
||||
return HatchResult(
|
||||
slug=pet.slug,
|
||||
display_name=pet.display_name,
|
||||
spritesheet=pet.spritesheet,
|
||||
states=validation["filled_states"],
|
||||
validation=validation,
|
||||
)
|
||||
183
agent/pet/generate/prompts.py
Normal file
183
agent/pet/generate/prompts.py
Normal file
@@ -0,0 +1,183 @@
|
||||
"""Prompt builders for pet generation.
|
||||
|
||||
Two prompt shapes: a *base* prompt (prompt-only, produces the canonical look the
|
||||
user picks between) and per-*state* *row* prompts (grounded on the chosen base,
|
||||
produce one horizontal strip of N poses). Prompts stay concise and
|
||||
sprite-production oriented; the identity lock and "one transparent row" framing
|
||||
matter more than flowery description.
|
||||
|
||||
We generate the full petdex/Codex nine-state set (see
|
||||
:data:`agent.pet.generate.atlas.ROW_SPECS`) so a hatched pet is a valid
|
||||
``petdex submit`` spritesheet.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
# What each petdex/Codex state should depict (kept short — these go straight into
|
||||
# the row prompt). Phrased to avoid the common sprite-gen failure modes (detached
|
||||
# effects, motion lines, shadows). Critical distinction: ``running`` is the
|
||||
# *working* state (in place), while ``running-right`` / ``running-left`` are the
|
||||
# actual directional walk/run cycles.
|
||||
STATE_ACTIONS: dict[str, str] = {
|
||||
"idle": "a calm idle loop: subtle breathing, a tiny blink or gentle bob, no big gestures",
|
||||
"running-right": (
|
||||
"a sideways walk/run locomotion cycle moving to the RIGHT: the character "
|
||||
"faces and travels right with clear directional steps, a smooth gait loop"
|
||||
),
|
||||
"running-left": (
|
||||
"a sideways walk/run locomotion cycle moving to the LEFT: the character "
|
||||
"faces and travels left with clear directional steps (the mirror of the "
|
||||
"right-facing run)"
|
||||
),
|
||||
"waving": "a friendly greeting: raising a paw/hand/limb to wave, clear up-and-down gesture",
|
||||
"jumping": "a happy celebration jump: anticipation, lift off the ground, peak, and land",
|
||||
"failed": "a sad or deflated reaction: slumped, dejected, small frown — readable but not noisy",
|
||||
"waiting": (
|
||||
"an expectant 'waiting on you' pose: looking up/out as if asking for input "
|
||||
"or approval — distinct from idle and review"
|
||||
),
|
||||
"running": (
|
||||
"focused active work, staying IN PLACE (NOT walking or foot-running): "
|
||||
"leaning in, concentrating, busy 'thinking / processing / typing' energy"
|
||||
),
|
||||
"review": "careful inspection: a focused lean, head tilt, studying something intently",
|
||||
}
|
||||
|
||||
_STYLE_HINTS: dict[str, str] = {
|
||||
# Default to the popular petdex look: crisp 16-bit PIXEL ART, not the smooth
|
||||
# 2D illustration (let alone 3D render) gpt-image reaches for by default.
|
||||
"auto": (
|
||||
" Style: crisp 16-bit PIXEL-ART game sprite — visible square pixels, a small "
|
||||
"limited palette, clean dark outline, flat cel shading, chunky chibi "
|
||||
"proportions, like a classic SNES/JRPG party member or a petdex.dev mascot. "
|
||||
"Absolutely NOT 3D-rendered, NOT a smooth painted or vector illustration, "
|
||||
"NOT photorealistic — no soft gradients, no realistic lighting, no figurine look."
|
||||
),
|
||||
"pixel": " Render in clean 16-bit pixel-art style with visible square pixels and a limited palette.",
|
||||
"plush": " Render as a soft plush toy.",
|
||||
"clay": " Render as a claymation / soft 3D clay figure.",
|
||||
"sticker": " Render as a glossy die-cut sticker.",
|
||||
"flat-vector": " Render in flat vector mascot style.",
|
||||
"3d-toy": " Render as a glossy 3D toy.",
|
||||
"painterly": " Render in a soft painterly style.",
|
||||
}
|
||||
|
||||
_BACKGROUND = (
|
||||
"Center the character on a SINGLE flat, uniform, high-contrast chroma-key "
|
||||
"background — pure hot magenta #FF00FF (only if magenta appears on the "
|
||||
"character, use pure green #00FF00 instead). The background is ONE continuous "
|
||||
"even color that completely surrounds the character with NO gradient, "
|
||||
"vignette, texture, pattern, scenery, shadow, ground line, frame, border, "
|
||||
"panel, comic cell, gutter line, grid, or divider of any kind, so it keys out "
|
||||
"cleanly. The background color must not appear anywhere on the character. "
|
||||
"No text, no labels, no speech bubbles, no UI."
|
||||
)
|
||||
|
||||
|
||||
def style_hint(style: str | None) -> str:
|
||||
return _STYLE_HINTS.get((style or "auto").strip().lower(), "")
|
||||
|
||||
|
||||
# Row strips are generated on the wider landscape canvas (see imagegen.generate /
|
||||
# orchestrate). The extra width is what lets each pose stay a healthy size AND
|
||||
# leave a real gutter — used here only to cite concrete pixel numbers.
|
||||
_ASSUMED_STRIP_WIDTH = 1536
|
||||
|
||||
|
||||
def _spacing_spec(frame_count: int) -> tuple[int, int]:
|
||||
"""(per-pose width px, gap px) for a row of *frame_count* poses.
|
||||
|
||||
Pixel counts alone don't hold — the model fills each slot edge-to-edge with
|
||||
the full wingspan, so neighbors touch even when bodies are spaced. The lever
|
||||
that works is proportional containment on a wide canvas: give each pose its
|
||||
own equal cell and keep the ENTIRE silhouette (wings/tail/halo included)
|
||||
inside it. On the 1536px landscape strip ~70% occupancy still leaves a
|
||||
generous gutter, so the pet stays a normal, good-looking size — no shrinking.
|
||||
"""
|
||||
slots = max(1, frame_count)
|
||||
slot_w = _ASSUMED_STRIP_WIDTH / slots
|
||||
pose_px = round(slot_w * 0.7)
|
||||
gap_px = max(48, round(slot_w * 0.3))
|
||||
return pose_px, gap_px
|
||||
|
||||
|
||||
# Per-draft nudges so the 4 base options are actually distinct — gpt-image returns
|
||||
# near-duplicates for a single prompt. We vary the *look* (palette, build,
|
||||
# expression, accents), NOT the pose, so the chosen base still grounds clean,
|
||||
# consistent animation rows.
|
||||
BASE_VARIATIONS: tuple[str, ...] = (
|
||||
"",
|
||||
"a distinctly different colour palette and markings",
|
||||
"a heavier, broader silhouette with sturdier proportions",
|
||||
"a different facial structure and expression matching the concept tone, with unique accent/accessory details",
|
||||
"a leaner, taller build and an alternate colour scheme",
|
||||
"bolder, more saturated colours and a stronger expression matching the concept tone",
|
||||
)
|
||||
|
||||
|
||||
def build_base_prompt(concept: str, *, style: str | None = "auto", variation: str = "") -> str:
|
||||
"""The base look: a single, clean, centered full-body mascot.
|
||||
|
||||
*variation* differentiates one draft from the next (see :data:`BASE_VARIATIONS`).
|
||||
"""
|
||||
concept = (concept or "a distinctive mascot creature").strip()
|
||||
nudge = f" Make this design distinct: {variation}." if variation else ""
|
||||
return (
|
||||
f"A stylized mascot pet character: {concept}. "
|
||||
"Honor the requested tone and mood exactly (cute, eerie, scary, menacing, whimsical, etc.) "
|
||||
"while staying non-graphic. "
|
||||
"Compact, whole-body silhouette that reads clearly at small size, "
|
||||
"clear readable facial features, simple consistent palette. "
|
||||
# A neutral, symmetric, at-rest stance makes the cleanest identity anchor
|
||||
"Neutral front-facing standing pose, upright and symmetric, arms/limbs "
|
||||
"relaxed at the sides, feet together on the ground, any cape/accessories "
|
||||
"hanging straight and still."
|
||||
f"{nudge} "
|
||||
f"{_BACKGROUND}{style_hint(style)}"
|
||||
)
|
||||
|
||||
|
||||
def build_row_prompt(state: str, frame_count: int, concept: str, *, style: str | None = "auto") -> str:
|
||||
"""A row strip: *frame_count* poses of the SAME character, left→right.
|
||||
|
||||
The attached base image is the identity source of truth; the prompt locks
|
||||
species, palette, face, and props to it.
|
||||
"""
|
||||
action = STATE_ACTIONS.get(state, "a simple idle pose")
|
||||
concept = (concept or "the mascot").strip()
|
||||
pose_px, gap_px = _spacing_spec(frame_count)
|
||||
return (
|
||||
f"Using the attached reference image as the exact same character "
|
||||
f"(same species, face, colors, markings, proportions, and props), "
|
||||
"preserving the same emotional tone/mood (e.g., scary stays scary, cute stays cute), "
|
||||
f"draw a single WIDE horizontal strip of {frame_count} animation frames showing {action}. "
|
||||
f"LAYOUT: arrange {frame_count} poses in ONE horizontal row at equal spacing, "
|
||||
"each pose centered in its own imaginary equal region. Draw NO panel borders, "
|
||||
"NO comic cells, NO boxes, NO vertical divider/gutter lines, NO grid, NO frame "
|
||||
"outlines between poses — the backdrop is one unbroken flat field behind all of them. "
|
||||
"Fill the WHOLE strip with the SAME single flat chroma-key color as the attached "
|
||||
"reference image's background (identical hue in every frame, no per-pose color shifts). "
|
||||
f"SPACING (critical): draw each pose at a consistent, healthy, clearly "
|
||||
f"visible size (roughly {pose_px}px wide on a {_ASSUMED_STRIP_WIDTH}px "
|
||||
f"strip) — do NOT shrink it tiny — but keep its ENTIRE silhouette "
|
||||
f"(wings, tail, halo, horns, cape, every appendage) fully INSIDE its own "
|
||||
f"cell. Leave at least {gap_px}px of empty chroma-key background between "
|
||||
f"neighboring silhouettes at their closest point (wingtip to wingtip), and "
|
||||
f"the same empty margin before the first pose and after the last. If a wing, "
|
||||
f"cape, or tail would reach into a neighbor, FOLD or angle it inward rather "
|
||||
f"than letting it cross the gap. Silhouettes must NEVER touch, overlap, "
|
||||
f"share a shadow, share a ground line, share motion trails, or merge into "
|
||||
f"one connected shape. "
|
||||
# Registration: a clean sprite sheet keeps the character locked in place
|
||||
# so only the action moves — this is what stops the loop sliding/pulsing.
|
||||
"REGISTRATION (critical): the character is the SAME height and SAME width "
|
||||
"in every frame, drawn at the SAME scale, centered over the SAME point, "
|
||||
"with all feet aligned to the SAME invisible horizontal baseline across the "
|
||||
"whole strip — this baseline is conceptual ONLY: draw NO ground line, floor, "
|
||||
"platform, horizon, or contact shadow beneath the feet. Keep the body's center, size, and stance fixed frame to "
|
||||
"frame — ONLY the limbs/features the action needs may move. Capes, cloaks, "
|
||||
"bags, and scarves stay in the SAME place and shape every frame (no "
|
||||
"swinging, flowing, or drifting) unless the action itself requires it. No "
|
||||
"pose is cropped at the strip edges. "
|
||||
f"{_BACKGROUND}{style_hint(style)}"
|
||||
)
|
||||
165
agent/pet/manifest.py
Normal file
165
agent/pet/manifest.py
Normal file
@@ -0,0 +1,165 @@
|
||||
"""Fetch the public petdex manifest.
|
||||
|
||||
``https://petdex.dev/api/manifest`` 307-redirects to a JSON document on R2:
|
||||
|
||||
{
|
||||
"generatedAt": "...",
|
||||
"total": 2926,
|
||||
"pets": [
|
||||
{"slug": "boba", "displayName": "Boba", "kind": "creature",
|
||||
"submittedBy": "railly",
|
||||
"spritesheetUrl": "https://assets.petdex.dev/.../spritesheet.webp",
|
||||
"petJsonUrl": "https://assets.petdex.dev/.../pet.json",
|
||||
"zipUrl": "https://assets.petdex.dev/.../boba.zip"},
|
||||
...
|
||||
]
|
||||
}
|
||||
|
||||
Read-only and unauthenticated; no credentials involved.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import threading
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
MANIFEST_URL = "https://petdex.dev/api/manifest"
|
||||
|
||||
_DEFAULT_TIMEOUT = 10.0
|
||||
|
||||
# In-process cache for the (large, slow, identical-per-call) manifest. The list
|
||||
# is a static CDN object that barely changes, yet a single session can ask for
|
||||
# it many times — every gallery open, plus a full re-fetch per install/select
|
||||
# (``find_entry``). A short TTL collapses those into one network hit without
|
||||
# going stale for long. Cleared by :func:`clear_cache` (tests).
|
||||
_MANIFEST_TTL = 300.0
|
||||
_cache: tuple[float, list[ManifestEntry]] | None = None
|
||||
|
||||
_prefetch_lock = threading.Lock()
|
||||
_prefetching = False
|
||||
|
||||
|
||||
def clear_cache() -> None:
|
||||
"""Drop the cached manifest (forces the next fetch to hit the network)."""
|
||||
global _cache
|
||||
_cache = None
|
||||
|
||||
|
||||
def _cache_is_warm() -> bool:
|
||||
return _cache is not None and time.monotonic() - _cache[0] < _MANIFEST_TTL
|
||||
|
||||
|
||||
def prefetch(*, timeout: float = _DEFAULT_TIMEOUT) -> None:
|
||||
"""Warm the manifest cache in a daemon thread — idempotent, never blocks.
|
||||
|
||||
The desktop picker calls this when it loads the (instant) local-only gallery
|
||||
so the full petdex catalog is usually cached by the time it's requested,
|
||||
without ever holding up the user's own pets on a network round-trip.
|
||||
"""
|
||||
global _prefetching
|
||||
|
||||
if _cache_is_warm():
|
||||
return
|
||||
|
||||
with _prefetch_lock:
|
||||
if _prefetching:
|
||||
return
|
||||
_prefetching = True
|
||||
|
||||
def _run() -> None:
|
||||
global _prefetching
|
||||
try:
|
||||
fetch_manifest(timeout=timeout)
|
||||
except Exception as exc: # noqa: BLE001 - best-effort warm
|
||||
logger.debug("petdex manifest prefetch failed: %s", exc)
|
||||
finally:
|
||||
_prefetching = False
|
||||
|
||||
threading.Thread(target=_run, name="petdex-prefetch", daemon=True).start()
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ManifestEntry:
|
||||
"""A single pet's row in the manifest."""
|
||||
|
||||
slug: str
|
||||
display_name: str
|
||||
kind: str
|
||||
submitted_by: str
|
||||
spritesheet_url: str
|
||||
pet_json_url: str
|
||||
zip_url: str
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict) -> "ManifestEntry":
|
||||
return cls(
|
||||
slug=str(data.get("slug", "")).strip(),
|
||||
display_name=str(data.get("displayName", "") or data.get("slug", "")),
|
||||
kind=str(data.get("kind", "") or "pet"),
|
||||
submitted_by=str(data.get("submittedBy", "") or ""),
|
||||
spritesheet_url=str(data.get("spritesheetUrl", "") or ""),
|
||||
pet_json_url=str(data.get("petJsonUrl", "") or ""),
|
||||
zip_url=str(data.get("zipUrl", "") or ""),
|
||||
)
|
||||
|
||||
|
||||
class ManifestError(RuntimeError):
|
||||
"""Raised when the manifest can't be fetched or parsed."""
|
||||
|
||||
|
||||
def fetch_manifest(*, timeout: float = _DEFAULT_TIMEOUT, force: bool = False) -> list[ManifestEntry]:
|
||||
"""Return every approved pet from the public manifest.
|
||||
|
||||
Cached in-process for ``_MANIFEST_TTL`` seconds (pass ``force=True`` to
|
||||
bypass). Follows the 307 redirect to R2. Raises :class:`ManifestError` on
|
||||
any network/parse failure so callers can surface a clean message.
|
||||
"""
|
||||
global _cache
|
||||
|
||||
if not force and _cache is not None and time.monotonic() - _cache[0] < _MANIFEST_TTL:
|
||||
return _cache[1]
|
||||
|
||||
try:
|
||||
import httpx
|
||||
except ImportError as exc: # pragma: no cover - httpx is a core dep
|
||||
raise ManifestError("httpx is required to fetch the petdex manifest") from exc
|
||||
|
||||
try:
|
||||
resp = httpx.get(
|
||||
MANIFEST_URL,
|
||||
timeout=timeout,
|
||||
follow_redirects=True,
|
||||
headers={"User-Agent": "hermes-agent-petdex"},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
payload = resp.json()
|
||||
except Exception as exc: # noqa: BLE001 - normalize to one error type
|
||||
raise ManifestError(f"could not fetch petdex manifest: {exc}") from exc
|
||||
|
||||
pets = payload.get("pets") if isinstance(payload, dict) else None
|
||||
if not isinstance(pets, list):
|
||||
raise ManifestError("petdex manifest had no 'pets' array")
|
||||
|
||||
entries: list[ManifestEntry] = []
|
||||
for raw in pets:
|
||||
if not isinstance(raw, dict):
|
||||
continue
|
||||
entry = ManifestEntry.from_dict(raw)
|
||||
if entry.slug and entry.spritesheet_url:
|
||||
entries.append(entry)
|
||||
|
||||
_cache = (time.monotonic(), entries)
|
||||
return entries
|
||||
|
||||
|
||||
def find_entry(slug: str, *, timeout: float = _DEFAULT_TIMEOUT) -> ManifestEntry | None:
|
||||
"""Return the manifest entry for *slug*, or ``None`` if not listed."""
|
||||
slug = slug.strip().lower()
|
||||
for entry in fetch_manifest(timeout=timeout):
|
||||
if entry.slug.lower() == slug:
|
||||
return entry
|
||||
return None
|
||||
618
agent/pet/render.py
Normal file
618
agent/pet/render.py
Normal file
@@ -0,0 +1,618 @@
|
||||
"""Decode a pet spritesheet and encode frames for a terminal.
|
||||
|
||||
Shared by the base CLI (writes the escape bytes to its own stdout) and the
|
||||
TUI (``tui_gateway`` ships the encoded bytes to Ink, which writes them) so the
|
||||
decode + capability-detection + protocol-encoding logic exists exactly once.
|
||||
|
||||
Supported output modes, in fidelity order:
|
||||
|
||||
- ``kitty`` — the kitty graphics protocol (kitty, Ghostty, WezTerm).
|
||||
- ``iterm`` — iTerm2 inline images (iTerm2, WezTerm).
|
||||
- ``sixel`` — DEC sixel (xterm -ti vt340, foot, mlterm, WezTerm, …).
|
||||
- ``unicode`` — 24-bit half-block downscale; works in any truecolor terminal.
|
||||
|
||||
Frame decoding requires Pillow (a core Hermes dependency). If Pillow or the
|
||||
spritesheet is unavailable the renderer degrades to ``unicode`` text or an
|
||||
empty string rather than raising.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import io
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from functools import lru_cache
|
||||
from pathlib import Path
|
||||
|
||||
from agent.pet.constants import (
|
||||
DEFAULT_SCALE,
|
||||
FRAME_H,
|
||||
FRAME_W,
|
||||
FRAMES_PER_STATE,
|
||||
PetState,
|
||||
state_row_index,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Public render-mode names accepted by ``display.pet.render_mode``.
|
||||
RENDER_MODES = ("auto", "kitty", "iterm", "sixel", "unicode", "off")
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────
|
||||
# Terminal capability detection
|
||||
# ─────────────────────────────────────────────────────────────────────────
|
||||
|
||||
def detect_terminal_graphics() -> str:
|
||||
"""Best-effort detection of the richest graphics protocol available.
|
||||
|
||||
Env-based (non-blocking — we never issue a DA1/terminal query that could
|
||||
hang a pipe). Returns one of ``kitty`` / ``iterm`` / ``sixel`` /
|
||||
``unicode``. Conservative: unknown terminals get ``unicode``, which works
|
||||
anywhere with truecolor.
|
||||
"""
|
||||
term = os.environ.get("TERM", "").lower()
|
||||
term_program = os.environ.get("TERM_PROGRAM", "").lower()
|
||||
|
||||
# The VS Code / Cursor integrated terminal sets TERM_PROGRAM=vscode
|
||||
# authoritatively but does NOT scrub the terminal env vars it inherits when
|
||||
# launched from another emulator (ITERM_SESSION_ID, KITTY_WINDOW_ID, …).
|
||||
# Trusting those leaks emits an image protocol the embedded xterm.js can't
|
||||
# display — you get a blank frame. Inline images there are opt-in
|
||||
# (terminal.integrated.enableImages), so default to half-blocks, which
|
||||
# always render in its truecolor grid. Users who enabled images can pin
|
||||
# display.pet.render_mode explicitly.
|
||||
if term_program == "vscode":
|
||||
return "unicode"
|
||||
|
||||
# kitty graphics protocol
|
||||
if os.environ.get("KITTY_WINDOW_ID") or "kitty" in term or "ghostty" in term:
|
||||
return "kitty"
|
||||
if term_program in {"ghostty"}:
|
||||
return "kitty"
|
||||
|
||||
# WezTerm speaks both kitty and iterm; prefer kitty (richer placement).
|
||||
if term_program == "wezterm" or os.environ.get("WEZTERM_PANE"):
|
||||
return "kitty"
|
||||
|
||||
# iTerm2 inline images
|
||||
if term_program == "iterm.app" or os.environ.get("ITERM_SESSION_ID"):
|
||||
return "iterm"
|
||||
|
||||
# sixel-capable terminals (env heuristics only)
|
||||
if term_program in {"mintty"} or "foot" in term or "mlterm" in term:
|
||||
return "sixel"
|
||||
if "sixel" in term:
|
||||
return "sixel"
|
||||
|
||||
return "unicode"
|
||||
|
||||
|
||||
def resolve_mode(configured: str | None, *, stream=None) -> str:
|
||||
"""Resolve the effective render mode from config + the environment.
|
||||
|
||||
``configured`` is ``display.pet.render_mode`` (``auto`` → detect). Returns
|
||||
``off`` when not attached to a TTY (no point emitting graphics into a pipe
|
||||
or logfile).
|
||||
"""
|
||||
mode = (configured or "auto").strip().lower()
|
||||
if mode not in RENDER_MODES:
|
||||
mode = "auto"
|
||||
if mode == "off":
|
||||
return "off"
|
||||
|
||||
stream = stream or sys.stdout
|
||||
try:
|
||||
if not (hasattr(stream, "isatty") and stream.isatty()):
|
||||
return "off"
|
||||
except (ValueError, OSError):
|
||||
return "off"
|
||||
|
||||
if mode == "auto":
|
||||
return detect_terminal_graphics()
|
||||
return mode
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────
|
||||
# Frame decoding
|
||||
# ─────────────────────────────────────────────────────────────────────────
|
||||
|
||||
def _open_sheet(path: Path):
|
||||
from PIL import Image
|
||||
|
||||
img = Image.open(path)
|
||||
return img.convert("RGBA")
|
||||
|
||||
|
||||
# Max alpha at/below which a frame counts as blank padding. petdex sheets are
|
||||
# left-packed: a state with fewer real frames than ``FRAMES_PER_STATE`` fills
|
||||
# the trailing columns with fully transparent cells. Animating into one flashes
|
||||
# the pet blank, so we stop the row at the first such gap.
|
||||
_BLANK_ALPHA = 8
|
||||
|
||||
|
||||
def _frame_is_blank(frame) -> bool:
|
||||
"""True if *frame* has no meaningfully opaque pixel (transparent padding)."""
|
||||
return frame.getchannel("A").getextrema()[1] <= _BLANK_ALPHA
|
||||
|
||||
|
||||
@lru_cache(maxsize=16)
|
||||
def _raw_frames(
|
||||
sheet_path: str,
|
||||
state_value: str,
|
||||
frame_w: int,
|
||||
frame_h: int,
|
||||
frames_per_state: int,
|
||||
) -> tuple:
|
||||
"""Cropped, padding-trimmed RGBA frames for one state row (unscaled).
|
||||
|
||||
Steps across the row until the first blank column so pets with ragged
|
||||
per-state frame counts never animate into empty padding. Cached; returns
|
||||
``()`` on any decode failure.
|
||||
"""
|
||||
try:
|
||||
sheet = _open_sheet(Path(sheet_path))
|
||||
cols = max(1, sheet.width // frame_w)
|
||||
rows = max(1, sheet.height // frame_h)
|
||||
row = state_row_index(state_value, rows)
|
||||
top = row * frame_h
|
||||
# Clamp the row to the sheet (some pets ship fewer rows than the 8 the
|
||||
# taxonomy reserves).
|
||||
if top + frame_h > sheet.height:
|
||||
top = max(0, sheet.height - frame_h)
|
||||
|
||||
frames = []
|
||||
for i in range(min(frames_per_state, cols)):
|
||||
left = i * frame_w
|
||||
frame = sheet.crop((left, top, left + frame_w, top + frame_h))
|
||||
if _frame_is_blank(frame):
|
||||
break # trailing transparent padding — real frames end here
|
||||
frames.append(frame)
|
||||
return tuple(frames)
|
||||
except Exception as exc: # noqa: BLE001 - cosmetic feature, never fatal
|
||||
logger.debug("pet frame decode failed (%s, %s): %s", sheet_path, state_value, exc)
|
||||
return ()
|
||||
|
||||
|
||||
@lru_cache(maxsize=8)
|
||||
def _frames_for(
|
||||
sheet_path: str,
|
||||
state_value: str,
|
||||
frame_w: int,
|
||||
frame_h: int,
|
||||
frames_per_state: int,
|
||||
scale_w: int,
|
||||
scale_h: int,
|
||||
):
|
||||
"""Return padding-trimmed RGBA frames for one state row, scaled.
|
||||
|
||||
Thin scaling layer over :func:`_raw_frames`; both are cached so repeated
|
||||
frame requests during animation are free.
|
||||
"""
|
||||
raw = _raw_frames(sheet_path, state_value, frame_w, frame_h, frames_per_state)
|
||||
if not raw or (scale_w, scale_h) == (frame_w, frame_h):
|
||||
return list(raw)
|
||||
from PIL import Image
|
||||
|
||||
return [f.resize((scale_w, scale_h), Image.LANCZOS) for f in raw]
|
||||
|
||||
|
||||
def state_frame_counts(
|
||||
sheet_path: str | Path,
|
||||
*,
|
||||
frame_w: int = FRAME_W,
|
||||
frame_h: int = FRAME_H,
|
||||
frames_per_state: int = FRAMES_PER_STATE,
|
||||
) -> dict[str, int]:
|
||||
"""Map each driven :class:`PetState` → its real (padding-trimmed) frame count.
|
||||
|
||||
The single source of truth for "how many frames does this state actually
|
||||
have?". The CLI/TUI consume the trimmed frame lists directly; the gateway
|
||||
ships this map to the desktop canvas, which steps its own loop.
|
||||
"""
|
||||
return {
|
||||
state.value: len(
|
||||
_raw_frames(str(sheet_path), state.value, frame_w, frame_h, frames_per_state)
|
||||
)
|
||||
for state in PetState
|
||||
}
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────
|
||||
# Encoders
|
||||
# ─────────────────────────────────────────────────────────────────────────
|
||||
|
||||
def _png_bytes(frame) -> bytes:
|
||||
buf = io.BytesIO()
|
||||
frame.save(buf, format="PNG")
|
||||
return buf.getvalue()
|
||||
|
||||
|
||||
def _kitty_apc(ctrl: str, data: str) -> str:
|
||||
"""Emit a kitty APC escape for *data*, chunked into ≤4096-byte ``m`` pieces."""
|
||||
chunk = 4096
|
||||
if len(data) <= chunk:
|
||||
return f"\x1b_G{ctrl},m=0;{data}\x1b\\"
|
||||
out = [f"\x1b_G{ctrl},m=1;{data[:chunk]}\x1b\\"]
|
||||
rest = data[chunk:]
|
||||
while rest:
|
||||
piece, rest = rest[:chunk], rest[chunk:]
|
||||
out.append(f"\x1b_Gm={1 if rest else 0};{piece}\x1b\\")
|
||||
return "".join(out)
|
||||
|
||||
|
||||
def _encode_kitty(frame, *, cell_cols: int | None = None, cell_rows: int | None = None) -> str:
|
||||
"""Encode one frame via the kitty graphics protocol (transmit + display).
|
||||
|
||||
``a=T`` transmits & displays at the cursor; ``c``/``r`` request a display
|
||||
box in terminal cells so successive frames overwrite the same area.
|
||||
"""
|
||||
ctrl = "f=100,a=T,q=2"
|
||||
if cell_cols:
|
||||
ctrl += f",c={cell_cols}"
|
||||
if cell_rows:
|
||||
ctrl += f",r={cell_rows}"
|
||||
return _kitty_apc(ctrl, base64.standard_b64encode(_png_bytes(frame)).decode("ascii"))
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────
|
||||
# kitty Unicode placeholders
|
||||
#
|
||||
# Ink (the TUI's React-for-terminal layer) owns the screen and measures every
|
||||
# cell's width, so it can't host raw kitty image escapes (no width to count,
|
||||
# clobbered on the next repaint). kitty's *Unicode placeholder* protocol is the
|
||||
# grid-safe path: transmit the image once (q=2, virtual placement U=1), then the
|
||||
# host app prints ordinary-width placeholder cells (U+10EEEE + diacritics) whose
|
||||
# foreground color encodes the image id. Ink counts those as width-1 text, so
|
||||
# layout stays correct and the terminal paints the image underneath.
|
||||
# https://sw.kovidgoyal.net/kitty/graphics-protocol/#unicode-placeholders
|
||||
# ─────────────────────────────────────────────────────────────────────────
|
||||
|
||||
_KITTY_PLACEHOLDER = "\U0010eeee"
|
||||
|
||||
# Row/column diacritics, in order (index → diacritic). Verbatim from kitty's
|
||||
# gen/rowcolumn-diacritics.txt (Unicode 6.0.0, combining class 230). Index i is
|
||||
# the diacritic that encodes the number i; we only ever need the row index.
|
||||
_ROWCOL_DIACRITICS: tuple[int, ...] = (
|
||||
0x0305, 0x030D, 0x030E, 0x0310, 0x0312, 0x033D, 0x033E, 0x033F, 0x0346, 0x034A,
|
||||
0x034B, 0x034C, 0x0350, 0x0351, 0x0352, 0x0357, 0x035B, 0x0363, 0x0364, 0x0365,
|
||||
0x0366, 0x0367, 0x0368, 0x0369, 0x036A, 0x036B, 0x036C, 0x036D, 0x036E, 0x036F,
|
||||
0x0483, 0x0484, 0x0485, 0x0486, 0x0487, 0x0592, 0x0593, 0x0594, 0x0595, 0x0597,
|
||||
0x0598, 0x0599, 0x059C, 0x059D, 0x059E, 0x059F, 0x05A0, 0x05A1, 0x05A8, 0x05A9,
|
||||
0x05AB, 0x05AC, 0x05AF, 0x05C4, 0x0610, 0x0611, 0x0612, 0x0613, 0x0614, 0x0615,
|
||||
0x0616, 0x0617, 0x0657, 0x0658, 0x0659, 0x065A, 0x065B, 0x065D, 0x065E, 0x06D6,
|
||||
0x06D7, 0x06D8, 0x06D9, 0x06DA, 0x06DB, 0x06DC, 0x06DF, 0x06E0, 0x06E1, 0x06E2,
|
||||
0x06E4, 0x06E7, 0x06E8, 0x06EB, 0x06EC, 0x0730, 0x0732, 0x0733, 0x0735, 0x0736,
|
||||
0x073A, 0x073D, 0x073F, 0x0740, 0x0741, 0x0743, 0x0745, 0x0747, 0x0749, 0x074A,
|
||||
0x07EB, 0x07EC, 0x07ED, 0x07EE, 0x07EF, 0x07F0, 0x07F1, 0x07F3, 0x0816, 0x0817,
|
||||
0x0818, 0x0819, 0x081B, 0x081C, 0x081D, 0x081E, 0x081F, 0x0820, 0x0821, 0x0822,
|
||||
0x0823, 0x0825, 0x0826, 0x0827, 0x0829, 0x082A, 0x082B, 0x082C, 0x082D, 0x0951,
|
||||
0x0953, 0x0954, 0x0F82, 0x0F83, 0x0F86, 0x0F87, 0x135D, 0x135E, 0x135F, 0x17DD,
|
||||
0x193A, 0x1A17, 0x1A75, 0x1A76, 0x1A77, 0x1A78, 0x1A79, 0x1A7A, 0x1A7B, 0x1A7C,
|
||||
0x1B6B, 0x1B6D, 0x1B6E, 0x1B6F, 0x1B70, 0x1B71, 0x1B72, 0x1B73, 0x1CD0, 0x1CD1,
|
||||
0x1CD2, 0x1CDA, 0x1CDB, 0x1CE0, 0x1DC0, 0x1DC1, 0x1DC3, 0x1DC4, 0x1DC5, 0x1DC6,
|
||||
0x1DC7, 0x1DC8, 0x1DC9, 0x1DCB, 0x1DCC, 0x1DD1, 0x1DD2, 0x1DD3, 0x1DD4, 0x1DD5,
|
||||
0x1DD6, 0x1DD7, 0x1DD8, 0x1DD9, 0x1DDA, 0x1DDB, 0x1DDC, 0x1DDD, 0x1DDE, 0x1DDF,
|
||||
0x1DE0, 0x1DE1, 0x1DE2, 0x1DE3, 0x1DE4, 0x1DE5, 0x1DE6, 0x1DFE, 0x20D0, 0x20D1,
|
||||
0x20D4, 0x20D5, 0x20D6, 0x20D7, 0x20DB, 0x20DC, 0x20E1, 0x20E7, 0x20E9, 0x20F0,
|
||||
0x2CEF, 0x2CF0, 0x2CF1, 0x2DE0, 0x2DE1, 0x2DE2, 0x2DE3, 0x2DE4, 0x2DE5, 0x2DE6,
|
||||
0x2DE7, 0x2DE8, 0x2DE9, 0x2DEA, 0x2DEB, 0x2DEC, 0x2DED, 0x2DEE, 0x2DEF, 0x2DF0,
|
||||
0x2DF1, 0x2DF2, 0x2DF3, 0x2DF4, 0x2DF5, 0x2DF6, 0x2DF7, 0x2DF8, 0x2DF9, 0x2DFA,
|
||||
0x2DFB, 0x2DFC, 0x2DFD, 0x2DFE, 0x2DFF, 0xA66F, 0xA67C, 0xA67D, 0xA6F0, 0xA6F1,
|
||||
0xA8E0, 0xA8E1, 0xA8E2, 0xA8E3, 0xA8E4, 0xA8E5, 0xA8E6, 0xA8E7, 0xA8E8, 0xA8E9,
|
||||
0xA8EA, 0xA8EB, 0xA8EC, 0xA8ED, 0xA8EE, 0xA8EF, 0xA8F0, 0xA8F1, 0xAAB0, 0xAAB2,
|
||||
0xAAB3, 0xAAB7, 0xAAB8, 0xAABE, 0xAABF, 0xAAC1, 0xFE20, 0xFE21, 0xFE22, 0xFE23,
|
||||
0xFE24, 0xFE25, 0xFE26, 0x10A0F, 0x10A38, 0x1D185, 0x1D186, 0x1D187, 0x1D188,
|
||||
0x1D189, 0x1D1AA, 0x1D1AB, 0x1D1AC, 0x1D1AD, 0x1D242, 0x1D243, 0x1D244,
|
||||
)
|
||||
|
||||
|
||||
def kitty_image_id(slug: str) -> int:
|
||||
"""Stable per-pet image id in ``[1, 0x7FFF]``.
|
||||
|
||||
The id is encoded in the placeholder's 24-bit foreground color, so it must
|
||||
be non-zero and fit comfortably under ``0xFFFFFF``. A small CRC keeps it
|
||||
deterministic per slug (so re-renders reuse the same terminal-side image)
|
||||
while making collisions between two different pets unlikely.
|
||||
"""
|
||||
import zlib
|
||||
|
||||
return (zlib.crc32(slug.encode("utf-8")) % 0x7FFE) + 1
|
||||
|
||||
|
||||
def kitty_color_hex(image_id: int) -> str:
|
||||
"""Hex foreground color (``#rrggbb``) that encodes *image_id* for kitty."""
|
||||
return "#%06x" % (image_id & 0xFFFFFF)
|
||||
|
||||
|
||||
def kitty_placeholder_rows(cols: int, rows: int) -> list[str]:
|
||||
"""Build the placeholder text grid for an *rows*×*cols* image.
|
||||
|
||||
Each line is one row of the grid: the first cell carries the row diacritic
|
||||
(column defaults to 0), and the remaining ``cols-1`` bare placeholders let
|
||||
the terminal auto-increment the column. The foreground color (the image id)
|
||||
is applied by the caller / Ink, not embedded here.
|
||||
"""
|
||||
cols = max(1, cols)
|
||||
out: list[str] = []
|
||||
for r in range(max(1, rows)):
|
||||
idx = min(r, len(_ROWCOL_DIACRITICS) - 1)
|
||||
first = _KITTY_PLACEHOLDER + chr(_ROWCOL_DIACRITICS[idx])
|
||||
out.append(first + _KITTY_PLACEHOLDER * (cols - 1))
|
||||
return out
|
||||
|
||||
|
||||
def _encode_kitty_virtual(frame, *, image_id: int, cols: int, rows: int) -> str:
|
||||
"""Transmit a frame as a kitty *virtual* placement for Unicode placeholders.
|
||||
|
||||
``a=T`` transmits and creates the placement in one shot; ``U=1`` marks it
|
||||
virtual (no on-screen output, cursor untouched); ``q=2`` suppresses the
|
||||
terminal's OK/error replies that would otherwise corrupt the host app's
|
||||
output. Re-sending with the same ``i`` replaces the image, so the static
|
||||
placeholder cells animate underneath.
|
||||
"""
|
||||
ctrl = f"a=T,U=1,i={image_id},c={cols},r={rows},f=100,q=2"
|
||||
return _kitty_apc(ctrl, base64.standard_b64encode(_png_bytes(frame)).decode("ascii"))
|
||||
|
||||
|
||||
def _encode_iterm(frame, *, cell_cols: int | None = None, cell_rows: int | None = None) -> str:
|
||||
"""Encode one frame as an iTerm2 inline image (OSC 1337 File)."""
|
||||
payload = base64.standard_b64encode(_png_bytes(frame)).decode("ascii")
|
||||
size = len(payload)
|
||||
args = [f"inline=1", f"size={size}", "preserveAspectRatio=1"]
|
||||
if cell_cols:
|
||||
args.append(f"width={cell_cols}")
|
||||
if cell_rows:
|
||||
args.append(f"height={cell_rows}")
|
||||
return f"\x1b]1337;File={';'.join(args)}:{payload}\x07"
|
||||
|
||||
|
||||
def _encode_sixel(frame) -> str:
|
||||
"""Encode one frame as DEC sixel.
|
||||
|
||||
Quantizes to an adaptive palette (≤255 colors) and emits the sixel band
|
||||
stream. Pillow has no sixel writer, so this is a compact hand-rolled
|
||||
encoder. Transparent pixels render as background (color register skipped).
|
||||
"""
|
||||
from PIL import Image
|
||||
|
||||
rgba = frame
|
||||
# Composite onto transparent-as-skip: track alpha to decide background.
|
||||
pal = rgba.convert("RGB").quantize(colors=255, method=Image.MEDIANCUT)
|
||||
palette = pal.getpalette() or []
|
||||
px = pal.load()
|
||||
alpha = rgba.getchannel("A").load()
|
||||
w, h = pal.size
|
||||
|
||||
out = ["\x1bP0;1;0q", '"1;1;%d;%d' % (w, h)]
|
||||
# Color register definitions (sixel uses 0..100 scale).
|
||||
used = sorted({px[x, y] for y in range(h) for x in range(w)})
|
||||
for idx in used:
|
||||
r = palette[idx * 3] if idx * 3 < len(palette) else 0
|
||||
g = palette[idx * 3 + 1] if idx * 3 + 1 < len(palette) else 0
|
||||
b = palette[idx * 3 + 2] if idx * 3 + 2 < len(palette) else 0
|
||||
out.append("#%d;2;%d;%d;%d" % (idx, r * 100 // 255, g * 100 // 255, b * 100 // 255))
|
||||
|
||||
# Emit in 6-row bands.
|
||||
for band in range(0, h, 6):
|
||||
for color_idx in used:
|
||||
line = ["#%d" % color_idx]
|
||||
run_char = None
|
||||
run_len = 0
|
||||
|
||||
def flush():
|
||||
nonlocal run_char, run_len
|
||||
if run_char is None:
|
||||
return
|
||||
if run_len > 3:
|
||||
line.append("!%d%s" % (run_len, run_char))
|
||||
else:
|
||||
line.append(run_char * run_len)
|
||||
run_char, run_len = None, 0
|
||||
|
||||
for x in range(w):
|
||||
bits = 0
|
||||
for bit in range(6):
|
||||
y = band + bit
|
||||
if y < h and alpha[x, y] > 32 and px[x, y] == color_idx:
|
||||
bits |= 1 << bit
|
||||
ch = chr(63 + bits)
|
||||
if ch == run_char:
|
||||
run_len += 1
|
||||
else:
|
||||
flush()
|
||||
run_char, run_len = ch, 1
|
||||
flush()
|
||||
out.append("".join(line) + "$") # carriage return within band
|
||||
out.append("-") # next band
|
||||
out.append("\x1b\\")
|
||||
return "".join(out)
|
||||
|
||||
|
||||
_HALF_BLOCK = "▀"
|
||||
|
||||
# A single half-block cell: top pixel + bottom pixel as (r, g, b, a) tuples.
|
||||
Cell = tuple[tuple[int, int, int, int], tuple[int, int, int, int]]
|
||||
|
||||
|
||||
def _downscale_cells(frame, *, target_cols: int) -> list[list[Cell]]:
|
||||
"""Downscale a frame to a grid of half-block cells.
|
||||
|
||||
Each cell pairs a top and bottom pixel so one terminal row encodes two
|
||||
pixel rows. Returns rows of ``((tr,tg,tb,ta),(br,bg,bb,ba))`` — the
|
||||
framework-neutral representation shared by the ANSI encoder (CLI) and the
|
||||
structured ``cells`` API (Ink).
|
||||
"""
|
||||
from PIL import Image
|
||||
|
||||
target_cols = max(4, target_cols)
|
||||
aspect = frame.height / max(1, frame.width)
|
||||
target_rows = max(2, int(round(target_cols * aspect * 0.5)) * 2)
|
||||
small = frame.resize((target_cols, target_rows), Image.LANCZOS).convert("RGBA")
|
||||
px = small.load()
|
||||
|
||||
grid: list[list[Cell]] = []
|
||||
for y in range(0, target_rows, 2):
|
||||
row: list[Cell] = []
|
||||
for x in range(target_cols):
|
||||
top = px[x, y]
|
||||
bottom = px[x, y + 1] if y + 1 < target_rows else (0, 0, 0, 0)
|
||||
row.append((top, bottom))
|
||||
grid.append(row)
|
||||
return grid
|
||||
|
||||
|
||||
def _encode_unicode(frame, *, target_cols: int) -> str:
|
||||
"""Downscale to truecolor ANSI half-blocks (one char = 2 vertical pixels)."""
|
||||
lines: list[str] = []
|
||||
for row in _downscale_cells(frame, target_cols=target_cols):
|
||||
cells: list[str] = []
|
||||
for (tr, tg, tb, ta), (br, bg, bb, ba) in row:
|
||||
if ta < 32 and ba < 32:
|
||||
cells.append("\x1b[0m ") # fully transparent → blank
|
||||
continue
|
||||
cells.append(f"\x1b[38;2;{tr};{tg};{tb}m\x1b[48;2;{br};{bg};{bb}m{_HALF_BLOCK}")
|
||||
lines.append("".join(cells) + "\x1b[0m")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────
|
||||
# Public renderer
|
||||
# ─────────────────────────────────────────────────────────────────────────
|
||||
|
||||
class PetRenderer:
|
||||
"""Holds a pet's spritesheet and yields encoded frames per (state, index).
|
||||
|
||||
Construct once per pet, then call :meth:`frame` on an animation timer.
|
||||
Cheap to call repeatedly — decoded frames are cached.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
spritesheet: str | Path,
|
||||
*,
|
||||
mode: str = "unicode",
|
||||
scale: float = DEFAULT_SCALE,
|
||||
unicode_cols: int = 20,
|
||||
frame_w: int = FRAME_W,
|
||||
frame_h: int = FRAME_H,
|
||||
frames_per_state: int = FRAMES_PER_STATE,
|
||||
) -> None:
|
||||
self.spritesheet = str(spritesheet)
|
||||
self.mode = mode if mode in RENDER_MODES else "unicode"
|
||||
self.scale = scale
|
||||
self.unicode_cols = unicode_cols
|
||||
self.frame_w = frame_w
|
||||
self.frame_h = frame_h
|
||||
self.frames_per_state = frames_per_state
|
||||
|
||||
@property
|
||||
def available(self) -> bool:
|
||||
return self.mode != "off" and Path(self.spritesheet).is_file()
|
||||
|
||||
def frame_count(self, state: PetState | str) -> int:
|
||||
return len(self._frames(state))
|
||||
|
||||
def _frames(self, state: PetState | str):
|
||||
value = state.value if isinstance(state, PetState) else str(state)
|
||||
scale_w = max(1, int(self.frame_w * self.scale))
|
||||
scale_h = max(1, int(self.frame_h * self.scale))
|
||||
return _frames_for(
|
||||
self.spritesheet,
|
||||
value,
|
||||
self.frame_w,
|
||||
self.frame_h,
|
||||
self.frames_per_state,
|
||||
scale_w,
|
||||
scale_h,
|
||||
)
|
||||
|
||||
def cells(self, state: PetState | str, index: int, *, cols: int | None = None) -> list[list[Cell]]:
|
||||
"""Return one frame as a half-block cell grid (framework-neutral).
|
||||
|
||||
Used by the TUI, which renders the grid with native Ink color props
|
||||
instead of raw ANSI. Returns ``[]`` when no frame is available.
|
||||
"""
|
||||
frames = self._frames(state)
|
||||
if not frames:
|
||||
return []
|
||||
frame = frames[index % len(frames)]
|
||||
return _downscale_cells(frame, target_cols=cols or self.unicode_cols)
|
||||
|
||||
def _cell_box(self, frame) -> tuple[int, int]:
|
||||
"""Terminal cell box for a scaled frame (~8×16 px per cell).
|
||||
|
||||
Must match :meth:`frame` graphics sizing — kitty stretches the image to
|
||||
fill ``c``×``r`` cells, so these must reflect the scaled pixel
|
||||
dimensions, not a native-aspect column count (that upscales small pets).
|
||||
"""
|
||||
return max(1, frame.width // 8), max(1, frame.height // 16)
|
||||
|
||||
def kitty_payload(self, state: PetState | str, *, image_id: int) -> dict | None:
|
||||
"""Build the kitty Unicode-placeholder payload for one state.
|
||||
|
||||
Returns ``{cols, rows, placeholder, frames}`` where ``frames`` is a
|
||||
list of transmit escapes (one per animation frame, all reusing
|
||||
``image_id``) and ``placeholder`` is the static text grid Ink paints.
|
||||
Placement geometry is derived from the scaled frame pixels (via
|
||||
:meth:`_cell_box`), not ``unicode_cols`` — kitty upscales to fill
|
||||
``c``×``r`` cells. ``None`` when no frame is available.
|
||||
"""
|
||||
frames = self._frames(state)
|
||||
if not frames:
|
||||
return None
|
||||
cols, rows = self._cell_box(frames[0])
|
||||
return {
|
||||
"cols": cols,
|
||||
"rows": rows,
|
||||
"placeholder": kitty_placeholder_rows(cols, rows),
|
||||
"frames": [
|
||||
_encode_kitty_virtual(f, image_id=image_id, cols=cols, rows=rows) for f in frames
|
||||
],
|
||||
}
|
||||
|
||||
def frame(self, state: PetState | str, index: int) -> str:
|
||||
"""Return the encoded escape string for one frame, or ``""``.
|
||||
|
||||
``index`` is taken modulo the available frame count so callers can pass
|
||||
a free-running counter.
|
||||
"""
|
||||
if self.mode == "off":
|
||||
return ""
|
||||
frames = self._frames(state)
|
||||
if not frames:
|
||||
return ""
|
||||
frame = frames[index % len(frames)]
|
||||
cell_cols, cell_rows = self._cell_box(frame)
|
||||
|
||||
try:
|
||||
if self.mode == "kitty":
|
||||
return _encode_kitty(frame, cell_cols=cell_cols, cell_rows=cell_rows)
|
||||
if self.mode == "iterm":
|
||||
return _encode_iterm(frame, cell_cols=cell_cols, cell_rows=cell_rows)
|
||||
if self.mode == "sixel":
|
||||
return _encode_sixel(frame)
|
||||
return _encode_unicode(frame, target_cols=self.unicode_cols)
|
||||
except Exception as exc: # noqa: BLE001 - degrade silently
|
||||
logger.debug("pet frame encode failed (mode=%s): %s", self.mode, exc)
|
||||
return ""
|
||||
|
||||
|
||||
def build_renderer(
|
||||
spritesheet: str | Path,
|
||||
*,
|
||||
configured_mode: str | None = None,
|
||||
scale: float = DEFAULT_SCALE,
|
||||
unicode_cols: int = 20,
|
||||
stream=None,
|
||||
) -> PetRenderer:
|
||||
"""Convenience factory: resolve the mode from config+env, then construct."""
|
||||
mode = resolve_mode(configured_mode, stream=stream)
|
||||
return PetRenderer(
|
||||
spritesheet,
|
||||
mode=mode,
|
||||
scale=scale,
|
||||
unicode_cols=unicode_cols,
|
||||
)
|
||||
81
agent/pet/state.py
Normal file
81
agent/pet/state.py
Normal file
@@ -0,0 +1,81 @@
|
||||
"""Map agent activity → a :class:`PetState`.
|
||||
|
||||
This is the one place the "what is the agent doing right now?" → "which
|
||||
animation row?" decision lives. Each surface feeds it the signals it already
|
||||
tracks:
|
||||
|
||||
- CLI — ``KawaiiSpinner`` waiting/thinking state + tool outcomes.
|
||||
- TUI — gateway ``tool.start/complete`` + ``message.delta/complete`` events.
|
||||
- Desktop — the ``$busy``/``$awaitingResponse``/tool-event nanostores
|
||||
(re-implemented in TS, but mirroring this priority order).
|
||||
|
||||
Keeping the priority order here (and documenting it) lets the TypeScript
|
||||
mirror stay faithful without a second design.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Iterable
|
||||
from typing import Any
|
||||
|
||||
from agent.pet.constants import PetState
|
||||
|
||||
|
||||
def todos_all_done(todos: Iterable[Any] | None) -> bool:
|
||||
"""True iff there's ≥1 todo and every one is completed/cancelled.
|
||||
|
||||
The "celebrate" beat (``JUMP``) fires when a plan finishes; this mirrors
|
||||
the TUI's ``isTodoDone`` so the trigger is defined once across surfaces.
|
||||
Accepts dicts (``{"status": ...}``) or objects with a ``status`` attr.
|
||||
"""
|
||||
items = list(todos or [])
|
||||
if not items:
|
||||
return False
|
||||
|
||||
def _status(t: Any) -> Any:
|
||||
return t.get("status") if isinstance(t, dict) else getattr(t, "status", None)
|
||||
|
||||
return all(_status(t) in ("completed", "cancelled") for t in items)
|
||||
|
||||
|
||||
def derive_pet_state(
|
||||
*,
|
||||
busy: bool = False,
|
||||
awaiting_input: bool = False,
|
||||
error: bool = False,
|
||||
celebrate: bool = False,
|
||||
just_completed: bool = False,
|
||||
tool_running: bool = False,
|
||||
reasoning: bool = False,
|
||||
) -> PetState:
|
||||
"""Resolve the animation state from coarse activity signals.
|
||||
|
||||
Priority (highest first) — only one row can show at a time, so the most
|
||||
salient signal wins:
|
||||
|
||||
1. ``error`` → ``FAILED`` (a tool/turn just failed)
|
||||
2. ``celebrate`` → ``JUMP`` (explicit success beat, e.g. todos done)
|
||||
3. ``just_completed`` → ``WAVE`` (turn finished cleanly / greeting)
|
||||
4. ``awaiting_input`` → ``WAITING`` (blocked on the user — a clarify/approval
|
||||
prompt is open; this outranks the in-flight signals below because the turn
|
||||
is paused on *you*, even though a tool is technically mid-call)
|
||||
5. ``tool_running`` → ``RUN`` (a tool is executing)
|
||||
6. ``reasoning`` → ``REVIEW`` (model is thinking / reading)
|
||||
7. ``busy`` → ``RUN`` (turn in flight, unspecified work)
|
||||
8. otherwise → ``IDLE``
|
||||
"""
|
||||
if error:
|
||||
return PetState.FAILED
|
||||
if celebrate:
|
||||
return PetState.JUMP
|
||||
if just_completed:
|
||||
return PetState.WAVE
|
||||
if awaiting_input:
|
||||
return PetState.WAITING
|
||||
if tool_running:
|
||||
return PetState.RUN
|
||||
if reasoning:
|
||||
return PetState.REVIEW
|
||||
if busy:
|
||||
return PetState.RUN
|
||||
return PetState.IDLE
|
||||
503
agent/pet/store.py
Normal file
503
agent/pet/store.py
Normal file
@@ -0,0 +1,503 @@
|
||||
"""On-disk pet store — install / list / resolve pets.
|
||||
|
||||
Pets live under ``get_hermes_home()/pets/<slug>/`` so every profile gets its
|
||||
own set (we deliberately do **not** reuse petdex's ``~/.codex/pets`` default —
|
||||
that's owned by the petdex npm CLI and isn't profile-aware). Each installed
|
||||
pet directory holds:
|
||||
|
||||
pets/<slug>/
|
||||
pet.json # {id, displayName, description, spritesheetPath}
|
||||
spritesheet.webp # (or .png)
|
||||
|
||||
The active pet is resolved from the caller-supplied ``display.pet.slug`` config
|
||||
value (falling back to the first installed pet), so this module stays free of
|
||||
the config loader.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_DOWNLOAD_TIMEOUT = 60.0
|
||||
|
||||
|
||||
class PetStoreError(RuntimeError):
|
||||
"""Raised on install/IO failures."""
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class InstalledPet:
|
||||
"""A pet present on disk."""
|
||||
|
||||
slug: str
|
||||
display_name: str
|
||||
description: str
|
||||
directory: Path
|
||||
spritesheet: Path
|
||||
created_by: str = "" # "generator" for pets hatched locally; "" for petdex installs
|
||||
|
||||
@property
|
||||
def exists(self) -> bool:
|
||||
return self.spritesheet.is_file()
|
||||
|
||||
@property
|
||||
def generated(self) -> bool:
|
||||
return self.created_by == "generator"
|
||||
|
||||
|
||||
def pets_dir() -> Path:
|
||||
"""Return the profile-scoped pets directory (created on demand)."""
|
||||
path = get_hermes_home() / "pets"
|
||||
path.mkdir(parents=True, exist_ok=True)
|
||||
return path
|
||||
|
||||
|
||||
def _read_pet_json(directory: Path) -> dict:
|
||||
pet_json = directory / "pet.json"
|
||||
if not pet_json.is_file():
|
||||
return {}
|
||||
try:
|
||||
return json.loads(pet_json.read_text(encoding="utf-8"))
|
||||
except (OSError, ValueError) as exc:
|
||||
logger.debug("unreadable pet.json in %s: %s", directory, exc)
|
||||
return {}
|
||||
|
||||
|
||||
def _resolve_spritesheet(directory: Path, meta: dict) -> Path:
|
||||
"""Find the spritesheet for a pet dir.
|
||||
|
||||
Honors ``spritesheetPath`` from pet.json, else probes the conventional
|
||||
filenames (``spritesheet.{webp,png}`` and petdex R2's ``sprite.webp``).
|
||||
"""
|
||||
declared = str(meta.get("spritesheetPath", "") or "").strip()
|
||||
if declared:
|
||||
candidate = directory / declared
|
||||
if candidate.is_file():
|
||||
return candidate
|
||||
for name in ("spritesheet.webp", "spritesheet.png", "sprite.webp", "sprite.png"):
|
||||
candidate = directory / name
|
||||
if candidate.is_file():
|
||||
return candidate
|
||||
# Default expectation even if missing, so callers get a stable path.
|
||||
return directory / "spritesheet.webp"
|
||||
|
||||
|
||||
def _safe_slug(slug: str) -> str:
|
||||
"""Normalize a slug to a single bare path segment.
|
||||
|
||||
Pet slugs index into ``pets_dir()/<slug>/`` for load/remove, so a value
|
||||
carrying path separators (``../``, absolute paths) could escape the pets
|
||||
directory. Strip every separator and reject ``.``/``..`` so callers can
|
||||
only ever name a direct child of the pets directory.
|
||||
"""
|
||||
segment = Path(str(slug).strip()).name
|
||||
if segment in ("", ".", ".."):
|
||||
return ""
|
||||
return segment
|
||||
|
||||
|
||||
def load_pet(slug: str) -> InstalledPet | None:
|
||||
"""Return the :class:`InstalledPet` for *slug*, or ``None`` if absent."""
|
||||
slug = _safe_slug(slug)
|
||||
if not slug:
|
||||
return None
|
||||
directory = pets_dir() / slug
|
||||
if not directory.is_dir():
|
||||
return None
|
||||
meta = _read_pet_json(directory)
|
||||
return InstalledPet(
|
||||
slug=slug,
|
||||
display_name=str(meta.get("displayName", "") or slug),
|
||||
description=str(meta.get("description", "") or ""),
|
||||
directory=directory,
|
||||
spritesheet=_resolve_spritesheet(directory, meta),
|
||||
created_by=str(meta.get("createdBy", "") or ""),
|
||||
)
|
||||
|
||||
|
||||
def installed_pets() -> list[InstalledPet]:
|
||||
"""Return every installed pet (dirs containing a usable spritesheet)."""
|
||||
out: list[InstalledPet] = []
|
||||
for child in sorted(pets_dir().iterdir()):
|
||||
if not child.is_dir():
|
||||
continue
|
||||
pet = load_pet(child.name)
|
||||
if pet and pet.exists:
|
||||
out.append(pet)
|
||||
return out
|
||||
|
||||
|
||||
def resolve_active_pet(configured_slug: str | None = None) -> InstalledPet | None:
|
||||
"""Resolve which pet to display.
|
||||
|
||||
Precedence: the configured slug (``display.pet.slug``) if it's installed,
|
||||
otherwise the first installed pet alphabetically, otherwise ``None``.
|
||||
"""
|
||||
if configured_slug:
|
||||
pet = load_pet(configured_slug.strip())
|
||||
if pet and pet.exists:
|
||||
return pet
|
||||
pets = installed_pets()
|
||||
return pets[0] if pets else None
|
||||
|
||||
|
||||
def install_pet(slug: str, *, force: bool = False, timeout: float = _DOWNLOAD_TIMEOUT) -> InstalledPet:
|
||||
"""Download *slug* from the manifest into the pets directory.
|
||||
|
||||
Idempotent: a fully-installed pet is returned as-is unless *force*. Raises
|
||||
:class:`PetStoreError` / :class:`~agent.pet.manifest.ManifestError` on
|
||||
failure.
|
||||
"""
|
||||
from agent.pet.manifest import find_entry
|
||||
|
||||
slug = _safe_slug(slug)
|
||||
if not slug:
|
||||
raise PetStoreError("invalid pet slug")
|
||||
existing = load_pet(slug)
|
||||
if existing and existing.exists and not force:
|
||||
return existing
|
||||
|
||||
entry = find_entry(slug, timeout=timeout)
|
||||
if entry is None:
|
||||
raise PetStoreError(f"pet '{slug}' is not in the petdex manifest")
|
||||
|
||||
# Host-pin every asset URL to petdex. The manifest is trusted (HTTPS from
|
||||
# petdex.dev), but pin the asset hosts too so a compromised/spoofed manifest
|
||||
# can't redirect the download at an arbitrary host. Matches thumbnail_png.
|
||||
if not _is_petdex_host(entry.spritesheet_url):
|
||||
raise PetStoreError(f"refusing non-petdex spritesheet host for '{slug}'")
|
||||
|
||||
directory = pets_dir() / slug
|
||||
directory.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
sprite_ext = ".png" if entry.spritesheet_url.lower().split("?")[0].endswith(".png") else ".webp"
|
||||
sprite_path = directory / f"spritesheet{sprite_ext}"
|
||||
|
||||
_download(entry.spritesheet_url, sprite_path, timeout=timeout)
|
||||
|
||||
# Fetch the upstream pet.json if present; otherwise synthesize a minimal
|
||||
# one so the local layout is self-describing.
|
||||
meta: dict = {}
|
||||
if entry.pet_json_url and _is_petdex_host(entry.pet_json_url):
|
||||
try:
|
||||
meta = _download_json(entry.pet_json_url, timeout=timeout)
|
||||
except Exception as exc: # noqa: BLE001 - non-fatal, fall back below
|
||||
logger.debug("pet.json fetch failed for %s: %s", slug, exc)
|
||||
if not isinstance(meta, dict) or not meta:
|
||||
meta = {"id": slug, "displayName": entry.display_name, "description": ""}
|
||||
meta["spritesheetPath"] = sprite_path.name
|
||||
meta.setdefault("id", slug)
|
||||
meta.setdefault("displayName", entry.display_name)
|
||||
(directory / "pet.json").write_text(json.dumps(meta, indent=2), encoding="utf-8")
|
||||
|
||||
pet = load_pet(slug)
|
||||
if pet is None or not pet.exists:
|
||||
raise PetStoreError(f"install of '{slug}' did not produce a spritesheet")
|
||||
return pet
|
||||
|
||||
|
||||
def slugify(name: str) -> str:
|
||||
"""Lowercase, hyphenate, and strip a display name into a filesystem slug."""
|
||||
slug = re.sub(r"[^a-z0-9]+", "-", (name or "").strip().lower()).strip("-")
|
||||
return slug or "pet"
|
||||
|
||||
|
||||
def unique_slug(name: str) -> str:
|
||||
"""A :func:`slugify` result that doesn't collide with an existing pet dir."""
|
||||
base = slugify(name)
|
||||
slug = base
|
||||
counter = 2
|
||||
while (pets_dir() / slug).exists():
|
||||
slug = f"{base}-{counter}"
|
||||
counter += 1
|
||||
return slug
|
||||
|
||||
|
||||
def _write_spritesheet(source, dest: Path) -> None:
|
||||
"""Write *source* (PIL image, bytes, or path) as a lossless WebP at *dest*."""
|
||||
if isinstance(source, (bytes, bytearray)):
|
||||
dest.write_bytes(bytes(source))
|
||||
return
|
||||
|
||||
from PIL import Image
|
||||
|
||||
if isinstance(source, (str, Path)):
|
||||
with Image.open(source) as opened:
|
||||
image = opened.convert("RGBA")
|
||||
else:
|
||||
image = source.convert("RGBA")
|
||||
image.save(dest, format="WEBP", lossless=True, quality=100, method=6, exact=True)
|
||||
|
||||
|
||||
def register_local_pet(
|
||||
spritesheet,
|
||||
*,
|
||||
slug: str,
|
||||
display_name: str = "",
|
||||
description: str = "",
|
||||
) -> InstalledPet:
|
||||
"""Write a locally-generated pet into the store and return it.
|
||||
|
||||
*spritesheet* may be a PIL image, raw WebP/PNG bytes, or a path. The pet
|
||||
appears in :func:`installed_pets` immediately, and because :func:`install_pet`
|
||||
returns an already-on-disk pet before consulting the manifest, it can be
|
||||
adopted (``pet.select`` / ``/pet <slug>``) without a manifest entry.
|
||||
"""
|
||||
slug = slugify(slug)
|
||||
directory = pets_dir() / slug
|
||||
directory.mkdir(parents=True, exist_ok=True)
|
||||
sprite_path = directory / "spritesheet.webp"
|
||||
try:
|
||||
_write_spritesheet(spritesheet, sprite_path)
|
||||
except Exception as exc: # noqa: BLE001 - normalize to one error type
|
||||
raise PetStoreError(f"could not write spritesheet for '{slug}': {exc}") from exc
|
||||
|
||||
meta = {
|
||||
"id": slug,
|
||||
"displayName": display_name or slug,
|
||||
"description": description or "",
|
||||
"spritesheetPath": sprite_path.name,
|
||||
"createdBy": "generator",
|
||||
}
|
||||
(directory / "pet.json").write_text(json.dumps(meta, indent=2), encoding="utf-8")
|
||||
|
||||
pet = load_pet(slug)
|
||||
if pet is None or not pet.exists:
|
||||
raise PetStoreError(f"register of generated pet '{slug}' did not produce a spritesheet")
|
||||
return pet
|
||||
|
||||
|
||||
def export_pet(slug: str) -> tuple[str, bytes]:
|
||||
"""Zip an installed pet's folder (pet.json + spritesheet) → (filename, bytes).
|
||||
|
||||
Dotfiles (cached thumbs, backups) are skipped so the archive is a clean,
|
||||
re-importable pet package. Raises :class:`PetStoreError` if not installed.
|
||||
"""
|
||||
import io
|
||||
import zipfile
|
||||
|
||||
root = pets_dir()
|
||||
directory = root / slug.strip()
|
||||
# Guard against traversal: the target must be a direct child of pets_dir.
|
||||
if directory.resolve().parent != root.resolve() or not directory.is_dir():
|
||||
raise PetStoreError(f"pet '{slug}' is not installed")
|
||||
|
||||
name = directory.name
|
||||
buf = io.BytesIO()
|
||||
with zipfile.ZipFile(buf, "w", zipfile.ZIP_DEFLATED) as archive:
|
||||
for path in sorted(directory.iterdir()):
|
||||
if path.is_file() and not path.name.startswith("."):
|
||||
archive.write(path, f"{name}/{path.name}")
|
||||
return f"{name}.zip", buf.getvalue()
|
||||
|
||||
|
||||
_THUMB_FRAME_W = 192
|
||||
_THUMB_FRAME_H = 208
|
||||
_THUMB_W = 96 # rendered ~40px; 2x+ keeps it crisp on HiDPI
|
||||
|
||||
|
||||
def _thumbs_dir() -> Path:
|
||||
path = pets_dir() / ".thumbs"
|
||||
path.mkdir(parents=True, exist_ok=True)
|
||||
return path
|
||||
|
||||
|
||||
def _is_petdex_host(url: str) -> bool:
|
||||
"""True only for petdex.dev hosts — bounds server-side fetch (anti-SSRF)."""
|
||||
from urllib.parse import urlparse
|
||||
|
||||
try:
|
||||
host = (urlparse(url).hostname or "").lower()
|
||||
except ValueError:
|
||||
return False
|
||||
return host == "petdex.dev" or host.endswith(".petdex.dev")
|
||||
|
||||
|
||||
def thumbnail_png(slug: str, *, source_url: str = "", timeout: float = 30.0) -> bytes | None:
|
||||
"""Return a small idle-frame PNG for *slug*, cached on disk.
|
||||
|
||||
Crops the top-left (idle, frame 0) cell of the spritesheet and downsamples
|
||||
it to a thumbnail. Source preference: an installed spritesheet on disk, else
|
||||
*source_url* — but only when it points at petdex (so the gateway never
|
||||
fetches an arbitrary client-supplied URL). Returns ``None`` when there's no
|
||||
usable source or Pillow/network fails; callers render a placeholder.
|
||||
|
||||
Doing this server-side sidesteps the renderer's CSP / R2 hotlink limits that
|
||||
break a direct ``<img src=cdn>`` and lets the result ride the authenticated
|
||||
gateway as a same-origin data URL.
|
||||
"""
|
||||
slug = slug.strip()
|
||||
if not slug:
|
||||
return None
|
||||
|
||||
cache = _thumbs_dir() / f"{slug}.png"
|
||||
if cache.is_file():
|
||||
try:
|
||||
return cache.read_bytes()
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
sheet_bytes: bytes | None = None
|
||||
pet = load_pet(slug)
|
||||
if pet and pet.exists:
|
||||
try:
|
||||
sheet_bytes = pet.spritesheet.read_bytes()
|
||||
except OSError:
|
||||
sheet_bytes = None
|
||||
|
||||
if sheet_bytes is None and source_url and _is_petdex_host(source_url):
|
||||
try:
|
||||
import httpx
|
||||
|
||||
resp = httpx.get(
|
||||
source_url,
|
||||
timeout=timeout,
|
||||
follow_redirects=True,
|
||||
headers={"User-Agent": "hermes-agent-petdex"},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
sheet_bytes = resp.content
|
||||
except Exception as exc: # noqa: BLE001 - cosmetic, degrade to placeholder
|
||||
logger.debug("thumb fetch failed for %s: %s", slug, exc)
|
||||
|
||||
if not sheet_bytes:
|
||||
return None
|
||||
|
||||
try:
|
||||
import io
|
||||
|
||||
from PIL import Image
|
||||
|
||||
with Image.open(io.BytesIO(sheet_bytes)) as im:
|
||||
frame = im.convert("RGBA").crop(
|
||||
(0, 0, min(_THUMB_FRAME_W, im.width), min(_THUMB_FRAME_H, im.height))
|
||||
)
|
||||
height = round(_THUMB_W * _THUMB_FRAME_H / _THUMB_FRAME_W)
|
||||
frame = frame.resize((_THUMB_W, height), Image.NEAREST)
|
||||
buf = io.BytesIO()
|
||||
frame.save(buf, format="PNG")
|
||||
data = buf.getvalue()
|
||||
except Exception as exc: # noqa: BLE001
|
||||
logger.debug("thumb crop failed for %s: %s", slug, exc)
|
||||
return None
|
||||
|
||||
try:
|
||||
cache.write_bytes(data)
|
||||
except OSError:
|
||||
pass
|
||||
return data
|
||||
|
||||
|
||||
def remove_pet(slug: str) -> bool:
|
||||
"""Delete an installed pet directory. Returns True if anything was removed."""
|
||||
import shutil
|
||||
|
||||
slug = _safe_slug(slug)
|
||||
if not slug:
|
||||
return False
|
||||
|
||||
# The cached thumbnail lives in pets/.thumbs/<slug>.png — OUTSIDE the pet
|
||||
# dir, so rmtree won't catch it. Drop it too, or a later pet that reuses this
|
||||
# slug renders this one's stale thumbnail.
|
||||
try:
|
||||
(_thumbs_dir() / f"{slug}.png").unlink(missing_ok=True)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
directory = pets_dir() / slug
|
||||
if not directory.is_dir():
|
||||
return False
|
||||
shutil.rmtree(directory, ignore_errors=True)
|
||||
return not directory.exists()
|
||||
|
||||
|
||||
def rename_pet(slug: str, display_name: str) -> str | None:
|
||||
"""Rename a pet's ``displayName`` AND realign its slug/dir to match.
|
||||
|
||||
Generated pets are hatched under a provisional, prompt-derived slug; when
|
||||
the user names the pet on the reveal screen we make that name the real
|
||||
identity so lists/subtitles show what they typed, not the prompt. The dir is
|
||||
renamed to ``slugify(name)`` (and the cached thumbnail moved alongside it)
|
||||
whenever that yields a free, different slug — otherwise the slug is left as
|
||||
is. Returns the resulting slug on success, or ``None`` on failure.
|
||||
"""
|
||||
slug = _safe_slug(slug)
|
||||
display_name = (display_name or "").strip()
|
||||
if not slug or not display_name:
|
||||
return None
|
||||
directory = pets_dir() / slug
|
||||
pet_json = directory / "pet.json"
|
||||
if not pet_json.is_file():
|
||||
return None
|
||||
try:
|
||||
meta = json.loads(pet_json.read_text(encoding="utf-8"))
|
||||
except (OSError, ValueError):
|
||||
meta = {}
|
||||
if not isinstance(meta, dict):
|
||||
meta = {}
|
||||
meta["displayName"] = display_name
|
||||
|
||||
new_slug = slug
|
||||
desired = slugify(display_name)
|
||||
if desired and desired != slug and not (pets_dir() / desired).exists():
|
||||
try:
|
||||
directory.rename(pets_dir() / desired)
|
||||
try:
|
||||
(_thumbs_dir() / f"{slug}.png").rename(_thumbs_dir() / f"{desired}.png")
|
||||
except OSError:
|
||||
pass
|
||||
directory = pets_dir() / desired
|
||||
pet_json = directory / "pet.json"
|
||||
new_slug = desired
|
||||
meta["id"] = new_slug
|
||||
except OSError:
|
||||
new_slug = slug # keep the provisional slug if the move fails
|
||||
|
||||
try:
|
||||
pet_json.write_text(json.dumps(meta, indent=2), encoding="utf-8")
|
||||
except OSError:
|
||||
return None
|
||||
return new_slug
|
||||
|
||||
|
||||
def _download(url: str, dest: Path, *, timeout: float) -> None:
|
||||
import httpx
|
||||
|
||||
try:
|
||||
with httpx.stream(
|
||||
"GET",
|
||||
url,
|
||||
timeout=timeout,
|
||||
follow_redirects=True,
|
||||
headers={"User-Agent": "hermes-agent-petdex"},
|
||||
) as resp:
|
||||
resp.raise_for_status()
|
||||
tmp = dest.with_suffix(dest.suffix + ".part")
|
||||
with tmp.open("wb") as fh:
|
||||
for chunk in resp.iter_bytes():
|
||||
fh.write(chunk)
|
||||
tmp.replace(dest)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
raise PetStoreError(f"download failed for {url}: {exc}") from exc
|
||||
|
||||
|
||||
def _download_json(url: str, *, timeout: float) -> dict:
|
||||
import httpx
|
||||
|
||||
resp = httpx.get(
|
||||
url,
|
||||
timeout=timeout,
|
||||
follow_redirects=True,
|
||||
headers={"User-Agent": "hermes-agent-petdex"},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
return data if isinstance(data, dict) else {}
|
||||
@@ -243,7 +243,10 @@ KANBAN_GUIDANCE = (
|
||||
"- **Workspace.** `cd $HERMES_KANBAN_WORKSPACE` first. For a `worktree` kind "
|
||||
"with no `.git`, `git worktree add <path> "
|
||||
"${HERMES_KANBAN_BRANCH:-wt/$HERMES_KANBAN_TASK}` from the main repo, then "
|
||||
"cd there.\n"
|
||||
"cd there. For a project-linked task the workspace is a fresh "
|
||||
"`<repo>/.worktrees/<task-id>` and `$HERMES_KANBAN_BRANCH` a deterministic "
|
||||
"`<project-slug>/<task-id>` — the main repo is two levels up, so run "
|
||||
"`git worktree add` from there.\n"
|
||||
"- **Deliverables.** Files a human wants go in "
|
||||
"`kanban_complete(artifacts=[<absolute paths>])` (top-level param; paths in "
|
||||
"`metadata` are NOT uploaded). Files must exist at completion.\n"
|
||||
@@ -709,7 +712,24 @@ PLATFORM_HINTS = {
|
||||
"(those are only intercepted on messaging platforms like Telegram, "
|
||||
"Discord, Slack, etc.; on the CLI they render as literal text). "
|
||||
"When referring to a file you created or changed, just state its "
|
||||
"absolute path in plain text; the user can open it from there."
|
||||
"absolute path in plain text; the user can open it from there. "
|
||||
"Cron jobs scheduled from this session are LOCAL-ONLY: their output is "
|
||||
"saved (viewable via cronjob action='list') but is NOT delivered back "
|
||||
"into this terminal — there is no live-delivery channel here. If the "
|
||||
"user wants to be notified when a job runs, the job's `deliver` must "
|
||||
"target a gateway-connected messaging platform (e.g. deliver='telegram' "
|
||||
"or 'all'). Do not promise the user that a deliver='origin' or "
|
||||
"default-deliver cron job will message them in this session."
|
||||
),
|
||||
"tui": (
|
||||
"You are running in the Hermes terminal UI (TUI). "
|
||||
"Cron jobs scheduled from this session are LOCAL-ONLY: their output is "
|
||||
"saved (viewable via cronjob action='list') but is NOT delivered back "
|
||||
"into this TUI session — there is no live-delivery channel here. If the "
|
||||
"user wants to be notified when a job runs, the job's `deliver` must "
|
||||
"target a gateway-connected messaging platform (e.g. deliver='telegram' "
|
||||
"or 'all'). Do not promise the user that a deliver='origin' or "
|
||||
"default-deliver cron job will message them in this session."
|
||||
),
|
||||
"sms": (
|
||||
"You are communicating via SMS. Keep responses concise and use plain text "
|
||||
|
||||
@@ -8,6 +8,7 @@ rate-limited provider concurrently.
|
||||
import random
|
||||
import threading
|
||||
import time
|
||||
from typing import Any
|
||||
|
||||
# Monotonic counter for jitter seed uniqueness within the same process.
|
||||
# Protected by a lock to avoid race conditions in concurrent retry paths
|
||||
@@ -15,6 +16,14 @@ import time
|
||||
_jitter_counter = 0
|
||||
_jitter_lock = threading.Lock()
|
||||
|
||||
# Z.AI Coding Plan's GLM-5.2 endpoint often returns HTTP 429 code 1305
|
||||
# ("The service may be temporarily overloaded...") for otherwise valid
|
||||
# Hermes requests. Short retries tend to hammer the same overloaded window;
|
||||
# after a few normal retries, progressively widen the wait window. Keep the
|
||||
# cap interactive-friendly: a simple TUI message should fail visibly in minutes,
|
||||
# not sit silent for 20+ minutes.
|
||||
_ZAI_CODING_OVERLOAD_LONG_BACKOFF = (30.0, 60.0, 90.0, 120.0)
|
||||
|
||||
|
||||
def jittered_backoff(
|
||||
attempt: int,
|
||||
@@ -55,3 +64,66 @@ def jittered_backoff(
|
||||
jitter = rng.uniform(0, jitter_ratio * delay)
|
||||
|
||||
return delay + jitter
|
||||
|
||||
|
||||
def _error_text(error: Any) -> str:
|
||||
"""Best-effort flattened provider error text for retry classification."""
|
||||
parts = [
|
||||
error,
|
||||
getattr(error, "message", None),
|
||||
getattr(error, "body", None),
|
||||
getattr(error, "response", None),
|
||||
]
|
||||
return " ".join(str(part) for part in parts if part is not None).lower()
|
||||
|
||||
|
||||
def is_zai_coding_overload_error(*, base_url: str | None, model: str | None, error: Any) -> bool:
|
||||
"""Return True for Z.AI Coding Plan transient overload 429s.
|
||||
|
||||
The coding-plan endpoint reports overload as HTTP 429 with body code 1305
|
||||
and message "The service may be temporarily overloaded...". Treat only
|
||||
that narrow shape specially so ordinary quota/billing 429s still fail fast
|
||||
through the existing classifier.
|
||||
"""
|
||||
base = (base_url or "").lower()
|
||||
model_name = (model or "").lower()
|
||||
status = getattr(error, "status_code", None)
|
||||
text = _error_text(error)
|
||||
return (
|
||||
status == 429
|
||||
and "api.z.ai/api/coding/paas/v4" in base
|
||||
and "glm-5.2" in model_name
|
||||
and ("1305" in text or "temporarily overloaded" in text)
|
||||
)
|
||||
|
||||
|
||||
def adaptive_rate_limit_backoff(
|
||||
attempt: int,
|
||||
*,
|
||||
base_url: str | None,
|
||||
model: str | None,
|
||||
error: Any,
|
||||
default_wait: float,
|
||||
short_attempts: int = 3,
|
||||
) -> tuple[float, str | None]:
|
||||
"""Provider-aware rate-limit backoff.
|
||||
|
||||
For most providers this returns ``default_wait`` unchanged. For Z.AI
|
||||
Coding Plan GLM-5.2 overloads, keep the first ``short_attempts`` retries on
|
||||
the normal short exponential schedule, then switch to progressively longer
|
||||
waits (30s → 60s → 90s → 120s, capped) plus light jitter.
|
||||
|
||||
``attempt`` is 1-based, matching the retry loop's logged attempt number.
|
||||
Returns ``(wait_seconds, reason_label)`` where ``reason_label`` is suitable
|
||||
for status/log decoration when a provider-specific policy fired.
|
||||
"""
|
||||
if not is_zai_coding_overload_error(base_url=base_url, model=model, error=error):
|
||||
return default_wait, None
|
||||
if attempt <= short_attempts:
|
||||
return default_wait, "zai_coding_overload_short"
|
||||
|
||||
idx = min(attempt - short_attempts - 1, len(_ZAI_CODING_OVERLOAD_LONG_BACKOFF) - 1)
|
||||
base_delay = _ZAI_CODING_OVERLOAD_LONG_BACKOFF[idx]
|
||||
# A smaller jitter ratio keeps long waits readable while still avoiding
|
||||
# synchronized retry storms across concurrent Hermes sessions.
|
||||
return jittered_backoff(1, base_delay=base_delay, max_delay=base_delay, jitter_ratio=0.2), "zai_coding_overload_long"
|
||||
|
||||
30
agent/telemetry/__init__.py
Normal file
30
agent/telemetry/__init__.py
Normal file
@@ -0,0 +1,30 @@
|
||||
"""Hermes telemetry & observability.
|
||||
|
||||
Local-first observability, on by default. The ``telemetry`` plugin registers Hermes
|
||||
lifecycle hooks and hands typed events to the fire-and-forget ``emitter`` (queue ->
|
||||
background writer -> JSONL + state.db ``tel_*`` index). The emitter never blocks or
|
||||
raises into a model/tool call (the hot-path invariant).
|
||||
|
||||
Events record the observed model ids, provider names, and tool names. ``metrics``
|
||||
derives rollups for /usage and /insights; ``rollup`` builds the per-run summaries shown
|
||||
by ``hermes telemetry preview``. ``redaction`` + ``exporter_bulk`` + ``otlp_exporter``
|
||||
handle export to an operator-chosen destination. ``policy`` holds the consent
|
||||
constants and the aggregate upload gate (no uploader ships).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from . import emitter, events, metrics, policy, spans
|
||||
|
||||
emit = emitter.emit
|
||||
get_emitter = emitter.get_emitter
|
||||
|
||||
__all__ = [
|
||||
"emitter",
|
||||
"events",
|
||||
"metrics",
|
||||
"policy",
|
||||
"spans",
|
||||
"emit",
|
||||
"get_emitter",
|
||||
]
|
||||
317
agent/telemetry/emitter.py
Normal file
317
agent/telemetry/emitter.py
Normal file
@@ -0,0 +1,317 @@
|
||||
"""Local telemetry emitter: fire-and-forget queue + background writer.
|
||||
|
||||
The emitter is the single seam between instrumentation (the telemetry plugin's hook
|
||||
callbacks) and durable storage. Its contract is the hot-path invariant:
|
||||
|
||||
``emit()`` MUST return in O(microseconds), MUST NOT block on disk/network, and
|
||||
MUST NEVER raise into the caller. A telemetry failure is logged locally and
|
||||
dropped — it can never affect a model call, a tool call, or a session.
|
||||
|
||||
Mechanism:
|
||||
* ``emit(event)`` does a non-blocking ``queue.put_nowait`` wrapped in a bare except.
|
||||
On a full queue it drops the *oldest* event and counts the drop.
|
||||
* A daemon thread drains the queue and writes each event to two places:
|
||||
1. the append-only JSONL log (source of truth)
|
||||
2. the ``tel_*`` SQLite tables in state.db (rebuildable index)
|
||||
* The writer uses its own sqlite connection to state.db, separate from SessionDB,
|
||||
so telemetry writes never contend with or corrupt session writes.
|
||||
|
||||
Local telemetry only. Nothing here uploads anywhere.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import queue
|
||||
import sqlite3
|
||||
import threading
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_MAX_QUEUE = 10_000 # ring-buffer depth; oldest dropped when full
|
||||
_DRAIN_BATCH = 256
|
||||
|
||||
|
||||
def _default_dir() -> Path:
|
||||
"""Resolve the telemetry dir under the active HERMES_HOME (profile-safe)."""
|
||||
from hermes_constants import get_hermes_home
|
||||
return get_hermes_home() / "telemetry"
|
||||
|
||||
|
||||
def _default_db_path() -> Path:
|
||||
"""Resolve state.db under the active HERMES_HOME (profile-safe)."""
|
||||
from hermes_constants import get_hermes_home
|
||||
return get_hermes_home() / "state.db"
|
||||
|
||||
|
||||
# Map a telemetry event dict (its "event" tag) to (table, column-ordered insert).
|
||||
# Only the columns the indexer knows about are written; unknown keys are ignored,
|
||||
# so an event carrying extra fields never breaks the insert.
|
||||
_TABLE_COLUMNS: Dict[str, tuple] = {
|
||||
"run": (
|
||||
"tel_runs",
|
||||
("run_id", "trace_id", "session_id", "profile_id", "entrypoint",
|
||||
"platform", "start_ns", "end_ns", "end_reason",
|
||||
"model_call_count", "tool_call_count", "error_count",
|
||||
"estimated_cost_usd", "cost_status"),
|
||||
),
|
||||
"model_call": (
|
||||
"tel_model_calls",
|
||||
("span_id", "run_id", "provider", "model", "base_url",
|
||||
"input_tokens", "output_tokens", "cache_read_tokens",
|
||||
"cache_write_tokens", "reasoning_tokens", "latency_ms", "ttft_ms",
|
||||
"estimated_cost_usd", "cost_status", "cost_source", "end_reason",
|
||||
"retry_count"),
|
||||
),
|
||||
"tool_call": (
|
||||
"tel_tool_calls",
|
||||
("span_id", "run_id", "tool_name", "backend",
|
||||
"duration_ms", "result_class", "retry_count", "approval"),
|
||||
),
|
||||
"error": (
|
||||
"tel_error_events",
|
||||
("run_id", "error_class", "subsystem", "recovery", "ts_ns"),
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
class TelemetryEmitter:
|
||||
"""Owns the queue, the writer thread, and the telemetry sqlite connection."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
events_path: Optional[Path] = None,
|
||||
db_path: Optional[Path] = None,
|
||||
enabled: bool = True,
|
||||
) -> None:
|
||||
self._dir = (events_path.parent if events_path else _default_dir())
|
||||
self._events_path = events_path or (self._dir / "events.jsonl")
|
||||
self._db_path = db_path or _default_db_path()
|
||||
self._enabled = enabled
|
||||
self._q: "queue.Queue[Dict[str, Any]]" = queue.Queue(maxsize=_MAX_QUEUE)
|
||||
self._dropped = 0
|
||||
self._written = 0
|
||||
self._stop = threading.Event()
|
||||
self._started = False
|
||||
self._lock = threading.Lock()
|
||||
self._conn: Optional[sqlite3.Connection] = None
|
||||
self._thread: Optional[threading.Thread] = None
|
||||
# Optional live subscribers (e.g. OTLP exporter). Called from the writer
|
||||
# thread AFTER durable writes, fully fail-isolated — a subscriber that
|
||||
# raises or blocks can never affect the JSONL/SQLite source of truth or
|
||||
# the hot path. Each subscriber is callable(batch: list[dict]).
|
||||
self._subscribers: list = []
|
||||
|
||||
# ── public API (hot path) ───────────────────────────────────────────────
|
||||
def emit(self, event: Any) -> None:
|
||||
"""Enqueue an event. Never blocks, never raises.
|
||||
|
||||
``event`` may be a dataclass with ``to_dict()`` or a plain dict.
|
||||
"""
|
||||
if not self._enabled:
|
||||
return
|
||||
try:
|
||||
payload = event.to_dict() if hasattr(event, "to_dict") else dict(event)
|
||||
payload.setdefault("ts_ns", time.time_ns())
|
||||
self._ensure_started()
|
||||
try:
|
||||
self._q.put_nowait(payload)
|
||||
except queue.Full:
|
||||
# Drop oldest to make room — bounded memory, newest-wins.
|
||||
try:
|
||||
self._q.get_nowait()
|
||||
self._dropped += 1
|
||||
self._q.put_nowait(payload)
|
||||
except Exception:
|
||||
self._dropped += 1
|
||||
except Exception: # the hot-path invariant: never propagate
|
||||
logger.debug("telemetry emit failed", exc_info=True)
|
||||
|
||||
# ── lifecycle ───────────────────────────────────────────────────────────
|
||||
def _ensure_started(self) -> None:
|
||||
if self._started:
|
||||
return
|
||||
with self._lock:
|
||||
if self._started:
|
||||
return
|
||||
try:
|
||||
self._dir.mkdir(parents=True, exist_ok=True)
|
||||
except Exception:
|
||||
logger.debug("telemetry dir create failed", exc_info=True)
|
||||
self._thread = threading.Thread(
|
||||
target=self._run, name="hermes-telemetry-writer", daemon=True
|
||||
)
|
||||
self._thread.start()
|
||||
self._started = True
|
||||
|
||||
def _open_conn(self) -> Optional[sqlite3.Connection]:
|
||||
if self._conn is not None:
|
||||
return self._conn
|
||||
try:
|
||||
conn = sqlite3.connect(str(self._db_path), isolation_level=None, timeout=5.0)
|
||||
conn.execute("PRAGMA journal_mode=WAL")
|
||||
conn.execute("PRAGMA busy_timeout=5000")
|
||||
self._conn = conn
|
||||
except Exception:
|
||||
logger.debug("telemetry db open failed", exc_info=True)
|
||||
self._conn = None
|
||||
return self._conn
|
||||
|
||||
def _run(self) -> None:
|
||||
while not self._stop.is_set():
|
||||
try:
|
||||
first = self._q.get(timeout=0.5)
|
||||
except queue.Empty:
|
||||
continue
|
||||
batch = [first]
|
||||
while len(batch) < _DRAIN_BATCH:
|
||||
try:
|
||||
batch.append(self._q.get_nowait())
|
||||
except queue.Empty:
|
||||
break
|
||||
self._write_batch(batch)
|
||||
|
||||
def _write_batch(self, batch) -> None:
|
||||
# JSONL append (source of truth) — best effort.
|
||||
try:
|
||||
with open(self._events_path, "a", encoding="utf-8") as fh:
|
||||
for ev in batch:
|
||||
fh.write(json.dumps(ev, ensure_ascii=False) + "\n")
|
||||
except Exception:
|
||||
logger.debug("telemetry jsonl append failed", exc_info=True)
|
||||
|
||||
# SQLite index — best effort, per-event so one bad row can't lose the batch.
|
||||
conn = self._open_conn()
|
||||
if conn is None:
|
||||
return
|
||||
for ev in batch:
|
||||
try:
|
||||
self._index_one(conn, ev)
|
||||
self._written += 1
|
||||
except Exception:
|
||||
logger.debug("telemetry index row failed", exc_info=True)
|
||||
|
||||
# Live fan-out (e.g. OTLP) — AFTER durable writes, fully fail-isolated.
|
||||
# A slow/raising subscriber never affects JSONL/SQLite or the hot path.
|
||||
for sub in self._subscribers:
|
||||
try:
|
||||
sub(batch)
|
||||
except Exception:
|
||||
logger.debug("telemetry subscriber failed", exc_info=True)
|
||||
|
||||
def subscribe(self, callback) -> None:
|
||||
"""Register a live batch subscriber (callable(batch: list[dict])).
|
||||
|
||||
Called from the writer thread after durable writes. Used by the OTLP
|
||||
exporter for continuous streaming. Fail-isolated; never on the hot path.
|
||||
"""
|
||||
if callback not in self._subscribers:
|
||||
self._subscribers.append(callback)
|
||||
|
||||
def unsubscribe(self, callback) -> None:
|
||||
try:
|
||||
self._subscribers.remove(callback)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
def _index_one(self, conn: sqlite3.Connection, ev: Dict[str, Any]) -> None:
|
||||
kind = ev.get("event")
|
||||
spec = _TABLE_COLUMNS.get(kind)
|
||||
if spec is None:
|
||||
return
|
||||
table, cols = spec
|
||||
values = [ev.get(c) for c in cols]
|
||||
placeholders = ", ".join("?" for _ in cols)
|
||||
collist = ", ".join(cols)
|
||||
conn.execute(
|
||||
f"INSERT OR REPLACE INTO {table} ({collist}) VALUES ({placeholders})",
|
||||
values,
|
||||
)
|
||||
|
||||
# ── introspection / shutdown (tests, CLI) ───────────────────────────────
|
||||
def flush(self, timeout: float = 2.0) -> None:
|
||||
"""Block until the queue drains (test/CLI helper, NOT the hot path)."""
|
||||
deadline = time.monotonic() + timeout
|
||||
while time.monotonic() < deadline:
|
||||
if self._q.empty():
|
||||
# give the writer a tick to finish the in-flight batch
|
||||
time.sleep(0.05)
|
||||
if self._q.empty():
|
||||
return
|
||||
time.sleep(0.02)
|
||||
|
||||
def stats(self) -> Dict[str, int]:
|
||||
return {
|
||||
"queued": self._q.qsize(),
|
||||
"written": self._written,
|
||||
"dropped": self._dropped,
|
||||
}
|
||||
|
||||
def close(self) -> None:
|
||||
self._stop.set()
|
||||
if self._thread is not None:
|
||||
self._thread.join(timeout=2.0)
|
||||
if self._conn is not None:
|
||||
try:
|
||||
self._conn.close()
|
||||
except Exception:
|
||||
pass
|
||||
self._conn = None
|
||||
self._started = False
|
||||
|
||||
|
||||
# ── process-wide singleton ──────────────────────────────────────────────────
|
||||
_EMITTER: Optional[TelemetryEmitter] = None
|
||||
_EMITTER_LOCK = threading.Lock()
|
||||
|
||||
|
||||
def get_emitter() -> TelemetryEmitter:
|
||||
"""Return the process-wide emitter, honoring telemetry.local config."""
|
||||
global _EMITTER
|
||||
if _EMITTER is not None:
|
||||
return _EMITTER
|
||||
with _EMITTER_LOCK:
|
||||
if _EMITTER is None:
|
||||
enabled = _local_enabled()
|
||||
_EMITTER = TelemetryEmitter(enabled=enabled)
|
||||
return _EMITTER
|
||||
|
||||
|
||||
def _local_enabled() -> bool:
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
cfg = load_config()
|
||||
tel = cfg.get("telemetry") if isinstance(cfg, dict) else {}
|
||||
return bool((tel or {}).get("local", True))
|
||||
except Exception:
|
||||
return True
|
||||
|
||||
|
||||
def emit(event: Any) -> None:
|
||||
"""Module-level convenience: emit via the singleton."""
|
||||
get_emitter().emit(event)
|
||||
|
||||
|
||||
def reset_emitter_for_tests(emitter: Optional[TelemetryEmitter] = None) -> None:
|
||||
"""Swap the singleton (tests only)."""
|
||||
global _EMITTER
|
||||
with _EMITTER_LOCK:
|
||||
if _EMITTER is not None and emitter is not _EMITTER:
|
||||
try:
|
||||
_EMITTER.close()
|
||||
except Exception:
|
||||
pass
|
||||
_EMITTER = emitter
|
||||
|
||||
|
||||
__all__ = [
|
||||
"TelemetryEmitter",
|
||||
"get_emitter",
|
||||
"emit",
|
||||
"reset_emitter_for_tests",
|
||||
]
|
||||
99
agent/telemetry/events.py
Normal file
99
agent/telemetry/events.py
Normal file
@@ -0,0 +1,99 @@
|
||||
"""Typed local telemetry events.
|
||||
|
||||
These dataclasses are the rows written to the local JSONL log and the ``tel_*``
|
||||
SQLite tables. They record the values observed for each run — model id, provider, tool
|
||||
name, token counts, durations — and stay on the machine unless explicitly exported.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import time
|
||||
from dataclasses import dataclass, field, asdict
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
# ── local telemetry events (real values) ────────────────────────────────────
|
||||
|
||||
|
||||
def _now_ns() -> int:
|
||||
return time.time_ns()
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class RunEvent:
|
||||
"""One top-level workflow execution (a trace root)."""
|
||||
run_id: str
|
||||
trace_id: str
|
||||
entrypoint: str
|
||||
session_id: Optional[str] = None
|
||||
profile_id: Optional[str] = None
|
||||
platform: Optional[str] = None
|
||||
start_ns: int = field(default_factory=_now_ns)
|
||||
end_ns: Optional[int] = None
|
||||
end_reason: Optional[str] = None
|
||||
model_call_count: int = 0
|
||||
tool_call_count: int = 0
|
||||
error_count: int = 0
|
||||
estimated_cost_usd: Optional[float] = None
|
||||
cost_status: Optional[str] = None
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {"event": "run", **asdict(self)}
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class ModelCallEvent:
|
||||
span_id: str
|
||||
run_id: str
|
||||
provider: Optional[str] = None # raw provider, e.g. "anthropic"
|
||||
model: Optional[str] = None # raw model id, e.g. "claude-opus-4"
|
||||
base_url: Optional[str] = None
|
||||
input_tokens: int = 0
|
||||
output_tokens: int = 0
|
||||
cache_read_tokens: int = 0
|
||||
cache_write_tokens: int = 0
|
||||
reasoning_tokens: int = 0
|
||||
latency_ms: Optional[int] = None
|
||||
ttft_ms: Optional[int] = None
|
||||
estimated_cost_usd: Optional[float] = None
|
||||
cost_status: Optional[str] = None
|
||||
cost_source: Optional[str] = None
|
||||
end_reason: Optional[str] = None
|
||||
retry_count: int = 0
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {"event": "model_call", **asdict(self)}
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class ToolCallEvent:
|
||||
span_id: str
|
||||
run_id: str
|
||||
tool_name: Optional[str] = None # raw tool name, e.g. "web_search"
|
||||
backend: Optional[str] = None
|
||||
duration_ms: Optional[int] = None
|
||||
result_class: Optional[str] = None
|
||||
retry_count: int = 0
|
||||
approval: Optional[str] = None
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {"event": "tool_call", **asdict(self)}
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class ErrorEvent:
|
||||
run_id: Optional[str]
|
||||
error_class: str
|
||||
subsystem: str
|
||||
recovery: Optional[str] = None
|
||||
ts_ns: int = field(default_factory=_now_ns)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {"event": "error", **asdict(self)}
|
||||
|
||||
|
||||
__all__ = [
|
||||
"RunEvent",
|
||||
"ModelCallEvent",
|
||||
"ToolCallEvent",
|
||||
"ErrorEvent",
|
||||
]
|
||||
139
agent/telemetry/exporter_bulk.py
Normal file
139
agent/telemetry/exporter_bulk.py
Normal file
@@ -0,0 +1,139 @@
|
||||
"""Export telemetry (and optionally session content) to a file or stream.
|
||||
|
||||
Two data domains, both written to an operator-chosen destination:
|
||||
|
||||
* Telemetry: the tel_* rows + events.jsonl (structural observability).
|
||||
* Content (opt-in via telemetry.trajectories): sessions + messages, with every
|
||||
content field (message body, reasoning, raw tool-call args) passed through the
|
||||
redaction pipeline (secrets always stripped; PII per content_redaction).
|
||||
|
||||
Formats: ndjson (default) and json. OTLP streaming export lives in otlp_exporter.py.
|
||||
|
||||
Content export is gated by ``redaction.content_export_enabled``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import sqlite3
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Iterator, List, Optional, TextIO
|
||||
|
||||
from . import redaction
|
||||
|
||||
_TEL_TABLES = (
|
||||
"tel_runs", "tel_model_calls", "tel_tool_calls", "tel_error_events",
|
||||
)
|
||||
|
||||
|
||||
def _open(db_path: Optional[Path]) -> sqlite3.Connection:
|
||||
if db_path is None:
|
||||
from hermes_constants import get_hermes_home
|
||||
db_path = get_hermes_home() / "state.db"
|
||||
c = sqlite3.connect(str(db_path), timeout=5.0)
|
||||
c.row_factory = sqlite3.Row
|
||||
return c
|
||||
|
||||
|
||||
def _iter_telemetry(conn: sqlite3.Connection, since_ns: Optional[int]) -> Iterator[Dict[str, Any]]:
|
||||
for table in _TEL_TABLES:
|
||||
# only tel_runs has start_ns; window the rest by run join when needed.
|
||||
if table == "tel_runs" and since_ns:
|
||||
rows = conn.execute(
|
||||
f"SELECT * FROM {table} WHERE start_ns >= ?", (int(since_ns),)
|
||||
).fetchall()
|
||||
else:
|
||||
rows = conn.execute(f"SELECT * FROM {table}").fetchall()
|
||||
for r in rows:
|
||||
d = dict(r)
|
||||
d["_kind"] = table
|
||||
yield d
|
||||
|
||||
|
||||
def _iter_content(
|
||||
db_path: Optional[Path],
|
||||
*,
|
||||
config: Optional[Dict[str, Any]],
|
||||
include_content: bool,
|
||||
) -> Iterator[Dict[str, Any]]:
|
||||
"""Yield session records. Message bodies included only when trajectories on."""
|
||||
from hermes_state import SessionDB
|
||||
|
||||
content_mode = redaction.content_mode_for(config)
|
||||
db = SessionDB(db_path=db_path) if db_path else SessionDB()
|
||||
try:
|
||||
for session in db.export_all():
|
||||
msgs = session.get("messages", []) or []
|
||||
red_msgs = [
|
||||
redaction.redact_message(
|
||||
m, content_mode=content_mode, include_content=include_content
|
||||
)
|
||||
for m in msgs
|
||||
]
|
||||
# Session-level metadata is structural; keep ids/model/counts, drop
|
||||
# any free-text title only when content is excluded.
|
||||
out = {
|
||||
"_kind": "session",
|
||||
"id": session.get("id"),
|
||||
"source": session.get("source"),
|
||||
"model": session.get("model"),
|
||||
"started_at": session.get("started_at"),
|
||||
"ended_at": session.get("ended_at"),
|
||||
"message_count": session.get("message_count"),
|
||||
"tool_call_count": session.get("tool_call_count"),
|
||||
"messages": red_msgs,
|
||||
}
|
||||
if include_content and session.get("title"):
|
||||
out["title"] = redaction.redact_for_export(
|
||||
session["title"], content_mode=content_mode
|
||||
)
|
||||
yield out
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
def export(
|
||||
out: TextIO,
|
||||
*,
|
||||
fmt: str = "ndjson",
|
||||
since_ns: Optional[int] = None,
|
||||
include_content: bool = False,
|
||||
config: Optional[Dict[str, Any]] = None,
|
||||
db_path: Optional[Path] = None,
|
||||
) -> Dict[str, int]:
|
||||
"""Write telemetry (+ optional content) to ``out``. Returns counts.
|
||||
|
||||
``include_content`` is honored only when telemetry.trajectories is enabled in
|
||||
``config``; otherwise content is forced off and only structural data is written.
|
||||
"""
|
||||
# Trajectories gate: a flag cannot override the config setting.
|
||||
content_allowed = include_content and redaction.content_export_enabled(config)
|
||||
counts = {"telemetry": 0, "sessions": 0, "content_included": int(content_allowed)}
|
||||
|
||||
conn = _open(db_path)
|
||||
records: List[Dict[str, Any]] = []
|
||||
try:
|
||||
for rec in _iter_telemetry(conn, since_ns):
|
||||
counts["telemetry"] += 1
|
||||
if fmt == "ndjson":
|
||||
out.write(json.dumps(rec, ensure_ascii=False) + "\n")
|
||||
else:
|
||||
records.append(rec)
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
# Content/session domain (separate connection via SessionDB).
|
||||
for rec in _iter_content(db_path, config=config, include_content=content_allowed):
|
||||
counts["sessions"] += 1
|
||||
if fmt == "ndjson":
|
||||
out.write(json.dumps(rec, ensure_ascii=False) + "\n")
|
||||
else:
|
||||
records.append(rec)
|
||||
|
||||
if fmt != "ndjson":
|
||||
json.dump({"records": records}, out, ensure_ascii=False, indent=2)
|
||||
|
||||
return counts
|
||||
|
||||
|
||||
__all__ = ["export"]
|
||||
219
agent/telemetry/metrics.py
Normal file
219
agent/telemetry/metrics.py
Normal file
@@ -0,0 +1,219 @@
|
||||
"""Derive metric rollups from the local telemetry tables.
|
||||
|
||||
Reads the ``tel_*`` tables in state.db and returns aggregates for /usage, /insights,
|
||||
and local dashboards. Metrics are computed by querying the event log rather than being
|
||||
emitted on the hot path.
|
||||
|
||||
Each function accepts either an open caller-owned ``conn`` (reused, not closed) or a
|
||||
``db_path`` (opened and closed internally). InsightsEngine passes its existing
|
||||
connection; a standalone dashboard passes a path.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sqlite3
|
||||
from contextlib import contextmanager
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Iterator, List, Optional
|
||||
|
||||
|
||||
@contextmanager
|
||||
def _cursor(
|
||||
conn: Optional[sqlite3.Connection], db_path: Optional[Path]
|
||||
) -> Iterator[sqlite3.Connection]:
|
||||
"""Yield a Row-factory connection. Closes it only if we opened it."""
|
||||
if conn is not None:
|
||||
prev_factory = conn.row_factory
|
||||
conn.row_factory = sqlite3.Row
|
||||
try:
|
||||
yield conn
|
||||
finally:
|
||||
conn.row_factory = prev_factory
|
||||
return
|
||||
if db_path is None:
|
||||
from hermes_constants import get_hermes_home
|
||||
db_path = get_hermes_home() / "state.db"
|
||||
c = sqlite3.connect(str(db_path), timeout=5.0)
|
||||
c.row_factory = sqlite3.Row
|
||||
try:
|
||||
yield c
|
||||
finally:
|
||||
c.close()
|
||||
|
||||
|
||||
def _since_clause(since_ns: Optional[int], col: str = "start_ns") -> str:
|
||||
return f" WHERE {col} >= {int(since_ns)}" if since_ns else ""
|
||||
|
||||
|
||||
def workflow_summary(
|
||||
db_path: Optional[Path] = None,
|
||||
since_ns: Optional[int] = None,
|
||||
*,
|
||||
conn: Optional[sqlite3.Connection] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Run-level counters + duration percentiles (local telemetry, exact)."""
|
||||
with _cursor(conn, db_path) as c:
|
||||
where = _since_clause(since_ns)
|
||||
total = c.execute(f"SELECT COUNT(*) n FROM tel_runs{where}").fetchone()["n"]
|
||||
by_reason = {
|
||||
r["end_reason"] or "unknown": r["n"]
|
||||
for r in c.execute(
|
||||
f"SELECT end_reason, COUNT(*) n FROM tel_runs{where} GROUP BY end_reason"
|
||||
).fetchall()
|
||||
}
|
||||
by_entry = {
|
||||
r["entrypoint"] or "unknown": r["n"]
|
||||
for r in c.execute(
|
||||
f"SELECT entrypoint, COUNT(*) n FROM tel_runs{where} GROUP BY entrypoint"
|
||||
).fetchall()
|
||||
}
|
||||
dur_where = (where + " AND end_ns IS NOT NULL") if where else " WHERE end_ns IS NOT NULL"
|
||||
durations = [
|
||||
(r["end_ns"] - r["start_ns"]) / 1e6
|
||||
for r in c.execute(
|
||||
f"SELECT start_ns, end_ns FROM tel_runs{dur_where}"
|
||||
).fetchall()
|
||||
]
|
||||
return {
|
||||
"total_runs": total,
|
||||
"by_end_reason": by_reason,
|
||||
"by_entrypoint": by_entry,
|
||||
"duration_ms_p50": _pct(durations, 50),
|
||||
"duration_ms_p95": _pct(durations, 95),
|
||||
"success_rate": round(by_reason.get("completed", 0) / total, 4) if total else 0.0,
|
||||
}
|
||||
|
||||
|
||||
def model_call_summary(
|
||||
db_path: Optional[Path] = None,
|
||||
since_ns: Optional[int] = None,
|
||||
*,
|
||||
conn: Optional[sqlite3.Connection] = None,
|
||||
) -> Dict[str, Any]:
|
||||
with _cursor(conn, db_path) as c:
|
||||
rows = c.execute(
|
||||
"SELECT provider, model, COUNT(*) n, "
|
||||
"SUM(input_tokens) inp, SUM(output_tokens) outp, "
|
||||
"SUM(cache_read_tokens) cache, AVG(latency_ms) avg_latency "
|
||||
"FROM tel_model_calls GROUP BY provider, model"
|
||||
).fetchall()
|
||||
by_provider: Dict[str, int] = {}
|
||||
by_model: Dict[str, int] = {}
|
||||
tokens = {"input": 0, "output": 0, "cache_read": 0}
|
||||
breakdown: List[Dict[str, Any]] = []
|
||||
for r in rows:
|
||||
prov = r["provider"] or "unknown"
|
||||
mdl = r["model"] or "unknown"
|
||||
by_provider[prov] = by_provider.get(prov, 0) + r["n"]
|
||||
by_model[mdl] = by_model.get(mdl, 0) + r["n"]
|
||||
tokens["input"] += r["inp"] or 0
|
||||
tokens["output"] += r["outp"] or 0
|
||||
tokens["cache_read"] += r["cache"] or 0
|
||||
breakdown.append({
|
||||
"provider": r["provider"],
|
||||
"model": r["model"],
|
||||
"calls": r["n"],
|
||||
"avg_latency_ms": round(r["avg_latency"] or 0, 1),
|
||||
})
|
||||
cache_total = tokens["cache_read"] + tokens["input"]
|
||||
return {
|
||||
"by_provider": by_provider,
|
||||
"by_model": by_model,
|
||||
"tokens": tokens,
|
||||
"cache_hit_rate": round(tokens["cache_read"] / cache_total, 4) if cache_total else 0.0,
|
||||
"breakdown": breakdown,
|
||||
}
|
||||
|
||||
|
||||
def tool_call_summary(
|
||||
db_path: Optional[Path] = None,
|
||||
*,
|
||||
conn: Optional[sqlite3.Connection] = None,
|
||||
) -> Dict[str, Any]:
|
||||
with _cursor(conn, db_path) as c:
|
||||
by_tool = {
|
||||
r["tool_name"] or "unknown": r["n"]
|
||||
for r in c.execute(
|
||||
"SELECT tool_name, COUNT(*) n FROM tel_tool_calls GROUP BY tool_name"
|
||||
).fetchall()
|
||||
}
|
||||
fails = {
|
||||
r["tool_name"] or "unknown": r["n"]
|
||||
for r in c.execute(
|
||||
"SELECT tool_name, COUNT(*) n FROM tel_tool_calls "
|
||||
"WHERE result_class IN ('error','timeout','blocked') GROUP BY tool_name"
|
||||
).fetchall()
|
||||
}
|
||||
total = sum(by_tool.values())
|
||||
total_fail = sum(fails.values())
|
||||
return {
|
||||
"by_tool": by_tool,
|
||||
"failures_by_tool": fails,
|
||||
"total": total,
|
||||
"failure_rate": round(total_fail / total, 4) if total else 0.0,
|
||||
}
|
||||
|
||||
|
||||
def error_summary(
|
||||
db_path: Optional[Path] = None,
|
||||
*,
|
||||
conn: Optional[sqlite3.Connection] = None,
|
||||
) -> Dict[str, Any]:
|
||||
with _cursor(conn, db_path) as c:
|
||||
return {
|
||||
"by_class": {
|
||||
r["error_class"] or "unknown": r["n"]
|
||||
for r in c.execute(
|
||||
"SELECT error_class, COUNT(*) n FROM tel_error_events GROUP BY error_class"
|
||||
).fetchall()
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _pct(values: List[float], p: int) -> float:
|
||||
if not values:
|
||||
return 0.0
|
||||
s = sorted(values)
|
||||
k = (len(s) - 1) * (p / 100)
|
||||
lo = int(k)
|
||||
hi = min(lo + 1, len(s) - 1)
|
||||
frac = k - lo
|
||||
return round(s[lo] + (s[hi] - s[lo]) * frac, 2)
|
||||
|
||||
|
||||
def overview(
|
||||
db_path: Optional[Path] = None,
|
||||
since_ns: Optional[int] = None,
|
||||
*,
|
||||
conn: Optional[sqlite3.Connection] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""One call for a dashboard: all the rollups."""
|
||||
return {
|
||||
"workflows": workflow_summary(db_path, since_ns, conn=conn),
|
||||
"model_calls": model_call_summary(db_path, since_ns, conn=conn),
|
||||
"tool_calls": tool_call_summary(db_path, conn=conn),
|
||||
"errors": error_summary(db_path, conn=conn),
|
||||
}
|
||||
|
||||
|
||||
def has_data(
|
||||
db_path: Optional[Path] = None,
|
||||
*,
|
||||
conn: Optional[sqlite3.Connection] = None,
|
||||
) -> bool:
|
||||
"""True when any telemetry runs exist (cheap guard for /insights rendering)."""
|
||||
try:
|
||||
with _cursor(conn, db_path) as c:
|
||||
return c.execute("SELECT 1 FROM tel_runs LIMIT 1").fetchone() is not None
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
__all__ = [
|
||||
"workflow_summary",
|
||||
"model_call_summary",
|
||||
"tool_call_summary",
|
||||
"error_summary",
|
||||
"overview",
|
||||
"has_data",
|
||||
]
|
||||
282
agent/telemetry/otlp_exporter.py
Normal file
282
agent/telemetry/otlp_exporter.py
Normal file
@@ -0,0 +1,282 @@
|
||||
"""Export telemetry to an OpenTelemetry Collector over OTLP/HTTP.
|
||||
|
||||
Maps telemetry events (which carry trace_id/run_id/span_id/parent_span_id) to OTel
|
||||
spans and sends them to the endpoint configured under ``telemetry.export.otlp``. Lets
|
||||
an operator stream Hermes telemetry into their own observability stack.
|
||||
|
||||
Notes:
|
||||
* The destination is operator-configured; this module only sends to that endpoint.
|
||||
It does not import or interact with any aggregate-metrics path.
|
||||
* ``opentelemetry-sdk`` + ``opentelemetry-exporter-otlp-proto-http`` are an optional
|
||||
extra (``pip install hermes-agent[otlp]``), imported lazily so the dependency is
|
||||
only required when OTLP export is actually used.
|
||||
* ``headers_env`` maps a header name to an environment variable name; values are read
|
||||
from the environment at export time and never logged or stored.
|
||||
* The continuous subscriber runs in the emitter's writer thread after durable writes
|
||||
and is fail-isolated, so an export error cannot affect a run.
|
||||
|
||||
Spans carry structural telemetry by default. Message content is included only when the
|
||||
trajectories is enabled, and always passes through the export redaction pipeline.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import sqlite3
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class OTLPUnavailable(RuntimeError):
|
||||
"""Raised when the optional OpenTelemetry SDK isn't installed."""
|
||||
|
||||
|
||||
def _require_sdk(*, auto_install: bool = True, prompt: bool = True):
|
||||
"""Import the OTel SDK, lazily installing it on first use if needed.
|
||||
|
||||
Routes through tools.lazy_deps (feature 'export.otlp') so a missing SDK
|
||||
triggers the standard venv install flow — same as every other optional
|
||||
backend — gated by security.allow_lazy_installs and TTY-prompted. Falls back
|
||||
to OTLPUnavailable (with a manual install hint) when the SDK can't be made
|
||||
importable (lazy installs disabled, install failed, or auto_install=False).
|
||||
|
||||
``auto_install``: attempt the lazy install when missing (default True).
|
||||
``prompt``: ask before installing when interactive (default True); pass
|
||||
False from non-interactive contexts like the continuous streamer.
|
||||
"""
|
||||
if auto_install:
|
||||
try:
|
||||
from tools.lazy_deps import ensure as _lazy_ensure
|
||||
_lazy_ensure("export.otlp", prompt=prompt)
|
||||
except ImportError:
|
||||
pass # lazy_deps unavailable — fall through to the import attempt
|
||||
except Exception:
|
||||
# FeatureUnavailable (lazy installs disabled / declined / failed) —
|
||||
# fall through; the import below raises OTLPUnavailable with the hint.
|
||||
pass
|
||||
try:
|
||||
from opentelemetry.sdk.trace import TracerProvider
|
||||
from opentelemetry.sdk.trace.export import BatchSpanProcessor
|
||||
from opentelemetry.sdk.resources import Resource
|
||||
from opentelemetry.exporter.otlp.proto.http.trace_exporter import (
|
||||
OTLPSpanExporter,
|
||||
)
|
||||
from opentelemetry.trace import SpanKind
|
||||
return {
|
||||
"TracerProvider": TracerProvider,
|
||||
"BatchSpanProcessor": BatchSpanProcessor,
|
||||
"Resource": Resource,
|
||||
"OTLPSpanExporter": OTLPSpanExporter,
|
||||
"SpanKind": SpanKind,
|
||||
}
|
||||
except Exception as e: # ImportError or partial install
|
||||
raise OTLPUnavailable(
|
||||
"OTLP export requires the optional dependency. Install with:\n"
|
||||
" pip install 'hermes-agent[otlp]'\n"
|
||||
f"(import error: {e})"
|
||||
)
|
||||
|
||||
|
||||
def _resolve_headers(headers_env: Optional[Dict[str, str]]) -> Dict[str, str]:
|
||||
"""Resolve {header_name: ENV_VAR_NAME} -> {header_name: value} from env.
|
||||
|
||||
The config stores environment variable names, not secret values; values are read
|
||||
from the environment here. Missing variables are skipped (and noted at debug level
|
||||
without the value).
|
||||
"""
|
||||
resolved: Dict[str, str] = {}
|
||||
for header_name, env_name in (headers_env or {}).items():
|
||||
val = os.environ.get(str(env_name))
|
||||
if val:
|
||||
resolved[str(header_name)] = val
|
||||
else:
|
||||
logger.debug("OTLP header %s: env var %s not set; skipping",
|
||||
header_name, env_name)
|
||||
return resolved
|
||||
|
||||
|
||||
def _otlp_config(config: Dict[str, Any]) -> Dict[str, Any]:
|
||||
tel = (config or {}).get("telemetry") or {}
|
||||
export = tel.get("export") or {}
|
||||
return export.get("otlp") or {}
|
||||
|
||||
|
||||
def build_exporter(config: Dict[str, Any]):
|
||||
"""Construct an OTLP span exporter from config. Raises OTLPUnavailable if no SDK."""
|
||||
sdk = _require_sdk()
|
||||
otlp = _otlp_config(config)
|
||||
endpoint = otlp.get("endpoint")
|
||||
if not endpoint:
|
||||
raise ValueError("telemetry.export.otlp.endpoint is not set")
|
||||
headers = _resolve_headers(otlp.get("headers_env"))
|
||||
return sdk["OTLPSpanExporter"](endpoint=endpoint, headers=headers or None)
|
||||
|
||||
|
||||
def _make_provider(config: Dict[str, Any]):
|
||||
sdk = _require_sdk()
|
||||
resource = sdk["Resource"].create({
|
||||
"service.name": "hermes-agent",
|
||||
"telemetry.scope": "local", # never aggregate metrics
|
||||
})
|
||||
provider = sdk["TracerProvider"](resource=resource)
|
||||
processor = sdk["BatchSpanProcessor"](build_exporter(config))
|
||||
provider.add_span_processor(processor)
|
||||
return provider, processor
|
||||
|
||||
|
||||
# ── event -> span attribute mapping (real values) ───────────────────────────
|
||||
def _span_attrs(ev: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Span attributes for an event — the real recorded values (local telemetry)."""
|
||||
kind = ev.get("event")
|
||||
attrs: Dict[str, Any] = {"hermes.event": kind or "unknown"}
|
||||
keep_by_kind = {
|
||||
"run": ("entrypoint", "platform", "end_reason",
|
||||
"model_call_count", "tool_call_count", "error_count",
|
||||
"estimated_cost_usd", "cost_status"),
|
||||
"model_call": ("provider", "model", "base_url",
|
||||
"input_tokens", "output_tokens", "cache_read_tokens",
|
||||
"cache_write_tokens", "reasoning_tokens", "latency_ms",
|
||||
"ttft_ms", "end_reason"),
|
||||
"tool_call": ("tool_name", "backend", "duration_ms", "result_class"),
|
||||
"error": ("error_class", "subsystem", "recovery"),
|
||||
}
|
||||
for col in keep_by_kind.get(kind, ()): # type: ignore[arg-type]
|
||||
v = ev.get(col)
|
||||
if v is not None:
|
||||
attrs[f"hermes.{col}"] = v
|
||||
return attrs
|
||||
|
||||
|
||||
def export_batch(provider, batch: List[Dict[str, Any]]) -> int:
|
||||
"""Map a batch of events to OTel spans. Returns spans created."""
|
||||
tracer = provider.get_tracer("hermes.telemetry")
|
||||
n = 0
|
||||
for ev in batch:
|
||||
try:
|
||||
name = f"hermes.{ev.get('event', 'event')}"
|
||||
span = tracer.start_span(name, attributes=_span_attrs(ev))
|
||||
span.end()
|
||||
n += 1
|
||||
except Exception:
|
||||
logger.debug("OTLP span map failed", exc_info=True)
|
||||
return n
|
||||
|
||||
|
||||
# ── one-shot drain (export current local rows) ──────────────────────────────
|
||||
def export_once(
|
||||
config: Dict[str, Any],
|
||||
*,
|
||||
db_path: Optional[Path] = None,
|
||||
since_ns: Optional[int] = None,
|
||||
) -> int:
|
||||
"""Drain the local tel_* tables to the configured OTLP endpoint once."""
|
||||
provider, processor = _make_provider(config)
|
||||
try:
|
||||
rows = _read_events(db_path, since_ns)
|
||||
total = export_batch(provider, rows)
|
||||
processor.force_flush()
|
||||
return total
|
||||
finally:
|
||||
try:
|
||||
provider.shutdown()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def _read_events(db_path: Optional[Path], since_ns: Optional[int]) -> List[Dict[str, Any]]:
|
||||
if db_path is None:
|
||||
from hermes_constants import get_hermes_home
|
||||
db_path = get_hermes_home() / "state.db"
|
||||
c = sqlite3.connect(str(db_path), timeout=5.0)
|
||||
c.row_factory = sqlite3.Row
|
||||
out: List[Dict[str, Any]] = []
|
||||
try:
|
||||
table_event = {
|
||||
"tel_runs": "run", "tel_model_calls": "model_call",
|
||||
"tel_tool_calls": "tool_call", "tel_error_events": "error",
|
||||
}
|
||||
for table, evkind in table_event.items():
|
||||
where = ""
|
||||
if table == "tel_runs" and since_ns:
|
||||
where = f" WHERE start_ns >= {int(since_ns)}"
|
||||
for r in c.execute(f"SELECT * FROM {table}{where}").fetchall():
|
||||
d = dict(r)
|
||||
d["event"] = evkind
|
||||
out.append(d)
|
||||
finally:
|
||||
c.close()
|
||||
return out
|
||||
|
||||
|
||||
# ── continuous streaming subscriber ─────────────────────────────────────────
|
||||
class OTLPStreamer:
|
||||
"""A live subscriber that pushes each emitter batch to OTLP as it lands.
|
||||
|
||||
Register with ``emitter.subscribe(streamer)``. Fail-isolated by the emitter.
|
||||
"""
|
||||
|
||||
def __init__(self, config: Dict[str, Any]):
|
||||
self._provider, self._processor = _make_provider(config)
|
||||
self.exported = 0
|
||||
|
||||
def __call__(self, batch: List[Dict[str, Any]]) -> None:
|
||||
self.exported += export_batch(self._provider, batch)
|
||||
|
||||
def shutdown(self) -> None:
|
||||
try:
|
||||
self._processor.force_flush()
|
||||
self._provider.shutdown()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def is_available() -> bool:
|
||||
"""True when the OTel SDK is already importable. Does NOT auto-install —
|
||||
this is a pure check (e.g. for status display)."""
|
||||
try:
|
||||
_require_sdk(auto_install=False)
|
||||
return True
|
||||
except OTLPUnavailable:
|
||||
return False
|
||||
|
||||
|
||||
def is_enabled(config: Dict[str, Any]) -> bool:
|
||||
otlp = _otlp_config(config)
|
||||
return bool(otlp.get("enabled") and otlp.get("endpoint"))
|
||||
|
||||
|
||||
def start_streaming(config: Dict[str, Any]) -> Optional[OTLPStreamer]:
|
||||
"""If OTLP is enabled, attach a streamer to the singleton emitter.
|
||||
|
||||
Non-interactive context (startup): attempts a lazy install with prompt=False
|
||||
so a configured-but-missing SDK is installed once (gated by
|
||||
security.allow_lazy_installs), then streams. If it still can't load, logs and
|
||||
no-ops — never blocks or raises into startup.
|
||||
"""
|
||||
if not is_enabled(config):
|
||||
return None
|
||||
try:
|
||||
_require_sdk(prompt=False)
|
||||
except OTLPUnavailable:
|
||||
logger.warning("telemetry.export.otlp.enabled but the OTel SDK could not "
|
||||
"be installed/imported; install 'hermes-agent[otlp]'")
|
||||
return None
|
||||
from agent.telemetry.emitter import get_emitter
|
||||
streamer = OTLPStreamer(config)
|
||||
get_emitter().subscribe(streamer)
|
||||
return streamer
|
||||
|
||||
|
||||
__all__ = [
|
||||
"OTLPUnavailable",
|
||||
"OTLPStreamer",
|
||||
"build_exporter",
|
||||
"export_once",
|
||||
"export_batch",
|
||||
"is_available",
|
||||
"is_enabled",
|
||||
"start_streaming",
|
||||
]
|
||||
70
agent/telemetry/policy.py
Normal file
70
agent/telemetry/policy.py
Normal file
@@ -0,0 +1,70 @@
|
||||
"""Telemetry consent posture and the aggregate-metrics gate.
|
||||
|
||||
Consent is a single config field, ``telemetry.consent_state``:
|
||||
|
||||
* "unknown" — no choice recorded; never uploads (the default).
|
||||
* "local" — declined aggregate metrics; local telemetry only.
|
||||
* "aggregate" — opted in to aggregate metrics.
|
||||
|
||||
The config file is the source of truth: set ``telemetry.consent_state`` with
|
||||
``hermes config set`` (or a managed-scope pin). Callers that gate behavior read
|
||||
``telemetry.*`` directly from config; this module only provides the consent
|
||||
constants, the install-id helper, and the upload gate a future uploader must
|
||||
consult.
|
||||
|
||||
``allow_aggregate`` is the hard gate. An administrator pins
|
||||
``telemetry.allow_aggregate: false`` through the managed-scope layer
|
||||
(``/etc/hermes/config.yaml``), which takes precedence over the user's config; when
|
||||
it is false, aggregate metrics are off regardless of ``consent_state``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
from typing import Any, Dict
|
||||
|
||||
CONSENT_UNKNOWN = "unknown"
|
||||
CONSENT_LOCAL = "local"
|
||||
CONSENT_AGGREGATE = "aggregate"
|
||||
VALID_CONSENT_STATES = {CONSENT_UNKNOWN, CONSENT_LOCAL, CONSENT_AGGREGATE}
|
||||
|
||||
|
||||
def _telemetry_cfg(config: Dict[str, Any]) -> Dict[str, Any]:
|
||||
cfg = config.get("telemetry") if isinstance(config, dict) else None
|
||||
return cfg if isinstance(cfg, dict) else {}
|
||||
|
||||
|
||||
def ensure_install_id(config: Dict[str, Any]) -> str:
|
||||
"""Return a stable install id, minting one if the config slot is empty.
|
||||
|
||||
Does not persist — the caller writes the returned value back to config.yaml. A
|
||||
fresh uuid4 is used; clearing ``telemetry.install_id`` (e.g. with
|
||||
``hermes config set telemetry.install_id ""``) causes the next call to mint anew.
|
||||
"""
|
||||
tel = _telemetry_cfg(config)
|
||||
existing = tel.get("install_id")
|
||||
if isinstance(existing, str) and existing.strip():
|
||||
return existing
|
||||
return str(uuid.uuid4())
|
||||
|
||||
|
||||
def may_upload_aggregate(config: Dict[str, Any]) -> bool:
|
||||
"""Whether aggregate metrics may upload — the gate a future uploader consults.
|
||||
|
||||
True only when the admin hard gate allows it AND the user has opted in via
|
||||
``telemetry.consent_state``.
|
||||
"""
|
||||
tel = _telemetry_cfg(config)
|
||||
allow_aggregate = bool(tel.get("allow_aggregate", True))
|
||||
state = tel.get("consent_state", CONSENT_UNKNOWN)
|
||||
return allow_aggregate and state == CONSENT_AGGREGATE
|
||||
|
||||
|
||||
__all__ = [
|
||||
"CONSENT_UNKNOWN",
|
||||
"CONSENT_LOCAL",
|
||||
"CONSENT_AGGREGATE",
|
||||
"VALID_CONSENT_STATES",
|
||||
"may_upload_aggregate",
|
||||
"ensure_install_id",
|
||||
]
|
||||
187
agent/telemetry/redaction.py
Normal file
187
agent/telemetry/redaction.py
Normal file
@@ -0,0 +1,187 @@
|
||||
"""Redaction applied to telemetry data on export.
|
||||
|
||||
Two independent controls:
|
||||
|
||||
* Secrets are always redacted, on every export and in every mode; no setting
|
||||
disables this. Wraps ``agent/redact.py::redact_sensitive_text(force=True)``.
|
||||
|
||||
* Whether message bodies, reasoning, and raw tool arguments are exportable at all is
|
||||
governed by the trajectories setting (``telemetry.trajectories.enabled``, default
|
||||
off, admin-pinnable), not by a redaction mode. With trajectories off, content is
|
||||
dropped. With it on, content is exportable and ``content_redaction`` (none|pii)
|
||||
controls how much is scrubbed; secrets are still always stripped.
|
||||
|
||||
This applies to the local and trajectory export paths. It is unrelated to any
|
||||
aggregate-metrics path.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
# Content-redaction strengths for any content that IS exported.
|
||||
CONTENT_NONE = "none" # drop content entirely (structural telemetry only)
|
||||
CONTENT_PII = "pii" # codec-aware PII redaction on exported content
|
||||
CONTENT_MODES = {CONTENT_NONE, CONTENT_PII}
|
||||
|
||||
# ── PII patterns (applied only in CONTENT_PII mode, on content that is exported) ──
|
||||
_EMAIL_RE = re.compile(r"[A-Za-z0-9._%+\-]+@[A-Za-z0-9.\-]+\.[A-Za-z]{2,}")
|
||||
# E.164-ish and common separators; conservative to avoid nuking code/IDs.
|
||||
_PHONE_RE = re.compile(
|
||||
r"(?<!\w)(?:\+?\d{1,3}[\s.\-]?)?(?:\(\d{2,4}\)[\s.\-]?)?\d{3}[\s.\-]?\d{3,4}(?:[\s.\-]?\d{2,4})?(?!\w)"
|
||||
)
|
||||
# Long opaque hex/uuid-ish user identifiers.
|
||||
_UUID_RE = re.compile(r"\b[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}\b")
|
||||
|
||||
|
||||
def _secret_redact(text: Optional[str]) -> Optional[str]:
|
||||
"""Always-on secret redaction. force=True so user config can't disable it."""
|
||||
if text is None:
|
||||
return None
|
||||
try:
|
||||
from agent.redact import redact_sensitive_text
|
||||
return redact_sensitive_text(str(text), force=True)
|
||||
except Exception:
|
||||
# Fail CLOSED: if the redactor can't run, do not emit the raw string.
|
||||
return "[redaction-unavailable]"
|
||||
|
||||
|
||||
def _pii_redact(text: str) -> str:
|
||||
text = _EMAIL_RE.sub("[email]", text)
|
||||
text = _UUID_RE.sub("[id]", text)
|
||||
text = _PHONE_RE.sub("[phone]", text)
|
||||
return text
|
||||
|
||||
|
||||
def redact_for_export(
|
||||
text: Optional[str],
|
||||
*,
|
||||
content_mode: str = CONTENT_NONE,
|
||||
) -> Optional[str]:
|
||||
"""Redact a single content string for export.
|
||||
|
||||
Secrets are ALWAYS stripped. Then PII is stripped when content_mode is 'pii'.
|
||||
Callers gate *whether content is exported at all* via telemetry.trajectories
|
||||
(see ``content_export_enabled``); this function only scrubs content that the
|
||||
caller has already decided to export.
|
||||
"""
|
||||
redacted = _secret_redact(text)
|
||||
if redacted is None:
|
||||
return None
|
||||
if content_mode == CONTENT_PII:
|
||||
redacted = _pii_redact(redacted)
|
||||
return redacted
|
||||
|
||||
|
||||
def content_export_enabled(config: Optional[Dict[str, Any]]) -> bool:
|
||||
"""True only when telemetry.trajectories is explicitly enabled.
|
||||
|
||||
This is the consent gate for exporting message bodies / reasoning / raw tool
|
||||
args. Default off. Admin-pinnable via managed scope (telemetry.trajectories.enabled).
|
||||
"""
|
||||
try:
|
||||
tel = (config or {}).get("telemetry") or {}
|
||||
traj = tel.get("trajectories") or {}
|
||||
return bool(traj.get("enabled", False))
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def content_mode_for(config: Optional[Dict[str, Any]]) -> str:
|
||||
try:
|
||||
tel = (config or {}).get("telemetry") or {}
|
||||
mode = tel.get("content_redaction", CONTENT_NONE)
|
||||
return mode if mode in CONTENT_MODES else CONTENT_NONE
|
||||
except Exception:
|
||||
return CONTENT_NONE
|
||||
|
||||
|
||||
# ── Codec-aware message redaction (NeMo pattern) ─────────────────────────────
|
||||
# Redact the right fields of a provider message shape rather than regex-blasting
|
||||
# the whole blob. Structure (roles, names, counts) is preserved; only the
|
||||
# free-text content fields are scrubbed.
|
||||
|
||||
def redact_message(
|
||||
msg: Dict[str, Any],
|
||||
*,
|
||||
content_mode: str = CONTENT_NONE,
|
||||
include_content: bool = False,
|
||||
) -> Dict[str, Any]:
|
||||
"""Redact one chat message dict for export.
|
||||
|
||||
When include_content is False (trajectories off), content/reasoning/tool-arg
|
||||
fields are dropped — only structural fields (role, tool name, counts) remain.
|
||||
When True, those fields are kept but passed through redact_for_export.
|
||||
"""
|
||||
role = msg.get("role")
|
||||
out: Dict[str, Any] = {"role": role}
|
||||
|
||||
# Always-structural fields.
|
||||
if msg.get("tool_name") is not None:
|
||||
out["tool_name"] = msg.get("tool_name")
|
||||
if msg.get("name") is not None:
|
||||
out["name"] = msg.get("name")
|
||||
|
||||
if not include_content:
|
||||
# Structural only: record presence/size, not bytes.
|
||||
c = msg.get("content")
|
||||
if c is not None:
|
||||
out["content_chars"] = len(str(c))
|
||||
if msg.get("reasoning_content"):
|
||||
out["reasoning_chars"] = len(str(msg["reasoning_content"]))
|
||||
if msg.get("tool_calls"):
|
||||
out["tool_call_count"] = _count_tool_calls(msg["tool_calls"])
|
||||
return out
|
||||
|
||||
# Content included (trajectories enabled): scrub then keep.
|
||||
if msg.get("content") is not None:
|
||||
out["content"] = redact_for_export(msg["content"], content_mode=content_mode)
|
||||
if msg.get("reasoning_content"):
|
||||
out["reasoning_content"] = redact_for_export(
|
||||
msg["reasoning_content"], content_mode=content_mode
|
||||
)
|
||||
if msg.get("tool_calls"):
|
||||
out["tool_calls"] = _redact_tool_calls(msg["tool_calls"], content_mode=content_mode)
|
||||
return out
|
||||
|
||||
|
||||
def _count_tool_calls(tool_calls: Any) -> int:
|
||||
try:
|
||||
import json
|
||||
tc = json.loads(tool_calls) if isinstance(tool_calls, str) else tool_calls
|
||||
return len(tc) if isinstance(tc, list) else (1 if tc else 0)
|
||||
except Exception:
|
||||
return 0
|
||||
|
||||
|
||||
def _redact_tool_calls(tool_calls: Any, *, content_mode: str) -> Any:
|
||||
"""Redact raw tool-call arguments (free text) while keeping function names."""
|
||||
import json
|
||||
try:
|
||||
tc = json.loads(tool_calls) if isinstance(tool_calls, str) else tool_calls
|
||||
except Exception:
|
||||
return "[unparseable-tool-calls]"
|
||||
if not isinstance(tc, list):
|
||||
return []
|
||||
out: List[Dict[str, Any]] = []
|
||||
for call in tc:
|
||||
if not isinstance(call, dict):
|
||||
continue
|
||||
fn = (call.get("function") or {}) if isinstance(call.get("function"), dict) else {}
|
||||
name = fn.get("name") or call.get("name")
|
||||
args = fn.get("arguments")
|
||||
red_args = redact_for_export(args, content_mode=content_mode) if args is not None else None
|
||||
out.append({"name": name, "arguments": red_args})
|
||||
return out
|
||||
|
||||
|
||||
__all__ = [
|
||||
"CONTENT_NONE",
|
||||
"CONTENT_PII",
|
||||
"CONTENT_MODES",
|
||||
"redact_for_export",
|
||||
"content_export_enabled",
|
||||
"content_mode_for",
|
||||
"redact_message",
|
||||
]
|
||||
145
agent/telemetry/rollup.py
Normal file
145
agent/telemetry/rollup.py
Normal file
@@ -0,0 +1,145 @@
|
||||
"""Build per-run summary events from the local telemetry tables.
|
||||
|
||||
Reads the ``tel_*`` tables and projects each completed run into a summary dict holding
|
||||
the recorded values: provider, models used, tool names, token totals, duration, and
|
||||
cost. Powers ``hermes telemetry preview``. No aggregation or bucketing is applied here.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import platform
|
||||
import sqlite3
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
|
||||
def _os_family() -> str:
|
||||
s = platform.system().lower()
|
||||
if s.startswith("lin"):
|
||||
return "linux"
|
||||
if s == "darwin":
|
||||
return "macos"
|
||||
if s.startswith("win"):
|
||||
return "windows"
|
||||
return "other"
|
||||
|
||||
|
||||
def _hermes_version() -> str:
|
||||
try:
|
||||
from hermes_cli import __version__
|
||||
return str(__version__)
|
||||
except Exception:
|
||||
return "0.0.0"
|
||||
|
||||
|
||||
def _open(db_path: Optional[Path], conn: Optional[sqlite3.Connection]):
|
||||
if conn is not None:
|
||||
prev = conn.row_factory
|
||||
conn.row_factory = sqlite3.Row
|
||||
return conn, prev, False
|
||||
if db_path is None:
|
||||
from hermes_constants import get_hermes_home
|
||||
db_path = get_hermes_home() / "state.db"
|
||||
c = sqlite3.connect(str(db_path), timeout=5.0)
|
||||
c.row_factory = sqlite3.Row
|
||||
return c, None, True
|
||||
|
||||
|
||||
def _run_events(c: sqlite3.Connection, since_ns: Optional[int]) -> List[Dict[str, Any]]:
|
||||
"""Project completed runs into per-run summary dicts."""
|
||||
where = " WHERE end_ns IS NOT NULL"
|
||||
if since_ns:
|
||||
where += f" AND start_ns >= {int(since_ns)}"
|
||||
rows = c.execute(
|
||||
"SELECT run_id, entrypoint, platform, end_reason, start_ns, end_ns, "
|
||||
"model_call_count, tool_call_count, error_count, estimated_cost_usd "
|
||||
"FROM tel_runs" + where
|
||||
).fetchall()
|
||||
|
||||
events: List[Dict[str, Any]] = []
|
||||
for r in rows:
|
||||
# Models actually used in this run (real ids), with token totals.
|
||||
models = [
|
||||
{"provider": m["provider"], "model": m["model"],
|
||||
"calls": m["n"], "input_tokens": int(m["inp"] or 0),
|
||||
"output_tokens": int(m["outp"] or 0)}
|
||||
for m in c.execute(
|
||||
"SELECT provider, model, COUNT(*) n, SUM(input_tokens) inp, "
|
||||
"SUM(output_tokens) outp FROM tel_model_calls WHERE run_id = ? "
|
||||
"GROUP BY provider, model ORDER BY n DESC",
|
||||
(r["run_id"],),
|
||||
).fetchall()
|
||||
]
|
||||
tools = [
|
||||
row["tool_name"]
|
||||
for row in c.execute(
|
||||
"SELECT DISTINCT tool_name FROM tel_tool_calls WHERE run_id = ?",
|
||||
(r["run_id"],),
|
||||
).fetchall()
|
||||
if row["tool_name"]
|
||||
]
|
||||
trow = c.execute(
|
||||
"SELECT SUM(input_tokens) inp, SUM(output_tokens) outp "
|
||||
"FROM tel_model_calls WHERE run_id = ?",
|
||||
(r["run_id"],),
|
||||
).fetchone()
|
||||
duration_ms = (r["end_ns"] - r["start_ns"]) / 1e6 if r["end_ns"] else None
|
||||
events.append({
|
||||
"event_name": "workflow_completed",
|
||||
"run_id": r["run_id"],
|
||||
"entrypoint": r["entrypoint"] or "cli",
|
||||
"platform": r["platform"],
|
||||
"end_reason": r["end_reason"] or "completed",
|
||||
"models_used": models,
|
||||
"tools_used": tools,
|
||||
"model_call_count": r["model_call_count"] or 0,
|
||||
"tool_call_count": r["tool_call_count"] or 0,
|
||||
"error_count": r["error_count"] or 0,
|
||||
"duration_ms": round(duration_ms, 1) if duration_ms is not None else None,
|
||||
"input_tokens": int((trow["inp"] if trow else 0) or 0),
|
||||
"output_tokens": int((trow["outp"] if trow else 0) or 0),
|
||||
"estimated_cost_usd": r["estimated_cost_usd"],
|
||||
})
|
||||
return events
|
||||
|
||||
|
||||
def build_aggregate_events(
|
||||
*,
|
||||
install_id: str,
|
||||
db_path: Optional[Path] = None,
|
||||
since_ns: Optional[int] = None,
|
||||
conn: Optional[sqlite3.Connection] = None,
|
||||
include_heartbeat: bool = True,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Return per-run summary events plus an optional heartbeat."""
|
||||
c, prev_factory, owned = _open(db_path, conn)
|
||||
try:
|
||||
events = _run_events(c, since_ns)
|
||||
if include_heartbeat:
|
||||
events.append({
|
||||
"event_name": "heartbeat",
|
||||
"install_id": install_id,
|
||||
"hermes_version": _hermes_version(),
|
||||
"os_family": _os_family(),
|
||||
"entrypoint": "cli",
|
||||
})
|
||||
return events
|
||||
finally:
|
||||
if owned:
|
||||
c.close()
|
||||
elif prev_factory is not None:
|
||||
c.row_factory = prev_factory
|
||||
|
||||
|
||||
def summarize(events: List[Dict[str, Any]]) -> Dict[str, Any]:
|
||||
"""Counts by event_name + field coverage, for status/preview output."""
|
||||
by_name: Dict[str, int] = {}
|
||||
fields = set()
|
||||
for e in events:
|
||||
name = e.get("event_name", "?")
|
||||
by_name[name] = by_name.get(name, 0) + 1
|
||||
fields.update(e.keys())
|
||||
return {"total": len(events), "by_event_name": by_name, "fields_present": sorted(fields)}
|
||||
|
||||
|
||||
__all__ = ["build_aggregate_events", "summarize"]
|
||||
83
agent/telemetry/spans.py
Normal file
83
agent/telemetry/spans.py
Normal file
@@ -0,0 +1,83 @@
|
||||
"""Trace / run / span id propagation via contextvars.
|
||||
|
||||
Telemetry events share IDs so a workflow can be reconstructed: one ``trace_id`` per
|
||||
workflow, one ``run_id`` per top-level execution, ``span_id`` per timed operation, and
|
||||
``parent_span_id`` for nesting. These live in contextvars so async tool calls and
|
||||
spawned subagents inherit the lineage automatically.
|
||||
|
||||
Provides helpers to start/clear a run context and mint child span ids. The telemetry
|
||||
plugin sets the run context on session start and reads it in each hook callback.
|
||||
Nothing here writes to storage — it only carries ids.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import contextvars
|
||||
import uuid
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
|
||||
_trace_id: contextvars.ContextVar[Optional[str]] = contextvars.ContextVar(
|
||||
"hermes_tel_trace_id", default=None
|
||||
)
|
||||
_run_id: contextvars.ContextVar[Optional[str]] = contextvars.ContextVar(
|
||||
"hermes_tel_run_id", default=None
|
||||
)
|
||||
_parent_span_id: contextvars.ContextVar[Optional[str]] = contextvars.ContextVar(
|
||||
"hermes_tel_parent_span_id", default=None
|
||||
)
|
||||
|
||||
|
||||
def new_id() -> str:
|
||||
return uuid.uuid4().hex
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class RunContext:
|
||||
trace_id: str
|
||||
run_id: str
|
||||
|
||||
|
||||
def start_run(trace_id: Optional[str] = None, run_id: Optional[str] = None) -> RunContext:
|
||||
"""Begin a run context, minting ids when not supplied. Sets contextvars."""
|
||||
tid = trace_id or new_id()
|
||||
rid = run_id or new_id()
|
||||
_trace_id.set(tid)
|
||||
_run_id.set(rid)
|
||||
_parent_span_id.set(None)
|
||||
return RunContext(trace_id=tid, run_id=rid)
|
||||
|
||||
|
||||
def current_trace_id() -> Optional[str]:
|
||||
return _trace_id.get()
|
||||
|
||||
|
||||
def current_run_id() -> Optional[str]:
|
||||
return _run_id.get()
|
||||
|
||||
|
||||
def current_parent_span_id() -> Optional[str]:
|
||||
return _parent_span_id.get()
|
||||
|
||||
|
||||
def new_span_id() -> str:
|
||||
"""Mint a span id (does not alter the parent pointer)."""
|
||||
return new_id()
|
||||
|
||||
|
||||
def clear_run() -> None:
|
||||
_trace_id.set(None)
|
||||
_run_id.set(None)
|
||||
_parent_span_id.set(None)
|
||||
|
||||
|
||||
__all__ = [
|
||||
"RunContext",
|
||||
"new_id",
|
||||
"start_run",
|
||||
"current_trace_id",
|
||||
"current_run_id",
|
||||
"current_parent_span_id",
|
||||
"new_span_id",
|
||||
"clear_run",
|
||||
]
|
||||
@@ -11,7 +11,8 @@ Pure module-level utilities extracted from ``run_agent.py``:
|
||||
``_append_subdir_hint_to_multimodal`` — envelope helpers for the
|
||||
``{"_multimodal": True, "content": [...], "text_summary": ...}`` dict
|
||||
shape returned by tools like ``computer_use``.
|
||||
* ``_extract_file_mutation_targets`` / ``_extract_error_preview`` —
|
||||
* ``_extract_file_mutation_targets`` / ``_extract_landed_file_mutation_paths`` /
|
||||
``_extract_error_preview`` —
|
||||
per-turn file-mutation verifier inputs.
|
||||
* ``_trajectory_normalize_msg`` — strip image blobs from a message for
|
||||
trajectory saving.
|
||||
@@ -269,6 +270,35 @@ def _extract_file_mutation_targets(tool_name: str, args: Dict[str, Any]) -> List
|
||||
return []
|
||||
|
||||
|
||||
def _extract_landed_file_mutation_paths(
|
||||
tool_name: str,
|
||||
args: Dict[str, Any],
|
||||
result: Any,
|
||||
) -> List[str]:
|
||||
"""Return the concrete file paths a successful mutation reports."""
|
||||
targets = _extract_file_mutation_targets(tool_name, args)
|
||||
if tool_name not in _FILE_MUTATING_TOOLS or not isinstance(result, str):
|
||||
return targets
|
||||
try:
|
||||
data = json.loads(result.strip())
|
||||
except Exception:
|
||||
return targets
|
||||
if not isinstance(data, dict):
|
||||
return targets
|
||||
|
||||
files = data.get("files_modified")
|
||||
if isinstance(files, list):
|
||||
landed = [str(p) for p in files if p]
|
||||
if landed:
|
||||
return landed
|
||||
|
||||
resolved = data.get("resolved_path")
|
||||
if resolved:
|
||||
return [str(resolved)]
|
||||
|
||||
return targets
|
||||
|
||||
|
||||
def _extract_error_preview(result: Any, max_len: int = 180) -> str:
|
||||
"""Pull a one-line error summary out of a tool result for footer display."""
|
||||
text = _multimodal_text_summary(result) if result is not None else ""
|
||||
@@ -411,6 +441,7 @@ __all__ = [
|
||||
"_multimodal_text_summary",
|
||||
"_append_subdir_hint_to_multimodal",
|
||||
"_extract_file_mutation_targets",
|
||||
"_extract_landed_file_mutation_paths",
|
||||
"_extract_error_preview",
|
||||
"_trajectory_normalize_msg",
|
||||
"make_tool_result_message",
|
||||
|
||||
@@ -69,12 +69,35 @@ def _budget_for_agent(agent) -> BudgetConfig:
|
||||
_MAX_TOOL_WORKERS = 8
|
||||
|
||||
|
||||
def _flush_session_db_after_tool_progress(
|
||||
agent,
|
||||
messages: list,
|
||||
*,
|
||||
stage: str,
|
||||
) -> None:
|
||||
"""Best-effort incremental SessionDB flush for tool-call progress.
|
||||
|
||||
Tool execution can perform side effects that terminate or restart the
|
||||
current Hermes process before the normal turn-end persistence path runs.
|
||||
Flush the already-appended assistant/tool messages immediately so the
|
||||
transcript survives destructive-but-valid tool calls.
|
||||
"""
|
||||
try:
|
||||
agent._flush_messages_to_session_db(messages)
|
||||
except Exception as exc:
|
||||
logger.warning("Incremental tool-call persistence failed after %s: %s", stage, exc)
|
||||
|
||||
|
||||
def _ra():
|
||||
"""Lazy reference to ``run_agent`` so patches like ``run_agent._set_interrupt`` work."""
|
||||
import run_agent
|
||||
return run_agent
|
||||
|
||||
|
||||
def _is_interpreter_shutdown_submit_error(exc: RuntimeError) -> bool:
|
||||
return "cannot schedule new futures after interpreter shutdown" in str(exc)
|
||||
|
||||
|
||||
def _emit_terminal_post_tool_call(
|
||||
agent,
|
||||
*,
|
||||
@@ -279,6 +302,11 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe
|
||||
f"[Tool execution cancelled — {tc.function.name} was skipped due to user interrupt]",
|
||||
tc.id,
|
||||
))
|
||||
_flush_session_db_after_tool_progress(
|
||||
agent,
|
||||
messages,
|
||||
stage=f"cancelled tool result {tc.function.name}",
|
||||
)
|
||||
return
|
||||
|
||||
# ── Parse args + pre-execution bookkeeping ───────────────────────
|
||||
@@ -581,13 +609,40 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe
|
||||
if runnable_calls:
|
||||
max_workers = min(len(runnable_calls), _MAX_TOOL_WORKERS)
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
|
||||
for i, tc, name, args in runnable_calls:
|
||||
for submit_index, (i, tc, name, args) in enumerate(runnable_calls):
|
||||
# Propagate the agent turn's ContextVars (e.g.
|
||||
# _approval_session_key) AND thread-local approval/sudo
|
||||
# callbacks into the worker thread; clears callbacks on exit.
|
||||
f = executor.submit(
|
||||
propagate_context_to_thread(_run_tool), i, tc, name, args, parsed_calls[i][3]
|
||||
)
|
||||
try:
|
||||
f = executor.submit(
|
||||
propagate_context_to_thread(_run_tool), i, tc, name, args, parsed_calls[i][3]
|
||||
)
|
||||
except RuntimeError as submit_error:
|
||||
if not _is_interpreter_shutdown_submit_error(submit_error):
|
||||
raise
|
||||
skipped_calls = runnable_calls[submit_index:]
|
||||
logger.warning(
|
||||
"interpreter shutdown while scheduling concurrent tools; "
|
||||
"skipping %d unsubmitted tool(s)",
|
||||
len(skipped_calls),
|
||||
)
|
||||
for skipped_i, _tc, skipped_name, skipped_args in skipped_calls:
|
||||
if results[skipped_i] is None:
|
||||
middleware_trace = parsed_calls[skipped_i][3]
|
||||
result = (
|
||||
f"Error executing tool '{skipped_name}': "
|
||||
"Python interpreter is shutting down; tool was not started"
|
||||
)
|
||||
results[skipped_i] = (
|
||||
skipped_name,
|
||||
skipped_args,
|
||||
result,
|
||||
0.0,
|
||||
True,
|
||||
False,
|
||||
middleware_trace,
|
||||
)
|
||||
break
|
||||
futures.append(f)
|
||||
|
||||
# Wait for all to complete with periodic heartbeats so the
|
||||
@@ -768,6 +823,11 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe
|
||||
# String results pass through unchanged.
|
||||
_tool_content = agent._tool_result_content_for_active_model(name, function_result)
|
||||
messages.append(make_tool_result_message(name, _tool_content, tc.id))
|
||||
_flush_session_db_after_tool_progress(
|
||||
agent,
|
||||
messages,
|
||||
stage=f"tool result {name}",
|
||||
)
|
||||
|
||||
# ── Per-tool /steer drain ───────────────────────────────────
|
||||
# Same as the sequential path: drain between each collected
|
||||
@@ -803,13 +863,16 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
|
||||
agent._vprint(f"{agent.log_prefix}⚡ Interrupt: skipping {len(remaining_calls)} tool call(s)", force=True)
|
||||
for skipped_tc in remaining_calls:
|
||||
skipped_name = skipped_tc.function.name
|
||||
skip_msg = {
|
||||
"role": "tool",
|
||||
"name": skipped_name,
|
||||
"content": f"[Tool execution cancelled — {skipped_name} was skipped due to user interrupt]",
|
||||
"tool_call_id": skipped_tc.id,
|
||||
}
|
||||
messages.append(skip_msg)
|
||||
messages.append(make_tool_result_message(
|
||||
skipped_name,
|
||||
f"[Tool execution cancelled — {skipped_name} was skipped due to user interrupt]",
|
||||
skipped_tc.id,
|
||||
))
|
||||
_flush_session_db_after_tool_progress(
|
||||
agent,
|
||||
messages,
|
||||
stage=f"cancelled tool result {skipped_name}",
|
||||
)
|
||||
break
|
||||
|
||||
function_name = tool_call.function.name
|
||||
@@ -1402,6 +1465,11 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
|
||||
# (see parallel path for rationale). String results pass through.
|
||||
_tool_content = agent._tool_result_content_for_active_model(function_name, function_result)
|
||||
messages.append(make_tool_result_message(function_name, _tool_content, tool_call.id))
|
||||
_flush_session_db_after_tool_progress(
|
||||
agent,
|
||||
messages,
|
||||
stage=f"tool result {function_name}",
|
||||
)
|
||||
|
||||
# ── Per-tool /steer drain ───────────────────────────────────
|
||||
# Drain pending steer BETWEEN individual tool calls so the
|
||||
@@ -1428,6 +1496,11 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
|
||||
f"[Tool execution skipped — {skipped_name} was not started. User sent a new message]",
|
||||
skipped_tc.id,
|
||||
))
|
||||
_flush_session_db_after_tool_progress(
|
||||
agent,
|
||||
messages,
|
||||
stage=f"skipped tool result {skipped_name}",
|
||||
)
|
||||
break
|
||||
|
||||
if agent.tool_delay > 0 and i < len(assistant_message.tool_calls):
|
||||
|
||||
@@ -5,12 +5,47 @@ This transport owns format conversion and normalization — NOT client lifecycle
|
||||
streaming, or the _run_codex_stream() call path.
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from agent.transports.base import ProviderTransport
|
||||
from agent.transports.types import NormalizedResponse, ToolCall
|
||||
|
||||
|
||||
def _content_cache_key(instructions: str, tools: Optional[List[Dict[str, Any]]]) -> Optional[str]:
|
||||
"""Content-address the prompt cache key from the static request prefix.
|
||||
|
||||
Returns ``pck_<sha256[:24]>`` of (instructions + sorted tool schemas), or
|
||||
None when there is nothing static to key on. The cache key is a routing
|
||||
hint only — never a correctness boundary — so two requests sharing a system
|
||||
prompt and tool set intentionally resolve to the same warm prefix bucket.
|
||||
|
||||
The fix this exists for: recurring cron jobs build session_id as
|
||||
``cron_<id>_<timestamp>``, so using session_id as the cache key made every
|
||||
fire cache-cold. The static prefix (identity + tools) is identical across
|
||||
fires, so hashing it gives a stable key that stays warm within the
|
||||
provider's cache TTL. Sorting tools by name keeps the hash insertion-order
|
||||
independent.
|
||||
"""
|
||||
if not instructions and not tools:
|
||||
return None
|
||||
tools_part = ""
|
||||
if tools:
|
||||
sorted_tools = sorted(
|
||||
(t for t in tools if isinstance(t, dict)),
|
||||
key=lambda t: str(t.get("name") or t.get("type") or ""),
|
||||
)
|
||||
tools_part = json.dumps(
|
||||
sorted_tools, sort_keys=True, ensure_ascii=False, separators=(",", ":")
|
||||
)
|
||||
# \x00 separator so instructions ending in the tool JSON can't collide with
|
||||
# a request whose instructions contain that JSON and whose tools are empty.
|
||||
content = f"{instructions or ''}\x00{tools_part}"
|
||||
digest = hashlib.sha256(content.encode("utf-8", errors="replace")).hexdigest()[:24]
|
||||
return f"pck_{digest}"
|
||||
|
||||
|
||||
class ResponsesApiTransport(ProviderTransport):
|
||||
"""Transport for api_mode='codex_responses'.
|
||||
|
||||
@@ -71,7 +106,10 @@ class ResponsesApiTransport(ProviderTransport):
|
||||
params:
|
||||
instructions: str — system prompt (extracted from messages[0] if not given)
|
||||
reasoning_config: dict | None — {effort, enabled}
|
||||
session_id: str | None — used for prompt_cache_key + xAI conv header
|
||||
session_id: str | None — transcript/session id; drives the xAI
|
||||
x-grok-conv-id header and the Codex cache-scope headers, and is
|
||||
the fallback prompt_cache_key when there is no static prefix to
|
||||
content-address
|
||||
max_tokens: int | None — max_output_tokens
|
||||
timeout: float | None — per-request timeout forwarded to the SDK
|
||||
request_overrides: dict | None — extra kwargs merged in
|
||||
@@ -212,10 +250,17 @@ class ResponsesApiTransport(ProviderTransport):
|
||||
kwargs["parallel_tool_calls"] = True
|
||||
|
||||
session_id = params.get("session_id")
|
||||
# prompt_cache_key is content-addressed from the static prefix
|
||||
# (instructions + tools), NOT session_id — recurring cron jobs carry a
|
||||
# per-fire timestamp in session_id (cron_<id>_<ts>) that made every run
|
||||
# cache-cold. session_id is left untouched for transcript isolation and
|
||||
# the cache-scope routing headers below. Falls back to session_id when
|
||||
# there is no static content to hash.
|
||||
cache_key = _content_cache_key(instructions, response_tools) or session_id
|
||||
# xAI Responses takes prompt_cache_key in extra_body (set further
|
||||
# down); GitHub Models opts out of cache-key routing entirely.
|
||||
if not is_github_responses and not is_xai_responses and session_id:
|
||||
kwargs["prompt_cache_key"] = session_id
|
||||
if not is_github_responses and not is_xai_responses and cache_key:
|
||||
kwargs["prompt_cache_key"] = cache_key
|
||||
|
||||
if reasoning_enabled and is_xai_responses:
|
||||
from agent.model_metadata import grok_supports_reasoning_effort
|
||||
@@ -326,7 +371,7 @@ class ResponsesApiTransport(ProviderTransport):
|
||||
merged_extra_body: Dict[str, Any] = {}
|
||||
if isinstance(existing_extra_body, dict):
|
||||
merged_extra_body.update(existing_extra_body)
|
||||
merged_extra_body.setdefault("prompt_cache_key", session_id)
|
||||
merged_extra_body.setdefault("prompt_cache_key", cache_key)
|
||||
kwargs["extra_body"] = merged_extra_body
|
||||
|
||||
return kwargs
|
||||
|
||||
@@ -29,7 +29,10 @@ from dataclasses import dataclass
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from agent.iteration_budget import IterationBudget
|
||||
from agent.model_metadata import estimate_request_tokens_rough
|
||||
from agent.model_metadata import (
|
||||
estimate_messages_tokens_rough,
|
||||
estimate_request_tokens_rough,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -57,6 +60,34 @@ def _compression_made_progress(
|
||||
return orig_tokens > 0 and new_tokens < orig_tokens * 0.95
|
||||
|
||||
|
||||
def _should_run_preflight_estimate(
|
||||
messages: List[Dict[str, Any]],
|
||||
protect_first_n: int,
|
||||
protect_last_n: int,
|
||||
threshold_tokens: int,
|
||||
) -> bool:
|
||||
"""Cheap gate for the (expensive) full preflight token estimate.
|
||||
|
||||
Returns ``True`` when either:
|
||||
(a) message count exceeds the protected ranges (the historical gate), or
|
||||
(b) a cheap char-based estimate already crosses the configured threshold
|
||||
— the few-but-huge case from issue #27405 that the count-only gate
|
||||
would silently skip (a handful of very large messages never trips
|
||||
the count condition, so compression was never attempted and the
|
||||
turn hit a hard context-overflow error).
|
||||
|
||||
Branch (b) uses ``estimate_messages_tokens_rough`` (the shared char-based
|
||||
estimator) so a single large base64 image isn't mistaken for ~250K tokens.
|
||||
It intentionally undercounts vs. the full request estimate — it omits the
|
||||
system prompt and tool schemas — because it is only a *hint* deciding
|
||||
whether to pay for the authoritative ``estimate_request_tokens_rough``,
|
||||
which (together with ``should_compress``) makes the real decision.
|
||||
"""
|
||||
if len(messages) > protect_first_n + protect_last_n + 1:
|
||||
return True
|
||||
return estimate_messages_tokens_rough(messages) >= threshold_tokens
|
||||
|
||||
|
||||
@dataclass
|
||||
class TurnContext:
|
||||
"""Values produced by the turn prologue and consumed by the turn loop."""
|
||||
@@ -111,7 +142,13 @@ def build_turn_context(
|
||||
# Guard stdio against OSError from broken pipes (systemd/headless/daemon).
|
||||
install_safe_stdio()
|
||||
|
||||
agent._ensure_db_session()
|
||||
# NOTE: the DB session row is created later, AFTER the system prompt is
|
||||
# restored/built (see _ensure_db_session() below the system-prompt block).
|
||||
# Creating it here — before _cached_system_prompt is populated — inserts a
|
||||
# row with system_prompt=NULL on a fresh API/gateway agent that carries
|
||||
# client-managed history, which then trips the "stored system prompt is
|
||||
# null; rebuilding from scratch" warning and a needless first-turn prefix
|
||||
# cache miss. (Issue #45499.)
|
||||
|
||||
# Tell auxiliary_client what the live main provider/model are for this turn.
|
||||
try:
|
||||
@@ -278,6 +315,11 @@ def build_turn_context(
|
||||
|
||||
active_system_prompt = agent._cached_system_prompt
|
||||
|
||||
# Create the DB session row now that _cached_system_prompt is populated, so
|
||||
# the persisted snapshot is written non-NULL on the first turn (Issue
|
||||
# #45499). Idempotent: _ensure_db_session() no-ops once the row exists.
|
||||
agent._ensure_db_session()
|
||||
|
||||
# Crash-resilience: persist the inbound user turn as soon as the session row exists.
|
||||
try:
|
||||
agent._persist_session(messages, conversation_history)
|
||||
@@ -289,10 +331,14 @@ def build_turn_context(
|
||||
)
|
||||
|
||||
# ── Preflight context compression ──
|
||||
if (
|
||||
agent.compression_enabled
|
||||
and len(messages) > agent.context_compressor.protect_first_n
|
||||
+ agent.context_compressor.protect_last_n + 1
|
||||
# Gate the (expensive) full token estimate behind a cheap pre-check.
|
||||
# See ``_should_run_preflight_estimate`` for the OR semantics that fix
|
||||
# issue #27405 (a few very large messages slipping past the count gate).
|
||||
if agent.compression_enabled and _should_run_preflight_estimate(
|
||||
messages,
|
||||
agent.context_compressor.protect_first_n,
|
||||
agent.context_compressor.protect_last_n,
|
||||
agent.context_compressor.threshold_tokens,
|
||||
):
|
||||
_preflight_tokens = estimate_request_tokens_rough(
|
||||
messages,
|
||||
@@ -392,6 +438,8 @@ def build_turn_context(
|
||||
|
||||
# Per-turn file-mutation verifier state.
|
||||
agent._turn_failed_file_mutations = {}
|
||||
agent._turn_file_mutation_paths = set()
|
||||
agent._verification_stop_nudges = 0
|
||||
|
||||
# Record the execution thread so interrupt()/clear_interrupt() can scope
|
||||
# the tool-level interrupt signal to THIS agent's thread only.
|
||||
|
||||
@@ -122,10 +122,14 @@ def finalize_turn(
|
||||
)
|
||||
|
||||
# Determine if conversation completed successfully
|
||||
normal_text_response = str(_turn_exit_reason).startswith("text_response(")
|
||||
completed = (
|
||||
final_response is not None
|
||||
and api_call_count < agent.max_iterations
|
||||
and not failed
|
||||
and (
|
||||
api_call_count < agent.max_iterations
|
||||
or normal_text_response
|
||||
)
|
||||
)
|
||||
|
||||
# Post-loop cleanup must never lose the response. Trajectory save,
|
||||
@@ -162,6 +166,25 @@ def finalize_turn(
|
||||
# same empty-response loop again.
|
||||
try:
|
||||
agent._drop_trailing_empty_response_scaffolding(messages)
|
||||
|
||||
# When the turn was interrupted and the last message is a tool
|
||||
# result, append a synthetic assistant message to close the
|
||||
# tool-call sequence. Without this, the session persists a
|
||||
# ``tool → user`` alternation that strict providers (Gemini,
|
||||
# Claude) reject, causing them to hallucinate a continuation of
|
||||
# the user's message on the next turn (#48879).
|
||||
#
|
||||
# ``_drop_trailing_empty_response_scaffolding`` only rewinds the
|
||||
# tool tail when an empty-response scaffolding flag is present; a
|
||||
# clean ``/stop`` interrupt after a successful tool sets no such
|
||||
# flag, so the tool result survives as the tail and we close it
|
||||
# here instead. On an interrupt ``final_response`` is typically
|
||||
# empty, so fall back to an explicit placeholder rather than
|
||||
# persisting an empty-content assistant turn.
|
||||
if interrupted:
|
||||
from agent.message_sanitization import close_interrupted_tool_sequence
|
||||
close_interrupted_tool_sequence(messages, final_response)
|
||||
|
||||
agent._persist_session(messages, conversation_history)
|
||||
except Exception as _persist_err:
|
||||
_cleanup_errors.append(f"persist_session: {_persist_err}")
|
||||
|
||||
618
agent/verification_evidence.py
Normal file
618
agent/verification_evidence.py
Normal file
@@ -0,0 +1,618 @@
|
||||
"""Coding verification evidence ledger.
|
||||
|
||||
This module records what the agent actually proved while working in a code
|
||||
workspace. It is deliberately passive: it never decides to run a suite, never
|
||||
blocks completion, and never upgrades targeted checks into "repo green".
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
import shlex
|
||||
import sqlite3
|
||||
import tempfile
|
||||
import threading
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Optional
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
|
||||
|
||||
_DB_LOCK = threading.Lock()
|
||||
_MAX_OUTPUT_SUMMARY_CHARS = 2000
|
||||
_MAX_EVIDENCE_AGE_DAYS = 30
|
||||
_MAX_EVENTS_PER_SESSION_ROOT = 100
|
||||
_MAX_TOTAL_UNREFERENCED_EVENTS = 10_000
|
||||
_AD_HOC_SCRIPT_NAME_PREFIXES = ("hermes-verify-", "hermes-ad-hoc-")
|
||||
_VERIFY_SCHEMA_VERSION = 1
|
||||
_SHELL_SPLIT_RE = re.compile(r"\s*(?:&&|\|\||;)\s*")
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class VerificationEvidence:
|
||||
"""A classified command result worth recording."""
|
||||
|
||||
command: str
|
||||
canonical_command: str
|
||||
kind: str
|
||||
scope: str
|
||||
status: str
|
||||
exit_code: int
|
||||
cwd: str
|
||||
root: str
|
||||
session_id: str
|
||||
output_summary: str = ""
|
||||
|
||||
|
||||
def _utc_now() -> str:
|
||||
return datetime.now(timezone.utc).isoformat()
|
||||
|
||||
|
||||
def _retention_cutoff() -> str:
|
||||
return (datetime.now(timezone.utc) - timedelta(days=_MAX_EVIDENCE_AGE_DAYS)).isoformat()
|
||||
|
||||
|
||||
def _db_path() -> Path:
|
||||
return get_hermes_home() / "verification_evidence.db"
|
||||
|
||||
|
||||
def _connect() -> sqlite3.Connection:
|
||||
path = _db_path()
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
conn = sqlite3.connect(path)
|
||||
conn.execute("PRAGMA journal_mode=WAL")
|
||||
conn.execute("PRAGMA busy_timeout=5000")
|
||||
conn.row_factory = sqlite3.Row
|
||||
_ensure_schema(conn)
|
||||
return conn
|
||||
|
||||
|
||||
def _ensure_schema(conn: sqlite3.Connection) -> None:
|
||||
conn.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS meta (
|
||||
key TEXT PRIMARY KEY,
|
||||
value TEXT NOT NULL
|
||||
)
|
||||
"""
|
||||
)
|
||||
conn.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS verification_events (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
created_at TEXT NOT NULL,
|
||||
session_id TEXT NOT NULL,
|
||||
cwd TEXT NOT NULL,
|
||||
root TEXT NOT NULL,
|
||||
command TEXT NOT NULL,
|
||||
canonical_command TEXT NOT NULL,
|
||||
kind TEXT NOT NULL,
|
||||
scope TEXT NOT NULL,
|
||||
status TEXT NOT NULL,
|
||||
exit_code INTEGER NOT NULL,
|
||||
output_summary TEXT NOT NULL
|
||||
)
|
||||
"""
|
||||
)
|
||||
conn.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS verification_state (
|
||||
session_id TEXT NOT NULL,
|
||||
root TEXT NOT NULL,
|
||||
last_event_id INTEGER,
|
||||
last_edit_at TEXT,
|
||||
changed_paths_json TEXT NOT NULL DEFAULT '[]',
|
||||
PRIMARY KEY (session_id, root)
|
||||
)
|
||||
"""
|
||||
)
|
||||
conn.execute(
|
||||
"""
|
||||
CREATE INDEX IF NOT EXISTS idx_verification_events_session_root
|
||||
ON verification_events(session_id, root, id DESC)
|
||||
"""
|
||||
)
|
||||
conn.execute(
|
||||
"INSERT OR REPLACE INTO meta(key, value) VALUES ('schema_version', ?)",
|
||||
(str(_VERIFY_SCHEMA_VERSION),),
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
|
||||
def _split_segment_tokens(command: str) -> list[list[str]]:
|
||||
segments: list[list[str]] = []
|
||||
for segment in _SHELL_SPLIT_RE.split(command.strip()):
|
||||
if not segment:
|
||||
continue
|
||||
try:
|
||||
tokens = shlex.split(segment)
|
||||
except ValueError:
|
||||
continue
|
||||
if tokens:
|
||||
segments.append(tokens)
|
||||
return segments
|
||||
|
||||
|
||||
def _clean_token(token: str) -> str:
|
||||
token = token.strip()
|
||||
while token.startswith("./"):
|
||||
token = token[2:]
|
||||
return token
|
||||
|
||||
|
||||
def _canonical_tokens(canonical: str) -> list[str]:
|
||||
try:
|
||||
return [_clean_token(t) for t in shlex.split(canonical) if t]
|
||||
except ValueError:
|
||||
return []
|
||||
|
||||
|
||||
def _find_subsequence(tokens: list[str], needle: list[str]) -> Optional[int]:
|
||||
if not tokens or not needle or len(needle) > len(tokens):
|
||||
return None
|
||||
cleaned = [_clean_token(t) for t in tokens]
|
||||
for idx in range(0, len(cleaned) - len(needle) + 1):
|
||||
if cleaned[idx:idx + len(needle)] == needle:
|
||||
return idx
|
||||
return None
|
||||
|
||||
|
||||
def _strip_command_prefix(tokens: list[str]) -> list[str]:
|
||||
"""Remove harmless command prefixes before matching canonical commands."""
|
||||
remaining = list(tokens)
|
||||
if remaining and remaining[0] == "env":
|
||||
remaining = remaining[1:]
|
||||
while remaining and "=" in remaining[0] and not remaining[0].startswith("-"):
|
||||
remaining = remaining[1:]
|
||||
while remaining and remaining[0] in {"command", "time", "noglob"}:
|
||||
remaining = remaining[1:]
|
||||
return remaining
|
||||
|
||||
|
||||
def _equivalent_needles(needle: list[str]) -> list[list[str]]:
|
||||
"""Return command spellings equivalent to the detected canonical command."""
|
||||
candidates = [needle]
|
||||
if len(needle) >= 3 and needle[1] == "run":
|
||||
package_manager = needle[0]
|
||||
script_name = needle[2]
|
||||
if package_manager in {"npm", "pnpm", "yarn", "bun"}:
|
||||
candidates.append([package_manager, script_name])
|
||||
if len(needle) == 1 and "/" in needle[0]:
|
||||
candidates.extend([["bash", needle[0]], ["sh", needle[0]]])
|
||||
if needle == ["pytest"]:
|
||||
candidates.extend(
|
||||
[
|
||||
["python", "-m", "pytest"],
|
||||
["python3", "-m", "pytest"],
|
||||
["uv", "run", "pytest"],
|
||||
["poetry", "run", "pytest"],
|
||||
["pipenv", "run", "pytest"],
|
||||
]
|
||||
)
|
||||
return candidates
|
||||
|
||||
|
||||
def _find_canonical_match(command: str, canonical_commands: list[str]) -> Optional[tuple[str, list[str]]]:
|
||||
"""Return ``(canonical, trailing_args)`` for the first detected command."""
|
||||
|
||||
segments = _split_segment_tokens(command)
|
||||
for canonical in canonical_commands:
|
||||
needle = _canonical_tokens(canonical)
|
||||
if not needle:
|
||||
continue
|
||||
for tokens in segments:
|
||||
candidate_tokens = _strip_command_prefix(tokens)
|
||||
for candidate in _equivalent_needles(needle):
|
||||
if candidate_tokens[:len(candidate)] == candidate:
|
||||
return canonical, candidate_tokens[len(candidate):]
|
||||
return None
|
||||
|
||||
|
||||
def _kind_for_command(canonical: str) -> str:
|
||||
lowered = canonical.lower()
|
||||
if any(word in lowered for word in ("lint", "eslint", "ruff")):
|
||||
return "lint"
|
||||
if any(word in lowered for word in ("typecheck", "tsc", "mypy", "pyright", "ty")):
|
||||
return "typecheck"
|
||||
if "build" in lowered:
|
||||
return "build"
|
||||
if "fmt" in lowered or "format" in lowered:
|
||||
return "format"
|
||||
if "check" in lowered and "test" not in lowered:
|
||||
return "check"
|
||||
return "test"
|
||||
|
||||
|
||||
def _looks_like_target(arg: str) -> bool:
|
||||
if not arg or arg.startswith("-") or "=" in arg:
|
||||
return False
|
||||
return (
|
||||
"/" in arg
|
||||
or "\\" in arg
|
||||
or "::" in arg
|
||||
or arg.endswith((".py", ".js", ".jsx", ".ts", ".tsx", ".rs", ".go", ".java"))
|
||||
or arg.startswith(("test_", "tests", "spec", "__tests__"))
|
||||
)
|
||||
|
||||
|
||||
def _scope_for_args(args: list[str]) -> str:
|
||||
return "targeted" if any(_looks_like_target(arg) for arg in args) else "full"
|
||||
|
||||
|
||||
def _is_under_temp_dir(token: str) -> bool:
|
||||
if not token or token.startswith("-"):
|
||||
return False
|
||||
try:
|
||||
path = Path(token).expanduser()
|
||||
if not path.is_absolute():
|
||||
return False
|
||||
resolved = path.resolve()
|
||||
temp_root = Path(tempfile.gettempdir()).resolve()
|
||||
return resolved == temp_root or temp_root in resolved.parents
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _is_under_root(token: str, root: str | Path | None) -> bool:
|
||||
if not root:
|
||||
return False
|
||||
try:
|
||||
path = Path(token).expanduser().resolve()
|
||||
root_path = Path(root).expanduser().resolve()
|
||||
return path == root_path or root_path in path.parents
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _is_temp_script_path(token: str, root: str | Path | None) -> bool:
|
||||
try:
|
||||
name = Path(token).expanduser().name
|
||||
except Exception:
|
||||
return False
|
||||
return (
|
||||
name.startswith(_AD_HOC_SCRIPT_NAME_PREFIXES)
|
||||
and _is_under_temp_dir(token)
|
||||
and not _is_under_root(token, root)
|
||||
)
|
||||
|
||||
|
||||
def _ad_hoc_script_args(tokens: list[str], root: str | Path | None) -> Optional[list[str]]:
|
||||
candidate_tokens = _strip_command_prefix(tokens)
|
||||
if not candidate_tokens:
|
||||
return None
|
||||
command = candidate_tokens[0]
|
||||
if _is_temp_script_path(command, root):
|
||||
return candidate_tokens[1:]
|
||||
if command in {"python", "python3", "node", "bash", "sh", "ruby", "perl"}:
|
||||
for idx, token in enumerate(candidate_tokens[1:], start=1):
|
||||
if token == "--":
|
||||
continue
|
||||
if _is_temp_script_path(token, root):
|
||||
return candidate_tokens[idx + 1:]
|
||||
if not token.startswith("-"):
|
||||
return None
|
||||
return None
|
||||
|
||||
|
||||
def _find_ad_hoc_match(command: str, root: str | Path | None) -> Optional[list[str]]:
|
||||
for tokens in _split_segment_tokens(command):
|
||||
trailing_args = _ad_hoc_script_args(tokens, root)
|
||||
if trailing_args is not None:
|
||||
return trailing_args
|
||||
return None
|
||||
|
||||
|
||||
def _summarize_output(output: str) -> str:
|
||||
text = (output or "").strip()
|
||||
if len(text) <= _MAX_OUTPUT_SUMMARY_CHARS:
|
||||
return text
|
||||
head = _MAX_OUTPUT_SUMMARY_CHARS // 3
|
||||
tail = _MAX_OUTPUT_SUMMARY_CHARS - head
|
||||
return (
|
||||
text[:head]
|
||||
+ f"\n... [{len(text) - _MAX_OUTPUT_SUMMARY_CHARS} chars omitted] ...\n"
|
||||
+ text[-tail:]
|
||||
)
|
||||
|
||||
|
||||
def _prune_old_events(conn: sqlite3.Connection, *, session_id: str, root: str) -> None:
|
||||
"""Bound ledger growth without deleting the current state pointer."""
|
||||
cutoff = _retention_cutoff()
|
||||
conn.execute(
|
||||
"""
|
||||
DELETE FROM verification_events
|
||||
WHERE session_id = ?
|
||||
AND root = ?
|
||||
AND id NOT IN (
|
||||
SELECT id FROM verification_events
|
||||
WHERE session_id = ? AND root = ?
|
||||
ORDER BY id DESC
|
||||
LIMIT ?
|
||||
)
|
||||
""",
|
||||
(session_id, root, session_id, root, _MAX_EVENTS_PER_SESSION_ROOT),
|
||||
)
|
||||
conn.execute(
|
||||
"""
|
||||
DELETE FROM verification_state
|
||||
WHERE (
|
||||
last_edit_at IS NOT NULL
|
||||
AND last_edit_at < ?
|
||||
)
|
||||
OR (
|
||||
last_edit_at IS NULL
|
||||
AND last_event_id IN (
|
||||
SELECT id FROM verification_events
|
||||
WHERE created_at < ?
|
||||
)
|
||||
)
|
||||
""",
|
||||
(cutoff, cutoff),
|
||||
)
|
||||
conn.execute(
|
||||
"""
|
||||
DELETE FROM verification_events
|
||||
WHERE created_at < ?
|
||||
AND id NOT IN (
|
||||
SELECT last_event_id FROM verification_state
|
||||
WHERE last_event_id IS NOT NULL
|
||||
)
|
||||
""",
|
||||
(cutoff,),
|
||||
)
|
||||
conn.execute(
|
||||
"""
|
||||
DELETE FROM verification_events
|
||||
WHERE id NOT IN (
|
||||
SELECT id FROM verification_events
|
||||
ORDER BY id DESC
|
||||
LIMIT ?
|
||||
)
|
||||
AND id NOT IN (
|
||||
SELECT last_event_id FROM verification_state
|
||||
WHERE last_event_id IS NOT NULL
|
||||
)
|
||||
""",
|
||||
(_MAX_TOTAL_UNREFERENCED_EVENTS,),
|
||||
)
|
||||
|
||||
|
||||
def classify_verification_command(
|
||||
command: str,
|
||||
*,
|
||||
cwd: str | Path | None = None,
|
||||
session_id: str | None = None,
|
||||
exit_code: int = 0,
|
||||
output: str = "",
|
||||
) -> Optional[VerificationEvidence]:
|
||||
"""Classify a terminal command as verification evidence, if applicable."""
|
||||
|
||||
if not command or not isinstance(command, str):
|
||||
return None
|
||||
try:
|
||||
from agent.coding_context import project_facts_for
|
||||
|
||||
facts = project_facts_for(cwd)
|
||||
except Exception:
|
||||
facts = None
|
||||
if not facts:
|
||||
return None
|
||||
|
||||
verify_commands = list(facts.get("verifyCommands") or [])
|
||||
match = _find_canonical_match(command, verify_commands)
|
||||
is_ad_hoc = False
|
||||
if match is None and not verify_commands:
|
||||
ad_hoc_args = _find_ad_hoc_match(command, facts.get("root"))
|
||||
if ad_hoc_args is not None:
|
||||
match = ("ad-hoc verification script", ad_hoc_args)
|
||||
is_ad_hoc = True
|
||||
if match is None:
|
||||
return None
|
||||
|
||||
canonical, trailing_args = match
|
||||
return VerificationEvidence(
|
||||
command=command,
|
||||
canonical_command=canonical,
|
||||
kind="ad_hoc" if is_ad_hoc else _kind_for_command(canonical),
|
||||
scope="targeted" if is_ad_hoc else _scope_for_args(trailing_args),
|
||||
status="passed" if int(exit_code) == 0 else "failed",
|
||||
exit_code=int(exit_code),
|
||||
cwd=str(Path(cwd or ".").resolve()),
|
||||
root=str(facts.get("root") or Path(cwd or ".").resolve()),
|
||||
session_id=str(session_id or "default"),
|
||||
output_summary=_summarize_output(output),
|
||||
)
|
||||
|
||||
|
||||
def record_terminal_result(
|
||||
*,
|
||||
command: str,
|
||||
cwd: str | Path | None,
|
||||
session_id: str | None,
|
||||
exit_code: int,
|
||||
output: str = "",
|
||||
) -> Optional[dict[str, Any]]:
|
||||
"""Record a foreground terminal result when it is verification evidence."""
|
||||
|
||||
evidence = classify_verification_command(
|
||||
command,
|
||||
cwd=cwd,
|
||||
session_id=session_id,
|
||||
exit_code=exit_code,
|
||||
output=output,
|
||||
)
|
||||
if evidence is None:
|
||||
return None
|
||||
|
||||
created_at = _utc_now()
|
||||
with _DB_LOCK:
|
||||
with _connect() as conn:
|
||||
cur = conn.execute(
|
||||
"""
|
||||
INSERT INTO verification_events(
|
||||
created_at, session_id, cwd, root, command, canonical_command,
|
||||
kind, scope, status, exit_code, output_summary
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
(
|
||||
created_at,
|
||||
evidence.session_id,
|
||||
evidence.cwd,
|
||||
evidence.root,
|
||||
evidence.command,
|
||||
evidence.canonical_command,
|
||||
evidence.kind,
|
||||
evidence.scope,
|
||||
evidence.status,
|
||||
evidence.exit_code,
|
||||
evidence.output_summary,
|
||||
),
|
||||
)
|
||||
if cur.lastrowid is None:
|
||||
raise RuntimeError("verification event insert did not return an id")
|
||||
event_id = int(cur.lastrowid)
|
||||
conn.execute(
|
||||
"""
|
||||
INSERT INTO verification_state(
|
||||
session_id, root, last_event_id, last_edit_at, changed_paths_json
|
||||
) VALUES (?, ?, ?, NULL, '[]')
|
||||
ON CONFLICT(session_id, root) DO UPDATE SET
|
||||
last_event_id = excluded.last_event_id,
|
||||
last_edit_at = NULL,
|
||||
changed_paths_json = '[]'
|
||||
""",
|
||||
(evidence.session_id, evidence.root, event_id),
|
||||
)
|
||||
_prune_old_events(conn, session_id=evidence.session_id, root=evidence.root)
|
||||
conn.commit()
|
||||
|
||||
return {"id": event_id, **evidence.__dict__, "created_at": created_at}
|
||||
|
||||
|
||||
def mark_workspace_edited(
|
||||
*,
|
||||
session_id: str | None,
|
||||
cwd: str | Path | None,
|
||||
paths: list[str] | tuple[str, ...] | None = None,
|
||||
) -> Optional[dict[str, Any]]:
|
||||
"""Mark verification evidence stale after a successful file edit."""
|
||||
|
||||
try:
|
||||
from agent.coding_context import project_facts_for
|
||||
|
||||
facts = project_facts_for(cwd)
|
||||
except Exception:
|
||||
facts = None
|
||||
if not facts:
|
||||
return None
|
||||
|
||||
sid = str(session_id or "default")
|
||||
root = str(facts.get("root") or Path(cwd or ".").resolve())
|
||||
changed_paths = sorted({str(p) for p in (paths or []) if p})
|
||||
edited_at = _utc_now()
|
||||
|
||||
with _DB_LOCK:
|
||||
with _connect() as conn:
|
||||
row = conn.execute(
|
||||
"""
|
||||
SELECT changed_paths_json FROM verification_state
|
||||
WHERE session_id = ? AND root = ?
|
||||
""",
|
||||
(sid, root),
|
||||
).fetchone()
|
||||
existing: set[str] = set()
|
||||
if row is not None:
|
||||
try:
|
||||
existing = set(json.loads(row["changed_paths_json"] or "[]"))
|
||||
except (TypeError, ValueError):
|
||||
existing = set()
|
||||
merged = sorted((existing | set(changed_paths)))[-200:]
|
||||
conn.execute(
|
||||
"""
|
||||
INSERT INTO verification_state(
|
||||
session_id, root, last_event_id, last_edit_at, changed_paths_json
|
||||
) VALUES (?, ?, NULL, ?, ?)
|
||||
ON CONFLICT(session_id, root) DO UPDATE SET
|
||||
last_edit_at = excluded.last_edit_at,
|
||||
changed_paths_json = excluded.changed_paths_json
|
||||
""",
|
||||
(sid, root, edited_at, json.dumps(merged)),
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
return {"session_id": sid, "root": root, "last_edit_at": edited_at, "changed_paths": changed_paths}
|
||||
|
||||
|
||||
def verification_status(
|
||||
*,
|
||||
session_id: str | None,
|
||||
cwd: str | Path | None,
|
||||
) -> dict[str, Any]:
|
||||
"""Return the best known verification state for a session/workspace."""
|
||||
|
||||
try:
|
||||
from agent.coding_context import project_facts_for
|
||||
|
||||
facts = project_facts_for(cwd)
|
||||
except Exception:
|
||||
facts = None
|
||||
if not facts:
|
||||
return {"status": "not_applicable", "evidence": None}
|
||||
|
||||
sid = str(session_id or "default")
|
||||
root = str(facts.get("root") or Path(cwd or ".").resolve())
|
||||
with _DB_LOCK:
|
||||
with _connect() as conn:
|
||||
state = conn.execute(
|
||||
"""
|
||||
SELECT last_event_id, last_edit_at, changed_paths_json
|
||||
FROM verification_state
|
||||
WHERE session_id = ? AND root = ?
|
||||
""",
|
||||
(sid, root),
|
||||
).fetchone()
|
||||
if state is None:
|
||||
return {
|
||||
"status": "unverified",
|
||||
"evidence": None,
|
||||
"root": root,
|
||||
"session_id": sid,
|
||||
"changed_paths": [],
|
||||
}
|
||||
event = None
|
||||
if state["last_event_id"] is not None:
|
||||
event = conn.execute(
|
||||
"SELECT * FROM verification_events WHERE id = ?",
|
||||
(state["last_event_id"],),
|
||||
).fetchone()
|
||||
|
||||
changed_paths: list[str] = []
|
||||
try:
|
||||
changed_paths = json.loads(state["changed_paths_json"] or "[]")
|
||||
except (TypeError, ValueError):
|
||||
changed_paths = []
|
||||
|
||||
if event is None:
|
||||
return {
|
||||
"status": "unverified",
|
||||
"evidence": None,
|
||||
"root": root,
|
||||
"session_id": sid,
|
||||
"changed_paths": changed_paths,
|
||||
}
|
||||
|
||||
evidence = dict(event)
|
||||
if state["last_edit_at"] and state["last_edit_at"] > evidence["created_at"]:
|
||||
status = "stale"
|
||||
else:
|
||||
status = evidence["status"]
|
||||
return {
|
||||
"status": status,
|
||||
"evidence": evidence,
|
||||
"root": root,
|
||||
"session_id": sid,
|
||||
"changed_paths": changed_paths,
|
||||
}
|
||||
164
agent/verification_stop.py
Normal file
164
agent/verification_stop.py
Normal file
@@ -0,0 +1,164 @@
|
||||
"""Turn-end verification guard for coding edits.
|
||||
|
||||
This module is intentionally policy-only. It never runs checks itself; it turns
|
||||
the passive verification ledger into a bounded follow-up when the model tries to
|
||||
finish immediately after editing code without fresh evidence.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from typing import Any, Iterable
|
||||
|
||||
|
||||
_MAX_CHANGED_PATHS_IN_NUDGE = 8
|
||||
|
||||
|
||||
def verify_on_stop_enabled(config: dict[str, Any] | None = None) -> bool:
|
||||
"""Return whether edit -> verify-before-finish behavior is enabled."""
|
||||
env = os.environ.get("HERMES_VERIFY_ON_STOP")
|
||||
if env is not None:
|
||||
return env.strip().lower() not in {"0", "false", "no", "off"}
|
||||
if config is None:
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
|
||||
config = load_config()
|
||||
except Exception:
|
||||
config = {}
|
||||
agent_cfg = (config or {}).get("agent") if isinstance(config, dict) else None
|
||||
if isinstance(agent_cfg, dict) and "verify_on_stop" in agent_cfg:
|
||||
return bool(agent_cfg.get("verify_on_stop"))
|
||||
return True
|
||||
|
||||
|
||||
def _candidate_cwds(paths: Iterable[str]) -> list[Path]:
|
||||
candidates: list[Path] = []
|
||||
seen: set[str] = set()
|
||||
for raw in paths:
|
||||
if not raw:
|
||||
continue
|
||||
try:
|
||||
path = Path(raw).expanduser()
|
||||
candidate = path if path.is_dir() else path.parent
|
||||
resolved = str(candidate.resolve())
|
||||
except Exception:
|
||||
continue
|
||||
if resolved not in seen:
|
||||
seen.add(resolved)
|
||||
candidates.append(Path(resolved))
|
||||
return candidates
|
||||
|
||||
|
||||
def _verification_snapshot(
|
||||
*,
|
||||
session_id: str | None,
|
||||
changed_paths: list[str],
|
||||
) -> tuple[dict[str, Any], dict[str, Any]] | None:
|
||||
"""Return ``(status, facts)`` for the first edited workspace needing proof."""
|
||||
try:
|
||||
from agent.coding_context import project_facts_for
|
||||
from agent.verification_evidence import verification_status
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
first_snapshot: tuple[dict[str, Any], dict[str, Any]] | None = None
|
||||
for cwd in _candidate_cwds(changed_paths):
|
||||
facts = project_facts_for(cwd)
|
||||
if not facts:
|
||||
continue
|
||||
status = verification_status(session_id=session_id, cwd=cwd)
|
||||
snapshot = (status, facts)
|
||||
if first_snapshot is None:
|
||||
first_snapshot = snapshot
|
||||
if str(status.get("status") or "unverified") != "passed":
|
||||
return snapshot
|
||||
return first_snapshot
|
||||
|
||||
|
||||
def _format_changed_paths(paths: list[str]) -> str:
|
||||
shown = paths[:_MAX_CHANGED_PATHS_IN_NUDGE]
|
||||
lines = [f"- `{path}`" for path in shown]
|
||||
remaining = len(paths) - len(shown)
|
||||
if remaining > 0:
|
||||
lines.append(f"- ... and {remaining} more")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _status_detail(status: dict[str, Any]) -> str:
|
||||
state = str(status.get("status") or "unverified")
|
||||
evidence = status.get("evidence") if isinstance(status.get("evidence"), dict) else None
|
||||
if not evidence:
|
||||
return state
|
||||
|
||||
command = evidence.get("canonical_command") or evidence.get("command")
|
||||
summary = str(evidence.get("output_summary") or "").strip()
|
||||
parts = [state]
|
||||
if command:
|
||||
parts.append(f"last command `{command}`")
|
||||
if summary:
|
||||
max_summary = 1200
|
||||
if len(summary) > max_summary:
|
||||
summary = summary[:max_summary].rstrip() + "\n... [truncated]"
|
||||
parts.append(f"last output:\n{summary}")
|
||||
return "\n".join(parts)
|
||||
|
||||
|
||||
def build_verify_on_stop_nudge(
|
||||
*,
|
||||
session_id: str | None,
|
||||
changed_paths: Iterable[str],
|
||||
attempts: int = 0,
|
||||
max_attempts: int = 2,
|
||||
) -> str | None:
|
||||
"""Return a synthetic follow-up when edited code lacks fresh verification."""
|
||||
paths = sorted({str(p) for p in changed_paths if p})
|
||||
if not paths or attempts >= max_attempts:
|
||||
return None
|
||||
|
||||
snapshot = _verification_snapshot(session_id=session_id, changed_paths=paths)
|
||||
if snapshot is None:
|
||||
return None
|
||||
status, facts = snapshot
|
||||
|
||||
verify_commands = [
|
||||
str(cmd).strip()
|
||||
for cmd in (facts.get("verifyCommands") or [])
|
||||
if str(cmd).strip()
|
||||
]
|
||||
|
||||
state = str(status.get("status") or "unverified")
|
||||
if state == "passed":
|
||||
return None
|
||||
|
||||
if verify_commands:
|
||||
command_instruction = (
|
||||
"Run the relevant verification command now ("
|
||||
+ ", ".join(f"`{cmd}`" for cmd in verify_commands[:3])
|
||||
+ (", ..." if len(verify_commands) > 3 else "")
|
||||
+ "), read any failure, repair the code, and summarize what passed."
|
||||
)
|
||||
else:
|
||||
temp_dir = tempfile.gettempdir()
|
||||
command_instruction = (
|
||||
"No canonical test/lint/build command was detected. Create a focused "
|
||||
f"temporary verification script under `{temp_dir}` using an OS-safe "
|
||||
"`tempfile` path with a `hermes-verify-` filename prefix, run it "
|
||||
"against the changed behavior, clean it up when possible, and "
|
||||
"summarize it explicitly as ad-hoc verification rather than suite "
|
||||
"green."
|
||||
)
|
||||
|
||||
return (
|
||||
"[System: You edited code in this turn, but the workspace does not have "
|
||||
"fresh passing verification evidence yet.\n\n"
|
||||
f"Verification status: {_status_detail(status)}\n\n"
|
||||
f"Changed paths:\n{_format_changed_paths(paths)}\n\n"
|
||||
f"{command_instruction} If verification is not possible, explain the "
|
||||
"concrete blocker instead of claiming the work is fully verified.]"
|
||||
)
|
||||
|
||||
|
||||
__all__ = ["build_verify_on_stop_nudge", "verify_on_stop_enabled"]
|
||||
@@ -17,5 +17,5 @@
|
||||
"lib": "@/lib",
|
||||
"hooks": "@/hooks"
|
||||
},
|
||||
"iconLibrary": "lucide"
|
||||
"iconLibrary": "tabler"
|
||||
}
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
const fs = require('node:fs')
|
||||
|
||||
const _READY_RE = /^HERMES_DASHBOARD_READY port=(\d+)/m
|
||||
|
||||
// The announcement clock starts the instant the backend process is spawned —
|
||||
@@ -94,8 +96,75 @@ function waitForDashboardPort(child, timeoutMs = resolvePortAnnounceTimeoutMs())
|
||||
})
|
||||
}
|
||||
|
||||
function readDashboardReadyFile(readyFile) {
|
||||
if (!readyFile) return null
|
||||
try {
|
||||
const parsed = JSON.parse(fs.readFileSync(readyFile, 'utf8'))
|
||||
const port = Number(parsed?.port)
|
||||
return Number.isInteger(port) && port > 0 ? port : null
|
||||
} catch {
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
function waitForDashboardReadyFile(readyFile, child, timeoutMs = resolvePortAnnounceTimeoutMs()) {
|
||||
return new Promise((resolve, reject) => {
|
||||
let done = false
|
||||
let interval = null
|
||||
|
||||
function cleanup() {
|
||||
if (done) return
|
||||
done = true
|
||||
clearTimeout(timer)
|
||||
if (interval) clearInterval(interval)
|
||||
child.off('exit', onExit)
|
||||
child.off('error', onError)
|
||||
}
|
||||
|
||||
function check() {
|
||||
const port = readDashboardReadyFile(readyFile)
|
||||
if (port) {
|
||||
cleanup()
|
||||
resolve(port)
|
||||
}
|
||||
}
|
||||
|
||||
function onExit(code, signal) {
|
||||
cleanup()
|
||||
reject(new Error(`Hermes backend: exited before port announcement (${signal || code})`))
|
||||
}
|
||||
|
||||
function onError(err) {
|
||||
cleanup()
|
||||
reject(err)
|
||||
}
|
||||
|
||||
const timer = setTimeout(() => {
|
||||
cleanup()
|
||||
reject(new Error(`Timed out waiting for Hermes backend port announcement (${timeoutMs}ms)`))
|
||||
}, timeoutMs)
|
||||
|
||||
child.on('exit', onExit)
|
||||
child.on('error', onError)
|
||||
interval = setInterval(check, 50)
|
||||
if (typeof interval.unref === 'function') interval.unref()
|
||||
check()
|
||||
})
|
||||
}
|
||||
|
||||
function waitForDashboardPortAnnouncement(child, options = {}) {
|
||||
const timeoutMs = options.timeoutMs ?? resolvePortAnnounceTimeoutMs()
|
||||
if (options.readyFile) {
|
||||
return waitForDashboardReadyFile(options.readyFile, child, timeoutMs)
|
||||
}
|
||||
return waitForDashboardPort(child, timeoutMs)
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
waitForDashboardPort,
|
||||
waitForDashboardPortAnnouncement,
|
||||
waitForDashboardReadyFile,
|
||||
readDashboardReadyFile,
|
||||
resolvePortAnnounceTimeoutMs,
|
||||
DEFAULT_PORT_ANNOUNCE_TIMEOUT_MS,
|
||||
MIN_PORT_ANNOUNCE_TIMEOUT_MS,
|
||||
|
||||
@@ -14,9 +14,15 @@
|
||||
const test = require('node:test')
|
||||
const assert = require('node:assert/strict')
|
||||
const { EventEmitter } = require('node:events')
|
||||
const fs = require('node:fs')
|
||||
const os = require('node:os')
|
||||
const path = require('node:path')
|
||||
|
||||
const {
|
||||
readDashboardReadyFile,
|
||||
waitForDashboardPort,
|
||||
waitForDashboardPortAnnouncement,
|
||||
waitForDashboardReadyFile,
|
||||
resolvePortAnnounceTimeoutMs,
|
||||
DEFAULT_PORT_ANNOUNCE_TIMEOUT_MS,
|
||||
MIN_PORT_ANNOUNCE_TIMEOUT_MS,
|
||||
@@ -119,3 +125,75 @@ test('a late announcement after timeout does not throw (listeners torn down)', a
|
||||
child.stdout.emit('data', 'HERMES_DASHBOARD_READY port=9999\n')
|
||||
})
|
||||
})
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// ready-file port announcement
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function mkTmpReadyFile() {
|
||||
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'hermes-ready-test-'))
|
||||
return {
|
||||
dir,
|
||||
file: path.join(dir, 'ready.json'),
|
||||
cleanup: () => fs.rmSync(dir, { recursive: true, force: true })
|
||||
}
|
||||
}
|
||||
|
||||
test('readDashboardReadyFile returns a valid port from JSON', () => {
|
||||
const tmp = mkTmpReadyFile()
|
||||
try {
|
||||
fs.writeFileSync(tmp.file, JSON.stringify({ port: 4567 }))
|
||||
assert.equal(readDashboardReadyFile(tmp.file), 4567)
|
||||
} finally {
|
||||
tmp.cleanup()
|
||||
}
|
||||
})
|
||||
|
||||
test('readDashboardReadyFile ignores missing, malformed, or invalid files', () => {
|
||||
const tmp = mkTmpReadyFile()
|
||||
try {
|
||||
assert.equal(readDashboardReadyFile(tmp.file), null)
|
||||
fs.writeFileSync(tmp.file, '{')
|
||||
assert.equal(readDashboardReadyFile(tmp.file), null)
|
||||
fs.writeFileSync(tmp.file, JSON.stringify({ port: 0 }))
|
||||
assert.equal(readDashboardReadyFile(tmp.file), null)
|
||||
} finally {
|
||||
tmp.cleanup()
|
||||
}
|
||||
})
|
||||
|
||||
test('waitForDashboardReadyFile resolves when the ready file appears', async () => {
|
||||
const tmp = mkTmpReadyFile()
|
||||
const child = makeFakeChild()
|
||||
try {
|
||||
const p = waitForDashboardReadyFile(tmp.file, child, 1000)
|
||||
setTimeout(() => fs.writeFileSync(tmp.file, JSON.stringify({ port: 8765 })), 20)
|
||||
assert.equal(await p, 8765)
|
||||
} finally {
|
||||
tmp.cleanup()
|
||||
}
|
||||
})
|
||||
|
||||
test('waitForDashboardPortAnnouncement uses ready file when provided', async () => {
|
||||
const tmp = mkTmpReadyFile()
|
||||
const child = makeFakeChild()
|
||||
try {
|
||||
const p = waitForDashboardPortAnnouncement(child, { readyFile: tmp.file, timeoutMs: 1000 })
|
||||
setTimeout(() => fs.writeFileSync(tmp.file, JSON.stringify({ port: 9876 })), 20)
|
||||
assert.equal(await p, 9876)
|
||||
} finally {
|
||||
tmp.cleanup()
|
||||
}
|
||||
})
|
||||
|
||||
test('waitForDashboardReadyFile rejects when the child exits before file readiness', async () => {
|
||||
const tmp = mkTmpReadyFile()
|
||||
const child = makeFakeChild()
|
||||
try {
|
||||
const p = waitForDashboardReadyFile(tmp.file, child, 1000)
|
||||
child.emit('exit', 1, null)
|
||||
await assert.rejects(p, /exited before port announcement/)
|
||||
} finally {
|
||||
tmp.cleanup()
|
||||
}
|
||||
})
|
||||
|
||||
98
apps/desktop/electron/git-repo-scan.cjs
Normal file
98
apps/desktop/electron/git-repo-scan.cjs
Normal file
@@ -0,0 +1,98 @@
|
||||
'use strict'
|
||||
|
||||
// Repo-first discovery: walk bounded roots for git repos using only Node's `fs`
|
||||
// — no native addon, so it just works for anyone who pulls main (no
|
||||
// electron-rebuild). Mirrors how GitHub Desktop scans: stop at the first `.git`
|
||||
// (don't descend into a repo), cap depth, and skip heavy non-repo trees so the
|
||||
// first scan stays fast. Results are cached by the backend after the first run.
|
||||
|
||||
const fs = require('node:fs')
|
||||
const os = require('node:os')
|
||||
const path = require('node:path')
|
||||
|
||||
const fsp = fs.promises
|
||||
|
||||
// Shallow on purpose: real projects live a few levels under home
|
||||
// (`~/www/repo`, `~/code/org/repo`); deeper `.git` dirs are almost always
|
||||
// fixtures/vendored/eval checkouts (e.g. `~/www/ha-evals/tasks/*/repo`). Repos
|
||||
// you actually use but keep deeper still surface via session-derived discovery,
|
||||
// so this only prunes noise, never repos with history.
|
||||
const DEFAULT_MAX_DEPTH = 3
|
||||
const MAX_CONCURRENCY = 32
|
||||
|
||||
// Big trees that are never themselves repos and would waste the walk. Anything
|
||||
// hidden (dotdirs like .cache/.Trash/.npm) is skipped wholesale below, so this
|
||||
// only needs the non-hidden heavyweights.
|
||||
const JUNK_DIRS = new Set(['Applications', 'Library', 'node_modules', 'site-packages', 'vendor', 'venv'])
|
||||
|
||||
async function mapLimit(items, limit, fn) {
|
||||
let cursor = 0
|
||||
|
||||
async function worker() {
|
||||
while (cursor < items.length) {
|
||||
const index = cursor
|
||||
cursor += 1
|
||||
await fn(items[index])
|
||||
}
|
||||
}
|
||||
|
||||
await Promise.all(Array.from({ length: Math.min(limit, items.length) }, worker))
|
||||
}
|
||||
|
||||
/**
|
||||
* Scan `roots` (default: the home dir) for git repositories. Returns deduped
|
||||
* `{ root, label }` entries. `options.maxDepth` caps recursion (default 3).
|
||||
*/
|
||||
async function scanGitRepos(roots, options = {}) {
|
||||
const maxDepth = Number(options.maxDepth) || DEFAULT_MAX_DEPTH
|
||||
const searchRoots = Array.isArray(roots) && roots.length > 0 ? roots : [os.homedir()]
|
||||
const found = new Map()
|
||||
|
||||
async function walk(dir, depth) {
|
||||
if (depth > maxDepth) {
|
||||
return
|
||||
}
|
||||
|
||||
let entries
|
||||
try {
|
||||
entries = await fsp.readdir(dir, { withFileTypes: true })
|
||||
} catch {
|
||||
return // unreadable / permission denied
|
||||
}
|
||||
|
||||
// A `.git` DIRECTORY marks a real repo root (a main checkout). A `.git`
|
||||
// FILE is a linked worktree or submodule — those belong to their parent
|
||||
// repo as lanes, not as separate projects, so we don't list them (and we
|
||||
// keep descending in case a real repo sits deeper). This is what kills the
|
||||
// worktree/eval-repo duplicate explosion.
|
||||
if (entries.some(entry => entry.name === '.git' && entry.isDirectory())) {
|
||||
const root = dir.replace(/[/\\]+$/, '')
|
||||
found.set(root, path.basename(root) || root)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
const subdirs = []
|
||||
for (const entry of entries) {
|
||||
// Real directories only (skip symlinks to avoid loops), no hidden dirs, no
|
||||
// known heavy trees.
|
||||
if (!entry.isDirectory() || entry.name.startsWith('.') || JUNK_DIRS.has(entry.name)) {
|
||||
continue
|
||||
}
|
||||
|
||||
subdirs.push(path.join(dir, entry.name))
|
||||
}
|
||||
|
||||
await mapLimit(subdirs, MAX_CONCURRENCY, sub => walk(sub, depth + 1))
|
||||
}
|
||||
|
||||
await mapLimit(
|
||||
searchRoots.map(root => String(root || '').trim()).filter(Boolean),
|
||||
MAX_CONCURRENCY,
|
||||
root => walk(root, 0)
|
||||
)
|
||||
|
||||
return [...found.entries()].map(([root, label]) => ({ label, root }))
|
||||
}
|
||||
|
||||
module.exports = { scanGitRepos }
|
||||
679
apps/desktop/electron/git-review-ops.cjs
Normal file
679
apps/desktop/electron/git-review-ops.cjs
Normal file
@@ -0,0 +1,679 @@
|
||||
'use strict'
|
||||
|
||||
// Git ops backing the coding rail + Codex-style review pane. Built on `simple-git`
|
||||
// (a maintained wrapper around the system git binary — same git the rest of the
|
||||
// app shells to, no native build) so we read structured status()/diffSummary()
|
||||
// results instead of hand-parsing porcelain. Reads degrade to null/empty on a
|
||||
// non-repo / remote backend; mutations reject so the renderer can toast.
|
||||
|
||||
const { execFile } = require('node:child_process')
|
||||
const fs = require('node:fs/promises')
|
||||
const path = require('node:path')
|
||||
|
||||
const simpleGit = require('simple-git')
|
||||
|
||||
const { resolveRequestedPathForIpc } = require('./hardening.cjs')
|
||||
|
||||
const COMMIT_CONTEXT_DIFF_MAX_CHARS = 120_000
|
||||
const COMMIT_CONTEXT_UNTRACKED_MAX = 80
|
||||
const UNTRACKED_LINE_COUNT_CONCURRENCY = 16
|
||||
const UNTRACKED_LINE_COUNT_MAX_BYTES = 1024 * 1024
|
||||
|
||||
// GUI-launched Electron apps on macOS inherit only a minimal PATH (no
|
||||
// /opt/homebrew/bin or /usr/local/bin), so `gh` — and the `git` gh shells out
|
||||
// to — aren't found. Augment PATH with the resolved gh dir + the common
|
||||
// package-manager bins so gh runs the same way it does in a terminal.
|
||||
function ghEnv(ghBin) {
|
||||
const extra = [ghBin ? path.dirname(ghBin) : '', '/opt/homebrew/bin', '/usr/local/bin', '/usr/bin'].filter(
|
||||
dir => dir && dir !== '.'
|
||||
)
|
||||
|
||||
return { ...process.env, PATH: [...extra, process.env.PATH].filter(Boolean).join(path.delimiter) }
|
||||
}
|
||||
|
||||
// Run the `gh` CLI in a repo. Resolves { ok, stdout } so callers branch on
|
||||
// availability/auth without a throw. gh missing/unauthed → ok:false.
|
||||
function runGh(args, cwd, ghBin) {
|
||||
return new Promise(resolve => {
|
||||
execFile(
|
||||
ghBin || 'gh',
|
||||
args,
|
||||
{ cwd, env: ghEnv(ghBin), windowsHide: true, timeout: 30_000, maxBuffer: 8 * 1024 * 1024 },
|
||||
(err, stdout) => resolve({ ok: !err, stdout: String(stdout || '') })
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
function gitFor(cwd, gitBin) {
|
||||
return simpleGit({ baseDir: cwd, binary: gitBin || 'git', maxConcurrentProcesses: 4, trimmed: false })
|
||||
}
|
||||
|
||||
// simple-git reports renames as `old => new` (and `dir/{old => new}/f`); resolve
|
||||
// to the NEW path so the row addresses the real file for diff/stage.
|
||||
function resolveRenamePath(raw) {
|
||||
const path = String(raw || '').trim()
|
||||
|
||||
if (!path.includes(' => ')) {
|
||||
return path
|
||||
}
|
||||
|
||||
const brace = path.match(/^(.*)\{(.*) => (.*)\}(.*)$/)
|
||||
|
||||
if (brace) {
|
||||
const [, prefix, , to, suffix] = brace
|
||||
|
||||
return `${prefix}${to}${suffix}`.replace(/\/{2,}/g, '/')
|
||||
}
|
||||
|
||||
return path.split(' => ').pop().trim()
|
||||
}
|
||||
|
||||
// DiffResult.files → Map<path, {added, removed}> (binary files carry no line
|
||||
// delta).
|
||||
function countsByPath(summary) {
|
||||
const map = new Map()
|
||||
|
||||
for (const file of summary.files) {
|
||||
map.set(resolveRenamePath(file.file), {
|
||||
added: file.binary ? 0 : file.insertions,
|
||||
removed: file.binary ? 0 : file.deletions
|
||||
})
|
||||
}
|
||||
|
||||
return map
|
||||
}
|
||||
|
||||
// Untracked files don't appear in diffSummary(); count insertions from disk so
|
||||
// the review tree can show +N for new files (matches an all-add diff view).
|
||||
// Insertions = line count: newline bytes, plus one for a final unterminated
|
||||
// line. Binary (NUL byte) → 0, mirroring git numstat's "-".
|
||||
async function untrackedInsertions(cwd, relPath) {
|
||||
try {
|
||||
const fullPath = path.join(cwd, relPath)
|
||||
const stat = await fs.stat(fullPath)
|
||||
|
||||
if (!stat.isFile() || stat.size > UNTRACKED_LINE_COUNT_MAX_BYTES) {
|
||||
return 0
|
||||
}
|
||||
|
||||
const buf = await fs.readFile(fullPath)
|
||||
|
||||
if (buf.includes(0)) {
|
||||
return 0
|
||||
}
|
||||
|
||||
let lines = 0
|
||||
|
||||
for (const byte of buf) {
|
||||
if (byte === 10) {
|
||||
lines++
|
||||
}
|
||||
}
|
||||
|
||||
return buf.length > 0 && buf[buf.length - 1] !== 10 ? lines + 1 : lines
|
||||
} catch {
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
function capText(text, maxChars, label = 'truncated') {
|
||||
const value = String(text || '')
|
||||
|
||||
if (value.length <= maxChars) {
|
||||
return value
|
||||
}
|
||||
|
||||
return `${value.slice(0, maxChars)}\n# ${label}: ${value.length - maxChars} chars omitted\n`
|
||||
}
|
||||
|
||||
async function fillUntrackedCounts(cwd, files) {
|
||||
const pending = files.filter(file => file.status === '?' && file.added === 0 && file.removed === 0)
|
||||
|
||||
for (let i = 0; i < pending.length; i += UNTRACKED_LINE_COUNT_CONCURRENCY) {
|
||||
await Promise.all(
|
||||
pending.slice(i, i + UNTRACKED_LINE_COUNT_CONCURRENCY).map(async file => {
|
||||
file.added = await untrackedInsertions(cwd, file.path)
|
||||
})
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// Resolve the base ref for "all branch changes": merge-base with the remote
|
||||
// default branch (origin/HEAD), falling back to common trunk names.
|
||||
async function branchBase(git) {
|
||||
const candidates = []
|
||||
|
||||
try {
|
||||
const head = (await git.revparse(['--abbrev-ref', 'origin/HEAD'])).trim()
|
||||
|
||||
if (head) {
|
||||
candidates.push(head)
|
||||
}
|
||||
} catch {
|
||||
// No origin/HEAD configured.
|
||||
}
|
||||
|
||||
candidates.push('origin/main', 'origin/master', 'main', 'master')
|
||||
|
||||
for (const ref of candidates) {
|
||||
try {
|
||||
const base = (await git.raw(['merge-base', 'HEAD', ref])).trim()
|
||||
|
||||
if (base) {
|
||||
return base
|
||||
}
|
||||
} catch {
|
||||
// Ref doesn't exist; try the next candidate.
|
||||
}
|
||||
}
|
||||
|
||||
return null
|
||||
}
|
||||
|
||||
// Resolve the repo's default branch NAME ("main" / "master" / …), preferring
|
||||
// the remote's HEAD, then common local trunk names. Null when none is found
|
||||
// (e.g. a fresh repo with only a feature branch). Used to offer "branch off the
|
||||
// trunk" regardless of which branch you're currently on.
|
||||
async function defaultBranchName(git) {
|
||||
try {
|
||||
const head = (await git.revparse(['--abbrev-ref', 'origin/HEAD'])).trim()
|
||||
|
||||
// "origin/main" → "main"; skip the bare "origin/HEAD" placeholder.
|
||||
if (head && head !== 'origin/HEAD') {
|
||||
return head.replace(/^origin\//, '')
|
||||
}
|
||||
} catch {
|
||||
// No origin/HEAD configured.
|
||||
}
|
||||
|
||||
// Prefer a local trunk, then a remote-only one (returns the clean name either
|
||||
// way) so "branch off main" works even before main is checked out locally.
|
||||
for (const ref of ['refs/heads/main', 'refs/heads/master', 'refs/remotes/origin/main', 'refs/remotes/origin/master']) {
|
||||
try {
|
||||
await git.raw(['rev-parse', '--verify', '--quiet', ref])
|
||||
|
||||
return ref.replace(/^refs\/(?:heads|remotes\/origin)\//, '')
|
||||
} catch {
|
||||
// Ref doesn't exist; try the next candidate.
|
||||
}
|
||||
}
|
||||
|
||||
return null
|
||||
}
|
||||
|
||||
// A status file's single-letter classification, preferring the staged (index)
|
||||
// code over the worktree code; untracked wins (simple-git marks both '?').
|
||||
function statusLetter(file) {
|
||||
if (file.index === '?' || file.working_dir === '?') {
|
||||
return '?'
|
||||
}
|
||||
|
||||
const code = file.index && file.index !== ' ' ? file.index : file.working_dir
|
||||
|
||||
return (code || 'M').toUpperCase()
|
||||
}
|
||||
|
||||
const isStaged = file => Boolean(file.index && file.index !== ' ' && file.index !== '?')
|
||||
|
||||
async function reviewList(repoPath, scope, baseRef, gitBin) {
|
||||
let cwd
|
||||
|
||||
try {
|
||||
cwd = resolveRequestedPathForIpc(repoPath, { purpose: 'Review list' })
|
||||
} catch {
|
||||
return { files: [], base: null }
|
||||
}
|
||||
|
||||
const git = gitFor(cwd, gitBin)
|
||||
|
||||
try {
|
||||
if (scope === 'branch' || scope === 'lastTurn') {
|
||||
const base = scope === 'branch' ? await branchBase(git) : baseRef
|
||||
|
||||
if (!base) {
|
||||
return { files: [], base: null }
|
||||
}
|
||||
|
||||
const range = scope === 'branch' ? `${base}...HEAD` : base
|
||||
const summary = await git.diffSummary([range])
|
||||
const files = summary.files.map(file => ({
|
||||
path: resolveRenamePath(file.file),
|
||||
added: file.binary ? 0 : file.insertions,
|
||||
removed: file.binary ? 0 : file.deletions,
|
||||
status: 'M',
|
||||
staged: false
|
||||
}))
|
||||
|
||||
// "Last turn" also surfaces files created since the baseline (untracked).
|
||||
if (scope === 'lastTurn') {
|
||||
const status = await git.status()
|
||||
|
||||
for (const path of status.not_added) {
|
||||
if (!files.some(f => f.path === path)) {
|
||||
files.push({ path, added: 0, removed: 0, status: '?', staged: false })
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
files.sort((a, b) => a.path.localeCompare(b.path))
|
||||
await fillUntrackedCounts(cwd, files)
|
||||
|
||||
return { files, base }
|
||||
}
|
||||
|
||||
// Default: uncommitted (staged + unstaged + untracked), one row per path.
|
||||
const [status, staged, unstaged] = await Promise.all([
|
||||
git.status(),
|
||||
git.diffSummary(['--cached']),
|
||||
git.diffSummary([])
|
||||
])
|
||||
const stagedCounts = countsByPath(staged)
|
||||
const unstagedCounts = countsByPath(unstaged)
|
||||
|
||||
const files = status.files.map(file => {
|
||||
const filePath = resolveRenamePath(file.path)
|
||||
const sc = stagedCounts.get(filePath) || { added: 0, removed: 0 }
|
||||
const uc = unstagedCounts.get(filePath) || { added: 0, removed: 0 }
|
||||
|
||||
return {
|
||||
path: filePath,
|
||||
added: sc.added + uc.added,
|
||||
removed: sc.removed + uc.removed,
|
||||
status: statusLetter(file),
|
||||
staged: isStaged(file)
|
||||
}
|
||||
})
|
||||
|
||||
files.sort((a, b) => a.path.localeCompare(b.path))
|
||||
await fillUntrackedCounts(cwd, files)
|
||||
|
||||
return { files, base: null }
|
||||
} catch {
|
||||
return { files: [], base: null }
|
||||
}
|
||||
}
|
||||
|
||||
async function reviewDiff(repoPath, filePath, scope, baseRef, staged, gitBin) {
|
||||
let cwd
|
||||
|
||||
try {
|
||||
cwd = resolveRequestedPathForIpc(repoPath, { purpose: 'Review diff' })
|
||||
} catch {
|
||||
return ''
|
||||
}
|
||||
|
||||
const git = gitFor(cwd, gitBin)
|
||||
const safe = args => git.diff(args).catch(() => '')
|
||||
|
||||
if (scope === 'branch') {
|
||||
const base = await branchBase(git)
|
||||
|
||||
return base ? safe([`${base}...HEAD`, '--', filePath]) : ''
|
||||
}
|
||||
|
||||
if (scope === 'lastTurn') {
|
||||
return baseRef ? safe([baseRef, '--', filePath]) : ''
|
||||
}
|
||||
|
||||
if (staged) {
|
||||
return safe(['--cached', '--', filePath])
|
||||
}
|
||||
|
||||
const worktree = await safe(['--', filePath])
|
||||
|
||||
if (worktree.trim()) {
|
||||
return worktree
|
||||
}
|
||||
|
||||
// Untracked file: no worktree diff exists, so synthesize an all-add diff via
|
||||
// --no-index (exits non-zero by design when files differ, so go around
|
||||
// simple-git's reject-on-nonzero with a raw execFile).
|
||||
return new Promise(resolve => {
|
||||
execFile(
|
||||
gitBin || 'git',
|
||||
['diff', '--no-index', '--', '/dev/null', filePath],
|
||||
{ cwd, windowsHide: true, timeout: 30_000, maxBuffer: 32 * 1024 * 1024 },
|
||||
(_err, stdout) => resolve(String(stdout || ''))
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
// Working-tree-vs-HEAD diff for ONE file — the "what changed since the last
|
||||
// commit" view used by the file preview. Unlike reviewDiff this never synthesizes
|
||||
// a full-add for a clean tracked file (so a pristine file shows no diff); it only
|
||||
// all-adds a genuinely untracked file.
|
||||
async function fileDiffVsHead(repoPath, filePath, gitBin) {
|
||||
let cwd
|
||||
|
||||
try {
|
||||
cwd = resolveRequestedPathForIpc(repoPath, { purpose: 'File diff' })
|
||||
} catch {
|
||||
return ''
|
||||
}
|
||||
|
||||
const git = gitFor(cwd, gitBin)
|
||||
const head = await git.diff(['HEAD', '--', filePath]).catch(() => '')
|
||||
|
||||
if (head.trim()) {
|
||||
return head
|
||||
}
|
||||
|
||||
// No tracked changes vs HEAD. Only synthesize an all-add diff for a file git
|
||||
// doesn't know yet; a clean tracked file must return empty.
|
||||
const status = await git.raw(['status', '--porcelain', '--', filePath]).catch(() => '')
|
||||
|
||||
if (!status.trim().startsWith('??')) {
|
||||
return ''
|
||||
}
|
||||
|
||||
return new Promise(resolve => {
|
||||
execFile(
|
||||
gitBin || 'git',
|
||||
['diff', '--no-index', '--', '/dev/null', filePath],
|
||||
{ cwd, windowsHide: true, timeout: 30_000, maxBuffer: 32 * 1024 * 1024 },
|
||||
(_err, stdout) => resolve(String(stdout || ''))
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
async function reviewStage(repoPath, filePath, gitBin) {
|
||||
const cwd = resolveRequestedPathForIpc(repoPath, { purpose: 'Review stage' })
|
||||
|
||||
await gitFor(cwd, gitBin).raw(filePath ? ['add', '--', filePath] : ['add', '-A'])
|
||||
|
||||
return { ok: true }
|
||||
}
|
||||
|
||||
async function reviewUnstage(repoPath, filePath, gitBin) {
|
||||
const cwd = resolveRequestedPathForIpc(repoPath, { purpose: 'Review unstage' })
|
||||
|
||||
await gitFor(cwd, gitBin).raw(filePath ? ['reset', '-q', 'HEAD', '--', filePath] : ['reset', '-q', 'HEAD'])
|
||||
|
||||
return { ok: true }
|
||||
}
|
||||
|
||||
// Discard changes back to the committed state. Destructive — the renderer
|
||||
// confirms first. Restores tracked files and removes untracked ones.
|
||||
async function reviewRevert(repoPath, filePath, gitBin) {
|
||||
const cwd = resolveRequestedPathForIpc(repoPath, { purpose: 'Review revert' })
|
||||
const git = gitFor(cwd, gitBin)
|
||||
|
||||
if (filePath) {
|
||||
await git.raw(['checkout', 'HEAD', '--', filePath]).catch(() => undefined)
|
||||
await git.raw(['clean', '-fd', '--', filePath]).catch(() => undefined)
|
||||
} else {
|
||||
await git.raw(['checkout', 'HEAD', '--', '.']).catch(() => undefined)
|
||||
await git.raw(['clean', '-fd']).catch(() => undefined)
|
||||
}
|
||||
|
||||
return { ok: true }
|
||||
}
|
||||
|
||||
// Resolve a ref to a commit sha (captures the turn baseline for "Last turn").
|
||||
async function reviewRevParse(repoPath, ref, gitBin) {
|
||||
let cwd
|
||||
|
||||
try {
|
||||
cwd = resolveRequestedPathForIpc(repoPath, { purpose: 'Review rev-parse' })
|
||||
} catch {
|
||||
return null
|
||||
}
|
||||
|
||||
try {
|
||||
return (await gitFor(cwd, gitBin).revparse([ref || 'HEAD'])).trim() || null
|
||||
} catch {
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
// Commit the working tree. Mirrors VS Code: if nothing is staged, stage
|
||||
// everything first ("commit all"), then commit. Optionally push afterward,
|
||||
// setting upstream on the first push.
|
||||
async function reviewCommit(repoPath, message, push, gitBin) {
|
||||
const cwd = resolveRequestedPathForIpc(repoPath, { purpose: 'Review commit' })
|
||||
const git = gitFor(cwd, gitBin)
|
||||
const status = await git.status()
|
||||
|
||||
if (status.staged.length === 0) {
|
||||
await git.raw(['add', '-A'])
|
||||
}
|
||||
|
||||
await git.commit(message)
|
||||
|
||||
if (push) {
|
||||
const fresh = await git.status()
|
||||
|
||||
if (fresh.tracking) {
|
||||
await git.push()
|
||||
} else if (fresh.current) {
|
||||
await git.raw(['push', '-u', 'origin', fresh.current])
|
||||
}
|
||||
}
|
||||
|
||||
return { ok: true }
|
||||
}
|
||||
|
||||
// Gather the context the model needs to draft a commit message: the diff of
|
||||
// what *will* be committed (staged when anything is staged, else everything
|
||||
// vs HEAD — mirroring reviewCommit's "stage all when nothing staged" rule),
|
||||
// the names of untracked files (which carry no diff), and recent commit
|
||||
// subjects for style. Diff is capped so the payload stays bounded. Reads only.
|
||||
async function reviewCommitContext(repoPath, gitBin) {
|
||||
let cwd
|
||||
|
||||
try {
|
||||
cwd = resolveRequestedPathForIpc(repoPath, { purpose: 'Review commit context' })
|
||||
} catch {
|
||||
return { diff: '', recent: '' }
|
||||
}
|
||||
|
||||
const git = gitFor(cwd, gitBin)
|
||||
const safe = args => git.diff(args).catch(() => '')
|
||||
|
||||
let status
|
||||
try {
|
||||
status = await git.status()
|
||||
} catch {
|
||||
return { diff: '', recent: '' }
|
||||
}
|
||||
|
||||
// What will land: staged changes if any, otherwise all tracked changes vs HEAD.
|
||||
let diff = capText(
|
||||
status.staged.length > 0 ? await safe(['--cached']) : await safe(['HEAD']),
|
||||
COMMIT_CONTEXT_DIFF_MAX_CHARS,
|
||||
'diff truncated for commit-message generation'
|
||||
)
|
||||
|
||||
// Untracked files have no diff — list them so new files aren't invisible.
|
||||
const untracked = status.not_added || []
|
||||
if (untracked.length > 0) {
|
||||
const visible = untracked.slice(0, COMMIT_CONTEXT_UNTRACKED_MAX)
|
||||
const omitted = untracked.length - visible.length
|
||||
const note =
|
||||
`\n# New (untracked) files:\n${visible.map(p => `# ${p}`).join('\n')}\n` +
|
||||
(omitted > 0 ? `# ... ${omitted} more omitted\n` : '')
|
||||
|
||||
diff = diff ? `${diff}${note}` : note
|
||||
}
|
||||
|
||||
const recent = await git.raw(['log', '-n', '10', '--pretty=format:%s']).catch(() => '')
|
||||
|
||||
return { diff: diff || '', recent: String(recent || '').trim() }
|
||||
}
|
||||
|
||||
async function reviewPush(repoPath, gitBin) {
|
||||
const cwd = resolveRequestedPathForIpc(repoPath, { purpose: 'Review push' })
|
||||
const git = gitFor(cwd, gitBin)
|
||||
const status = await git.status()
|
||||
|
||||
if (status.tracking) {
|
||||
await git.push()
|
||||
} else if (status.current) {
|
||||
await git.raw(['push', '-u', 'origin', status.current])
|
||||
}
|
||||
|
||||
return { ok: true }
|
||||
}
|
||||
|
||||
// gh availability + auth + whether this branch already has a PR. Reads only;
|
||||
// drives the PR button's enabled/label state. `ghReady` is false when gh is
|
||||
// missing OR not authenticated — either way the PR action can't run.
|
||||
async function reviewShipInfo(repoPath, ghBin) {
|
||||
let cwd
|
||||
|
||||
try {
|
||||
cwd = resolveRequestedPathForIpc(repoPath, { purpose: 'Review ship info' })
|
||||
} catch {
|
||||
return { ghReady: false, pr: null }
|
||||
}
|
||||
|
||||
const auth = await runGh(['auth', 'status'], cwd, ghBin)
|
||||
|
||||
if (!auth.ok) {
|
||||
return { ghReady: false, pr: null }
|
||||
}
|
||||
|
||||
const view = await runGh(['pr', 'view', '--json', 'url,state,number'], cwd, ghBin)
|
||||
|
||||
if (!view.ok) {
|
||||
// gh exits non-zero when no PR exists for the branch — that's not an error.
|
||||
return { ghReady: true, pr: null }
|
||||
}
|
||||
|
||||
try {
|
||||
const pr = JSON.parse(view.stdout)
|
||||
|
||||
return { ghReady: true, pr: pr && pr.url ? { url: pr.url, state: pr.state, number: pr.number } : null }
|
||||
} catch {
|
||||
return { ghReady: true, pr: null }
|
||||
}
|
||||
}
|
||||
|
||||
// Create a PR for the current branch (pushing first so gh has a remote ref),
|
||||
// letting gh fill title/body from the commits. Returns the new PR url.
|
||||
async function reviewCreatePr(repoPath, gitBin, ghBin) {
|
||||
const cwd = resolveRequestedPathForIpc(repoPath, { purpose: 'Review create PR' })
|
||||
|
||||
await reviewPush(repoPath, gitBin).catch(() => undefined)
|
||||
|
||||
const created = await runGh(['pr', 'create', '--fill'], cwd, ghBin)
|
||||
|
||||
if (!created.ok) {
|
||||
throw new Error('gh pr create failed (is gh installed and authenticated?)')
|
||||
}
|
||||
|
||||
const url = created.stdout.trim().split('\n').filter(Boolean).pop() || ''
|
||||
|
||||
return { url }
|
||||
}
|
||||
|
||||
// Compact working-tree status for the composer coding rail: branch, ahead/behind,
|
||||
// per-state change counts, +/- vs HEAD, and a capped changed-file list.
|
||||
async function repoStatus(repoPath, gitBin) {
|
||||
let cwd
|
||||
|
||||
try {
|
||||
cwd = resolveRequestedPathForIpc(repoPath, { purpose: 'Repo status' })
|
||||
} catch {
|
||||
return null
|
||||
}
|
||||
|
||||
// Session cwds can point at a deleted worktree for a moment (or forever in a
|
||||
// stale row). simple-git throws at construction time on a missing baseDir, so
|
||||
// fail soft and hide the coding rail instead of spamming IPC handler errors.
|
||||
try {
|
||||
const stat = await fs.stat(cwd)
|
||||
if (!stat.isDirectory()) {
|
||||
return null
|
||||
}
|
||||
} catch {
|
||||
return null
|
||||
}
|
||||
|
||||
let git
|
||||
try {
|
||||
git = gitFor(cwd, gitBin)
|
||||
} catch {
|
||||
return null
|
||||
}
|
||||
let status
|
||||
|
||||
try {
|
||||
status = await git.status()
|
||||
} catch {
|
||||
// Not a repo / git unavailable / remote backend.
|
||||
return null
|
||||
}
|
||||
|
||||
const detached = typeof status.detached === 'boolean' ? status.detached : !status.current
|
||||
const files = status.files.map(file => ({
|
||||
path: file.path,
|
||||
staged: isStaged(file),
|
||||
unstaged: Boolean(file.working_dir && file.working_dir !== ' ' && file.working_dir !== '?'),
|
||||
untracked: file.index === '?' || file.working_dir === '?',
|
||||
conflicted: file.index === 'U' || file.working_dir === 'U'
|
||||
}))
|
||||
|
||||
const result = {
|
||||
branch: detached ? null : status.current || null,
|
||||
defaultBranch: await defaultBranchName(git),
|
||||
detached,
|
||||
ahead: status.ahead || 0,
|
||||
behind: status.behind || 0,
|
||||
staged: files.filter(f => f.staged).length,
|
||||
unstaged: files.filter(f => f.unstaged).length,
|
||||
untracked: status.not_added.length,
|
||||
conflicted: status.conflicted.length,
|
||||
changed: files.length,
|
||||
added: 0,
|
||||
removed: 0,
|
||||
files: files.slice(0, 200)
|
||||
}
|
||||
|
||||
// +/- vs HEAD (staged + unstaged tracked changes). No HEAD yet → leave 0.
|
||||
try {
|
||||
const summary = await git.diffSummary(['HEAD'])
|
||||
result.added = summary.insertions
|
||||
result.removed = summary.deletions
|
||||
} catch {
|
||||
// No commits yet.
|
||||
}
|
||||
|
||||
// `git diff HEAD` ignores untracked files, so a turn that only creates new
|
||||
// files (the common case — a fresh module, a demo dir) showed +0 in the rail
|
||||
// while the review pane counted them. Fold untracked insertions into `added`
|
||||
// so the rail matches reality. Bounded (size cap + concurrency) like the
|
||||
// review tree; only the capped file slice is counted so a huge untracked tree
|
||||
// can't stall the probe.
|
||||
try {
|
||||
const untracked = status.not_added.slice(0, 500)
|
||||
for (let i = 0; i < untracked.length; i += UNTRACKED_LINE_COUNT_CONCURRENCY) {
|
||||
const batch = await Promise.all(
|
||||
untracked.slice(i, i + UNTRACKED_LINE_COUNT_CONCURRENCY).map(path => untrackedInsertions(cwd, path))
|
||||
)
|
||||
result.added += batch.reduce((sum, n) => sum + n, 0)
|
||||
}
|
||||
} catch {
|
||||
// Best-effort: a probe failure just leaves untracked lines uncounted.
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
branchBase,
|
||||
fileDiffVsHead,
|
||||
repoStatus,
|
||||
resolveRenamePath,
|
||||
reviewCommit,
|
||||
reviewCommitContext,
|
||||
reviewCreatePr,
|
||||
reviewDiff,
|
||||
reviewList,
|
||||
reviewPush,
|
||||
reviewRevParse,
|
||||
reviewRevert,
|
||||
reviewShipInfo,
|
||||
reviewStage,
|
||||
reviewUnstage
|
||||
}
|
||||
22
apps/desktop/electron/git-review-ops.test.cjs
Normal file
22
apps/desktop/electron/git-review-ops.test.cjs
Normal file
@@ -0,0 +1,22 @@
|
||||
'use strict'
|
||||
|
||||
const assert = require('node:assert/strict')
|
||||
const test = require('node:test')
|
||||
|
||||
const { resolveRenamePath } = require('./git-review-ops.cjs')
|
||||
|
||||
test('resolveRenamePath: plain path is unchanged', () => {
|
||||
assert.equal(resolveRenamePath('src/a.ts'), 'src/a.ts')
|
||||
})
|
||||
|
||||
test('resolveRenamePath: simple rename resolves to the new path', () => {
|
||||
assert.equal(resolveRenamePath('old.ts => new.ts'), 'new.ts')
|
||||
})
|
||||
|
||||
test('resolveRenamePath: brace rename resolves to the new path', () => {
|
||||
assert.equal(resolveRenamePath('src/{old => new}/file.ts'), 'src/new/file.ts')
|
||||
})
|
||||
|
||||
test('resolveRenamePath: brace rename collapsing a segment', () => {
|
||||
assert.equal(resolveRenamePath('src/{lib => }/file.ts'), 'src/file.ts')
|
||||
})
|
||||
339
apps/desktop/electron/git-worktree-ops.cjs
Normal file
339
apps/desktop/electron/git-worktree-ops.cjs
Normal file
@@ -0,0 +1,339 @@
|
||||
'use strict'
|
||||
|
||||
// Git-driven worktree operations for the desktop "Start work" flow: spin up a
|
||||
// fresh worktree the lightest way (`git worktree add -b`), list real worktrees,
|
||||
// and remove them. Git is the source of truth; the renderer just drives these.
|
||||
|
||||
const path = require('node:path')
|
||||
const fs = require('node:fs')
|
||||
const { execFile } = require('node:child_process')
|
||||
|
||||
const { resolveRequestedPathForIpc } = require('./hardening.cjs')
|
||||
|
||||
function runGit(gitBin, args, cwd) {
|
||||
return new Promise((resolve, reject) => {
|
||||
execFile(
|
||||
gitBin,
|
||||
args,
|
||||
{ cwd, windowsHide: true, timeout: 30_000, maxBuffer: 8 * 1024 * 1024 },
|
||||
(err, stdout, stderr) => {
|
||||
if (err) {
|
||||
err.stderr = String(stderr || '')
|
||||
reject(err)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
resolve(String(stdout || ''))
|
||||
}
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
// Parse `git worktree list --porcelain`. The first record is the main worktree.
|
||||
function parseWorktrees(out) {
|
||||
const trees = []
|
||||
let cur = null
|
||||
|
||||
for (const line of out.split('\n')) {
|
||||
if (line.startsWith('worktree ')) {
|
||||
if (cur) {
|
||||
trees.push(cur)
|
||||
}
|
||||
|
||||
cur = { path: line.slice(9).trim(), branch: null, detached: false, bare: false, locked: false }
|
||||
} else if (!cur) {
|
||||
continue
|
||||
} else if (line.startsWith('branch ')) {
|
||||
cur.branch = line.slice(7).trim().replace(/^refs\/heads\//, '')
|
||||
} else if (line === 'detached') {
|
||||
cur.detached = true
|
||||
} else if (line === 'bare') {
|
||||
cur.bare = true
|
||||
} else if (line.startsWith('locked')) {
|
||||
cur.locked = true
|
||||
}
|
||||
}
|
||||
|
||||
if (cur) {
|
||||
trees.push(cur)
|
||||
}
|
||||
|
||||
return trees
|
||||
}
|
||||
|
||||
async function listWorktrees(repoPath, gitBin) {
|
||||
let resolved
|
||||
|
||||
try {
|
||||
resolved = resolveRequestedPathForIpc(repoPath, { purpose: 'Worktree list' })
|
||||
} catch {
|
||||
return []
|
||||
}
|
||||
|
||||
try {
|
||||
const out = await runGit(gitBin, ['worktree', 'list', '--porcelain'], resolved)
|
||||
|
||||
return parseWorktrees(out).map((tree, index) => ({
|
||||
path: tree.path,
|
||||
branch: tree.branch,
|
||||
isMain: index === 0,
|
||||
detached: tree.detached,
|
||||
locked: tree.locked
|
||||
}))
|
||||
} catch {
|
||||
return []
|
||||
}
|
||||
}
|
||||
|
||||
// A git-ref-safe branch name (spaces → "-", drop forbidden chars, trim edges),
|
||||
// or "" when nothing usable remains. Mirrors the renderer's `gitRef`, so a bad
|
||||
// value can't reach `git` no matter the caller (the GUI also enforces live).
|
||||
function sanitizeBranch(name) {
|
||||
return String(name || '')
|
||||
.replace(/\s+/g, '-')
|
||||
.replace(/[^\w./-]/g, '')
|
||||
.replace(/-{2,}/g, '-')
|
||||
.replace(/\/{2,}/g, '/')
|
||||
.replace(/\.{2,}/g, '.')
|
||||
.replace(/^[-./]+|[-./]+$/g, '')
|
||||
}
|
||||
|
||||
function slugify(name) {
|
||||
const slug = String(name || '')
|
||||
.trim()
|
||||
.toLowerCase()
|
||||
.replace(/[^a-z0-9]+/g, '-')
|
||||
.replace(/^-+|-+$/g, '')
|
||||
.slice(0, 40)
|
||||
.replace(/-+$/g, '')
|
||||
|
||||
return slug || 'work'
|
||||
}
|
||||
|
||||
const TRUNK_BRANCHES = ['main', 'master']
|
||||
|
||||
async function gitLine(gitBin, args, cwd) {
|
||||
try {
|
||||
return (await runGit(gitBin, args, cwd)).trim()
|
||||
} catch {
|
||||
return ''
|
||||
}
|
||||
}
|
||||
|
||||
async function defaultBranch(gitBin, cwd) {
|
||||
const remote = (await gitLine(gitBin, ['symbolic-ref', '--quiet', '--short', 'refs/remotes/origin/HEAD'], cwd)).replace(
|
||||
/^origin\//,
|
||||
''
|
||||
)
|
||||
|
||||
if (remote) {
|
||||
return remote
|
||||
}
|
||||
|
||||
const configured = await gitLine(gitBin, ['config', '--get', 'init.defaultBranch'], cwd)
|
||||
|
||||
if (configured) {
|
||||
return configured
|
||||
}
|
||||
|
||||
for (const branch of TRUNK_BRANCHES) {
|
||||
if (await gitLine(gitBin, ['show-ref', '--verify', `refs/heads/${branch}`], cwd)) {
|
||||
return branch
|
||||
}
|
||||
}
|
||||
|
||||
return ''
|
||||
}
|
||||
|
||||
// A brand-new project folder isn't a git repo — and a freshly-init'd one has no
|
||||
// commit to branch from — so `git worktree add` would fail. Make the dir a repo
|
||||
// with a root commit on the user's behalf so worktrees "just work". No-op for a
|
||||
// repo that already has commits; never touches the user's files (the seed commit
|
||||
// is `--allow-empty`), and never inits a dir that already lives inside a repo.
|
||||
async function ensureGitRepo(gitBin, dir) {
|
||||
let needsRoot = false
|
||||
|
||||
try {
|
||||
const inside = (await runGit(gitBin, ['rev-parse', '--is-inside-work-tree'], dir)).trim()
|
||||
|
||||
if (inside !== 'true') {
|
||||
await runGit(gitBin, ['init'], dir)
|
||||
needsRoot = true
|
||||
} else {
|
||||
// Repo exists; a worktree still needs a HEAD to branch from.
|
||||
try {
|
||||
await runGit(gitBin, ['rev-parse', '--verify', 'HEAD'], dir)
|
||||
} catch {
|
||||
needsRoot = true
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
await runGit(gitBin, ['init'], dir)
|
||||
needsRoot = true
|
||||
}
|
||||
|
||||
if (needsRoot) {
|
||||
// Inline identity so the seed commit lands even with no global git config.
|
||||
await runGit(
|
||||
gitBin,
|
||||
['-c', 'user.email=hermes@localhost', '-c', 'user.name=Hermes', 'commit', '--allow-empty', '-m', 'Initial commit'],
|
||||
dir
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// Resolve the repo's MAIN worktree root, so `.worktrees/` always nests under the
|
||||
// primary checkout even when called from a linked worktree.
|
||||
async function mainRoot(gitBin, cwd) {
|
||||
const list = await listWorktrees(cwd, gitBin)
|
||||
const main = list.find(tree => tree.isMain)
|
||||
|
||||
return main ? main.path : cwd
|
||||
}
|
||||
|
||||
function uniqueDir(base) {
|
||||
let dir = base
|
||||
let n = 1
|
||||
|
||||
while (fs.existsSync(dir)) {
|
||||
n += 1
|
||||
dir = `${base}-${n}`
|
||||
}
|
||||
|
||||
return dir
|
||||
}
|
||||
|
||||
async function addExistingBranchWorktree(gitBin, root, name) {
|
||||
const branch = sanitizeBranch(name)
|
||||
|
||||
if (!branch) {
|
||||
throw new Error('Branch name is required.')
|
||||
}
|
||||
|
||||
if (branch === (await defaultBranch(gitBin, root))) {
|
||||
await runGit(gitBin, ['switch', branch], root)
|
||||
|
||||
return { path: root, branch, repoRoot: root }
|
||||
}
|
||||
|
||||
const dir = uniqueDir(path.join(root, '.worktrees', slugify(branch)))
|
||||
await runGit(gitBin, ['worktree', 'add', dir, branch], root)
|
||||
|
||||
return { path: dir, branch, repoRoot: root }
|
||||
}
|
||||
|
||||
async function addWorktree(repoPath, options, gitBin) {
|
||||
const resolved = resolveRequestedPathForIpc(repoPath, { purpose: 'Worktree add' })
|
||||
// A new project's folder may not be a git repo yet — init it (with a root
|
||||
// commit) so the worktree has something to branch from.
|
||||
await ensureGitRepo(gitBin, resolved)
|
||||
const root = await mainRoot(gitBin, resolved)
|
||||
const opts = options || {}
|
||||
|
||||
if (opts.existingBranch) {
|
||||
return addExistingBranchWorktree(gitBin, root, opts.existingBranch)
|
||||
}
|
||||
|
||||
const slug = slugify(opts.name || `work-${Date.now().toString(36)}`)
|
||||
const branch = sanitizeBranch(opts.branch) || `hermes/${slug}`
|
||||
const dir = uniqueDir(path.join(root, '.worktrees', slug))
|
||||
|
||||
const args = ['worktree', 'add', '-b', branch, dir]
|
||||
|
||||
if (opts.base) {
|
||||
args.push(String(opts.base))
|
||||
}
|
||||
|
||||
try {
|
||||
await runGit(gitBin, args, root)
|
||||
} catch (err) {
|
||||
// Branch name may already exist — retry checking out the existing branch
|
||||
// into a fresh worktree dir instead of failing the whole flow.
|
||||
if (/already exists/i.test(err.stderr || '')) {
|
||||
await runGit(gitBin, ['worktree', 'add', dir, branch], root)
|
||||
} else {
|
||||
throw err
|
||||
}
|
||||
}
|
||||
|
||||
return { path: dir, branch, repoRoot: root }
|
||||
}
|
||||
|
||||
async function removeWorktree(repoPath, worktreePath, options, gitBin) {
|
||||
const resolvedRepo = resolveRequestedPathForIpc(repoPath, { purpose: 'Worktree remove (repo)' })
|
||||
const resolvedTree = resolveRequestedPathForIpc(worktreePath, { purpose: 'Worktree remove (tree)' })
|
||||
const root = await mainRoot(gitBin, resolvedRepo)
|
||||
const args = ['worktree', 'remove']
|
||||
|
||||
if (options && options.force) {
|
||||
args.push('--force')
|
||||
}
|
||||
|
||||
args.push(resolvedTree)
|
||||
await runGit(gitBin, args, root)
|
||||
|
||||
return { removed: resolvedTree }
|
||||
}
|
||||
|
||||
// List local branches for the "convert a branch into a worktree" picker, most
|
||||
// recently committed first. Each carries whether it's already checked out in a
|
||||
// worktree and, when checked out, that worktree's path. Empty on a non-repo /
|
||||
// remote backend where the probe can't run.
|
||||
async function listBranches(repoPath, gitBin) {
|
||||
let resolved
|
||||
|
||||
try {
|
||||
resolved = resolveRequestedPathForIpc(repoPath, { purpose: 'Branch list' })
|
||||
} catch {
|
||||
return []
|
||||
}
|
||||
|
||||
try {
|
||||
const out = await runGit(
|
||||
gitBin,
|
||||
['for-each-ref', '--format=%(refname:short)', '--sort=-committerdate', 'refs/heads'],
|
||||
resolved
|
||||
)
|
||||
const trees = await listWorktrees(resolved, gitBin)
|
||||
const pathByBranch = new Map(trees.filter(tree => tree.branch).map(tree => [tree.branch, tree.path]))
|
||||
const trunk = await defaultBranch(gitBin, resolved)
|
||||
|
||||
return out
|
||||
.split('\n')
|
||||
.map(line => line.trim())
|
||||
.filter(Boolean)
|
||||
.map(name => ({
|
||||
name,
|
||||
checkedOut: pathByBranch.has(name),
|
||||
isDefault: Boolean(trunk && name === trunk),
|
||||
worktreePath: pathByBranch.get(name) || null
|
||||
}))
|
||||
} catch {
|
||||
return []
|
||||
}
|
||||
}
|
||||
|
||||
async function switchBranch(repoPath, branch, gitBin) {
|
||||
const resolved = resolveRequestedPathForIpc(repoPath, { purpose: 'Branch switch' })
|
||||
const target = sanitizeBranch(branch)
|
||||
|
||||
if (!target) {
|
||||
throw new Error('Branch name is required.')
|
||||
}
|
||||
|
||||
await runGit(gitBin, ['switch', target], resolved)
|
||||
|
||||
return { branch: target }
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
addWorktree,
|
||||
ensureGitRepo,
|
||||
listBranches,
|
||||
listWorktrees,
|
||||
parseWorktrees,
|
||||
removeWorktree,
|
||||
sanitizeBranch,
|
||||
switchBranch
|
||||
}
|
||||
214
apps/desktop/electron/git-worktree-ops.test.cjs
Normal file
214
apps/desktop/electron/git-worktree-ops.test.cjs
Normal file
@@ -0,0 +1,214 @@
|
||||
'use strict'
|
||||
|
||||
const assert = require('node:assert/strict')
|
||||
const { execFileSync } = require('node:child_process')
|
||||
const fs = require('node:fs')
|
||||
const os = require('node:os')
|
||||
const path = require('node:path')
|
||||
const test = require('node:test')
|
||||
|
||||
const {
|
||||
addWorktree,
|
||||
ensureGitRepo,
|
||||
listBranches,
|
||||
parseWorktrees,
|
||||
sanitizeBranch,
|
||||
switchBranch
|
||||
} = require('./git-worktree-ops.cjs')
|
||||
|
||||
test('sanitizeBranch: spaces → hyphens, forbidden chars dropped, edges trimmed', () => {
|
||||
assert.equal(sanitizeBranch('beach vibes'), 'beach-vibes')
|
||||
assert.equal(sanitizeBranch('feat/cool thing'), 'feat/cool-thing')
|
||||
assert.equal(sanitizeBranch(' wip~^:? '), 'wip')
|
||||
assert.equal(sanitizeBranch('///'), '')
|
||||
})
|
||||
|
||||
test('parseWorktrees: main checkout + linked worktree', () => {
|
||||
const out = [
|
||||
'worktree /repo',
|
||||
'HEAD abc123',
|
||||
'branch refs/heads/main',
|
||||
'',
|
||||
'worktree /repo/.worktrees/feat',
|
||||
'HEAD def456',
|
||||
'branch refs/heads/hermes/feat',
|
||||
''
|
||||
].join('\n')
|
||||
|
||||
const trees = parseWorktrees(out)
|
||||
|
||||
assert.equal(trees.length, 2)
|
||||
assert.equal(trees[0].path, '/repo')
|
||||
assert.equal(trees[0].branch, 'main')
|
||||
assert.equal(trees[1].path, '/repo/.worktrees/feat')
|
||||
assert.equal(trees[1].branch, 'hermes/feat')
|
||||
})
|
||||
|
||||
test('parseWorktrees: detached + locked flags', () => {
|
||||
const out = ['worktree /repo/wt', 'HEAD abc', 'detached', 'locked reason', ''].join('\n')
|
||||
const trees = parseWorktrees(out)
|
||||
|
||||
assert.equal(trees.length, 1)
|
||||
assert.equal(trees[0].detached, true)
|
||||
assert.equal(trees[0].locked, true)
|
||||
assert.equal(trees[0].branch, null)
|
||||
})
|
||||
|
||||
test('parseWorktrees: empty input', () => {
|
||||
assert.deepEqual(parseWorktrees(''), [])
|
||||
})
|
||||
|
||||
test('ensureGitRepo: inits a plain dir with a root commit so worktrees branch', async () => {
|
||||
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'hermes-wt-'))
|
||||
const git = (...args) => execFileSync('git', args, { cwd: dir }).toString().trim()
|
||||
|
||||
try {
|
||||
await ensureGitRepo('git', dir)
|
||||
assert.match(git('rev-parse', '--verify', 'HEAD'), /^[0-9a-f]{7,}$/)
|
||||
|
||||
// The whole point: a worktree can now branch off the seeded root commit.
|
||||
execFileSync('git', ['worktree', 'add', '-b', 'wt', path.join(dir, '.worktrees', 'wt')], { cwd: dir })
|
||||
assert.ok(fs.existsSync(path.join(dir, '.worktrees', 'wt')))
|
||||
|
||||
// Idempotent: an already-committed repo gets no extra commit.
|
||||
await ensureGitRepo('git', dir)
|
||||
assert.equal(git('rev-list', '--count', 'HEAD'), '1')
|
||||
} finally {
|
||||
fs.rmSync(dir, { recursive: true, force: true })
|
||||
}
|
||||
})
|
||||
|
||||
test('switchBranch: switches a normal checkout branch', async () => {
|
||||
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'hermes-switch-'))
|
||||
const git = (...args) => execFileSync('git', args, { cwd: dir }).toString().trim()
|
||||
|
||||
try {
|
||||
await ensureGitRepo('git', dir)
|
||||
execFileSync('git', ['branch', 'feature'], { cwd: dir })
|
||||
|
||||
await switchBranch(dir, 'feature', 'git')
|
||||
|
||||
assert.equal(git('branch', '--show-current'), 'feature')
|
||||
} finally {
|
||||
fs.rmSync(dir, { recursive: true, force: true })
|
||||
}
|
||||
})
|
||||
|
||||
test('listBranches: lists locals and flags the checked-out branch', async () => {
|
||||
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'hermes-branches-'))
|
||||
|
||||
try {
|
||||
await ensureGitRepo('git', dir)
|
||||
const current = execFileSync('git', ['branch', '--show-current'], { cwd: dir }).toString().trim()
|
||||
execFileSync('git', ['branch', 'feature'], { cwd: dir })
|
||||
|
||||
const branches = await listBranches(dir, 'git')
|
||||
const names = branches.map(b => b.name).sort()
|
||||
|
||||
assert.deepEqual(names, [current, 'feature'].sort())
|
||||
// The repo's own checkout is flagged; the unused branch is convertible.
|
||||
assert.equal(branches.find(b => b.name === current).checkedOut, true)
|
||||
assert.equal(branches.find(b => b.name === current).isDefault, true)
|
||||
assert.equal(fs.realpathSync(branches.find(b => b.name === current).worktreePath), fs.realpathSync(dir))
|
||||
assert.equal(branches.find(b => b.name === 'feature').checkedOut, false)
|
||||
assert.equal(branches.find(b => b.name === 'feature').isDefault, false)
|
||||
assert.equal(branches.find(b => b.name === 'feature').worktreePath, null)
|
||||
} finally {
|
||||
fs.rmSync(dir, { recursive: true, force: true })
|
||||
}
|
||||
})
|
||||
|
||||
test('listBranches: flags a free default branch as default, not checked out', async () => {
|
||||
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'hermes-branches-default-'))
|
||||
const git = (...args) => execFileSync('git', args, { cwd: dir }).toString().trim()
|
||||
|
||||
try {
|
||||
await ensureGitRepo('git', dir)
|
||||
const trunk = git('branch', '--show-current')
|
||||
execFileSync('git', ['switch', '-c', 'rawr'], { cwd: dir })
|
||||
|
||||
const branches = await listBranches(dir, 'git')
|
||||
const defaultBranch = branches.find(b => b.name === trunk)
|
||||
|
||||
assert.equal(defaultBranch.checkedOut, false)
|
||||
assert.equal(defaultBranch.isDefault, true)
|
||||
assert.equal(defaultBranch.worktreePath, null)
|
||||
} finally {
|
||||
fs.rmSync(dir, { recursive: true, force: true })
|
||||
}
|
||||
})
|
||||
|
||||
test('listBranches: a branch claimed by a worktree is flagged checked out', async () => {
|
||||
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'hermes-branches-wt-'))
|
||||
|
||||
try {
|
||||
await ensureGitRepo('git', dir)
|
||||
execFileSync('git', ['branch', 'feature'], { cwd: dir })
|
||||
// addWorktree converts the existing "feature" branch into a worktree.
|
||||
const result = await addWorktree(dir, { existingBranch: 'feature' }, 'git')
|
||||
|
||||
assert.equal(result.branch, 'feature')
|
||||
assert.ok(fs.existsSync(result.path))
|
||||
|
||||
const branches = await listBranches(dir, 'git')
|
||||
|
||||
assert.equal(branches.find(b => b.name === 'feature').checkedOut, true)
|
||||
} finally {
|
||||
fs.rmSync(dir, { recursive: true, force: true })
|
||||
}
|
||||
})
|
||||
|
||||
test('listBranches: empty on a non-repo path', async () => {
|
||||
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'hermes-nonrepo-'))
|
||||
|
||||
try {
|
||||
assert.deepEqual(await listBranches(dir, 'git'), [])
|
||||
} finally {
|
||||
fs.rmSync(dir, { recursive: true, force: true })
|
||||
}
|
||||
})
|
||||
|
||||
test('addWorktree: existingBranch checks the branch out without a new branch', async () => {
|
||||
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'hermes-convert-'))
|
||||
const git = (...args) => execFileSync('git', args, { cwd: dir }).toString().trim()
|
||||
|
||||
try {
|
||||
await ensureGitRepo('git', dir)
|
||||
execFileSync('git', ['branch', 'cool/feature'], { cwd: dir })
|
||||
|
||||
const before = git('branch', '--list').split('\n').length
|
||||
const result = await addWorktree(dir, { existingBranch: 'cool/feature' }, 'git')
|
||||
|
||||
// No new branch was created — only the existing one is checked out.
|
||||
assert.equal(git('branch', '--list').split('\n').length, before)
|
||||
assert.equal(result.branch, 'cool/feature')
|
||||
// Dir is named off the branch slug, nested under the main repo's .worktrees.
|
||||
assert.match(result.path, /[/\\]\.worktrees[/\\]cool-feature/)
|
||||
assert.equal(
|
||||
execFileSync('git', ['branch', '--show-current'], { cwd: result.path }).toString().trim(),
|
||||
'cool/feature'
|
||||
)
|
||||
} finally {
|
||||
fs.rmSync(dir, { recursive: true, force: true })
|
||||
}
|
||||
})
|
||||
|
||||
test('addWorktree: existing default branch switches the main checkout, not .worktrees/main', async () => {
|
||||
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'hermes-convert-default-'))
|
||||
const git = (...args) => execFileSync('git', args, { cwd: dir }).toString().trim()
|
||||
|
||||
try {
|
||||
await ensureGitRepo('git', dir)
|
||||
const trunk = git('branch', '--show-current')
|
||||
execFileSync('git', ['switch', '-c', 'rawr'], { cwd: dir })
|
||||
|
||||
const result = await addWorktree(dir, { existingBranch: trunk }, 'git')
|
||||
|
||||
assert.equal(result.branch, trunk)
|
||||
assert.equal(fs.realpathSync(result.path), fs.realpathSync(dir))
|
||||
assert.equal(git('branch', '--show-current'), trunk)
|
||||
assert.equal(fs.existsSync(path.join(dir, '.worktrees', trunk)), false)
|
||||
} finally {
|
||||
fs.rmSync(dir, { recursive: true, force: true })
|
||||
}
|
||||
})
|
||||
@@ -1,174 +0,0 @@
|
||||
'use strict'
|
||||
|
||||
// Resolve git-worktree relationships for a set of session cwds, reading git's
|
||||
// on-disk metadata directly (no `git` spawn per path):
|
||||
//
|
||||
// - A normal checkout has a `.git` DIRECTORY at its root → it's the main
|
||||
// worktree; its repo root IS that directory's parent.
|
||||
// - A linked worktree has a `.git` FILE: `gitdir: <repo>/.git/worktrees/<name>`.
|
||||
// That admin dir's `commondir` points back at the shared `<repo>/.git`, whose
|
||||
// parent is the main repo root.
|
||||
//
|
||||
// Grouping by repoRoot therefore clusters a repo's main checkout with all of its
|
||||
// linked worktrees, regardless of how the worktree directories are named. The
|
||||
// branch (read from the worktree's own HEAD) gives each worktree a meaningful
|
||||
// label.
|
||||
|
||||
const fs = require('node:fs')
|
||||
const path = require('node:path')
|
||||
const { resolveRequestedPathForIpc } = require('./hardening.cjs')
|
||||
|
||||
// Walk up from `start` to the nearest ancestor that carries a `.git` entry
|
||||
// (file for a linked worktree, dir for the main checkout). Capped so a stray
|
||||
// path can't loop forever.
|
||||
function findGitHost(start, fsImpl) {
|
||||
let dir = start
|
||||
|
||||
for (let i = 0; i < 64; i += 1) {
|
||||
const dotgit = path.join(dir, '.git')
|
||||
|
||||
try {
|
||||
if (fsImpl.existsSync(dotgit)) {
|
||||
return dir
|
||||
}
|
||||
} catch {
|
||||
return null
|
||||
}
|
||||
|
||||
const parent = path.dirname(dir)
|
||||
|
||||
if (parent === dir) {
|
||||
return null
|
||||
}
|
||||
|
||||
dir = parent
|
||||
}
|
||||
|
||||
return null
|
||||
}
|
||||
|
||||
function readBranch(gitDir, fsImpl) {
|
||||
try {
|
||||
const head = fsImpl.readFileSync(path.join(gitDir, 'HEAD'), 'utf8').trim()
|
||||
const ref = head.match(/^ref:\s*refs\/heads\/(.+)$/)
|
||||
|
||||
if (ref) {
|
||||
return ref[1]
|
||||
}
|
||||
|
||||
// Detached HEAD: surface a short sha so the worktree still gets a label.
|
||||
return /^[0-9a-f]{7,40}$/i.test(head) ? head.slice(0, 8) : null
|
||||
} catch {
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
// Given the directory that owns the `.git` entry, resolve its worktree identity.
|
||||
function resolveFromHost(host, fsImpl) {
|
||||
const dotgit = path.join(host, '.git')
|
||||
let stat
|
||||
|
||||
try {
|
||||
stat = fsImpl.statSync(dotgit)
|
||||
} catch {
|
||||
return null
|
||||
}
|
||||
|
||||
if (stat.isDirectory()) {
|
||||
return {
|
||||
repoRoot: host,
|
||||
worktreeRoot: host,
|
||||
isMainWorktree: true,
|
||||
branch: readBranch(dotgit, fsImpl)
|
||||
}
|
||||
}
|
||||
|
||||
// Linked worktree: `.git` is a file pointing at the admin dir.
|
||||
let contents
|
||||
|
||||
try {
|
||||
contents = fsImpl.readFileSync(dotgit, 'utf8').trim()
|
||||
} catch {
|
||||
return null
|
||||
}
|
||||
|
||||
const match = contents.match(/^gitdir:\s*(.+)$/m)
|
||||
|
||||
if (!match) {
|
||||
return null
|
||||
}
|
||||
|
||||
const adminDir = path.resolve(host, match[1].trim())
|
||||
|
||||
// `commondir` resolves to the shared `<repo>/.git`; fall back to walking two
|
||||
// levels up from `<repo>/.git/worktrees/<name>` if it's missing.
|
||||
let commonDir
|
||||
|
||||
try {
|
||||
const rel = fsImpl.readFileSync(path.join(adminDir, 'commondir'), 'utf8').trim()
|
||||
commonDir = path.resolve(adminDir, rel)
|
||||
} catch {
|
||||
commonDir = path.dirname(path.dirname(adminDir))
|
||||
}
|
||||
|
||||
return {
|
||||
repoRoot: path.dirname(commonDir),
|
||||
worktreeRoot: host,
|
||||
isMainWorktree: false,
|
||||
branch: readBranch(adminDir, fsImpl)
|
||||
}
|
||||
}
|
||||
|
||||
function resolveWorktree(startPath, fsImpl = fs) {
|
||||
let resolved
|
||||
|
||||
try {
|
||||
resolved = resolveRequestedPathForIpc(startPath, { purpose: 'Worktree lookup' })
|
||||
} catch {
|
||||
return null
|
||||
}
|
||||
|
||||
let start = resolved
|
||||
|
||||
try {
|
||||
const stat = fsImpl.statSync(resolved)
|
||||
|
||||
if (!stat.isDirectory()) {
|
||||
start = path.dirname(resolved)
|
||||
}
|
||||
} catch {
|
||||
return null
|
||||
}
|
||||
|
||||
const host = findGitHost(start, fsImpl)
|
||||
|
||||
if (!host) {
|
||||
return null
|
||||
}
|
||||
|
||||
return resolveFromHost(host, fsImpl)
|
||||
}
|
||||
|
||||
// Batch entry point for the renderer: maps each requested cwd to its worktree
|
||||
// info (or null when it isn't inside a git checkout / can't be read). Dedupes so
|
||||
// many sessions sharing a cwd cost one lookup.
|
||||
async function worktreesForIpc(cwds, options = {}) {
|
||||
const fsImpl = options.fs || fs
|
||||
const list = Array.isArray(cwds) ? cwds : []
|
||||
const out = {}
|
||||
|
||||
for (const cwd of list) {
|
||||
if (typeof cwd !== 'string' || !cwd.trim() || cwd in out) {
|
||||
continue
|
||||
}
|
||||
|
||||
out[cwd] = resolveWorktree(cwd, fsImpl)
|
||||
}
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
resolveWorktree,
|
||||
worktreesForIpc
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -7,6 +7,32 @@ contextBridge.exposeInMainWorld('hermesDesktop', {
|
||||
getGatewayWsUrl: profile => ipcRenderer.invoke('hermes:gateway:ws-url', profile),
|
||||
openSessionWindow: (sessionId, opts) => ipcRenderer.invoke('hermes:window:openSession', sessionId, opts),
|
||||
openNewSessionWindow: () => ipcRenderer.invoke('hermes:window:openNewSession'),
|
||||
petOverlay: {
|
||||
// Main renderer → main process: window lifecycle + drag. `request` is
|
||||
// `{ bounds, screen }`; resolves with the screen bounds it actually used.
|
||||
open: request => ipcRenderer.invoke('hermes:pet-overlay:open', request),
|
||||
close: () => ipcRenderer.invoke('hermes:pet-overlay:close'),
|
||||
setBounds: bounds => ipcRenderer.send('hermes:pet-overlay:set-bounds', bounds),
|
||||
setIgnoreMouse: ignore => ipcRenderer.send('hermes:pet-overlay:ignore-mouse', ignore),
|
||||
// Flip the overlay focusable (and focus it) while the composer needs keys.
|
||||
setFocusable: focusable => ipcRenderer.send('hermes:pet-overlay:set-focusable', focusable),
|
||||
// Main renderer → overlay (forwarded by main): push the latest pet state.
|
||||
pushState: payload => ipcRenderer.send('hermes:pet-overlay:state', payload),
|
||||
// Overlay → main renderer (forwarded by main): pop back in / composer submit.
|
||||
control: payload => ipcRenderer.send('hermes:pet-overlay:control', payload),
|
||||
// Overlay subscribes to state pushes.
|
||||
onState: callback => {
|
||||
const listener = (_event, payload) => callback(payload)
|
||||
ipcRenderer.on('hermes:pet-overlay:state', listener)
|
||||
return () => ipcRenderer.removeListener('hermes:pet-overlay:state', listener)
|
||||
},
|
||||
// Main renderer subscribes to overlay control messages.
|
||||
onControl: callback => {
|
||||
const listener = (_event, payload) => callback(payload)
|
||||
ipcRenderer.on('hermes:pet-overlay:control', listener)
|
||||
return () => ipcRenderer.removeListener('hermes:pet-overlay:control', listener)
|
||||
}
|
||||
},
|
||||
getBootProgress: () => ipcRenderer.invoke('hermes:boot-progress:get'),
|
||||
getConnectionConfig: profile => ipcRenderer.invoke('hermes:connection-config:get', profile),
|
||||
saveConnectionConfig: payload => ipcRenderer.invoke('hermes:connection-config:save', payload),
|
||||
@@ -44,6 +70,7 @@ contextBridge.exposeInMainWorld('hermesDesktop', {
|
||||
setTranslucency: payload => ipcRenderer.send('hermes:translucency', payload),
|
||||
setPreviewShortcutActive: active => ipcRenderer.send('hermes:previewShortcutActive', Boolean(active)),
|
||||
openExternal: url => ipcRenderer.invoke('hermes:openExternal', url),
|
||||
openPreviewInBrowser: url => ipcRenderer.invoke('hermes:openPreviewInBrowser', url),
|
||||
fetchLinkTitle: url => ipcRenderer.invoke('hermes:fetchLinkTitle', url),
|
||||
sanitizeWorkspaceCwd: cwd => ipcRenderer.invoke('hermes:workspace:sanitize', cwd),
|
||||
settings: {
|
||||
@@ -55,7 +82,35 @@ contextBridge.exposeInMainWorld('hermesDesktop', {
|
||||
getRecentLogs: () => ipcRenderer.invoke('hermes:logs:recent'),
|
||||
readDir: dirPath => ipcRenderer.invoke('hermes:fs:readDir', dirPath),
|
||||
gitRoot: startPath => ipcRenderer.invoke('hermes:fs:gitRoot', startPath),
|
||||
worktrees: cwds => ipcRenderer.invoke('hermes:fs:worktrees', cwds),
|
||||
revealPath: targetPath => ipcRenderer.invoke('hermes:fs:reveal', targetPath),
|
||||
renamePath: (targetPath, newName) => ipcRenderer.invoke('hermes:fs:rename', targetPath, newName),
|
||||
writeTextFile: (filePath, content) => ipcRenderer.invoke('hermes:fs:writeText', filePath, content),
|
||||
trashPath: targetPath => ipcRenderer.invoke('hermes:fs:trash', targetPath),
|
||||
git: {
|
||||
worktreeList: repoPath => ipcRenderer.invoke('hermes:git:worktreeList', repoPath),
|
||||
worktreeAdd: (repoPath, options) => ipcRenderer.invoke('hermes:git:worktreeAdd', repoPath, options),
|
||||
worktreeRemove: (repoPath, worktreePath, options) =>
|
||||
ipcRenderer.invoke('hermes:git:worktreeRemove', repoPath, worktreePath, options),
|
||||
branchSwitch: (repoPath, branch) => ipcRenderer.invoke('hermes:git:branchSwitch', repoPath, branch),
|
||||
branchList: repoPath => ipcRenderer.invoke('hermes:git:branchList', repoPath),
|
||||
repoStatus: repoPath => ipcRenderer.invoke('hermes:git:repoStatus', repoPath),
|
||||
fileDiff: (repoPath, filePath) => ipcRenderer.invoke('hermes:git:fileDiff', repoPath, filePath),
|
||||
scanRepos: (roots, options) => ipcRenderer.invoke('hermes:git:scanRepos', roots, options),
|
||||
review: {
|
||||
list: (repoPath, scope, baseRef) => ipcRenderer.invoke('hermes:git:review:list', repoPath, scope, baseRef),
|
||||
diff: (repoPath, filePath, scope, baseRef, staged) =>
|
||||
ipcRenderer.invoke('hermes:git:review:diff', repoPath, filePath, scope, baseRef, staged),
|
||||
stage: (repoPath, filePath) => ipcRenderer.invoke('hermes:git:review:stage', repoPath, filePath),
|
||||
unstage: (repoPath, filePath) => ipcRenderer.invoke('hermes:git:review:unstage', repoPath, filePath),
|
||||
revert: (repoPath, filePath) => ipcRenderer.invoke('hermes:git:review:revert', repoPath, filePath),
|
||||
revParse: (repoPath, ref) => ipcRenderer.invoke('hermes:git:review:revParse', repoPath, ref),
|
||||
commit: (repoPath, message, push) => ipcRenderer.invoke('hermes:git:review:commit', repoPath, message, push),
|
||||
commitContext: repoPath => ipcRenderer.invoke('hermes:git:review:commitContext', repoPath),
|
||||
push: repoPath => ipcRenderer.invoke('hermes:git:review:push', repoPath),
|
||||
shipInfo: repoPath => ipcRenderer.invoke('hermes:git:review:shipInfo', repoPath),
|
||||
createPr: repoPath => ipcRenderer.invoke('hermes:git:review:createPr', repoPath)
|
||||
}
|
||||
},
|
||||
terminal: {
|
||||
dispose: id => ipcRenderer.invoke('hermes:terminal:dispose', id),
|
||||
resize: (id, size) => ipcRenderer.invoke('hermes:terminal:resize', id, size),
|
||||
|
||||
28
apps/desktop/electron/update-count.cjs
Normal file
28
apps/desktop/electron/update-count.cjs
Normal file
@@ -0,0 +1,28 @@
|
||||
'use strict'
|
||||
|
||||
// Whether `git rev-list HEAD..origin/<branch> --count` produces a meaningful
|
||||
// number worth computing. On a SHALLOW checkout (installer clones with
|
||||
// --depth 1) the local history often shares no merge-base with the freshly
|
||||
// fetched origin tip, so the count enumerates the entire remote ancestry and
|
||||
// returns a bogus huge number (e.g. 12104) — see #51922. resolveBehindCount
|
||||
// discards that bogus count in favour of a SHA compare, so the caller should
|
||||
// SKIP the expensive rev-list entirely in that case rather than run it and
|
||||
// throw the result away.
|
||||
function shouldCountCommits({ isShallow, hasMergeBase }) {
|
||||
return !(isShallow && !hasMergeBase)
|
||||
}
|
||||
|
||||
// Resolve how many commits the local checkout is behind origin for the desktop
|
||||
// update indicator. When the count isn't meaningful (shallow + no merge-base)
|
||||
// fall back to a binary up-to-date check by SHA, exactly like the official-SSH
|
||||
// path in checkUpdates() and the CLI guard in hermes_cli/banner.py. Full clones
|
||||
// (developers / Docker dev images) keep the exact count path unchanged.
|
||||
function resolveBehindCount({ countStr, currentSha, targetSha, isShallow, hasMergeBase }) {
|
||||
if (!shouldCountCommits({ isShallow, hasMergeBase })) {
|
||||
if (currentSha && targetSha && currentSha === targetSha) return 0
|
||||
return 1 // behind by an unknown amount — show a generic "update available"
|
||||
}
|
||||
return Number.parseInt(countStr, 10) || 0
|
||||
}
|
||||
|
||||
module.exports = { resolveBehindCount, shouldCountCommits }
|
||||
79
apps/desktop/electron/update-count.test.cjs
Normal file
79
apps/desktop/electron/update-count.test.cjs
Normal file
@@ -0,0 +1,79 @@
|
||||
'use strict'
|
||||
const test = require('node:test')
|
||||
const assert = require('node:assert/strict')
|
||||
const { resolveBehindCount, shouldCountCommits } = require('./update-count.cjs')
|
||||
|
||||
// FAIL-BEFORE: pre-fix the function did `Number.parseInt(countStr) || 0`
|
||||
// unconditionally, so a shallow checkout with no merge-base surfaced the bogus
|
||||
// rev-list count (e.g. 12104). This asserts the new shallow/no-merge-base branch.
|
||||
test('shallow checkout with no merge-base does NOT trust the bogus rev-list count', () => {
|
||||
assert.equal(resolveBehindCount({
|
||||
countStr: '12104', currentSha: 'aaa', targetSha: 'bbb',
|
||||
isShallow: true, hasMergeBase: false,
|
||||
}), 1)
|
||||
})
|
||||
|
||||
test('shallow checkout with no merge-base but identical SHA reports up-to-date', () => {
|
||||
assert.equal(resolveBehindCount({
|
||||
countStr: '12104', currentSha: 'abc', targetSha: 'abc',
|
||||
isShallow: true, hasMergeBase: false,
|
||||
}), 0)
|
||||
})
|
||||
|
||||
test('shallow checkout WITH a merge-base keeps the exact count (reliable)', () => {
|
||||
assert.equal(resolveBehindCount({
|
||||
countStr: '3', currentSha: 'aaa', targetSha: 'bbb',
|
||||
isShallow: true, hasMergeBase: true,
|
||||
}), 3)
|
||||
})
|
||||
|
||||
test('full (non-shallow) clone keeps the exact count path unchanged', () => {
|
||||
assert.equal(resolveBehindCount({
|
||||
countStr: '7', currentSha: 'aaa', targetSha: 'bbb',
|
||||
isShallow: false, hasMergeBase: true,
|
||||
}), 7)
|
||||
})
|
||||
|
||||
test('up-to-date full clone reports 0', () => {
|
||||
assert.equal(resolveBehindCount({
|
||||
countStr: '0', currentSha: 'x', targetSha: 'x',
|
||||
isShallow: false, hasMergeBase: true,
|
||||
}), 0)
|
||||
})
|
||||
|
||||
test('non-numeric count falls back to 0 (defensive, unchanged behaviour)', () => {
|
||||
assert.equal(resolveBehindCount({
|
||||
countStr: '', currentSha: 'aaa', targetSha: 'bbb',
|
||||
isShallow: false, hasMergeBase: true,
|
||||
}), 0)
|
||||
})
|
||||
|
||||
// shouldCountCommits gates the expensive `rev-list --count` in checkUpdates().
|
||||
// FAIL-BEFORE: in the shallow + no-merge-base case the caller ran rev-list
|
||||
// unconditionally and discarded the bogus result; this predicate lets the
|
||||
// caller SKIP the whole-ancestry enumeration in exactly that case (#51922).
|
||||
test('shallow checkout with no merge-base SKIPS the rev-list count', () => {
|
||||
assert.equal(shouldCountCommits({ isShallow: true, hasMergeBase: false }), false)
|
||||
})
|
||||
|
||||
test('shallow checkout WITH a merge-base still runs the count', () => {
|
||||
assert.equal(shouldCountCommits({ isShallow: true, hasMergeBase: true }), true)
|
||||
})
|
||||
|
||||
test('full (non-shallow) clone always runs the count', () => {
|
||||
assert.equal(shouldCountCommits({ isShallow: false, hasMergeBase: true }), true)
|
||||
assert.equal(shouldCountCommits({ isShallow: false, hasMergeBase: false }), true)
|
||||
})
|
||||
|
||||
// The skip path produces an empty countStr; resolveBehindCount must NOT trust
|
||||
// it and must fall through to the SHA compare (mirrors the live call site).
|
||||
test('skipped-count path resolves via SHA compare, never via empty countStr', () => {
|
||||
assert.equal(resolveBehindCount({
|
||||
countStr: '', currentSha: 'aaa', targetSha: 'bbb',
|
||||
isShallow: true, hasMergeBase: false,
|
||||
}), 1)
|
||||
assert.equal(resolveBehindCount({
|
||||
countStr: '', currentSha: 'same', targetSha: 'same',
|
||||
isShallow: true, hasMergeBase: false,
|
||||
}), 0)
|
||||
})
|
||||
117
apps/desktop/electron/window-state.cjs
Normal file
117
apps/desktop/electron/window-state.cjs
Normal file
@@ -0,0 +1,117 @@
|
||||
/**
|
||||
* Pure geometry helpers for window-state.json — restoring the main window's
|
||||
* size, position, and maximized flag across launches. Side-effect-free so the
|
||||
* part that actually matters (rejecting garbage + off-screen bounds) is
|
||||
* unit-testable without booting Electron; main.cjs owns the file I/O and the
|
||||
* live `screen` displays.
|
||||
*/
|
||||
|
||||
// Defaults mirror the historical hardcoded BrowserWindow size; MIN_* mirror its
|
||||
// minWidth/minHeight so a restored size never undershoots what the live window
|
||||
// allows. A fresh install (no saved state) is byte-identical to before.
|
||||
const DEFAULT_WIDTH = 1220
|
||||
const DEFAULT_HEIGHT = 800
|
||||
const MIN_WIDTH = 400
|
||||
const MIN_HEIGHT = 620
|
||||
|
||||
// Keep at least this much of the window over a display work area before we trust
|
||||
// a saved position, so the title bar stays grabbable after a monitor unplugs.
|
||||
const MIN_VISIBLE = 48
|
||||
|
||||
const finite = v => typeof v === 'number' && Number.isFinite(v)
|
||||
const clamp = (v, lo, hi) => Math.max(lo, Math.min(v, hi))
|
||||
|
||||
// Parse raw JSON → clean state, or null if garbage. width/height are required
|
||||
// and floored; x/y survive only as a finite pair; isMaximized is strict.
|
||||
function sanitizeWindowState(raw) {
|
||||
if (!raw || typeof raw !== 'object' || !finite(raw.width) || !finite(raw.height)) return null
|
||||
|
||||
const state = {
|
||||
width: Math.max(MIN_WIDTH, Math.round(raw.width)),
|
||||
height: Math.max(MIN_HEIGHT, Math.round(raw.height)),
|
||||
isMaximized: raw.isMaximized === true
|
||||
}
|
||||
if (finite(raw.x) && finite(raw.y)) {
|
||||
state.x = Math.round(raw.x)
|
||||
state.y = Math.round(raw.y)
|
||||
}
|
||||
return state
|
||||
}
|
||||
|
||||
// True when `bounds` overlaps some display's work area by ≥ MIN_VISIBLE on both
|
||||
// axes. `displays` is Electron's screen.getAllDisplays() shape.
|
||||
function onScreen(bounds, displays) {
|
||||
if (!Array.isArray(displays)) return false
|
||||
return displays.some(({ workArea: a } = {}) => {
|
||||
if (!a) return false
|
||||
const x = Math.min(bounds.x + bounds.width, a.x + a.width) - Math.max(bounds.x, a.x)
|
||||
const y = Math.min(bounds.y + bounds.height, a.y + a.height) - Math.max(bounds.y, a.y)
|
||||
return x >= MIN_VISIBLE && y >= MIN_VISIBLE
|
||||
})
|
||||
}
|
||||
|
||||
// Sanitized state (or null) → BrowserWindow size/position options. Always sets
|
||||
// width/height, capped to the largest current display so a size saved on a
|
||||
// since-disconnected bigger monitor can't exceed any screen the user now has.
|
||||
// Sets x/y only when still on-screen; otherwise Electron centers the window.
|
||||
function computeWindowOptions(state, displays) {
|
||||
const opts = {
|
||||
width: finite(state?.width) ? state.width : DEFAULT_WIDTH,
|
||||
height: finite(state?.height) ? state.height : DEFAULT_HEIGHT
|
||||
}
|
||||
|
||||
const cap = (Array.isArray(displays) ? displays : []).reduce(
|
||||
(m, { workArea: a } = {}) =>
|
||||
a && finite(a.width) && finite(a.height)
|
||||
? { width: Math.max(m.width, a.width), height: Math.max(m.height, a.height) }
|
||||
: m,
|
||||
{ width: 0, height: 0 }
|
||||
)
|
||||
if (cap.width && cap.height) {
|
||||
opts.width = clamp(opts.width, MIN_WIDTH, cap.width)
|
||||
opts.height = clamp(opts.height, MIN_HEIGHT, cap.height)
|
||||
}
|
||||
|
||||
if (
|
||||
state &&
|
||||
finite(state.x) &&
|
||||
finite(state.y) &&
|
||||
onScreen({ x: state.x, y: state.y, width: opts.width, height: opts.height }, displays)
|
||||
) {
|
||||
opts.x = state.x
|
||||
opts.y = state.y
|
||||
}
|
||||
return opts
|
||||
}
|
||||
|
||||
// Trailing debounce: collapse a burst of resize/move events (Linux fires many
|
||||
// mid-drag) into a single run `delayMs` after the last. `.flush()` runs now and
|
||||
// cancels the pending timer — used on close, before the window is gone.
|
||||
function debounce(fn, delayMs) {
|
||||
let timer = null
|
||||
const debounced = () => {
|
||||
clearTimeout(timer)
|
||||
timer = setTimeout(() => {
|
||||
timer = null
|
||||
fn()
|
||||
}, delayMs)
|
||||
}
|
||||
debounced.flush = () => {
|
||||
clearTimeout(timer)
|
||||
timer = null
|
||||
fn()
|
||||
}
|
||||
return debounced
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
DEFAULT_WIDTH,
|
||||
DEFAULT_HEIGHT,
|
||||
MIN_WIDTH,
|
||||
MIN_HEIGHT,
|
||||
MIN_VISIBLE,
|
||||
sanitizeWindowState,
|
||||
onScreen,
|
||||
computeWindowOptions,
|
||||
debounce
|
||||
}
|
||||
135
apps/desktop/electron/window-state.test.cjs
Normal file
135
apps/desktop/electron/window-state.test.cjs
Normal file
@@ -0,0 +1,135 @@
|
||||
/**
|
||||
* Unit tests for the pure window-state geometry helpers. These cover the logic
|
||||
* that protects the user: garbage rejection, off-screen fallback, oversized
|
||||
* clamping, and the debounce that collapses mid-drag write storms.
|
||||
*/
|
||||
|
||||
const test = require('node:test')
|
||||
const assert = require('node:assert/strict')
|
||||
|
||||
const {
|
||||
DEFAULT_WIDTH,
|
||||
DEFAULT_HEIGHT,
|
||||
MIN_WIDTH,
|
||||
MIN_HEIGHT,
|
||||
sanitizeWindowState,
|
||||
onScreen,
|
||||
computeWindowOptions,
|
||||
debounce
|
||||
} = require('./window-state.cjs')
|
||||
|
||||
// A single 1920×1080 monitor (work area trimmed for the taskbar).
|
||||
const PRIMARY = [{ workArea: { x: 0, y: 0, width: 1920, height: 1040 } }]
|
||||
// A laptop panel left behind after a bigger external monitor is unplugged.
|
||||
const LAPTOP = [{ workArea: { x: 0, y: 0, width: 1366, height: 728 } }]
|
||||
|
||||
// ─── sanitizeWindowState ───────────────────────────────────────────────────
|
||||
|
||||
test('sanitizeWindowState rejects missing/garbage input', () => {
|
||||
for (const bad of [null, undefined, 'nope', 42, {}, { width: 'x', height: 800 }, { width: NaN, height: 800 }, { width: 1000 }]) {
|
||||
assert.equal(sanitizeWindowState(bad), null)
|
||||
}
|
||||
})
|
||||
|
||||
test('sanitizeWindowState keeps a valid full state and rounds HiDPI fractions', () => {
|
||||
assert.deepEqual(sanitizeWindowState({ x: 100.6, y: 50.2, width: 1400.4, height: 900.7, isMaximized: true }), {
|
||||
x: 101,
|
||||
y: 50,
|
||||
width: 1400,
|
||||
height: 901,
|
||||
isMaximized: true
|
||||
})
|
||||
})
|
||||
|
||||
test('sanitizeWindowState floors size to the minimums', () => {
|
||||
const state = sanitizeWindowState({ width: 10, height: 10 })
|
||||
assert.equal(state.width, MIN_WIDTH)
|
||||
assert.equal(state.height, MIN_HEIGHT)
|
||||
})
|
||||
|
||||
test('sanitizeWindowState drops a partial position but keeps the size', () => {
|
||||
assert.deepEqual(sanitizeWindowState({ x: 100, width: 1400, height: 900 }), {
|
||||
width: 1400,
|
||||
height: 900,
|
||||
isMaximized: false
|
||||
})
|
||||
})
|
||||
|
||||
test('sanitizeWindowState treats isMaximized strictly', () => {
|
||||
assert.equal(sanitizeWindowState({ width: 1400, height: 900, isMaximized: 'yes' }).isMaximized, false)
|
||||
})
|
||||
|
||||
// ─── onScreen ──────────────────────────────────────────────────────────────
|
||||
|
||||
test('onScreen accepts a window on the primary or a secondary display', () => {
|
||||
const dual = [...PRIMARY, { workArea: { x: 1920, y: 0, width: 2560, height: 1400 } }]
|
||||
assert.equal(onScreen({ x: 100, y: 100, width: 1220, height: 800 }, PRIMARY), true)
|
||||
assert.equal(onScreen({ x: 2200, y: 200, width: 1220, height: 800 }, dual), true)
|
||||
})
|
||||
|
||||
test('onScreen rejects off-screen, slivers, and bad input', () => {
|
||||
assert.equal(onScreen({ x: 3000, y: 100, width: 1220, height: 800 }, PRIMARY), false) // past right edge
|
||||
assert.equal(onScreen({ x: 100, y: -900, width: 1220, height: 800 }, PRIMARY), false) // above top
|
||||
assert.equal(onScreen({ x: 1910, y: 100, width: 1220, height: 800 }, PRIMARY), false) // ~10px sliver
|
||||
assert.equal(onScreen({ x: 0, y: 0, width: 1220, height: 800 }, []), false)
|
||||
assert.equal(onScreen({ x: 0, y: 0, width: 1220, height: 800 }, null), false)
|
||||
})
|
||||
|
||||
// ─── computeWindowOptions ──────────────────────────────────────────────────
|
||||
|
||||
test('computeWindowOptions falls back to defaults with no saved state', () => {
|
||||
assert.deepEqual(computeWindowOptions(null, PRIMARY), { width: DEFAULT_WIDTH, height: DEFAULT_HEIGHT })
|
||||
})
|
||||
|
||||
test('computeWindowOptions restores an on-screen position', () => {
|
||||
const saved = sanitizeWindowState({ x: 200, y: 150, width: 1400, height: 900 })
|
||||
assert.deepEqual(computeWindowOptions(saved, PRIMARY), { width: 1400, height: 900, x: 200, y: 150 })
|
||||
})
|
||||
|
||||
test('computeWindowOptions keeps the size but drops an off-screen position', () => {
|
||||
const saved = sanitizeWindowState({ x: 5000, y: 150, width: 1400, height: 900 })
|
||||
assert.deepEqual(computeWindowOptions(saved, PRIMARY), { width: 1400, height: 900 })
|
||||
})
|
||||
|
||||
test('computeWindowOptions clamps a size larger than the only display', () => {
|
||||
const saved = sanitizeWindowState({ width: 2560, height: 1440 })
|
||||
assert.deepEqual(computeWindowOptions(saved, LAPTOP), { width: 1366, height: 728 })
|
||||
})
|
||||
|
||||
test('computeWindowOptions keeps the MIN floor on a sub-minimum display', () => {
|
||||
const tiny = [{ workArea: { x: 0, y: 0, width: 360, height: 480 } }]
|
||||
const saved = sanitizeWindowState({ width: 2000, height: 1500 })
|
||||
assert.deepEqual(computeWindowOptions(saved, tiny), { width: MIN_WIDTH, height: MIN_HEIGHT })
|
||||
})
|
||||
|
||||
test('computeWindowOptions does not clamp when displays are unknown', () => {
|
||||
const saved = sanitizeWindowState({ width: 2560, height: 1440 })
|
||||
assert.deepEqual(computeWindowOptions(saved, []), { width: 2560, height: 1440 })
|
||||
})
|
||||
|
||||
// ─── debounce ──────────────────────────────────────────────────────────────
|
||||
|
||||
test('debounce coalesces a burst into one trailing run', t => {
|
||||
t.mock.timers.enable({ apis: ['setTimeout'] })
|
||||
let calls = 0
|
||||
const d = debounce(() => { calls += 1 }, 250)
|
||||
|
||||
d(); d(); d()
|
||||
assert.equal(calls, 0)
|
||||
t.mock.timers.tick(249)
|
||||
assert.equal(calls, 0)
|
||||
t.mock.timers.tick(1)
|
||||
assert.equal(calls, 1)
|
||||
})
|
||||
|
||||
test('debounce.flush runs now and cancels the pending timer', t => {
|
||||
t.mock.timers.enable({ apis: ['setTimeout'] })
|
||||
let calls = 0
|
||||
const d = debounce(() => { calls += 1 }, 250)
|
||||
|
||||
d()
|
||||
d.flush()
|
||||
assert.equal(calls, 1)
|
||||
t.mock.timers.tick(1000)
|
||||
assert.equal(calls, 1)
|
||||
})
|
||||
@@ -12,7 +12,8 @@ function readElectronFile(name) {
|
||||
}
|
||||
|
||||
function requireHiddenChildOptions(source, needle) {
|
||||
const index = source.indexOf(needle)
|
||||
const match = needle instanceof RegExp ? needle.exec(source) : null
|
||||
const index = needle instanceof RegExp ? match?.index ?? -1 : source.indexOf(needle)
|
||||
assert.notEqual(index, -1, `missing call site: ${needle}`)
|
||||
const snippet = source.slice(index, index + 700)
|
||||
assert.match(
|
||||
@@ -28,14 +29,28 @@ test('desktop background child processes opt into hidden Windows consoles', () =
|
||||
assert.match(source, /function hiddenWindowsChildOptions\(options = \{\}\)/)
|
||||
|
||||
requireHiddenChildOptions(source, "execFileSync(\n 'reg'")
|
||||
requireHiddenChildOptions(source, 'execFileSync(pyExe')
|
||||
requireHiddenChildOptions(source, 'spawn(resolveGitBinary()')
|
||||
requireHiddenChildOptions(source, /execFileSync\(\s*pyExe/)
|
||||
requireHiddenChildOptions(source, /spawn\(\s*resolveGitBinary\(\)/)
|
||||
requireHiddenChildOptions(source, "execFileSync('taskkill'")
|
||||
requireHiddenChildOptions(source, 'spawn(command, args')
|
||||
requireHiddenChildOptions(source, /spawn\(\s*command,\s*args/)
|
||||
requireHiddenChildOptions(source, "spawn('curl'")
|
||||
requireHiddenChildOptions(source, 'spawn(backend.command, backend.args')
|
||||
requireHiddenChildOptions(source, 'hermesProcess = spawn(backend.command, backend.args')
|
||||
requireHiddenChildOptions(source, "spawn(py, ['-m', 'hermes_cli.main', 'uninstall', '--gui-summary']")
|
||||
requireHiddenChildOptions(source, /spawn\(\s*backend\.command,\s*backend\.args/)
|
||||
requireHiddenChildOptions(source, /hermesProcess = spawn\(\s*backend\.command,\s*backend\.args/)
|
||||
requireHiddenChildOptions(source, /spawn\(\s*py,\s*\['-m', 'hermes_cli\.main', 'uninstall', '--gui-summary'\]/)
|
||||
|
||||
assert.match(source, /function unwrapWindowsVenvHermesCommand\(command, dashboardArgs\)/)
|
||||
assert.match(source, /existing Hermes no-console Python at/)
|
||||
assert.match(source, /function getNoConsoleVenvPython\(venvRoot\)/)
|
||||
assert.match(source, /function toNoConsolePython\(pythonPath\)/)
|
||||
assert.match(source, /function applyWindowsNoConsoleSpawnHints\(backend\)/)
|
||||
assert.match(source, /function readVenvHome\(venvRoot\)/)
|
||||
assert.match(source, /path\.join\(venvRoot, 'Scripts', 'pythonw\.exe'\)/)
|
||||
assert.match(source, /backendStartFailure/)
|
||||
assert.match(source, /HERMES_DESKTOP_READY_FILE/)
|
||||
assert.match(source, /readyFile: true/)
|
||||
assert.match(source, /function getVenvSitePackagesEntries\(venvRoot\)/)
|
||||
assert.match(source, /path\.join\(venvRoot, 'Lib', 'site-packages'\)/)
|
||||
assert.match(source, /args: \['-m', 'hermes_cli\.main', \.\.\.dashboardArgs\]/)
|
||||
})
|
||||
|
||||
test('intentional or interactive desktop child processes stay documented', () => {
|
||||
|
||||
@@ -18,7 +18,7 @@
|
||||
"profile:main": "wait-on http://127.0.0.1:5174 && cross-env XCURSOR_SIZE=24 HERMES_DESKTOP_DEV_SERVER=http://127.0.0.1:5174 electron --inspect=9229 .",
|
||||
"profile:main:cpu": "wait-on http://127.0.0.1:5174 && cross-env XCURSOR_SIZE=24 NODE_OPTIONS=--cpu-prof HERMES_DESKTOP_DEV_SERVER=http://127.0.0.1:5174 electron .",
|
||||
"start": "npm run build && electron .",
|
||||
"build": "node scripts/assert-root-install.cjs && node scripts/write-build-stamp.cjs && node scripts/stage-native-deps.cjs && tsc -b && vite build && npm run postbuild",
|
||||
"build": "node scripts/assert-root-install.cjs && node scripts/write-build-stamp.cjs && node scripts/stage-native-deps.cjs && tsc -b && vite build && node scripts/bundle-electron-main.mjs && npm run postbuild",
|
||||
"postbuild": "node scripts/assert-dist-built.cjs",
|
||||
"prebuilder": "node scripts/patch-electron-builder-mac-binary.cjs",
|
||||
"builder": "cross-env NODE_OPTIONS=--max-old-space-size=16384 node scripts/run-electron-builder.cjs",
|
||||
@@ -37,7 +37,7 @@
|
||||
"test:desktop:nsis": "node scripts/test-desktop.mjs nsis",
|
||||
"test:desktop:existing": "node scripts/test-desktop.mjs existing",
|
||||
"test:desktop:fresh": "node scripts/test-desktop.mjs fresh",
|
||||
"test:desktop:platforms": "node --test electron/bootstrap-platform.test.cjs electron/hardening.test.cjs electron/backend-env.test.cjs electron/backend-probes.test.cjs electron/backend-ready.test.cjs electron/bootstrap-runner.test.cjs electron/connection-config.test.cjs electron/dashboard-token.test.cjs electron/gateway-ws-probe.test.cjs electron/oauth-net-request.test.cjs electron/desktop-uninstall.test.cjs electron/session-windows.test.cjs electron/link-title-window.test.cjs electron/workspace-cwd.test.cjs electron/fs-read-dir.test.cjs electron/git-root.test.cjs electron/windows-child-process.test.cjs electron/update-remote.test.cjs electron/update-rebuild.test.cjs electron/update-marker.test.cjs electron/update-relaunch.test.cjs electron/windows-user-env.test.cjs",
|
||||
"test:desktop:platforms": "node --test electron/bootstrap-platform.test.cjs electron/hardening.test.cjs electron/backend-env.test.cjs electron/backend-probes.test.cjs electron/backend-ready.test.cjs electron/bootstrap-runner.test.cjs electron/connection-config.test.cjs electron/dashboard-token.test.cjs electron/gateway-ws-probe.test.cjs electron/oauth-net-request.test.cjs electron/desktop-uninstall.test.cjs electron/session-windows.test.cjs electron/link-title-window.test.cjs electron/workspace-cwd.test.cjs electron/fs-read-dir.test.cjs electron/git-root.test.cjs electron/git-worktree-ops.test.cjs electron/windows-child-process.test.cjs electron/update-remote.test.cjs electron/update-count.test.cjs electron/update-rebuild.test.cjs electron/update-marker.test.cjs electron/update-relaunch.test.cjs electron/windows-user-env.test.cjs electron/window-state.test.cjs",
|
||||
"typecheck": "tsc -p . --noEmit",
|
||||
"lint": "eslint src/ electron/",
|
||||
"lint:fix": "eslint src/ electron/ --fix",
|
||||
@@ -93,6 +93,7 @@
|
||||
"remark-math": "^6.0.0",
|
||||
"remend": "^1.3.0",
|
||||
"shiki": "^4.0.2",
|
||||
"simple-git": "^3.36.0",
|
||||
"streamdown": "^2.5.0",
|
||||
"tailwind-merge": "^3.5.0",
|
||||
"tailwindcss": "^4.2.4",
|
||||
|
||||
33
apps/desktop/scripts/bundle-electron-main.mjs
Normal file
33
apps/desktop/scripts/bundle-electron-main.mjs
Normal file
@@ -0,0 +1,33 @@
|
||||
#!/usr/bin/env node
|
||||
// bundle-electron-main.mjs — bundles electron/main.cjs into a single
|
||||
// self-contained file so the nix build doesn't need to ship node_modules/.
|
||||
//
|
||||
// `electron` is provided by the runtime; `node-pty` is staged separately
|
||||
// via stage-native-deps.cjs. `preload.cjs` is NOT require()'d by main —
|
||||
// Electron loads it via path.join(__dirname, 'preload.cjs') — so it stays
|
||||
// as a separate file and doesn't need bundling.
|
||||
import { build } from 'esbuild'
|
||||
import { resolve, dirname } from 'node:path'
|
||||
import { fileURLToPath } from 'node:url'
|
||||
import { renameSync } from 'node:fs'
|
||||
|
||||
const here = dirname(fileURLToPath(import.meta.url))
|
||||
const root = resolve(here, '..')
|
||||
const entry = resolve(root, 'electron/main.cjs')
|
||||
const tmp = resolve(root, 'electron/main.bundled.cjs')
|
||||
|
||||
await build({
|
||||
entryPoints: [entry],
|
||||
bundle: true,
|
||||
platform: 'node',
|
||||
format: 'cjs',
|
||||
target: 'node20',
|
||||
outfile: tmp,
|
||||
external: ['electron', 'node-pty'],
|
||||
logLevel: 'info'
|
||||
})
|
||||
|
||||
// Overwrite the original with the bundled version.
|
||||
renameSync(tmp, entry)
|
||||
|
||||
console.log(`bundled ${entry}`)
|
||||
@@ -4,14 +4,15 @@ import { type ReactNode, useEffect, useMemo, useState } from 'react'
|
||||
import { useElapsedSeconds } from '@/components/chat/activity-timer'
|
||||
import { ActivityTimerText } from '@/components/chat/activity-timer-text'
|
||||
import { FadeText } from '@/components/ui/fade-text'
|
||||
import { Codicon } from '@/components/ui/codicon'
|
||||
import { GlyphSpinner } from '@/components/ui/glyph-spinner'
|
||||
import { type Translations, useI18n } from '@/i18n'
|
||||
import { AlertCircle, CheckCircle2, Sparkles } from '@/lib/icons'
|
||||
import { AlertCircle, CheckCircle2 } from '@/lib/icons'
|
||||
import { useEnterAnimation } from '@/lib/use-enter-animation'
|
||||
import { cn } from '@/lib/utils'
|
||||
import { $activeSessionId } from '@/store/session'
|
||||
import {
|
||||
$subagentsBySession,
|
||||
allSubagents,
|
||||
buildSubagentTree,
|
||||
type SubagentNode,
|
||||
type SubagentStatus,
|
||||
@@ -77,15 +78,12 @@ interface AgentsViewProps {
|
||||
|
||||
export function AgentsView({ onClose }: AgentsViewProps) {
|
||||
const { t } = useI18n()
|
||||
const activeSessionId = useStore($activeSessionId)
|
||||
const subagentsBySession = useStore($subagentsBySession)
|
||||
|
||||
const activeSubagents = useMemo(
|
||||
() => (activeSessionId ? (subagentsBySession[activeSessionId] ?? []) : []),
|
||||
[activeSessionId, subagentsBySession]
|
||||
)
|
||||
|
||||
const tree = useMemo(() => buildSubagentTree(activeSubagents), [activeSubagents])
|
||||
// Aggregate every session, matching the status-bar indicator — a subagent
|
||||
// running in a background session must still be visible here, or the two
|
||||
// desync ("Agents N running" vs an empty tree).
|
||||
const tree = useMemo(() => buildSubagentTree(allSubagents(subagentsBySession)), [subagentsBySession])
|
||||
|
||||
return (
|
||||
<OverlayView
|
||||
@@ -212,7 +210,7 @@ function SubagentTree({ tree }: { tree: SubagentNode[] }) {
|
||||
if (tree.length === 0) {
|
||||
return (
|
||||
<div className="grid place-items-center gap-3 py-12 text-center">
|
||||
<Sparkles className="size-6 text-muted-foreground/60" />
|
||||
<Codicon className="text-muted-foreground/60" name="hubot" size="1.5rem" />
|
||||
<p className="text-sm font-medium text-foreground/90">{t.agents.emptyTitle}</p>
|
||||
<p className="max-w-md text-xs leading-relaxed text-muted-foreground/75">{t.agents.emptyDesc}</p>
|
||||
</div>
|
||||
|
||||
106
apps/desktop/src/app/chat/composer/composer-text-guard.test.tsx
Normal file
106
apps/desktop/src/app/chat/composer/composer-text-guard.test.tsx
Normal file
@@ -0,0 +1,106 @@
|
||||
// @vitest-environment jsdom
|
||||
import { act, cleanup, render } from '@testing-library/react'
|
||||
import { useCallback, useRef } from 'react'
|
||||
import { afterEach, describe, expect, it, vi } from 'vitest'
|
||||
|
||||
afterEach(cleanup)
|
||||
|
||||
// Regression repro for #49903: on desktop v0.17.0 the composer threw an
|
||||
// uncaught `Error: Composer is not available` at startup and the input went
|
||||
// unresponsive. The throw comes from @assistant-ui/core's composer-runtime —
|
||||
// every *mutator* (setText/send/…) does `if (!core) throw new Error("Composer
|
||||
// is not available")` when the thread's composer core isn't bound yet. Unlike
|
||||
// the read path (`s.composer.text`, which is null-safe: `runtime?.text ?? ""`),
|
||||
// the mutators have no graceful fallback. ChatBar's mount-time effects (draft
|
||||
// restore, clearDraft, external inserts) push text via `aui.composer().setText`
|
||||
// before the core binds, and the popout refactor (#49488) widened that window,
|
||||
// so the throw surfaced as an uncaught error that wedged the input.
|
||||
//
|
||||
// The fix wraps every `aui.composer().setText` call in a `setComposerText`
|
||||
// helper that swallows the unbound-core throw — the contentEditable DOM +
|
||||
// draftRef already hold the text and the draft⇄editor sync re-applies it once
|
||||
// the core attaches, so nothing is lost. This Harness mirrors that helper
|
||||
// faithfully (same try/catch shape) over a fake `aui` whose composer can be
|
||||
// toggled bound/unbound, the way the assistant-ui runtime behaves across mount.
|
||||
|
||||
interface FakeComposer {
|
||||
setText: (value: string) => void
|
||||
}
|
||||
|
||||
// Mirror of index.tsx's `useAui()` composer surface: composer() returns a
|
||||
// runtime whose setText throws exactly like @assistant-ui/core when unbound.
|
||||
function makeFakeAui(bound: { current: boolean }, applied: string[]) {
|
||||
const composer: FakeComposer = {
|
||||
setText(value: string) {
|
||||
if (!bound.current) {
|
||||
throw new Error('Composer is not available')
|
||||
}
|
||||
|
||||
applied.push(value)
|
||||
}
|
||||
}
|
||||
|
||||
return { composer: () => composer }
|
||||
}
|
||||
|
||||
function Harness({
|
||||
bound,
|
||||
applied,
|
||||
onError
|
||||
}: {
|
||||
applied: string[]
|
||||
bound: { current: boolean }
|
||||
onError: (err: unknown) => void
|
||||
}) {
|
||||
const aui = useRef(makeFakeAui(bound, applied)).current
|
||||
|
||||
// Verbatim mirror of the production `setComposerText` helper in index.tsx.
|
||||
const setComposerText = useCallback(
|
||||
(value: string) => {
|
||||
try {
|
||||
aui.composer().setText(value)
|
||||
} catch {
|
||||
// Composer core not bound yet — swallow so the input stays usable.
|
||||
}
|
||||
},
|
||||
[aui]
|
||||
)
|
||||
|
||||
// A draft-restore-on-mount that fires while the core may still be unbound,
|
||||
// exactly like loadIntoComposer/clearDraft do on startup.
|
||||
try {
|
||||
setComposerText('restored draft')
|
||||
} catch (err) {
|
||||
onError(err)
|
||||
}
|
||||
|
||||
return null
|
||||
}
|
||||
|
||||
describe('setComposerText guard (#49903)', () => {
|
||||
it('swallows the unbound-core throw at startup instead of crashing the renderer', () => {
|
||||
const applied: string[] = []
|
||||
const bound = { current: false }
|
||||
const onError = vi.fn()
|
||||
|
||||
expect(() => render(<Harness applied={applied} bound={bound} onError={onError} />)).not.toThrow()
|
||||
|
||||
// The guard absorbed the throw — nothing escaped to the renderer, and no
|
||||
// assistant-ui write landed (core was unbound).
|
||||
expect(onError).not.toHaveBeenCalled()
|
||||
expect(applied).toEqual([])
|
||||
})
|
||||
|
||||
it('writes through to the composer once the core is bound', () => {
|
||||
const applied: string[] = []
|
||||
const bound = { current: true }
|
||||
const onError = vi.fn()
|
||||
|
||||
act(() => {
|
||||
render(<Harness applied={applied} bound={bound} onError={onError} />)
|
||||
})
|
||||
|
||||
expect(onError).not.toHaveBeenCalled()
|
||||
expect(applied).toEqual(['restored draft'])
|
||||
})
|
||||
})
|
||||
@@ -13,6 +13,7 @@ import {
|
||||
DropdownMenuTrigger
|
||||
} from '@/components/ui/dropdown-menu'
|
||||
import { Kbd } from '@/components/ui/kbd'
|
||||
import { Tip } from '@/components/ui/tooltip'
|
||||
import { useI18n } from '@/i18n'
|
||||
import { Clipboard, FileText, FolderOpen, type IconComponent, ImageIcon, Link, MessageSquareText } from '@/lib/icons'
|
||||
import { cn } from '@/lib/utils'
|
||||
@@ -42,22 +43,23 @@ export function ContextMenu({
|
||||
return (
|
||||
<>
|
||||
<DropdownMenu>
|
||||
<DropdownMenuTrigger asChild>
|
||||
<Button
|
||||
aria-label={state.tools.label}
|
||||
className={cn(
|
||||
GHOST_ICON_BTN,
|
||||
'data-[state=open]:bg-(--chrome-action-hover) data-[state=open]:text-foreground'
|
||||
)}
|
||||
disabled={!state.tools.enabled}
|
||||
size="icon"
|
||||
title={state.tools.label}
|
||||
type="button"
|
||||
variant="ghost"
|
||||
>
|
||||
<Codicon name="add" size="0.875rem" />
|
||||
</Button>
|
||||
</DropdownMenuTrigger>
|
||||
<Tip label={state.tools.label} side="top">
|
||||
<DropdownMenuTrigger asChild>
|
||||
<Button
|
||||
aria-label={state.tools.label}
|
||||
className={cn(
|
||||
GHOST_ICON_BTN,
|
||||
'data-[state=open]:bg-(--chrome-action-hover) data-[state=open]:text-foreground'
|
||||
)}
|
||||
disabled={!state.tools.enabled}
|
||||
size="icon"
|
||||
type="button"
|
||||
variant="ghost"
|
||||
>
|
||||
<Codicon name="add" size="0.875rem" />
|
||||
</Button>
|
||||
</DropdownMenuTrigger>
|
||||
</Tip>
|
||||
<DropdownMenuContent align="start" className={cn('w-60', composerPanelCard)} side="top" sideOffset={6}>
|
||||
<DropdownMenuLabel className="px-2 pb-0.5 pt-0.5 text-[0.625rem] font-semibold uppercase tracking-wider text-(--ui-text-tertiary)">
|
||||
{c.attachLabel}
|
||||
|
||||
@@ -10,8 +10,8 @@
|
||||
* steal focus from the composer effect.
|
||||
*/
|
||||
|
||||
import { RICH_INPUT_SLOT } from './rich-editor'
|
||||
import type { InlineRefInput } from './inline-refs'
|
||||
import { RICH_INPUT_SLOT } from './rich-editor'
|
||||
|
||||
export type ComposerTarget = 'edit' | 'main'
|
||||
export type ComposerInsertMode = 'block' | 'inline'
|
||||
@@ -34,6 +34,13 @@ interface InsertRefsDetail {
|
||||
const FOCUS_EVENT = 'hermes:composer-focus'
|
||||
const INSERT_EVENT = 'hermes:composer-insert'
|
||||
const INSERT_REFS_EVENT = 'hermes:composer-insert-refs'
|
||||
const SUBMIT_EVENT = 'hermes:composer-submit'
|
||||
const VOICE_TOGGLE_EVENT = 'hermes:composer-voice-toggle'
|
||||
|
||||
interface SubmitDetail {
|
||||
target: ComposerTarget
|
||||
text: string
|
||||
}
|
||||
|
||||
let activeTarget: ComposerTarget = 'main'
|
||||
|
||||
@@ -105,6 +112,30 @@ export const requestComposerInsertRefs = (
|
||||
export const onComposerInsertRefsRequest = (handler: (detail: InsertRefsDetail) => void) =>
|
||||
subscribe<InsertRefsDetail>(INSERT_REFS_EVENT, handler)
|
||||
|
||||
/** Submit a prompt through a composer as if the user typed + sent it. Lets
|
||||
* external panels (e.g. the review pane's "let the agent ship it" button) hand
|
||||
* the agent a task without the user round-tripping through the input. */
|
||||
export const requestComposerSubmit = (
|
||||
text: string,
|
||||
{ target = 'active' }: { target?: ComposerTarget | 'active' } = {}
|
||||
) => {
|
||||
const trimmed = text.trim()
|
||||
|
||||
if (trimmed) {
|
||||
dispatch<SubmitDetail>(SUBMIT_EVENT, { target: resolve(target), text: trimmed })
|
||||
}
|
||||
}
|
||||
|
||||
export const onComposerSubmitRequest = (handler: (detail: SubmitDetail) => void) =>
|
||||
subscribe<SubmitDetail>(SUBMIT_EVENT, handler)
|
||||
|
||||
/** Toggle the active composer's voice conversation — the `composer.voice`
|
||||
* hotkey (Ctrl+B) reaching into the composer that owns the voice state. */
|
||||
export const requestVoiceToggle = () => dispatch<{ at: number }>(VOICE_TOGGLE_EVENT, { at: Date.now() })
|
||||
|
||||
export const onComposerVoiceToggleRequest = (handler: () => void) =>
|
||||
subscribe<{ at: number }>(VOICE_TOGGLE_EVENT, () => handler())
|
||||
|
||||
/**
|
||||
* Focus a composer input across React commit + browser focus restore.
|
||||
*
|
||||
|
||||
@@ -45,8 +45,8 @@ import {
|
||||
$composerPoppedOut,
|
||||
POPOUT_WIDTH_REM,
|
||||
readPopoutBounds,
|
||||
setComposerPoppedOut,
|
||||
setComposerPopoutPosition
|
||||
setComposerPopoutPosition,
|
||||
setComposerPoppedOut
|
||||
} from '@/store/composer-popout'
|
||||
import {
|
||||
$queuedPromptsBySession,
|
||||
@@ -61,6 +61,9 @@ import {
|
||||
} from '@/store/composer-queue'
|
||||
import { $statusItemsBySession } from '@/store/composer-status'
|
||||
import { notify } from '@/store/notifications'
|
||||
import { $previewStatusBySession } from '@/store/preview-status'
|
||||
import { listRepoBranches, requestStartWorkSession, startWorkInRepo, switchBranchInRepo } from '@/store/projects'
|
||||
import { toggleReview } from '@/store/review'
|
||||
import { $gatewayState, $messages, setSessionPickerOpen } from '@/store/session'
|
||||
import { $threadScrolledUp } from '@/store/thread-scroll'
|
||||
import { isSecondaryWindow } from '@/store/windows'
|
||||
@@ -78,7 +81,9 @@ import {
|
||||
markActiveComposer,
|
||||
onComposerFocusRequest,
|
||||
onComposerInsertRefsRequest,
|
||||
onComposerInsertRequest
|
||||
onComposerInsertRequest,
|
||||
onComposerSubmitRequest,
|
||||
onComposerVoiceToggleRequest
|
||||
} from './focus'
|
||||
import { HelpHint } from './help-hint'
|
||||
import { useAtCompletions } from './hooks/use-at-completions'
|
||||
@@ -106,6 +111,7 @@ import {
|
||||
slashChipElement
|
||||
} from './rich-editor'
|
||||
import { ComposerStatusStack } from './status-stack'
|
||||
import { CodingStatusRow } from './status-stack/coding-row'
|
||||
import { detectTrigger, extractClipboardImageBlobs, textBeforeCaret, type TriggerState } from './text-utils'
|
||||
import { ComposerTriggerPopover } from './trigger-popover'
|
||||
import type { ChatBarProps } from './types'
|
||||
@@ -192,9 +198,36 @@ export function ChatBar({
|
||||
}: ChatBarProps) {
|
||||
const aui = useAui()
|
||||
const draft = useAuiState(s => s.composer.text)
|
||||
|
||||
// assistant-ui's composer *mutators* (setText/send/…) throw "Composer is not
|
||||
// available" when the thread's composer core isn't bound yet — and unlike the
|
||||
// read path (`s.composer.text`, which is null-safe), there's no graceful
|
||||
// fallback. There's a startup/thread-swap window where this ChatBar's mount
|
||||
// effects (draft restore, clearDraft, external inserts) run before the core
|
||||
// binds; the popout refactor (#49488) widened it by moving the composer out
|
||||
// of the contain wrapper into a sibling of the thread, so the throw began
|
||||
// surfacing as an uncaught error that wedged the desktop input (#49903).
|
||||
//
|
||||
// Guard every mutation: if the core isn't ready, no-op the assistant-ui write.
|
||||
// The contentEditable DOM + draftRef already hold the text, and the
|
||||
// draft⇄editor sync reconciles composer state once the core attaches, so the
|
||||
// draft is never lost — only the (premature) state push is skipped.
|
||||
const setComposerText = useCallback(
|
||||
(value: string) => {
|
||||
try {
|
||||
aui.composer().setText(value)
|
||||
} catch {
|
||||
// Composer core not bound yet — DOM/draftRef carry the text; the sync
|
||||
// effect re-applies it after bind. Swallow so the input stays usable.
|
||||
}
|
||||
},
|
||||
[aui]
|
||||
)
|
||||
|
||||
const attachments = useStore($composerAttachments)
|
||||
const queuedPromptsBySession = useStore($queuedPromptsBySession)
|
||||
const statusItemsBySession = useStore($statusItemsBySession)
|
||||
const previewStatusBySession = useStore($previewStatusBySession)
|
||||
const scrolledUp = useStore($threadScrolledUp)
|
||||
// Pop-out is a shared, persisted state — but secondary windows (the Ctrl+Shift+N
|
||||
// tiny window, subagent watch windows) always start docked and can't pop out:
|
||||
@@ -217,8 +250,12 @@ export function ChatBar({
|
||||
|
||||
const statusStackVisible = useMemo(
|
||||
() =>
|
||||
queuedPrompts.length > 0 || (statusSessionId ? (statusItemsBySession[statusSessionId]?.length ?? 0) > 0 : false),
|
||||
[queuedPrompts.length, statusItemsBySession, statusSessionId]
|
||||
queuedPrompts.length > 0 ||
|
||||
(statusSessionId
|
||||
? (statusItemsBySession[statusSessionId]?.length ?? 0) > 0 ||
|
||||
(previewStatusBySession[statusSessionId]?.length ?? 0) > 0
|
||||
: false),
|
||||
[previewStatusBySession, queuedPrompts.length, statusItemsBySession, statusSessionId]
|
||||
)
|
||||
|
||||
const composerRef = useRef<HTMLFormElement | null>(null)
|
||||
@@ -364,7 +401,7 @@ export function ChatBar({
|
||||
const next = `${base}${sep}${value}`
|
||||
|
||||
draftRef.current = next
|
||||
aui.composer().setText(next)
|
||||
setComposerText(next)
|
||||
|
||||
const editor = editorRef.current
|
||||
|
||||
@@ -375,7 +412,7 @@ export function ChatBar({
|
||||
|
||||
setFocusRequestId(id => id + 1)
|
||||
},
|
||||
[aui]
|
||||
[setComposerText]
|
||||
)
|
||||
|
||||
useEffect(() => {
|
||||
@@ -585,7 +622,7 @@ export function ChatBar({
|
||||
const nextDraft = `${currentDraft}${sep}${text}`
|
||||
|
||||
draftRef.current = nextDraft
|
||||
aui.composer().setText(nextDraft)
|
||||
setComposerText(nextDraft)
|
||||
|
||||
// Push the new text into the contentEditable editor directly. Setting the
|
||||
// assistant-ui composer state alone is not enough: the draft→editor sync
|
||||
@@ -618,7 +655,7 @@ export function ChatBar({
|
||||
}
|
||||
|
||||
draftRef.current = nextDraft
|
||||
aui.composer().setText(nextDraft)
|
||||
setComposerText(nextDraft)
|
||||
requestMainFocus()
|
||||
|
||||
return true
|
||||
@@ -704,7 +741,7 @@ export function ChatBar({
|
||||
|
||||
if (nextDraft !== draftRef.current) {
|
||||
draftRef.current = nextDraft
|
||||
aui.composer().setText(nextDraft)
|
||||
setComposerText(nextDraft)
|
||||
}
|
||||
|
||||
window.setTimeout(refreshTrigger, 0)
|
||||
@@ -830,7 +867,7 @@ export function ChatBar({
|
||||
renderComposerContents(editor, prefix)
|
||||
placeCaretEnd(editor)
|
||||
draftRef.current = composerPlainText(editor)
|
||||
aui.composer().setText(draftRef.current)
|
||||
setComposerText(draftRef.current)
|
||||
closeTrigger()
|
||||
runAction()
|
||||
requestMainFocus()
|
||||
@@ -858,7 +895,7 @@ export function ChatBar({
|
||||
|
||||
const finish = () => {
|
||||
draftRef.current = composerPlainText(editor)
|
||||
aui.composer().setText(draftRef.current)
|
||||
setComposerText(draftRef.current)
|
||||
requestMainFocus()
|
||||
keepTriggerOpen ? window.setTimeout(refreshTrigger, 0) : closeTrigger()
|
||||
}
|
||||
@@ -1310,17 +1347,91 @@ export function ChatBar({
|
||||
}
|
||||
|
||||
const clearDraft = useCallback(() => {
|
||||
aui.composer().setText('')
|
||||
setComposerText('')
|
||||
draftRef.current = ''
|
||||
|
||||
if (editorRef.current) {
|
||||
editorRef.current.replaceChildren()
|
||||
}
|
||||
}, [aui])
|
||||
}, [setComposerText])
|
||||
|
||||
// Hand a worktree off to the controller: open a fresh session anchored there,
|
||||
// carrying the composer draft as its first turn. Clearing here means the draft
|
||||
// travels to the new session instead of getting stashed under this one.
|
||||
const openInWorktree = useCallback(
|
||||
(path: string) => {
|
||||
const text = draftRef.current
|
||||
clearDraft()
|
||||
clearComposerAttachments()
|
||||
requestStartWorkSession(path, text)
|
||||
},
|
||||
[clearDraft]
|
||||
)
|
||||
|
||||
// Branch off into a NEW worktree (base = branch name, or current HEAD). A
|
||||
// create failure throws back to the row (which toasts) before we touch the
|
||||
// draft; a missing cwd / remote backend no-ops (the row hides the affordance).
|
||||
const handleBranchOff = useCallback(
|
||||
async (branch: string, base?: string) => {
|
||||
const repoPath = cwd?.trim()
|
||||
const result = repoPath && (await startWorkInRepo(repoPath, { base, branch, name: branch }))
|
||||
|
||||
if (result) {
|
||||
openInWorktree(result.path)
|
||||
}
|
||||
},
|
||||
[cwd, openInWorktree]
|
||||
)
|
||||
|
||||
// Convert an EXISTING branch into a fresh worktree + session (no new branch).
|
||||
// Mirrors handleBranchOff's hand-off: create the worktree, then open a session
|
||||
// anchored there carrying the draft.
|
||||
const handleConvertBranch = useCallback(
|
||||
async (branch: string, path?: null | string, isDefault?: boolean) => {
|
||||
if (path?.trim()) {
|
||||
openInWorktree(path)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
const repoPath = cwd?.trim()
|
||||
|
||||
if (repoPath && isDefault) {
|
||||
await switchBranchInRepo(repoPath, branch)
|
||||
openInWorktree(repoPath)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
const result = repoPath && (await startWorkInRepo(repoPath, { existingBranch: branch }))
|
||||
|
||||
if (result) {
|
||||
openInWorktree(result.path)
|
||||
}
|
||||
},
|
||||
[cwd, openInWorktree]
|
||||
)
|
||||
|
||||
const handleListBranches = useCallback(async () => {
|
||||
const repoPath = cwd?.trim()
|
||||
|
||||
return repoPath ? listRepoBranches(repoPath) : []
|
||||
}, [cwd])
|
||||
|
||||
const handleSwitchBranch = useCallback(
|
||||
async (branch: string) => {
|
||||
const repoPath = cwd?.trim()
|
||||
|
||||
if (repoPath) {
|
||||
await switchBranchInRepo(repoPath, branch)
|
||||
}
|
||||
},
|
||||
[cwd]
|
||||
)
|
||||
|
||||
const loadIntoComposer = (text: string, attachments: ComposerAttachment[]) => {
|
||||
draftRef.current = text
|
||||
aui.composer().setText(text)
|
||||
setComposerText(text)
|
||||
$composerAttachments.set(cloneAttachments(attachments))
|
||||
|
||||
const editor = editorRef.current
|
||||
@@ -1641,6 +1752,41 @@ export function ChatBar({
|
||||
}
|
||||
}, [autoDrainNext, busy, queuedPrompts.length])
|
||||
|
||||
// Esc cancels the in-flight turn when the CHAT has focus — not just the
|
||||
// composer input (which has its own handler above). Clicking into the
|
||||
// transcript and hitting Esc now stops the run, matching the Stop button.
|
||||
// Intentional only: we bail if (a) the composer/another field already
|
||||
// handled Esc (defaultPrevented), (b) focus is in any input/textarea/
|
||||
// contenteditable (you're typing, not stopping), or (c) a dialog/popover is
|
||||
// open — Esc must close that overlay, never double as canceling the stream
|
||||
// behind it. A latest-handler ref keeps the listener registered once.
|
||||
const escCancelRef = useRef<(event: globalThis.KeyboardEvent) => void>(() => {})
|
||||
escCancelRef.current = (event: globalThis.KeyboardEvent) => {
|
||||
if (event.key !== 'Escape' || event.defaultPrevented || !busy) {
|
||||
return
|
||||
}
|
||||
|
||||
const active = document.activeElement as HTMLElement | null
|
||||
if (active && (active.tagName === 'INPUT' || active.tagName === 'TEXTAREA' || active.isContentEditable)) {
|
||||
return
|
||||
}
|
||||
|
||||
if (document.querySelector('[role="dialog"],[role="alertdialog"],[data-radix-popper-content-wrapper]')) {
|
||||
return
|
||||
}
|
||||
|
||||
event.preventDefault()
|
||||
triggerHaptic('cancel')
|
||||
void Promise.resolve(onCancel())
|
||||
}
|
||||
|
||||
useEffect(() => {
|
||||
const onKeyDown = (event: globalThis.KeyboardEvent) => escCancelRef.current(event)
|
||||
window.addEventListener('keydown', onKeyDown)
|
||||
|
||||
return () => window.removeEventListener('keydown', onKeyDown)
|
||||
}, [])
|
||||
|
||||
// Queue-edit cleanup: on session swap the scope effect already stashed the
|
||||
// edit snapshot; only restore into the composer when still on the same scope.
|
||||
useEffect(() => {
|
||||
@@ -1673,6 +1819,22 @@ export function ChatBar({
|
||||
.catch(restore)
|
||||
}
|
||||
|
||||
// External "submit this prompt" requests (e.g. the review pane's agent-ship
|
||||
// button) route through the same send path. A ref keeps the listener stable
|
||||
// while always calling the latest dispatchSubmit closure.
|
||||
const dispatchSubmitRef = useRef(dispatchSubmit)
|
||||
dispatchSubmitRef.current = dispatchSubmit
|
||||
|
||||
useEffect(
|
||||
() =>
|
||||
onComposerSubmitRequest(({ target, text }) => {
|
||||
if (target === 'main' && !inputDisabled) {
|
||||
dispatchSubmitRef.current(text)
|
||||
}
|
||||
}),
|
||||
[inputDisabled]
|
||||
)
|
||||
|
||||
const submitDraft = () => {
|
||||
if (disabled) {
|
||||
return
|
||||
@@ -1693,7 +1855,7 @@ export function ChatBar({
|
||||
|
||||
if (domText !== draftRef.current) {
|
||||
draftRef.current = domText
|
||||
aui.composer().setText(domText)
|
||||
setComposerText(domText)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1812,6 +1974,24 @@ export function ChatBar({
|
||||
pendingResponse
|
||||
})
|
||||
|
||||
// The `composer.voice` hotkey (Ctrl+B) toggles the conversation. Starting
|
||||
// with STT unconfigured lets the conversation surface its own "configure
|
||||
// speech-to-text" notice rather than silently no-opping.
|
||||
const toggleVoiceConversation = useCallback(() => {
|
||||
if (disabled) {
|
||||
return
|
||||
}
|
||||
|
||||
if (voiceConversationActive) {
|
||||
setVoiceConversationActive(false)
|
||||
void conversation.end()
|
||||
} else {
|
||||
setVoiceConversationActive(true)
|
||||
}
|
||||
}, [conversation, disabled, voiceConversationActive])
|
||||
|
||||
useEffect(() => onComposerVoiceToggleRequest(toggleVoiceConversation), [toggleVoiceConversation])
|
||||
|
||||
const contextMenu = (
|
||||
<ContextMenu
|
||||
onInsertText={insertText}
|
||||
@@ -2048,7 +2228,7 @@ export function ChatBar({
|
||||
<div className="relative w-full rounded-[inherit]">
|
||||
<div
|
||||
className={cn(
|
||||
'group/composer-surface relative z-4 isolate rounded-[inherit] border border-[color-mix(in_srgb,var(--dt-composer-ring)_calc(18%*var(--composer-ring-strength)),var(--dt-input))] transition-[border-color] duration-200 ease-out focus-within:border-[color-mix(in_srgb,var(--dt-composer-ring)_calc(45%*var(--composer-ring-strength)),transparent)]',
|
||||
'group/composer-surface relative z-4 isolate grid grid-rows-[auto_1fr] overflow-hidden rounded-[inherit] border border-[color-mix(in_srgb,var(--dt-composer-ring)_calc(18%*var(--composer-ring-strength)),var(--dt-input))]',
|
||||
COMPOSER_DROP_FADE_CLASS,
|
||||
dragActive && COMPOSER_DROP_ACTIVE_CLASS
|
||||
)}
|
||||
@@ -2063,6 +2243,14 @@ export function ChatBar({
|
||||
composerSurfaceGlass
|
||||
)}
|
||||
/>
|
||||
<CodingStatusRow
|
||||
onBranchOff={handleBranchOff}
|
||||
onConvertBranch={handleConvertBranch}
|
||||
onListBranches={handleListBranches}
|
||||
onOpen={toggleReview}
|
||||
onOpenWorktree={openInWorktree}
|
||||
onSwitchBranch={handleSwitchBranch}
|
||||
/>
|
||||
<div
|
||||
className={cn(
|
||||
'relative z-1 flex min-h-0 w-full flex-col gap-(--composer-row-gap) overflow-hidden rounded-[inherit] px-(--composer-surface-pad-x) py-(--composer-surface-pad-y) transition-opacity duration-200 ease-out',
|
||||
|
||||
@@ -5,6 +5,7 @@ import { ModelMenuCloseContext } from '@/app/shell/model-menu-panel'
|
||||
import { Button } from '@/components/ui/button'
|
||||
import { DropdownMenu, DropdownMenuContent, DropdownMenuTrigger } from '@/components/ui/dropdown-menu'
|
||||
import { GlyphSpinner } from '@/components/ui/glyph-spinner'
|
||||
import { Tip } from '@/components/ui/tooltip'
|
||||
import { useI18n } from '@/i18n'
|
||||
import { ChevronDown } from '@/lib/icons'
|
||||
import { formatModelStatusLabel } from '@/lib/model-status-label'
|
||||
@@ -74,34 +75,36 @@ export function ModelPill({
|
||||
|
||||
if (!model.modelMenuContent) {
|
||||
return (
|
||||
<Button
|
||||
aria-label={copy.openModelPicker}
|
||||
className={pillClass}
|
||||
disabled={disabled}
|
||||
onClick={() => setModelPickerOpen(true)}
|
||||
title={copy.openModelPicker}
|
||||
type="button"
|
||||
variant="ghost"
|
||||
>
|
||||
{label}
|
||||
</Button>
|
||||
)
|
||||
}
|
||||
|
||||
return (
|
||||
<DropdownMenu onOpenChange={setOpen} open={open}>
|
||||
<DropdownMenuTrigger asChild>
|
||||
<Tip label={copy.openModelPicker} side="top">
|
||||
<Button
|
||||
aria-label={title}
|
||||
aria-label={copy.openModelPicker}
|
||||
className={pillClass}
|
||||
disabled={disabled}
|
||||
title={title}
|
||||
onClick={() => setModelPickerOpen(true)}
|
||||
type="button"
|
||||
variant="ghost"
|
||||
>
|
||||
{label}
|
||||
</Button>
|
||||
</DropdownMenuTrigger>
|
||||
</Tip>
|
||||
)
|
||||
}
|
||||
|
||||
return (
|
||||
<DropdownMenu onOpenChange={setOpen} open={open}>
|
||||
<Tip label={title} side="top">
|
||||
<DropdownMenuTrigger asChild>
|
||||
<Button
|
||||
aria-label={title}
|
||||
className={pillClass}
|
||||
disabled={disabled}
|
||||
type="button"
|
||||
variant="ghost"
|
||||
>
|
||||
{label}
|
||||
</Button>
|
||||
</DropdownMenuTrigger>
|
||||
</Tip>
|
||||
<DropdownMenuContent align="end" className="w-64 p-0" side="top" sideOffset={8}>
|
||||
<ModelMenuCloseContext.Provider value={() => setOpen(false)}>
|
||||
{model.modelMenuContent}
|
||||
|
||||
475
apps/desktop/src/app/chat/composer/status-stack/coding-row.tsx
Normal file
475
apps/desktop/src/app/chat/composer/status-stack/coding-row.tsx
Normal file
@@ -0,0 +1,475 @@
|
||||
import { useStore } from '@nanostores/react'
|
||||
import { memo, useCallback, useEffect, useRef, useState } from 'react'
|
||||
|
||||
import { StatusRow } from '@/components/chat/status-row'
|
||||
import { Button } from '@/components/ui/button'
|
||||
import { Codicon } from '@/components/ui/codicon'
|
||||
import {
|
||||
Command,
|
||||
CommandEmpty,
|
||||
CommandGroup,
|
||||
CommandInput,
|
||||
CommandItem,
|
||||
CommandList
|
||||
} from '@/components/ui/command'
|
||||
import {
|
||||
Dialog,
|
||||
DialogContent,
|
||||
DialogDescription,
|
||||
DialogFooter,
|
||||
DialogHeader,
|
||||
DialogTitle
|
||||
} from '@/components/ui/dialog'
|
||||
import { DiffCount } from '@/components/ui/diff-count'
|
||||
import {
|
||||
DropdownMenu,
|
||||
DropdownMenuContent,
|
||||
DropdownMenuItem,
|
||||
DropdownMenuLabel,
|
||||
DropdownMenuSeparator,
|
||||
DropdownMenuTrigger
|
||||
} from '@/components/ui/dropdown-menu'
|
||||
import { SanitizedInput } from '@/components/ui/sanitized-input'
|
||||
import type { HermesGitBranch } from '@/global'
|
||||
import { useI18n } from '@/i18n'
|
||||
import { gitRef } from '@/lib/sanitize'
|
||||
import { $repoStatus, $repoWorktrees } from '@/store/coding-status'
|
||||
import { notifyError } from '@/store/notifications'
|
||||
import { $newWorktreeRequest } from '@/store/projects'
|
||||
|
||||
// Tiny uppercase section header, matching the composer "+" menu's labels.
|
||||
const MENU_SECTION = 'text-[0.625rem] font-semibold uppercase tracking-wider text-(--ui-text-tertiary)'
|
||||
|
||||
interface BranchActionCopy {
|
||||
branchCreateWorktree: string
|
||||
branchOpenExisting: string
|
||||
branchSwitchHome: string
|
||||
}
|
||||
|
||||
const branchActionLabel = (branch: HermesGitBranch, copy: BranchActionCopy) => {
|
||||
if (branch.checkedOut) {
|
||||
return copy.branchOpenExisting
|
||||
}
|
||||
|
||||
return branch.isDefault ? copy.branchSwitchHome : copy.branchCreateWorktree
|
||||
}
|
||||
|
||||
interface CodingStatusRowProps {
|
||||
/** Branch the current draft off into a fresh worktree + session, based on
|
||||
* `base` (a branch name; omitted = current HEAD). The composer owns the
|
||||
* draft, so it supplies the orchestration; the row just collects the new
|
||||
* branch name + base. Omitted (e.g. remote backend) hides the affordance. */
|
||||
onBranchOff?: (branch: string, base?: string) => Promise<void>
|
||||
/** Check an existing branch out into a fresh worktree + session (no new
|
||||
* branch). Drives the dialog's "convert a branch" picker. */
|
||||
onConvertBranch?: (branch: string, path?: null | string, isDefault?: boolean) => Promise<void>
|
||||
/** List the repo's local branches for the "convert a branch" picker. */
|
||||
onListBranches?: () => Promise<HermesGitBranch[]>
|
||||
/** Open the review pane (changed files + diffs). */
|
||||
onOpen?: () => void
|
||||
/** Jump into an existing worktree (open a fresh session anchored there). */
|
||||
onOpenWorktree?: (path: string) => void
|
||||
/** Switch the current repo checkout to another branch. */
|
||||
onSwitchBranch?: (branch: string) => Promise<void>
|
||||
}
|
||||
|
||||
/**
|
||||
* The always-on coding-context row, the BASE of the composer status stack:
|
||||
* current branch, dirty summary (+/-), and ahead/behind. A touch more prominent
|
||||
* than the per-turn rows above it (larger branch label, accent glyph), and the
|
||||
* entry point to the review pane. Hidden when the active session isn't in a
|
||||
* local git repo (the probe returns null).
|
||||
*/
|
||||
export const CodingStatusRow = memo(function CodingStatusRow({
|
||||
onBranchOff,
|
||||
onConvertBranch,
|
||||
onListBranches,
|
||||
onOpen,
|
||||
onOpenWorktree,
|
||||
onSwitchBranch
|
||||
}: CodingStatusRowProps) {
|
||||
const { t } = useI18n()
|
||||
const s = t.statusStack.coding
|
||||
const p = t.sidebar.projects
|
||||
const status = useStore($repoStatus)
|
||||
const worktrees = useStore($repoWorktrees)
|
||||
|
||||
const [branchOpen, setBranchOpen] = useState(false)
|
||||
const [branchName, setBranchName] = useState('')
|
||||
const [branchBase, setBranchBase] = useState<string | undefined>(undefined)
|
||||
const [branchPending, setBranchPending] = useState(false)
|
||||
const [convertMode, setConvertMode] = useState(false)
|
||||
const [branches, setBranches] = useState<HermesGitBranch[]>([])
|
||||
const [branchesLoading, setBranchesLoading] = useState(false)
|
||||
|
||||
const loadBranches = useCallback(async () => {
|
||||
if (!onListBranches) {
|
||||
return
|
||||
}
|
||||
|
||||
setBranchesLoading(true)
|
||||
|
||||
try {
|
||||
setBranches(await onListBranches())
|
||||
} catch {
|
||||
setBranches([])
|
||||
} finally {
|
||||
setBranchesLoading(false)
|
||||
}
|
||||
}, [onListBranches])
|
||||
|
||||
// Open the name dialog for a chosen base. Deferred so the dropdown finishes
|
||||
// closing before the dialog grabs focus (Radix focus-trap handoff races
|
||||
// otherwise).
|
||||
const startBranch = (base: string | undefined) => {
|
||||
setBranchBase(base)
|
||||
setBranchName('')
|
||||
setConvertMode(false)
|
||||
setTimeout(() => setBranchOpen(true), 0)
|
||||
}
|
||||
|
||||
const startConvert = () => {
|
||||
setBranchBase(undefined)
|
||||
setBranchName('')
|
||||
setConvertMode(true)
|
||||
void loadBranches()
|
||||
setTimeout(() => setBranchOpen(true), 0)
|
||||
}
|
||||
|
||||
const enterConvert = () => {
|
||||
setConvertMode(true)
|
||||
void loadBranches()
|
||||
}
|
||||
|
||||
const convertBranch = async (branch: HermesGitBranch) => {
|
||||
if (branchPending || !branch || !onConvertBranch) {
|
||||
return
|
||||
}
|
||||
|
||||
setBranchPending(true)
|
||||
|
||||
try {
|
||||
await onConvertBranch(branch.name, branch.worktreePath, branch.isDefault)
|
||||
setBranchOpen(false)
|
||||
} catch (err) {
|
||||
notifyError(err, p.startWorkFailed)
|
||||
} finally {
|
||||
setBranchPending(false)
|
||||
}
|
||||
}
|
||||
|
||||
// Global ⌘⇧B (workspace.newWorktree): open the name dialog for a worktree off
|
||||
// current HEAD. The rail only renders inside a repo, so the hotkey naturally
|
||||
// no-ops elsewhere. Guarded by a token ref so it fires on the keypress, not on
|
||||
// mount or unrelated re-renders.
|
||||
const worktreeReq = useStore($newWorktreeRequest)
|
||||
const lastWorktreeReqRef = useRef(worktreeReq)
|
||||
|
||||
useEffect(() => {
|
||||
if (worktreeReq === lastWorktreeReqRef.current) {
|
||||
return
|
||||
}
|
||||
|
||||
lastWorktreeReqRef.current = worktreeReq
|
||||
|
||||
if (!onBranchOff) {
|
||||
return
|
||||
}
|
||||
|
||||
setBranchBase(undefined)
|
||||
setBranchName('')
|
||||
setConvertMode(false)
|
||||
setBranchOpen(true)
|
||||
}, [onBranchOff, worktreeReq])
|
||||
|
||||
const submitBranch = async () => {
|
||||
const branch = branchName.trim()
|
||||
|
||||
if (branchPending || !branch || !onBranchOff) {
|
||||
return
|
||||
}
|
||||
|
||||
setBranchPending(true)
|
||||
|
||||
try {
|
||||
await onBranchOff(branch, branchBase)
|
||||
setBranchOpen(false)
|
||||
setBranchName('')
|
||||
} catch (err) {
|
||||
notifyError(err, p.startWorkFailed)
|
||||
} finally {
|
||||
setBranchPending(false)
|
||||
}
|
||||
}
|
||||
|
||||
const switchToBranch = async (branch: string) => {
|
||||
if (!onSwitchBranch) {
|
||||
return
|
||||
}
|
||||
|
||||
try {
|
||||
await onSwitchBranch(branch)
|
||||
} catch (err) {
|
||||
notifyError(err, s.switchFailed(branch))
|
||||
}
|
||||
}
|
||||
|
||||
if (!status) {
|
||||
return null
|
||||
}
|
||||
|
||||
const branchLabel = status.detached ? s.detached : status.branch || s.noBranch
|
||||
// The kebab offers branching off the trunk and/or the current branch. The
|
||||
// worktree-add bases the new branch on `base` (a branch name; undefined =
|
||||
// current HEAD). We dedupe so "on main" shows a single trunk entry, and fall
|
||||
// back to a plain off-HEAD branch when no trunk is detected.
|
||||
const current = status.detached ? null : status.branch
|
||||
const branchTargets: { base: string | undefined; label: string }[] = []
|
||||
|
||||
// Current branch first (the 99% "branch off where I am"), then the trunk just
|
||||
// below it ("New branch from main"), deduped when they're the same.
|
||||
if (current) {
|
||||
branchTargets.push({ base: current, label: s.branchOffFrom(current) })
|
||||
}
|
||||
|
||||
if (status.defaultBranch && status.defaultBranch !== current) {
|
||||
branchTargets.push({ base: status.defaultBranch, label: s.branchOffFrom(status.defaultBranch) })
|
||||
}
|
||||
|
||||
if (branchTargets.length === 0) {
|
||||
branchTargets.push({ base: undefined, label: s.newBranch })
|
||||
}
|
||||
|
||||
const switchTarget = onSwitchBranch && current && status.defaultBranch && status.defaultBranch !== current ? status.defaultBranch : null
|
||||
|
||||
// Other worktrees to jump into — everything except the one we're already in
|
||||
// (matched by its checked-out branch) and the bare/main placeholder entry.
|
||||
const otherWorktrees = onOpenWorktree
|
||||
? worktrees.filter(w => w.path && !w.detached && w.branch && w.branch !== current)
|
||||
: []
|
||||
|
||||
const hasLineDelta = status.added > 0 || status.removed > 0
|
||||
// Untracked files carry no line delta vs HEAD, so surface them as a count when
|
||||
// they're the only change (otherwise +/- tells the story).
|
||||
const untrackedOnly = !hasLineDelta && status.untracked > 0
|
||||
|
||||
return (
|
||||
<>
|
||||
<StatusRow
|
||||
// The base "where am I working" strip is part of the composer surface
|
||||
// itself, so it inherits the composer's width and clipped top radius.
|
||||
className="coding-status-bar min-h-7 rounded-t-[inherit] rounded-b-none border-b border-(--ui-stroke-tertiary) px-3.5 py-1.5 hover:bg-transparent"
|
||||
// Static branch glyph — never the loading spinner. This row only renders
|
||||
// once `status` exists, so a spinner here only ever fired on *refreshes*
|
||||
// of an already-loaded repo (window focus, turn settle), reading as an
|
||||
// annoying icon "blip" with no first-load value. Refreshes are silent.
|
||||
leading={<Codicon className="text-(--ui-green)" name="git-branch" size="0.8rem" />}
|
||||
onActivate={onOpen}
|
||||
>
|
||||
<div className="flex min-w-0 flex-1 items-center gap-1">
|
||||
<span
|
||||
className="min-w-0 truncate text-xs font-normal text-muted-foreground/92 transition-colors group-hover/status-row:text-foreground/90"
|
||||
title={branchLabel}
|
||||
>
|
||||
{branchLabel}
|
||||
</span>
|
||||
|
||||
{/* Branch actions kebab — same pattern as the session/worktree rows.
|
||||
ALWAYS laid out; only its opacity flips on hover/focus/open, so
|
||||
revealing it never reflows the row (no layout shift). pointer-events
|
||||
follow opacity so the invisible trigger isn't clickable at rest. */}
|
||||
{onBranchOff && (
|
||||
<DropdownMenu>
|
||||
<DropdownMenuTrigger asChild>
|
||||
<Button
|
||||
aria-label={s.newBranch}
|
||||
className="pointer-events-none size-4 shrink-0 text-muted-foreground/60 opacity-0 transition hover:text-foreground group-hover/status-row:pointer-events-auto group-hover/status-row:opacity-100 group-focus-within/status-row:pointer-events-auto group-focus-within/status-row:opacity-100 data-[state=open]:pointer-events-auto data-[state=open]:opacity-100"
|
||||
onClick={event => event.stopPropagation()}
|
||||
onKeyDown={event => {
|
||||
// The row's onActivate also fires on Enter/Space; keep it from
|
||||
// opening the review pane when the kebab is the focus target.
|
||||
if (event.key === 'Enter' || event.key === ' ') {
|
||||
event.stopPropagation()
|
||||
}
|
||||
}}
|
||||
size="icon-xs"
|
||||
variant="ghost"
|
||||
>
|
||||
<Codicon name="kebab-vertical" size="0.8rem" />
|
||||
</Button>
|
||||
</DropdownMenuTrigger>
|
||||
{/* The row sits at the bottom of the screen (above the composer),
|
||||
so the menu opens upward. */}
|
||||
<DropdownMenuContent align="end" className="w-60" side="top" sideOffset={6}>
|
||||
<DropdownMenuLabel className={MENU_SECTION}>{s.newBranch}</DropdownMenuLabel>
|
||||
{branchTargets.map(target => (
|
||||
<DropdownMenuItem key={target.base ?? '__head__'} onSelect={() => startBranch(target.base)}>
|
||||
<span className="truncate">{target.label}</span>
|
||||
</DropdownMenuItem>
|
||||
))}
|
||||
|
||||
{switchTarget && (
|
||||
<DropdownMenuItem onSelect={() => void switchToBranch(switchTarget)}>
|
||||
<span className="truncate">{s.switchTo(switchTarget)}</span>
|
||||
</DropdownMenuItem>
|
||||
)}
|
||||
|
||||
<DropdownMenuSeparator />
|
||||
<DropdownMenuLabel className={MENU_SECTION}>{s.worktrees}</DropdownMenuLabel>
|
||||
{otherWorktrees.map(worktree => (
|
||||
<DropdownMenuItem key={worktree.path} onSelect={() => onOpenWorktree?.(worktree.path)}>
|
||||
<span className="truncate">{worktree.branch}</span>
|
||||
</DropdownMenuItem>
|
||||
))}
|
||||
{/* Create a fresh worktree off the current HEAD (the generic
|
||||
"spin up a worktree here", mirroring the sidebar's + button). */}
|
||||
<DropdownMenuItem onSelect={() => startBranch(undefined)}>
|
||||
<span className="truncate">{p.startWork}</span>
|
||||
</DropdownMenuItem>
|
||||
{/* Check an EXISTING branch out into a worktree (no new branch). */}
|
||||
{onConvertBranch && (
|
||||
<DropdownMenuItem onSelect={() => startConvert()}>
|
||||
<span className="truncate">{p.convertBranch}</span>
|
||||
</DropdownMenuItem>
|
||||
)}
|
||||
</DropdownMenuContent>
|
||||
</DropdownMenu>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{(status.ahead > 0 || status.behind > 0) && (
|
||||
<span className="ml-auto flex shrink-0 items-center gap-1.5 text-[0.68rem] leading-4 text-muted-foreground/75 tabular-nums">
|
||||
{status.ahead > 0 && (
|
||||
<span className="flex items-center gap-0.5" title={s.ahead(status.ahead)}>
|
||||
<span aria-hidden>↑</span>
|
||||
{status.ahead}
|
||||
</span>
|
||||
)}
|
||||
{status.behind > 0 && (
|
||||
<span className="flex items-center gap-0.5" title={s.behind(status.behind)}>
|
||||
<span aria-hidden>↓</span>
|
||||
{status.behind}
|
||||
</span>
|
||||
)}
|
||||
</span>
|
||||
)}
|
||||
|
||||
{hasLineDelta ? (
|
||||
<DiffCount
|
||||
added={status.added}
|
||||
className={`text-[0.72rem] leading-4 ${status.ahead === 0 && status.behind === 0 ? 'ml-auto' : ''}`}
|
||||
removed={status.removed}
|
||||
/>
|
||||
) : untrackedOnly ? (
|
||||
<span
|
||||
className={`shrink-0 text-[0.72rem] leading-4 text-amber-500/90 ${status.ahead === 0 && status.behind === 0 ? 'ml-auto' : ''}`}
|
||||
>
|
||||
{s.changed(status.untracked)}
|
||||
</span>
|
||||
) : null}
|
||||
</StatusRow>
|
||||
|
||||
<Dialog onOpenChange={open => !branchPending && setBranchOpen(open)} open={branchOpen}>
|
||||
<DialogContent className="max-w-md">
|
||||
<DialogHeader>
|
||||
<DialogTitle>{convertMode ? p.convertBranchTitle : p.newWorktreeTitle}</DialogTitle>
|
||||
<DialogDescription>
|
||||
{convertMode ? p.convertBranchDesc : p.newWorktreeDesc}
|
||||
{!convertMode && branchBase && (
|
||||
<span className="mt-1 block text-(--ui-text-secondary)">{s.branchOffFrom(branchBase)}</span>
|
||||
)}
|
||||
</DialogDescription>
|
||||
</DialogHeader>
|
||||
|
||||
{convertMode ? (
|
||||
<Command
|
||||
className="rounded-md border border-(--ui-stroke-tertiary)"
|
||||
// The branch name is the authoritative key; filter on it directly.
|
||||
filter={(value, search) => (value.toLowerCase().includes(search.toLowerCase()) ? 1 : 0)}
|
||||
>
|
||||
<CommandInput autoFocus disabled={branchPending} placeholder={p.convertBranchPlaceholder} />
|
||||
<CommandList className="max-h-64">
|
||||
<CommandEmpty>{branchesLoading ? p.branchesLoading : p.noBranches}</CommandEmpty>
|
||||
<CommandGroup>
|
||||
{branches.map(branch => (
|
||||
<CommandItem
|
||||
disabled={branchPending}
|
||||
key={branch.name}
|
||||
onSelect={() => void convertBranch(branch)}
|
||||
value={branch.name}
|
||||
>
|
||||
<Codicon className="shrink-0 text-(--ui-text-tertiary)" name="git-branch" size="0.8rem" />
|
||||
<span className="truncate">{branch.name}</span>
|
||||
<span className="ml-auto shrink-0 text-[0.625rem] text-(--ui-text-tertiary)">
|
||||
{branchActionLabel(branch, p)}
|
||||
</span>
|
||||
</CommandItem>
|
||||
))}
|
||||
</CommandGroup>
|
||||
</CommandList>
|
||||
</Command>
|
||||
) : (
|
||||
<SanitizedInput
|
||||
autoFocus
|
||||
disabled={branchPending}
|
||||
onKeyDown={event => {
|
||||
if (event.key === 'Enter') {
|
||||
event.preventDefault()
|
||||
void submitBranch()
|
||||
} else if (event.key === 'Escape') {
|
||||
setBranchOpen(false)
|
||||
}
|
||||
}}
|
||||
onValueChange={setBranchName}
|
||||
placeholder={p.branchPlaceholder}
|
||||
sanitize={gitRef}
|
||||
value={branchName}
|
||||
/>
|
||||
)}
|
||||
|
||||
{convertMode ? (
|
||||
<DialogFooter className="sm:justify-start">
|
||||
<Button
|
||||
className="px-0 text-(--ui-text-secondary) hover:text-foreground"
|
||||
disabled={branchPending}
|
||||
onClick={() => setConvertMode(false)}
|
||||
type="button"
|
||||
variant="link"
|
||||
>
|
||||
{t.common.cancel}
|
||||
</Button>
|
||||
</DialogFooter>
|
||||
) : (
|
||||
<DialogFooter className="sm:justify-between">
|
||||
{onConvertBranch ? (
|
||||
<Button
|
||||
className="px-0 text-(--ui-text-secondary) hover:text-foreground"
|
||||
disabled={branchPending}
|
||||
onClick={enterConvert}
|
||||
type="button"
|
||||
variant="link"
|
||||
>
|
||||
{p.convertBranchInstead}
|
||||
</Button>
|
||||
) : (
|
||||
<span />
|
||||
)}
|
||||
<div className="flex items-center gap-2">
|
||||
<Button disabled={branchPending} onClick={() => setBranchOpen(false)} type="button" variant="ghost">
|
||||
{t.common.cancel}
|
||||
</Button>
|
||||
<Button
|
||||
disabled={branchPending || !branchName.trim()}
|
||||
onClick={() => void submitBranch()}
|
||||
type="button"
|
||||
>
|
||||
{p.startWork}
|
||||
</Button>
|
||||
</div>
|
||||
</DialogFooter>
|
||||
)}
|
||||
</DialogContent>
|
||||
</Dialog>
|
||||
</>
|
||||
)
|
||||
})
|
||||
@@ -19,15 +19,30 @@ import {
|
||||
type StatusGroup,
|
||||
stopBackgroundProcess
|
||||
} from '@/store/composer-status'
|
||||
import { $previewStatusBySession, dismissPreviewArtifact } from '@/store/preview-status'
|
||||
import { $threadScrolledUp } from '@/store/thread-scroll'
|
||||
import { openSessionInNewWindow } from '@/store/windows'
|
||||
|
||||
import { PreviewStatusRow } from './preview-row'
|
||||
import { StatusItemRow } from './status-row'
|
||||
|
||||
// Slow safety-net poll for silent exits (processes without notify_on_complete
|
||||
// emit no event when they die). Only armed while a running row is on screen.
|
||||
const BACKGROUND_POLL_MS = 5_000
|
||||
|
||||
// A localhost/loopback preview is only meaningful while its dev server is up, so
|
||||
// we tie it to a live background process rather than persisting dismissals or
|
||||
// letting dead URLs pile up. File previews (a real on-disk artifact) stand alone.
|
||||
const isLocalhostPreview = (target: string): boolean => /\b(?:localhost|127\.0\.0\.1|0\.0\.0\.0)\b/i.test(target)
|
||||
|
||||
// Real codicons per group (no sparkles): a checklist for todos, a bot for
|
||||
// subagents, a background process glyph for background tasks.
|
||||
const GROUP_ICON: Record<StatusGroup['type'], string> = {
|
||||
todo: 'checklist',
|
||||
subagent: 'hubot',
|
||||
background: 'server-process'
|
||||
}
|
||||
|
||||
const groupLabel = (group: StatusGroup, s: Translations['statusStack']) => {
|
||||
if (group.type === 'todo') {
|
||||
return s.todos(group.items.filter(i => i.todoStatus === 'completed').length, group.items.length)
|
||||
@@ -52,6 +67,7 @@ export function ComposerStatusStack({ queue, sessionId }: ComposerStatusStackPro
|
||||
const { t } = useI18n()
|
||||
const navigate = useNavigate()
|
||||
const itemsBySession = useStore($statusItemsBySession)
|
||||
const previewsBySession = useStore($previewStatusBySession)
|
||||
const scrolledUp = useStore($threadScrolledUp)
|
||||
|
||||
const groups = useMemo(
|
||||
@@ -59,6 +75,8 @@ export function ComposerStatusStack({ queue, sessionId }: ComposerStatusStackPro
|
||||
[itemsBySession, sessionId]
|
||||
)
|
||||
|
||||
const previews = sessionId ? (previewsBySession[sessionId] ?? []) : []
|
||||
|
||||
// Seed from the registry on session open; event-driven refreshes (terminal /
|
||||
// process tool completions) live in use-message-stream.
|
||||
useEffect(() => {
|
||||
@@ -69,6 +87,10 @@ export function ComposerStatusStack({ queue, sessionId }: ComposerStatusStackPro
|
||||
|
||||
const hasRunningBackground = groups.some(g => g.type === 'background' && g.items.some(i => i.state === 'running'))
|
||||
|
||||
// Drop localhost previews once no dev server is left running — that's what made
|
||||
// dead `localhost:5174` chips stick around. On-disk file previews are kept.
|
||||
const visiblePreviews = previews.filter(item => hasRunningBackground || !isLocalhostPreview(item.target))
|
||||
|
||||
useEffect(() => {
|
||||
if (!sessionId || !hasRunningBackground) {
|
||||
return
|
||||
@@ -84,6 +106,18 @@ export function ComposerStatusStack({ queue, sessionId }: ComposerStatusStackPro
|
||||
const openSubagent = (item: ComposerStatusItem) =>
|
||||
item.sessionId ? void openSessionInNewWindow(item.sessionId, { watch: true }) : openAgents()
|
||||
|
||||
// Preview links live as child rows of the background group — a localhost dev
|
||||
// server and its preview are the same thing — so they no longer float as an
|
||||
// odd, differently-indented standalone block under the stack.
|
||||
const previewRows =
|
||||
visiblePreviews.length > 0 && sessionId
|
||||
? visiblePreviews.map(item => (
|
||||
<PreviewStatusRow item={item} key={item.id} onDismiss={id => dismissPreviewArtifact(sessionId, id)} />
|
||||
))
|
||||
: []
|
||||
|
||||
const hasBackgroundGroup = groups.some(g => g.type === 'background')
|
||||
|
||||
const sections: { key: string; node: ReactNode }[] = groups.map(group => ({
|
||||
key: group.type,
|
||||
node: (
|
||||
@@ -102,11 +136,7 @@ export function ComposerStatusStack({ queue, sessionId }: ComposerStatusStackPro
|
||||
) : undefined
|
||||
}
|
||||
defaultCollapsed={group.type !== 'todo'}
|
||||
icon={
|
||||
group.type === 'todo' ? (
|
||||
<Codicon className="text-muted-foreground/70" name="checklist" size="0.8rem" />
|
||||
) : undefined
|
||||
}
|
||||
icon={<Codicon className="text-muted-foreground/70" name={GROUP_ICON[group.type]} size="0.8rem" />}
|
||||
label={groupLabel(group, t.statusStack)}
|
||||
>
|
||||
{group.items.map(item => (
|
||||
@@ -115,13 +145,23 @@ export function ComposerStatusStack({ queue, sessionId }: ComposerStatusStackPro
|
||||
key={item.id}
|
||||
onDismiss={sessionId ? id => dismissBackgroundProcess(sessionId, id) : undefined}
|
||||
onOpen={() => openSubagent(item)}
|
||||
onStop={sessionId ? id => stopBackgroundProcess(sessionId, id) : undefined}
|
||||
onStop={sessionId ? id => void stopBackgroundProcess(sessionId, id) : undefined}
|
||||
/>
|
||||
))}
|
||||
{group.type === 'background' && previewRows}
|
||||
</StatusSection>
|
||||
)
|
||||
}))
|
||||
|
||||
// No background group to host them (e.g. a standalone on-disk file preview):
|
||||
// keep the previews as their own row block so they don't disappear.
|
||||
if (previewRows.length > 0 && !hasBackgroundGroup) {
|
||||
sections.push({
|
||||
key: 'preview',
|
||||
node: <div className="px-1 py-0.5">{previewRows}</div>
|
||||
})
|
||||
}
|
||||
|
||||
if (queue) {
|
||||
sections.push({ key: 'queue', node: queue })
|
||||
}
|
||||
@@ -170,12 +210,10 @@ export function ComposerStatusStack({ queue, sessionId }: ComposerStatusStackPro
|
||||
|
||||
return (
|
||||
<div
|
||||
// Sits above the composer (bottom-full), nudged down by the shell's 0.5rem
|
||||
// top pad (pt-2 on composer-root) plus 1px so its bottom edge overlaps the
|
||||
// composer surface's top border. z BELOW the surface (z-4) so the surface's
|
||||
// top border paints over our transparent bottom border — one seam, no
|
||||
// double line.
|
||||
className="absolute inset-x-0 bottom-full z-3 max-h-[40vh] translate-y-[calc(0.5rem+1px)] overflow-y-auto"
|
||||
// Sits in the overlay lane above the composer. The composer root has pt-2
|
||||
// before the actual surface; translate by that amount so the stack returns
|
||||
// to its original attachment point without intruding into the repo strip.
|
||||
className="absolute inset-x-0 bottom-full z-3 max-h-[40vh] translate-y-2 overflow-y-auto"
|
||||
onPointerDownCapture={() => blurComposerInput()}
|
||||
ref={stackRef}
|
||||
>
|
||||
@@ -185,17 +223,19 @@ export function ComposerStatusStack({ queue, sessionId }: ComposerStatusStackPro
|
||||
Rounded top, square bottom; the bottom border is TRANSPARENT — the
|
||||
composer surface's visible top border (which sits at a higher z) is the
|
||||
single shared seam, so the two read as one fused capsule. */}
|
||||
<div className={cn(composerDockCard('top'), 'mx-2 rounded-b-none border-b border-b-transparent pt-0.5 pb-1')}>
|
||||
<div
|
||||
className={cn(
|
||||
'transition-opacity duration-200 ease-out',
|
||||
scrolledUp ? 'opacity-30 group-hover/composer:opacity-100' : 'opacity-100'
|
||||
)}
|
||||
>
|
||||
{sections.map(section => (
|
||||
<div key={section.key}>{section.node}</div>
|
||||
))}
|
||||
</div>
|
||||
<div
|
||||
className={cn(
|
||||
composerDockCard('top'),
|
||||
// Inset (mx-2) so the stack reads slightly narrower than the composer
|
||||
// surface below it — the original look.
|
||||
'mx-2 overflow-hidden rounded-b-none border-b border-b-transparent pt-0.5',
|
||||
'transition-opacity duration-200 ease-out',
|
||||
scrolledUp ? 'opacity-30 group-hover/composer:opacity-100' : 'opacity-100'
|
||||
)}
|
||||
>
|
||||
{sections.map(section => (
|
||||
<div key={section.key}>{section.node}</div>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
|
||||
121
apps/desktop/src/app/chat/composer/status-stack/preview-row.tsx
Normal file
121
apps/desktop/src/app/chat/composer/status-stack/preview-row.tsx
Normal file
@@ -0,0 +1,121 @@
|
||||
import { useStore } from '@nanostores/react'
|
||||
import { memo, useState } from 'react'
|
||||
|
||||
import { StatusRow } from '@/components/chat/status-row'
|
||||
import { Button } from '@/components/ui/button'
|
||||
import { Codicon } from '@/components/ui/codicon'
|
||||
import { Tip } from '@/components/ui/tooltip'
|
||||
import { useI18n } from '@/i18n'
|
||||
import { normalizeOrLocalPreviewTarget } from '@/lib/local-preview'
|
||||
import { cn } from '@/lib/utils'
|
||||
import { PREVIEW_PANE_ID } from '@/store/layout'
|
||||
import { notifyError } from '@/store/notifications'
|
||||
import { $paneOpen } from '@/store/panes'
|
||||
import { $previewTarget, dismissPreviewTarget, setCurrentSessionPreviewTarget } from '@/store/preview'
|
||||
import { type PreviewArtifact } from '@/store/preview-status'
|
||||
|
||||
interface PreviewStatusRowProps {
|
||||
item: PreviewArtifact
|
||||
onDismiss: (id: string) => void
|
||||
}
|
||||
|
||||
/** One detected artifact, single line, always visible: filename + open + close. */
|
||||
export const PreviewStatusRow = memo(function PreviewStatusRow({ item, onDismiss }: PreviewStatusRowProps) {
|
||||
const { t } = useI18n()
|
||||
const activePreview = useStore($previewTarget)
|
||||
const previewPaneOpen = useStore($paneOpen(PREVIEW_PANE_ID))
|
||||
const [opening, setOpening] = useState(false)
|
||||
const isOpen = activePreview?.source === item.target && previewPaneOpen
|
||||
|
||||
const resolveTarget = async () => {
|
||||
const target = await normalizeOrLocalPreviewTarget(item.target, item.cwd || undefined)
|
||||
|
||||
if (!target) {
|
||||
throw new Error(`Could not open preview target: ${item.target}`)
|
||||
}
|
||||
|
||||
return target
|
||||
}
|
||||
|
||||
const togglePreview = async () => {
|
||||
if (opening) {
|
||||
return
|
||||
}
|
||||
|
||||
if (isOpen) {
|
||||
dismissPreviewTarget()
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
setOpening(true)
|
||||
|
||||
try {
|
||||
setCurrentSessionPreviewTarget(await resolveTarget(), 'tool-result', item.target)
|
||||
} catch (error) {
|
||||
notifyError(error, t.preview.unavailable)
|
||||
} finally {
|
||||
setOpening(false)
|
||||
}
|
||||
}
|
||||
|
||||
const openInBrowser = async () => {
|
||||
try {
|
||||
const bridge = window.hermesDesktop?.openPreviewInBrowser
|
||||
|
||||
if (!bridge) {
|
||||
throw new Error('Desktop preview browser bridge is unavailable')
|
||||
}
|
||||
|
||||
await bridge((await resolveTarget()).url)
|
||||
} catch (error) {
|
||||
notifyError(error, t.preview.unavailable)
|
||||
}
|
||||
}
|
||||
|
||||
return (
|
||||
<StatusRow
|
||||
leading={
|
||||
<Codicon aria-hidden className={cn('text-muted-foreground/70', opening && 'animate-pulse')} name="globe" size="0.8rem" />
|
||||
}
|
||||
// Plain click opens the link in the browser; ⌘/Ctrl-click opens it in the
|
||||
// in-app preview pane instead. (isOpen still toggles the pane closed.)
|
||||
onActivate={event => {
|
||||
if (event.metaKey || event.ctrlKey) {
|
||||
void togglePreview()
|
||||
} else {
|
||||
void openInBrowser()
|
||||
}
|
||||
}}
|
||||
trailing={
|
||||
<Tip label={t.statusStack.dismiss}>
|
||||
<Button
|
||||
aria-label={t.statusStack.dismiss}
|
||||
className="-my-1 size-4 rounded-md text-muted-foreground/60 hover:text-foreground/90"
|
||||
onClick={event => {
|
||||
event.stopPropagation()
|
||||
onDismiss(item.id)
|
||||
}}
|
||||
size="icon-xs"
|
||||
type="button"
|
||||
variant="ghost"
|
||||
>
|
||||
<Codicon name="close" size="0.75rem" />
|
||||
</Button>
|
||||
</Tip>
|
||||
}
|
||||
trailingVisible
|
||||
>
|
||||
<Tip
|
||||
label={
|
||||
<span className="flex flex-col gap-0.5">
|
||||
<span>{item.target}</span>
|
||||
<span className="opacity-70">{t.preview.linkHint}</span>
|
||||
</span>
|
||||
}
|
||||
>
|
||||
<span className="min-w-0 max-w-[18rem] truncate text-[0.73rem] leading-4 text-foreground/92">{item.label}</span>
|
||||
</Tip>
|
||||
</StatusRow>
|
||||
)
|
||||
})
|
||||
@@ -8,7 +8,6 @@ import { DisclosureCaret } from '@/components/ui/disclosure-caret'
|
||||
import { GlyphSpinner } from '@/components/ui/glyph-spinner'
|
||||
import { Tip } from '@/components/ui/tooltip'
|
||||
import { type Translations, useI18n } from '@/i18n'
|
||||
import { ArrowUpRight, X } from '@/lib/icons'
|
||||
import type { TodoStatus } from '@/lib/todos'
|
||||
import { cn } from '@/lib/utils'
|
||||
import type { ComposerStatusItem } from '@/store/composer-status'
|
||||
@@ -50,7 +49,7 @@ function leadingGlyph(item: ComposerStatusItem, s: Translations['statusStack']):
|
||||
return (
|
||||
<GlyphSpinner
|
||||
ariaLabel={s.running}
|
||||
className="text-[0.9rem] leading-none text-muted-foreground/80"
|
||||
className="text-[0.85rem] leading-none text-muted-foreground/80"
|
||||
spinner="braille"
|
||||
/>
|
||||
)
|
||||
@@ -117,11 +116,11 @@ export const StatusItemRow = memo(function StatusItemRow({ item, onDismiss, onOp
|
||||
type="button"
|
||||
variant="ghost"
|
||||
>
|
||||
<X size={12} />
|
||||
<Codicon name="close" size="0.75rem" />
|
||||
</Button>
|
||||
</Tip>
|
||||
) : canOpen ? (
|
||||
<ArrowUpRight aria-hidden className="size-3.5 text-muted-foreground/55" />
|
||||
<Codicon aria-hidden className="text-muted-foreground/55" name="link-external" size="0.85rem" />
|
||||
) : undefined
|
||||
}
|
||||
>
|
||||
|
||||
@@ -88,7 +88,10 @@ interface ChatViewProps extends Omit<React.ComponentProps<'div'>, 'onSubmit'> {
|
||||
onThreadMessagesChange: (messages: readonly ThreadMessage[]) => void
|
||||
onEdit: (message: AppendMessage) => Promise<void>
|
||||
onReload: (parentId: string | null) => Promise<void>
|
||||
onRestoreToMessage?: (messageId: string) => Promise<void>
|
||||
onRestoreToMessage?: (
|
||||
messageId: string,
|
||||
target?: { text?: string; userOrdinal?: number | null }
|
||||
) => Promise<void>
|
||||
onRetryResume: (sessionId: string) => void
|
||||
onTranscribeAudio?: (audio: Blob) => Promise<string>
|
||||
onDismissError?: (messageId: string) => void
|
||||
|
||||
@@ -6,7 +6,7 @@ import type {
|
||||
MouseEvent as ReactMouseEvent,
|
||||
ReactNode
|
||||
} from 'react'
|
||||
import { useEffect, useMemo, useState } from 'react'
|
||||
import { Fragment, useEffect, useMemo, useState } from 'react'
|
||||
import ShikiHighlighter from 'react-shiki'
|
||||
import { Streamdown } from 'streamdown'
|
||||
|
||||
@@ -14,15 +14,21 @@ import { requestComposerFocus, requestComposerInsertRefs } from '@/app/chat/comp
|
||||
import { droppedFileInlineRef } from '@/app/chat/composer/inline-refs'
|
||||
import { HERMES_PATHS_MIME } from '@/app/chat/hooks/use-composer-actions'
|
||||
import { isAddSelectionShortcut } from '@/app/right-sidebar/terminal/selection'
|
||||
import { FileDiffPanel } from '@/components/chat/diff-lines'
|
||||
import { chunkTextLines, useFixedRowWindow } from '@/components/chat/fixed-row-window'
|
||||
import { PageLoader } from '@/components/page-loader'
|
||||
import { translateNow, useI18n } from '@/i18n'
|
||||
import { readDesktopFileDataUrl, readDesktopFileText } from '@/lib/desktop-fs'
|
||||
import { desktopFileDiff, desktopGitRoot, readDesktopFileDataUrl, readDesktopFileText } from '@/lib/desktop-fs'
|
||||
import { shikiLanguageForFilename } from '@/lib/markdown-code'
|
||||
import { cn } from '@/lib/utils'
|
||||
import type { PreviewTarget } from '@/store/preview'
|
||||
import { $currentCwd } from '@/store/session'
|
||||
|
||||
const SHIKI_THEME = { dark: 'github-dark-default', light: 'github-light-default' } as const
|
||||
const TEXT_PREVIEW_MAX_BYTES = 512 * 1024
|
||||
const SOURCE_CHUNK_LINES = 200
|
||||
const SOURCE_LINE_PX = 20
|
||||
const SOURCE_OVERSCAN_LINES = 400
|
||||
|
||||
type EmptyStateTone = 'neutral' | 'warning'
|
||||
|
||||
@@ -126,6 +132,8 @@ interface LocalPreviewState {
|
||||
binary?: boolean
|
||||
byteSize?: number
|
||||
dataUrl?: string
|
||||
/** Working-tree-vs-HEAD unified diff, when the file has uncommitted changes. */
|
||||
diff?: string
|
||||
error?: string
|
||||
language?: string
|
||||
loading: boolean
|
||||
@@ -299,28 +307,44 @@ function MarkdownPreview({ text }: { text: string }) {
|
||||
)
|
||||
}
|
||||
|
||||
function PreviewToggle({ asSource, onToggle }: { asSource: boolean; onToggle: () => void }) {
|
||||
function PreviewModeSwitcher({
|
||||
active,
|
||||
modes,
|
||||
onSelect
|
||||
}: {
|
||||
active: PreviewViewMode
|
||||
modes: PreviewViewMode[]
|
||||
onSelect: (mode: PreviewViewMode) => void
|
||||
}) {
|
||||
const { t } = useI18n()
|
||||
|
||||
const label: Record<PreviewViewMode, string> = {
|
||||
diff: t.preview.diff,
|
||||
rendered: t.preview.renderedPreview,
|
||||
source: t.preview.source
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="sticky top-0 z-10 flex justify-end border-b border-border/40 bg-transparent px-3 py-1 backdrop-blur">
|
||||
<button
|
||||
className="text-[0.625rem] font-bold text-muted-foreground underline decoration-current/20 underline-offset-4 transition-colors hover:text-foreground"
|
||||
onClick={onToggle}
|
||||
type="button"
|
||||
>
|
||||
{asSource ? t.preview.renderedPreview : t.preview.source}
|
||||
</button>
|
||||
<div className="flex shrink-0 justify-end gap-3 border-b border-border/40 px-3 py-1">
|
||||
{modes.map(mode => (
|
||||
<button
|
||||
className={cn(
|
||||
'text-[0.625rem] font-bold underline-offset-4 transition-colors',
|
||||
mode === active
|
||||
? 'text-foreground underline decoration-current/30'
|
||||
: 'text-muted-foreground hover:text-foreground'
|
||||
)}
|
||||
key={mode}
|
||||
onClick={() => onSelect(mode)}
|
||||
type="button"
|
||||
>
|
||||
{label[mode]}
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
// Gutter and Shiki output share `font-mono text-xs leading-relaxed py-3` so
|
||||
// each line aligns vertically. The selection overlay relies on the same
|
||||
// `text-xs * leading-relaxed = 1.21875rem` line-height to position itself.
|
||||
const SOURCE_LINE_HEIGHT_REM = 1.21875
|
||||
const SOURCE_PAD_Y_REM = 0.75
|
||||
|
||||
interface LineSelection {
|
||||
end: number
|
||||
start: number
|
||||
@@ -337,7 +361,18 @@ function startLineDrag(event: ReactDragEvent<HTMLElement>, filePath: string, { e
|
||||
|
||||
function SourceView({ filePath, language, text }: { filePath: string; language: string; text: string }) {
|
||||
const { t } = useI18n()
|
||||
const lineCount = useMemo(() => Math.max(1, text.split('\n').length), [text])
|
||||
const chunks = useMemo(() => chunkTextLines(text, SOURCE_CHUNK_LINES), [text])
|
||||
const lastChunk = chunks.at(-1)
|
||||
const totalLines = lastChunk ? lastChunk.start + lastChunk.lines.length : 0
|
||||
|
||||
const { afterRows, beforeRows, endChunk, onScroll, scrollerRef, startChunk } = useFixedRowWindow({
|
||||
overscanRows: SOURCE_OVERSCAN_LINES,
|
||||
rowPx: SOURCE_LINE_PX,
|
||||
rowsPerChunk: SOURCE_CHUNK_LINES,
|
||||
totalRows: totalLines
|
||||
})
|
||||
|
||||
const visibleChunks = chunks.slice(startChunk, endChunk + 1)
|
||||
const [selection, setSelection] = useState<LineSelection | null>(null)
|
||||
const inSelection = (line: number) => selection != null && line >= selection.start && line <= selection.end
|
||||
|
||||
@@ -394,69 +429,76 @@ function SourceView({ filePath, language, text }: { filePath: string; language:
|
||||
}, [filePath, selection])
|
||||
|
||||
return (
|
||||
<div className="grid min-w-max grid-cols-[auto_minmax(0,1fr)] font-mono text-xs leading-relaxed">
|
||||
<div className="select-none py-3 text-right text-muted-foreground/55">
|
||||
{Array.from({ length: lineCount }, (_, index) => {
|
||||
const line = index + 1
|
||||
const selected = inSelection(line)
|
||||
|
||||
return (
|
||||
<div
|
||||
className={cn(
|
||||
'cursor-pointer px-3 tabular-nums transition-colors',
|
||||
selected
|
||||
? 'bg-amber-200/45 text-amber-900 dark:bg-amber-300/20 dark:text-amber-100'
|
||||
: 'hover:text-foreground'
|
||||
)}
|
||||
draggable
|
||||
key={line}
|
||||
onClick={event => handleLineClick(event, line)}
|
||||
onDragStart={event => handleDragStart(event, line)}
|
||||
title={t.preview.sourceLineTitle}
|
||||
>
|
||||
{line}
|
||||
</div>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
<div
|
||||
className="relative [&_pre]:m-0 [&_pre]:px-3 [&_pre]:py-3 [&_pre]:bg-transparent!"
|
||||
data-selectable-text="true"
|
||||
>
|
||||
{selection && (
|
||||
<div
|
||||
aria-hidden
|
||||
className="pointer-events-none absolute inset-x-0 bg-amber-200/35 dark:bg-amber-300/10"
|
||||
style={{
|
||||
top: `calc(${SOURCE_PAD_Y_REM}rem + ${selection.start - 1} * ${SOURCE_LINE_HEIGHT_REM}rem)`,
|
||||
height: `calc(${selection.end - selection.start + 1} * ${SOURCE_LINE_HEIGHT_REM}rem)`
|
||||
}}
|
||||
/>
|
||||
<div className="h-full overflow-auto" onScroll={onScroll} ref={scrollerRef}>
|
||||
<div className="grid min-w-max grid-cols-[auto_minmax(0,1fr)] font-mono text-[0.7rem] leading-relaxed">
|
||||
{beforeRows > 0 && (
|
||||
<div aria-hidden className="col-span-2" style={{ height: beforeRows * SOURCE_LINE_PX }} />
|
||||
)}
|
||||
{visibleChunks.map(chunk => (
|
||||
<Fragment key={chunk.start}>
|
||||
<div className="select-none text-right text-muted-foreground/55">
|
||||
{chunk.lines.map((_lineText, offset) => {
|
||||
const line = chunk.start + offset + 1
|
||||
const selected = inSelection(line)
|
||||
|
||||
return (
|
||||
<div
|
||||
className={cn(
|
||||
'h-5 w-9 cursor-pointer pr-2 leading-5 tabular-nums transition-colors',
|
||||
selected
|
||||
? 'bg-amber-200/45 text-amber-900 dark:bg-amber-300/20 dark:text-amber-100'
|
||||
: 'hover:text-foreground'
|
||||
)}
|
||||
draggable
|
||||
key={line}
|
||||
onClick={event => handleLineClick(event, line)}
|
||||
onDragStart={event => handleDragStart(event, line)}
|
||||
title={t.preview.sourceLineTitle}
|
||||
>
|
||||
{line}
|
||||
</div>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
<div className="preview-source-code min-w-0 [&_pre]:m-0" data-selectable-text="true">
|
||||
<ShikiHighlighter
|
||||
addDefaultStyles={false}
|
||||
as="div"
|
||||
defaultColor="light-dark()"
|
||||
delay={80}
|
||||
language={language || 'text'}
|
||||
showLanguage={false}
|
||||
theme={SHIKI_THEME}
|
||||
>
|
||||
{chunk.text}
|
||||
</ShikiHighlighter>
|
||||
</div>
|
||||
</Fragment>
|
||||
))}
|
||||
{afterRows > 0 && (
|
||||
<div aria-hidden className="col-span-2" style={{ height: afterRows * SOURCE_LINE_PX }} />
|
||||
)}
|
||||
<ShikiHighlighter
|
||||
addDefaultStyles={false}
|
||||
as="div"
|
||||
defaultColor="light-dark()"
|
||||
delay={80}
|
||||
language={language || 'text'}
|
||||
showLanguage={false}
|
||||
theme={SHIKI_THEME}
|
||||
>
|
||||
{text}
|
||||
</ShikiHighlighter>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
type PreviewViewMode = 'diff' | 'rendered' | 'source'
|
||||
|
||||
export function LocalFilePreview({ reloadKey, target }: { reloadKey: number; target: PreviewTarget }) {
|
||||
const { t } = useI18n()
|
||||
const [state, setState] = useState<LocalPreviewState>({ loading: true })
|
||||
const [forcePreview, setForcePreview] = useState(false)
|
||||
const [renderMarkdownAsSource, setRenderMarkdownAsSource] = useState(false)
|
||||
// User-picked view; null = auto (diff when changed, else rendered markdown,
|
||||
// else source). Reset when the previewed file changes.
|
||||
const [userMode, setUserMode] = useState<null | PreviewViewMode>(null)
|
||||
const filePath = filePathForTarget(target)
|
||||
const isImage = target.previewKind === 'image'
|
||||
|
||||
useEffect(() => {
|
||||
setUserMode(null)
|
||||
}, [filePath, reloadKey])
|
||||
|
||||
// HTML files are rendered as source code, not in a webview - so they take
|
||||
// the same path as plain text files. `previewKind === 'binary'` arrives
|
||||
// when the file is forcibly previewed past the binary refusal screen.
|
||||
@@ -508,6 +550,22 @@ export function LocalFilePreview({ reloadKey, target }: { reloadKey: number; tar
|
||||
text: shouldBlock ? undefined : result.text,
|
||||
truncated: result.truncated
|
||||
})
|
||||
|
||||
// Best-effort: fetch the file's working-tree-vs-HEAD diff so the
|
||||
// preview can offer a DIFF view when there are uncommitted changes.
|
||||
// Empty (clean file / not a repo / remote) just hides the option.
|
||||
if (!shouldBlock) {
|
||||
try {
|
||||
const root = await desktopGitRoot(filePath)
|
||||
const diff = root ? await desktopFileDiff(root, filePath) : ''
|
||||
|
||||
if (active && diff.trim()) {
|
||||
setState(prev => (prev.text === result.text ? { ...prev, diff } : prev))
|
||||
}
|
||||
} catch {
|
||||
// No diff available; the preview just shows source.
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
if (active) {
|
||||
@@ -571,21 +629,50 @@ export function LocalFilePreview({ reloadKey, target }: { reloadKey: number; tar
|
||||
|
||||
if (isText && state.text !== undefined) {
|
||||
const isMarkdown = (state.language || target.language) === 'markdown'
|
||||
const showRendered = isMarkdown && !renderMarkdownAsSource
|
||||
const hasDiff = Boolean(state.diff && state.diff.trim())
|
||||
// Order the toggle reads left→right; default lands on the most useful view.
|
||||
const modes: PreviewViewMode[] = []
|
||||
|
||||
if (isMarkdown) {
|
||||
modes.push('rendered')
|
||||
}
|
||||
|
||||
modes.push('source')
|
||||
|
||||
if (hasDiff) {
|
||||
modes.push('diff')
|
||||
}
|
||||
|
||||
const autoMode: PreviewViewMode = hasDiff ? 'diff' : isMarkdown ? 'rendered' : 'source'
|
||||
const mode = userMode && modes.includes(userMode) ? userMode : autoMode
|
||||
|
||||
return (
|
||||
<div className="h-full overflow-auto bg-transparent">
|
||||
<div className="flex h-full flex-col overflow-hidden bg-transparent">
|
||||
{state.truncated && (
|
||||
<div className="border-b border-border/60 bg-muted/35 px-3 py-1.5 text-[0.68rem] text-muted-foreground">
|
||||
{t.preview.truncated}
|
||||
</div>
|
||||
)}
|
||||
{isMarkdown && <PreviewToggle asSource={!showRendered} onToggle={() => setRenderMarkdownAsSource(s => !s)} />}
|
||||
{showRendered ? (
|
||||
<MarkdownPreview text={state.text} />
|
||||
) : (
|
||||
<SourceView filePath={filePath} language={state.language || 'text'} text={state.text} />
|
||||
)}
|
||||
{modes.length > 1 && <PreviewModeSwitcher active={mode} modes={modes} onSelect={setUserMode} />}
|
||||
<div className="min-h-0 flex-1 overflow-auto">
|
||||
{mode === 'rendered' ? (
|
||||
<MarkdownPreview text={state.text} />
|
||||
) : mode === 'diff' ? (
|
||||
<FileDiffPanel
|
||||
className="mx-0 mb-0 h-full max-h-none"
|
||||
diff={state.diff ?? ''}
|
||||
fullText={state.text}
|
||||
path={filePath}
|
||||
showLineNumbers
|
||||
/>
|
||||
) : (
|
||||
<SourceView
|
||||
filePath={filePath}
|
||||
language={shikiLanguageForFilename(filePath) || state.language || 'text'}
|
||||
text={state.text}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
@@ -3,10 +3,19 @@ import { useEffect, useMemo } from 'react'
|
||||
|
||||
import type { SetTitlebarToolGroup } from '@/app/shell/titlebar-controls'
|
||||
import { Codicon } from '@/components/ui/codicon'
|
||||
import {
|
||||
ContextMenu,
|
||||
ContextMenuContent,
|
||||
ContextMenuItem,
|
||||
ContextMenuSeparator,
|
||||
ContextMenuTrigger
|
||||
} from '@/components/ui/context-menu'
|
||||
import { Tip } from '@/components/ui/tooltip'
|
||||
import { translateNow, useI18n } from '@/i18n'
|
||||
import { formatCombo } from '@/lib/keybinds/combo'
|
||||
import { cn } from '@/lib/utils'
|
||||
import {
|
||||
$panesFlipped,
|
||||
$rightRailActiveTabId,
|
||||
RIGHT_RAIL_PREVIEW_TAB_ID,
|
||||
type RightRailTabId,
|
||||
@@ -16,8 +25,10 @@ import {
|
||||
$filePreviewTabs,
|
||||
$previewReloadRequest,
|
||||
$previewTarget,
|
||||
closeOtherRightRailTabs,
|
||||
closeRightRail,
|
||||
closeRightRailTab,
|
||||
closeRightRailTabsToRight,
|
||||
type PreviewTarget
|
||||
} from '@/store/preview'
|
||||
|
||||
@@ -56,6 +67,7 @@ export function ChatPreviewRail({ onRestartServer, setTitlebarToolGroup }: ChatP
|
||||
const { t } = useI18n()
|
||||
const previewReloadRequest = useStore($previewReloadRequest)
|
||||
const activeTabId = useStore($rightRailActiveTabId)
|
||||
const panesFlipped = useStore($panesFlipped)
|
||||
const filePreviewTabs = useStore($filePreviewTabs)
|
||||
const previewTarget = useStore($previewTarget)
|
||||
|
||||
@@ -82,68 +94,92 @@ export function ChatPreviewRail({ onRestartServer, setTitlebarToolGroup }: ChatP
|
||||
const isPreview = activeTab.id === RIGHT_RAIL_PREVIEW_TAB_ID
|
||||
|
||||
return (
|
||||
<aside className="relative flex h-full w-full min-w-0 flex-col overflow-hidden border-l border-(--ui-stroke-tertiary) bg-(--ui-editor-surface-background) text-(--ui-text-tertiary)">
|
||||
<aside
|
||||
className={cn(
|
||||
'relative flex h-full w-full min-w-0 flex-col overflow-hidden border-(--ui-stroke-tertiary) bg-(--ui-editor-surface-background) text-(--ui-text-tertiary)',
|
||||
panesFlipped ? 'border-r' : 'border-l'
|
||||
)}
|
||||
>
|
||||
<div className="group/rail-tabs flex h-(--titlebar-height) shrink-0 border-b border-(--ui-stroke-tertiary) bg-(--ui-sidebar-surface-background)">
|
||||
<div
|
||||
className="flex min-w-0 flex-1 overflow-x-auto overflow-y-hidden overscroll-x-contain [-ms-overflow-style:none] [scrollbar-width:none] [&::-webkit-scrollbar]:hidden"
|
||||
role="tablist"
|
||||
>
|
||||
{tabs.map(tab => {
|
||||
{tabs.map((tab, index) => {
|
||||
const active = tab.id === activeTab.id
|
||||
const hasOthers = tabs.length > 1
|
||||
const hasTabsToRight = index < tabs.length - 1
|
||||
|
||||
return (
|
||||
<div
|
||||
className={cn(
|
||||
'group/tab relative flex h-full min-w-0 max-w-48 shrink-0 items-center text-[0.6875rem] font-medium [-webkit-app-region:no-drag] last:border-r last:border-(--ui-stroke-quaternary)',
|
||||
active
|
||||
? 'bg-(--ui-editor-surface-background) text-foreground [--tab-bg:var(--ui-editor-surface-background)]'
|
||||
: 'border-r border-(--ui-stroke-quaternary) text-(--ui-text-tertiary) [--tab-bg:var(--ui-sidebar-surface-background)] hover:bg-(--chrome-action-hover) hover:text-foreground'
|
||||
)}
|
||||
key={tab.id}
|
||||
// Middle-click closes the tab, matching browser/IDE muscle
|
||||
// memory. `onMouseDown` swallows the middle-button press so
|
||||
// Chromium doesn't switch into autoscroll mode.
|
||||
onAuxClick={event => {
|
||||
if (event.button !== 1) {
|
||||
return
|
||||
}
|
||||
<ContextMenu key={tab.id}>
|
||||
<ContextMenuTrigger asChild>
|
||||
<div
|
||||
className={cn(
|
||||
'group/tab relative flex h-full min-w-0 max-w-48 shrink-0 items-center text-[0.6875rem] font-medium [-webkit-app-region:no-drag] last:border-r last:border-(--ui-stroke-quaternary)',
|
||||
active
|
||||
? 'bg-(--ui-editor-surface-background) text-foreground [--tab-bg:var(--ui-editor-surface-background)]'
|
||||
: 'border-r border-(--ui-stroke-quaternary) text-(--ui-text-tertiary) [--tab-bg:var(--ui-sidebar-surface-background)] hover:bg-(--chrome-action-hover) hover:text-foreground'
|
||||
)}
|
||||
// Middle-click closes the tab, matching browser/IDE muscle
|
||||
// memory. `onMouseDown` swallows the middle-button press so
|
||||
// Chromium doesn't switch into autoscroll mode.
|
||||
onAuxClick={event => {
|
||||
if (event.button !== 1) {
|
||||
return
|
||||
}
|
||||
|
||||
event.preventDefault()
|
||||
closeRightRailTab(tab.id)
|
||||
}}
|
||||
onMouseDown={event => {
|
||||
if (event.button === 1) {
|
||||
event.preventDefault()
|
||||
}
|
||||
}}
|
||||
>
|
||||
{active && (
|
||||
<span aria-hidden="true" className="absolute inset-x-0 top-0 h-px bg-(--ui-stroke-primary)" />
|
||||
)}
|
||||
<Tip label={tab.label}>
|
||||
<button
|
||||
aria-selected={active}
|
||||
className="flex h-full min-w-0 max-w-full items-center overflow-hidden pl-3 pr-2 text-left outline-none"
|
||||
onClick={() => selectRightRailTab(tab.id)}
|
||||
role="tab"
|
||||
type="button"
|
||||
event.preventDefault()
|
||||
closeRightRailTab(tab.id)
|
||||
}}
|
||||
onMouseDown={event => {
|
||||
if (event.button === 1) {
|
||||
event.preventDefault()
|
||||
}
|
||||
}}
|
||||
>
|
||||
<span className="block min-w-0 truncate">{tab.label}</span>
|
||||
</button>
|
||||
</Tip>
|
||||
<span
|
||||
aria-hidden="true"
|
||||
className="pointer-events-none absolute inset-y-0 right-0 w-9 bg-[linear-gradient(to_right,transparent,var(--tab-bg)_55%)] opacity-0 transition-opacity group-hover/tab:opacity-100 group-focus-within/tab:opacity-100"
|
||||
/>
|
||||
<button
|
||||
aria-label={t.preview.closeTab(tab.label)}
|
||||
className="pointer-events-none absolute right-1.5 top-1/2 grid size-4 -translate-y-1/2 place-items-center rounded-sm text-(--ui-text-tertiary) opacity-0 transition-[background-color,color,opacity] hover:bg-(--ui-bg-secondary) hover:text-foreground focus-visible:pointer-events-auto focus-visible:opacity-100 group-hover/tab:pointer-events-auto group-hover/tab:opacity-100 group-focus-within/tab:pointer-events-auto group-focus-within/tab:opacity-100"
|
||||
onClick={() => closeRightRailTab(tab.id)}
|
||||
type="button"
|
||||
>
|
||||
<Codicon name="close" size="0.75rem" />
|
||||
</button>
|
||||
</div>
|
||||
{active && (
|
||||
<span aria-hidden="true" className="absolute inset-x-0 top-0 h-px bg-(--ui-stroke-primary)" />
|
||||
)}
|
||||
<Tip label={tab.target.path || tab.target.url || tab.label}>
|
||||
<button
|
||||
aria-selected={active}
|
||||
className="flex h-full min-w-0 max-w-full items-center overflow-hidden pl-3 pr-2 text-left outline-none"
|
||||
onClick={() => selectRightRailTab(tab.id)}
|
||||
role="tab"
|
||||
type="button"
|
||||
>
|
||||
<span className="block min-w-0 truncate">{tab.label}</span>
|
||||
</button>
|
||||
</Tip>
|
||||
<span
|
||||
aria-hidden="true"
|
||||
className="pointer-events-none absolute inset-y-0 right-0 w-9 bg-[linear-gradient(to_right,transparent,var(--tab-bg)_55%)] opacity-0 transition-opacity group-hover/tab:opacity-100 group-focus-within/tab:opacity-100"
|
||||
/>
|
||||
<button
|
||||
aria-label={t.preview.closeTab(tab.label)}
|
||||
className="pointer-events-none absolute right-1.5 top-1/2 grid size-4 -translate-y-1/2 place-items-center rounded-sm text-(--ui-text-tertiary) opacity-0 transition-[background-color,color,opacity] hover:bg-(--ui-bg-secondary) hover:text-foreground focus-visible:pointer-events-auto focus-visible:opacity-100 group-hover/tab:pointer-events-auto group-hover/tab:opacity-100 group-focus-within/tab:pointer-events-auto group-focus-within/tab:opacity-100"
|
||||
onClick={() => closeRightRailTab(tab.id)}
|
||||
type="button"
|
||||
>
|
||||
<Codicon name="close" size="0.75rem" />
|
||||
</button>
|
||||
</div>
|
||||
</ContextMenuTrigger>
|
||||
<ContextMenuContent>
|
||||
<ContextMenuItem onSelect={() => closeRightRailTab(tab.id)}>
|
||||
{t.common.close}
|
||||
<span className="ml-auto pl-4 text-(--ui-text-tertiary)">{formatCombo('mod+w')}</span>
|
||||
</ContextMenuItem>
|
||||
<ContextMenuItem disabled={!hasOthers} onSelect={() => closeOtherRightRailTabs(tab.id)}>
|
||||
{t.preview.closeOthers}
|
||||
</ContextMenuItem>
|
||||
<ContextMenuItem disabled={!hasTabsToRight} onSelect={() => closeRightRailTabsToRight(tab.id)}>
|
||||
{t.preview.closeToRight}
|
||||
</ContextMenuItem>
|
||||
<ContextMenuSeparator />
|
||||
<ContextMenuItem onSelect={closeRightRail}>{t.preview.closeAll}</ContextMenuItem>
|
||||
</ContextMenuContent>
|
||||
</ContextMenu>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
|
||||
158
apps/desktop/src/app/chat/sidebar/chrome.tsx
Normal file
158
apps/desktop/src/app/chat/sidebar/chrome.tsx
Normal file
@@ -0,0 +1,158 @@
|
||||
import type * as React from 'react'
|
||||
|
||||
import { Codicon } from '@/components/ui/codicon'
|
||||
import { cn } from '@/lib/utils'
|
||||
|
||||
// Shared, content-agnostic sidebar chrome — used by both the flat session
|
||||
// sections and the project/workspace tree, so it lives outside either to keep
|
||||
// imports one-directional (no index <-> projects cycle).
|
||||
|
||||
/** `loaded/total` when there's more on the server, else just the loaded count. */
|
||||
export const countLabel = (loaded: number, total: number): string =>
|
||||
total > loaded ? `${loaded}/${total}` : String(loaded)
|
||||
|
||||
/** The muted count chip next to a section/workspace label. */
|
||||
export function SidebarCount({ children }: { children: React.ReactNode }) {
|
||||
return <span className="text-[0.6875rem] font-medium text-(--ui-text-quaternary)">{children}</span>
|
||||
}
|
||||
|
||||
// ── Row geometry (session row is canonical — everything composes these) ─────
|
||||
//
|
||||
// Height lives ONLY on SidebarRowShell (min-h-[1.625rem]). Inset children
|
||||
// stretch to fill the cell and center content internally — never items-center
|
||||
// on the shell grid, or short clusters (projects) float 1–2px off sessions.
|
||||
|
||||
const rowMinH = 'min-h-[1.625rem]'
|
||||
const rowPadX = 'pl-2 pr-1'
|
||||
const rowGap = 'gap-1.5'
|
||||
const rowLead = 'grid size-3.5 shrink-0 place-items-center'
|
||||
const rowInset = cn(rowPadX, rowGap, 'flex h-full min-w-0 items-center self-stretch py-0.5')
|
||||
const rowLabel = 'min-w-0 truncate text-[0.8125rem] leading-none text-(--ui-text-secondary)'
|
||||
|
||||
/** Codicon size in sidebar row leads — matches the file tree (`tree.tsx`). */
|
||||
export const SIDEBAR_LEAD_ICON_SIZE = '0.875rem' as const
|
||||
|
||||
/** Vertical stack of rows (gap-px, single column). */
|
||||
export function SidebarRowStack({ className, ...props }: React.ComponentProps<'div'>) {
|
||||
return <div className={cn('grid grid-cols-[minmax(0,1fr)] gap-px', className)} {...props} />
|
||||
}
|
||||
|
||||
/** Nested rows (session previews, worktree bodies). */
|
||||
export function SidebarRowNest({ className, ...props }: React.ComponentProps<'div'>) {
|
||||
return <SidebarRowStack className={cn('pb-1 pl-4', className)} {...props} />
|
||||
}
|
||||
|
||||
/** Outer grid — sole owner of row height. */
|
||||
export function SidebarRowShell({
|
||||
actions,
|
||||
children,
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<'div'> & { actions?: React.ReactNode }) {
|
||||
return (
|
||||
<div className={cn(rowMinH, 'grid grid-cols-[minmax(0,1fr)_auto] items-stretch rounded-md', className)} {...props}>
|
||||
{children}
|
||||
{actions ? <div className="flex shrink-0 items-center self-center">{actions}</div> : null}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
/** Multi-control left cluster (project rows). */
|
||||
export function SidebarRowCluster({ className, ...props }: React.ComponentProps<'div'>) {
|
||||
return <div className={cn(rowInset, className)} {...props} />
|
||||
}
|
||||
|
||||
/** Session row main tap target. */
|
||||
export function SidebarRowBody({ className, ...props }: React.ComponentProps<'button'>) {
|
||||
return <button className={cn(rowInset, 'bg-transparent text-left', className)} type="button" {...props} />
|
||||
}
|
||||
|
||||
/** Tappable label — underline/truncate live on the inner span, not the button. */
|
||||
export function SidebarRowLink({
|
||||
className,
|
||||
labelClassName,
|
||||
children,
|
||||
...props
|
||||
}: React.ComponentProps<'button'> & { labelClassName?: string }) {
|
||||
return (
|
||||
<button className={cn('min-w-0 shrink bg-transparent p-0 text-left', className)} type="button" {...props}>
|
||||
<span className={cn(rowLabel, labelClassName)}>{children}</span>
|
||||
</button>
|
||||
)
|
||||
}
|
||||
|
||||
/** Fixed leading column (dot, icon, drag handle). */
|
||||
export function SidebarRowLead({ className, ...props }: React.ComponentProps<'span'>) {
|
||||
return <span className={cn(rowLead, className)} {...props} />
|
||||
}
|
||||
|
||||
/** Standard row label typography. */
|
||||
export function SidebarRowLabel({ className, ...props }: React.ComponentProps<'span'>) {
|
||||
return <span className={cn(rowLabel, className)} {...props} />
|
||||
}
|
||||
|
||||
/** Dot ↔ grabber swap for dnd-kit reorder rows. */
|
||||
export function SidebarRowGrab({
|
||||
ariaLabel,
|
||||
children,
|
||||
className,
|
||||
dragging = false,
|
||||
dragHandleProps,
|
||||
leadClassName
|
||||
}: {
|
||||
ariaLabel: string
|
||||
children: React.ReactNode
|
||||
className?: string
|
||||
dragging?: boolean
|
||||
dragHandleProps?: React.HTMLAttributes<HTMLElement>
|
||||
leadClassName?: string
|
||||
}) {
|
||||
return (
|
||||
<SidebarRowLead
|
||||
{...dragHandleProps}
|
||||
aria-label={ariaLabel}
|
||||
className={cn(
|
||||
'group/handle relative cursor-grab touch-none overflow-hidden active:cursor-grabbing',
|
||||
leadClassName,
|
||||
className
|
||||
)}
|
||||
data-reorder-handle
|
||||
onClick={event => event.stopPropagation()}
|
||||
>
|
||||
<span className="grid size-full place-items-center transition-opacity group-hover/handle:opacity-0 group-focus-within/handle:opacity-0">
|
||||
{children}
|
||||
</span>
|
||||
<Codicon
|
||||
className={cn(
|
||||
'absolute text-(--ui-text-quaternary) opacity-0 transition-opacity group-hover/handle:opacity-80 group-focus-within/handle:opacity-80 hover:text-(--ui-text-secondary)',
|
||||
dragging && 'text-(--ui-text-secondary) opacity-100'
|
||||
)}
|
||||
name="grabber"
|
||||
size="0.75rem"
|
||||
/>
|
||||
</SidebarRowLead>
|
||||
)
|
||||
}
|
||||
|
||||
/** Icon/dot slot inside SidebarRowLead — caps visual size so rows align. */
|
||||
export function SidebarRowLeadGlyph({
|
||||
children,
|
||||
className,
|
||||
style
|
||||
}: {
|
||||
children: React.ReactNode
|
||||
className?: string
|
||||
style?: React.CSSProperties
|
||||
}) {
|
||||
return (
|
||||
<span
|
||||
className={cn(
|
||||
'grid size-full place-items-center text-(--ui-text-tertiary) [&_.codicon]:leading-none',
|
||||
className
|
||||
)}
|
||||
style={style}
|
||||
>
|
||||
{children}
|
||||
</span>
|
||||
)
|
||||
}
|
||||
@@ -3,6 +3,7 @@ import { useEffect, useMemo, useState } from 'react'
|
||||
|
||||
import { Codicon } from '@/components/ui/codicon'
|
||||
import { DisclosureCaret } from '@/components/ui/disclosure-caret'
|
||||
import { GlyphSpinner } from '@/components/ui/glyph-spinner'
|
||||
import { SidebarGroup, SidebarGroupContent } from '@/components/ui/sidebar'
|
||||
import { Tip } from '@/components/ui/tooltip'
|
||||
import { getCronJobRuns, type SessionInfo } from '@/hermes'
|
||||
@@ -328,7 +329,7 @@ function CronJobSidebarRuns({ jobId, onOpenRun }: { jobId: string; onOpenRun: (s
|
||||
<div className="mb-1 ml-[1.375rem] flex flex-col gap-px">
|
||||
{runs === null ? (
|
||||
<div className="flex items-center gap-1.5 py-1 pl-1 text-[0.6875rem] text-(--ui-text-tertiary)">
|
||||
<Codicon name="loading" size="0.75rem" spinning />
|
||||
<GlyphSpinner ariaLabel={c.loading} className="text-[0.75rem]" />
|
||||
</div>
|
||||
) : runs.length === 0 ? (
|
||||
<div className="py-1 pl-1 text-[0.6875rem] text-(--ui-text-tertiary)">{c.noRuns}</div>
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,4 +1,5 @@
|
||||
import { Codicon } from '@/components/ui/codicon'
|
||||
import { GlyphSpinner } from '@/components/ui/glyph-spinner'
|
||||
import { useI18n } from '@/i18n'
|
||||
|
||||
interface SidebarLoadMoreRowProps {
|
||||
@@ -7,24 +8,22 @@ interface SidebarLoadMoreRowProps {
|
||||
loading?: boolean
|
||||
}
|
||||
|
||||
// "Load N more" affordance shared by the recents, messaging, and cron sections.
|
||||
// The chevron sits in the same w-3.5 column the rows use for their dot, so it
|
||||
// lines up with the list above.
|
||||
// Compact "load more" affordance shared by recents, messaging, and cron. Kept
|
||||
// intentionally identical to workspace "show more" controls (ellipsis button)
|
||||
// so pagination reads as one interaction everywhere.
|
||||
export function SidebarLoadMoreRow({ step, onClick, loading = false }: SidebarLoadMoreRowProps) {
|
||||
const { t } = useI18n()
|
||||
const label = loading ? t.sidebar.loading : step > 0 ? t.sidebar.loadCount(step) : t.sidebar.loadMore
|
||||
|
||||
return (
|
||||
<button
|
||||
className="flex min-h-5 items-center gap-1.5 self-start bg-transparent pl-2 text-left text-[0.6875rem] text-(--ui-text-tertiary) transition-colors duration-100 ease-out hover:text-foreground hover:transition-none disabled:cursor-default disabled:opacity-60 disabled:hover:text-(--ui-text-tertiary)"
|
||||
aria-label={label}
|
||||
className="ml-auto grid size-5 place-items-center rounded-sm bg-transparent text-(--ui-text-tertiary) transition-colors hover:bg-(--ui-control-hover-background) hover:text-foreground disabled:cursor-default disabled:opacity-60 disabled:hover:bg-transparent disabled:hover:text-(--ui-text-tertiary)"
|
||||
disabled={loading}
|
||||
onClick={onClick}
|
||||
type="button"
|
||||
>
|
||||
<span className="grid w-3.5 shrink-0 place-items-center">
|
||||
<Codicon className="opacity-70" name={loading ? 'loading' : 'chevron-down'} size="0.75rem" spinning={loading} />
|
||||
</span>
|
||||
<span>{label}</span>
|
||||
{loading ? <GlyphSpinner ariaLabel={label} className="text-[0.75rem]" /> : <Codicon name="ellipsis" size="0.75rem" />}
|
||||
</button>
|
||||
)
|
||||
}
|
||||
|
||||
@@ -1,3 +1,12 @@
|
||||
/** New ids first, then ids still present in the persisted order. */
|
||||
export function reconcileFreshFirst(currentIds: string[], orderIds: string[]): string[] {
|
||||
const current = new Set(currentIds)
|
||||
const retained = orderIds.filter(id => current.has(id))
|
||||
const retainedSet = new Set(retained)
|
||||
|
||||
return [...currentIds.filter(id => !retainedSet.has(id)), ...retained]
|
||||
}
|
||||
|
||||
export function resolveManualSessionOrderIds(currentIds: string[], orderIds: string[], manual: boolean): string[] {
|
||||
if (!manual || !currentIds.length || !orderIds.length) {
|
||||
return []
|
||||
@@ -10,8 +19,5 @@ export function resolveManualSessionOrderIds(currentIds: string[], orderIds: str
|
||||
return []
|
||||
}
|
||||
|
||||
const retainedSet = new Set(retained)
|
||||
const fresh = currentIds.filter(id => !retainedSet.has(id))
|
||||
|
||||
return [...fresh, ...retained]
|
||||
return reconcileFreshFirst(currentIds, orderIds)
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user