Compare commits
1 Commits
bb/gui
...
bb/base-gu
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
648da6a8d1 |
@@ -5,15 +5,7 @@
|
||||
|
||||
# Dependencies
|
||||
node_modules
|
||||
**/node_modules
|
||||
.venv
|
||||
**/.venv
|
||||
|
||||
# Built artifacts that are regenerated inside the image. Excluded so local
|
||||
# rebuilds on the developer's machine don't invalidate the npm-install layer
|
||||
# that now depends on the full ui-tui/packages/hermes-ink/ tree being present.
|
||||
ui-tui/dist/
|
||||
ui-tui/packages/hermes-ink/dist/
|
||||
|
||||
# CI/CD
|
||||
.github
|
||||
@@ -25,7 +17,3 @@ ui-tui/packages/hermes-ink/dist/
|
||||
|
||||
# Runtime data (bind-mounted at /opt/data; must not leak into build context)
|
||||
data/
|
||||
|
||||
# Compose/profile runtime state (bind-mounted; avoid ownership/secret issues)
|
||||
hermes-config/
|
||||
runtime/
|
||||
|
||||
33
.env.example
@@ -244,15 +244,6 @@ BROWSERBASE_PROXIES=true
|
||||
# Uses custom Chromium build to avoid bot detection altogether
|
||||
BROWSERBASE_ADVANCED_STEALTH=false
|
||||
|
||||
# Browser engine for local mode (default: auto = Chrome)
|
||||
# "auto" — use Chrome (don't pass --engine flag)
|
||||
# "lightpanda" — use Lightpanda (1.3-5.8x faster navigation, no screenshots)
|
||||
# "chrome" — explicitly request Chrome
|
||||
# Requires agent-browser v0.25.3+. Lightpanda commands that fail or return
|
||||
# empty results are automatically retried with Chrome.
|
||||
# Also configurable via browser.engine in config.yaml.
|
||||
# AGENT_BROWSER_ENGINE=auto
|
||||
|
||||
# Browser session timeout in seconds (default: 300)
|
||||
# Sessions are cleaned up after this duration of inactivity
|
||||
BROWSER_SESSION_TIMEOUT=300
|
||||
@@ -393,9 +384,9 @@ IMAGE_TOOLS_DEBUG=false
|
||||
# Default STT provider is "local" (faster-whisper) — runs on your machine, no API key needed.
|
||||
# Install with: pip install faster-whisper
|
||||
# Model downloads automatically on first use (~150 MB for "base").
|
||||
# To use cloud providers instead, set GROQ_API_KEY, VOICE_TOOLS_OPENAI_KEY, or ELEVENLABS_API_KEY above.
|
||||
# Provider priority: local > groq > openai > mistral > xai > elevenlabs
|
||||
# Configure in config.yaml: stt.provider: local | groq | openai | mistral | xai | elevenlabs
|
||||
# To use cloud providers instead, set GROQ_API_KEY or VOICE_TOOLS_OPENAI_KEY above.
|
||||
# Provider priority: local > groq > openai
|
||||
# Configure in config.yaml: stt.provider: local | groq | openai
|
||||
|
||||
# =============================================================================
|
||||
# STT ADVANCED OVERRIDES (optional)
|
||||
@@ -403,25 +394,7 @@ IMAGE_TOOLS_DEBUG=false
|
||||
# Override default STT models per provider (normally set via stt.model in config.yaml)
|
||||
# STT_GROQ_MODEL=whisper-large-v3-turbo
|
||||
# STT_OPENAI_MODEL=whisper-1
|
||||
# STT_ELEVENLABS_MODEL=scribe_v2
|
||||
|
||||
# Override STT provider endpoints (for proxies or self-hosted instances)
|
||||
# GROQ_BASE_URL=https://api.groq.com/openai/v1
|
||||
# STT_OPENAI_BASE_URL=https://api.openai.com/v1
|
||||
# ELEVENLABS_STT_BASE_URL=https://api.elevenlabs.io/v1
|
||||
|
||||
# =============================================================================
|
||||
# MICROSOFT TEAMS INTEGRATION
|
||||
# =============================================================================
|
||||
# Register a Bot in Azure: https://dev.botframework.com/ → "Register a bot"
|
||||
# Or use Azure Portal: Azure Active Directory → App registrations → New registration
|
||||
# Then add the bot to Teams via the Bot Framework or App Studio.
|
||||
#
|
||||
# TEAMS_CLIENT_ID= # Azure AD App (client) ID
|
||||
# TEAMS_CLIENT_SECRET= # Azure AD client secret value
|
||||
# TEAMS_TENANT_ID= # Azure AD tenant ID (or "common" for multi-tenant)
|
||||
# TEAMS_ALLOWED_USERS= # Comma-separated AAD object IDs or UPNs
|
||||
# TEAMS_ALLOW_ALL_USERS=false # Set true to skip the allowlist
|
||||
# TEAMS_HOME_CHANNEL= # Default channel/chat ID for cron delivery
|
||||
# TEAMS_HOME_CHANNEL_NAME= # Display name for the home channel
|
||||
# TEAMS_PORT=3978 # Webhook listen port (Bot Framework default)
|
||||
|
||||
14
.github/actions/nix-setup/action.yml
vendored
@@ -1,18 +1,8 @@
|
||||
name: 'Setup Nix'
|
||||
description: 'Install Nix and configure Cachix binary cache'
|
||||
|
||||
inputs:
|
||||
cachix-auth-token:
|
||||
description: 'Cachix auth token (enables push). Omit for read-only.'
|
||||
required: false
|
||||
default: ''
|
||||
description: 'Install Nix with DeterminateSystems and enable magic-nix-cache'
|
||||
|
||||
runs:
|
||||
using: composite
|
||||
steps:
|
||||
- uses: DeterminateSystems/nix-installer-action@ef8a148080ab6020fd15196c2084a2eea5ff2d25 # v22
|
||||
- uses: cachix/cachix-action@1eb2ef646ac0255473d23a5907ad7b04ce94065c # v17
|
||||
with:
|
||||
name: hermes-agent
|
||||
authToken: ${{ inputs.cachix-auth-token }}
|
||||
continue-on-error: true
|
||||
- uses: DeterminateSystems/magic-nix-cache-action@565684385bcd71bad329742eefe8d12f2e765b39 # v13
|
||||
|
||||
44
.github/dependabot.yml
vendored
@@ -1,44 +0,0 @@
|
||||
# Dependabot configuration for hermes-agent.
|
||||
#
|
||||
# Deliberately scoped to github-actions only.
|
||||
#
|
||||
# We do NOT enable Dependabot for pip / npm / any source-dependency ecosystem
|
||||
# because we pin source dependencies exactly (uv.lock, package-lock.json) as
|
||||
# part of our supply-chain posture. Automatic version-bump PRs against those
|
||||
# pins would undermine the strategy — pins are moved deliberately, after
|
||||
# review, not on a schedule.
|
||||
#
|
||||
# github-actions is the exception: action pins (we use full commit SHAs per
|
||||
# supply-chain policy) must be updated when upstream actions publish
|
||||
# patches — usually themselves security fixes. Dependabot opens a PR with
|
||||
# the new SHA and release notes; we review and merge like any other PR.
|
||||
#
|
||||
# Security-update PRs for source dependencies (opened ONLY when a CVE is
|
||||
# published affecting a currently-pinned version) are enabled separately
|
||||
# via the repo's Dependabot security updates setting
|
||||
# (Settings → Code security → Dependabot → Dependabot security updates).
|
||||
# Those are CVE-only, not schedule-driven, and do not conflict with our
|
||||
# pinning strategy — they fire when a pinned version becomes known-bad,
|
||||
# which is exactly when we want to move the pin.
|
||||
|
||||
version: 2
|
||||
updates:
|
||||
- package-ecosystem: "github-actions"
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
day: "monday"
|
||||
open-pull-requests-limit: 5
|
||||
labels:
|
||||
- "dependencies"
|
||||
- "github-actions"
|
||||
commit-message:
|
||||
prefix: "chore(actions)"
|
||||
include: "scope"
|
||||
groups:
|
||||
# Batch routine action bumps into one PR per week to reduce noise.
|
||||
# Security updates still open individually and bypass grouping.
|
||||
actions-minor-patch:
|
||||
update-types:
|
||||
- "minor"
|
||||
- "patch"
|
||||
10
.github/workflows/deploy-site.yml
vendored
@@ -76,16 +76,6 @@ jobs:
|
||||
run: |
|
||||
mkdir -p _site/docs
|
||||
cp -r website/build/* _site/docs/
|
||||
# llms.txt / llms-full.txt are also published at the site root
|
||||
# (https://hermes-agent.nousresearch.com/llms.txt) because some
|
||||
# agents and IDE plugins probe the classic root-level path rather
|
||||
# than /docs/llms.txt. Same file, two URLs, one source of truth.
|
||||
if [ -f website/build/llms.txt ]; then
|
||||
cp website/build/llms.txt _site/llms.txt
|
||||
fi
|
||||
if [ -f website/build/llms-full.txt ]; then
|
||||
cp website/build/llms-full.txt _site/llms-full.txt
|
||||
fi
|
||||
|
||||
- name: Upload artifact
|
||||
uses: actions/upload-pages-artifact@56afc609e74202658d3ffba0e8f6dda462b719fa # v3
|
||||
|
||||
341
.github/workflows/desktop-release.yml
vendored
@@ -1,341 +0,0 @@
|
||||
name: Desktop Release
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
release:
|
||||
types: [published]
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
channel:
|
||||
description: Release channel to build
|
||||
required: true
|
||||
default: nightly
|
||||
type: choice
|
||||
options:
|
||||
- nightly
|
||||
- stable
|
||||
release_tag:
|
||||
description: "Required when channel=stable (example: v2026.5.5)"
|
||||
required: false
|
||||
type: string
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
|
||||
concurrency:
|
||||
group: desktop-release-${{ github.ref }}
|
||||
cancel-in-progress: false
|
||||
|
||||
jobs:
|
||||
prepare:
|
||||
if: github.repository == 'NousResearch/hermes-agent'
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
channel: ${{ steps.meta.outputs.channel }}
|
||||
release_name: ${{ steps.meta.outputs.release_name }}
|
||||
release_tag: ${{ steps.meta.outputs.release_tag }}
|
||||
version: ${{ steps.meta.outputs.version }}
|
||||
is_stable: ${{ steps.meta.outputs.is_stable }}
|
||||
steps:
|
||||
- id: meta
|
||||
env:
|
||||
EVENT_NAME: ${{ github.event_name }}
|
||||
INPUT_CHANNEL: ${{ github.event.inputs.channel }}
|
||||
INPUT_RELEASE_TAG: ${{ github.event.inputs.release_tag }}
|
||||
RELEASE_TAG_FROM_EVENT: ${{ github.event.release.tag_name }}
|
||||
GITHUB_SHA: ${{ github.sha }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
|
||||
channel="nightly"
|
||||
release_tag="desktop-nightly"
|
||||
is_stable="false"
|
||||
|
||||
if [[ "$EVENT_NAME" == "release" ]]; then
|
||||
channel="stable"
|
||||
release_tag="$RELEASE_TAG_FROM_EVENT"
|
||||
is_stable="true"
|
||||
elif [[ "$EVENT_NAME" == "workflow_dispatch" && "$INPUT_CHANNEL" == "stable" ]]; then
|
||||
channel="stable"
|
||||
release_tag="$INPUT_RELEASE_TAG"
|
||||
is_stable="true"
|
||||
fi
|
||||
|
||||
if [[ "$channel" == "stable" ]]; then
|
||||
if [[ -z "$release_tag" ]]; then
|
||||
echo "Stable desktop releases require a release tag." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
version="${release_tag#v}"
|
||||
release_name="Hermes Desktop ${release_tag}"
|
||||
else
|
||||
stamp="$(date -u +%Y%m%d)"
|
||||
short_sha="${GITHUB_SHA::7}"
|
||||
version="0.0.0-nightly.${stamp}.${short_sha}"
|
||||
release_name="Hermes Desktop Nightly ${stamp}-${short_sha}"
|
||||
fi
|
||||
|
||||
{
|
||||
echo "channel=$channel"
|
||||
echo "release_name=$release_name"
|
||||
echo "release_tag=$release_tag"
|
||||
echo "version=$version"
|
||||
echo "is_stable=$is_stable"
|
||||
} >> "$GITHUB_OUTPUT"
|
||||
|
||||
build:
|
||||
if: github.repository == 'NousResearch/hermes-agent'
|
||||
needs: prepare
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- platform: mac
|
||||
runner: macos-latest
|
||||
build_args: --mac dmg zip
|
||||
- platform: win
|
||||
runner: windows-latest
|
||||
build_args: --win nsis msi
|
||||
runs-on: ${{ matrix.runner }}
|
||||
env:
|
||||
DESKTOP_CHANNEL: ${{ needs.prepare.outputs.channel }}
|
||||
DESKTOP_VERSION: ${{ needs.prepare.outputs.version }}
|
||||
MAC_CSC_LINK: ${{ secrets.CSC_LINK }}
|
||||
MAC_CSC_KEY_PASSWORD: ${{ secrets.CSC_KEY_PASSWORD }}
|
||||
APPLE_API_KEY: ${{ secrets.APPLE_API_KEY }}
|
||||
APPLE_API_KEY_ID: ${{ secrets.APPLE_API_KEY_ID }}
|
||||
APPLE_API_ISSUER: ${{ secrets.APPLE_API_ISSUER }}
|
||||
WIN_CSC_LINK: ${{ secrets.WIN_CSC_LINK }}
|
||||
WIN_CSC_KEY_PASSWORD: ${{ secrets.WIN_CSC_KEY_PASSWORD }}
|
||||
steps:
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
|
||||
- uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
|
||||
with:
|
||||
node-version: 20
|
||||
cache: npm
|
||||
cache-dependency-path: package-lock.json
|
||||
|
||||
- uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
- name: Enforce signing gates for stable releases
|
||||
if: needs.prepare.outputs.is_stable == 'true'
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
missing=()
|
||||
|
||||
if [[ "${{ matrix.platform }}" == "mac" ]]; then
|
||||
[[ -z "${MAC_CSC_LINK:-}" ]] && missing+=("CSC_LINK")
|
||||
[[ -z "${MAC_CSC_KEY_PASSWORD:-}" ]] && missing+=("CSC_KEY_PASSWORD")
|
||||
[[ -z "${APPLE_API_KEY:-}" ]] && missing+=("APPLE_API_KEY")
|
||||
[[ -z "${APPLE_API_KEY_ID:-}" ]] && missing+=("APPLE_API_KEY_ID")
|
||||
[[ -z "${APPLE_API_ISSUER:-}" ]] && missing+=("APPLE_API_ISSUER")
|
||||
else
|
||||
[[ -z "${WIN_CSC_LINK:-}" ]] && missing+=("WIN_CSC_LINK")
|
||||
[[ -z "${WIN_CSC_KEY_PASSWORD:-}" ]] && missing+=("WIN_CSC_KEY_PASSWORD")
|
||||
fi
|
||||
|
||||
if (( ${#missing[@]} > 0 )); then
|
||||
echo "::error::Stable desktop release missing required secrets: ${missing[*]}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Install workspace dependencies
|
||||
run: npm ci
|
||||
|
||||
- name: Build bundled TUI payload
|
||||
run: npm --prefix ui-tui run build
|
||||
|
||||
- name: Build desktop renderer
|
||||
run: npm --prefix apps/desktop run build
|
||||
|
||||
- name: Stage Hermes payload
|
||||
run: npm --prefix apps/desktop run stage:hermes
|
||||
|
||||
- name: Map macOS signing credentials
|
||||
if: matrix.platform == 'mac'
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
has_link=0
|
||||
has_pass=0
|
||||
[[ -n "${MAC_CSC_LINK:-}" ]] && has_link=1
|
||||
[[ -n "${MAC_CSC_KEY_PASSWORD:-}" ]] && has_pass=1
|
||||
|
||||
if [[ $has_link -eq 1 && $has_pass -eq 1 ]]; then
|
||||
echo "CSC_LINK=${MAC_CSC_LINK}" >> "$GITHUB_ENV"
|
||||
echo "CSC_KEY_PASSWORD=${MAC_CSC_KEY_PASSWORD}" >> "$GITHUB_ENV"
|
||||
elif [[ $has_link -eq 1 || $has_pass -eq 1 ]]; then
|
||||
echo "::error::macOS signing secrets are partially configured. Set both CSC_LINK and CSC_KEY_PASSWORD."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Map Windows signing credentials
|
||||
if: matrix.platform == 'win'
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
has_link=0
|
||||
has_pass=0
|
||||
[[ -n "${WIN_CSC_LINK:-}" ]] && has_link=1
|
||||
[[ -n "${WIN_CSC_KEY_PASSWORD:-}" ]] && has_pass=1
|
||||
|
||||
if [[ $has_link -eq 1 && $has_pass -eq 1 ]]; then
|
||||
echo "CSC_LINK=${WIN_CSC_LINK}" >> "$GITHUB_ENV"
|
||||
echo "CSC_KEY_PASSWORD=${WIN_CSC_KEY_PASSWORD}" >> "$GITHUB_ENV"
|
||||
echo "CSC_FOR_PULL_REQUEST=true" >> "$GITHUB_ENV"
|
||||
elif [[ $has_link -eq 1 || $has_pass -eq 1 ]]; then
|
||||
echo "::error::Windows signing secrets are partially configured. Set both WIN_CSC_LINK and WIN_CSC_KEY_PASSWORD."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Build desktop installers
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
npm --prefix apps/desktop exec electron-builder -- \
|
||||
${{ matrix.build_args }} \
|
||||
--publish never \
|
||||
--config.extraMetadata.version="${DESKTOP_VERSION}" \
|
||||
--config.extraMetadata.desktopChannel="${DESKTOP_CHANNEL}" \
|
||||
'--config.artifactName=Hermes-${version}-${env.DESKTOP_CHANNEL}-${os}-${arch}.${ext}'
|
||||
|
||||
- name: Notarize and staple macOS DMG
|
||||
if: matrix.platform == 'mac' && needs.prepare.outputs.is_stable == 'true'
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
dmg_path="$(ls apps/desktop/release/*.dmg | head -n 1)"
|
||||
node apps/desktop/scripts/notarize-artifact.cjs "$dmg_path"
|
||||
|
||||
- name: Validate macOS notarization and Gatekeeper trust
|
||||
if: matrix.platform == 'mac' && needs.prepare.outputs.is_stable == 'true'
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
app_path="$(ls -d apps/desktop/release/mac*/Hermes.app | head -n 1)"
|
||||
dmg_path="$(ls apps/desktop/release/*.dmg | head -n 1)"
|
||||
xcrun stapler validate "$app_path"
|
||||
xcrun stapler validate "$dmg_path"
|
||||
spctl --assess --type execute --verbose=4 "$app_path"
|
||||
|
||||
- name: Generate desktop checksums
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
node <<'EOF'
|
||||
const crypto = require('node:crypto')
|
||||
const fs = require('node:fs')
|
||||
const path = require('node:path')
|
||||
|
||||
const releaseDir = path.resolve('apps/desktop/release')
|
||||
const platform = process.env.PLATFORM
|
||||
const extensions = platform === 'mac' ? ['.dmg', '.zip'] : ['.exe', '.msi']
|
||||
const files = fs
|
||||
.readdirSync(releaseDir)
|
||||
.filter(name => extensions.some(ext => name.endsWith(ext)))
|
||||
.sort()
|
||||
|
||||
if (!files.length) {
|
||||
throw new Error(`No release artifacts were produced for ${platform}`)
|
||||
}
|
||||
|
||||
const lines = files.map(name => {
|
||||
const full = path.join(releaseDir, name)
|
||||
const hash = crypto.createHash('sha256').update(fs.readFileSync(full)).digest('hex')
|
||||
return `${hash} ${name}`
|
||||
})
|
||||
fs.writeFileSync(path.join(releaseDir, `SHA256SUMS-${platform}.txt`), `${lines.join('\n')}\n`)
|
||||
EOF
|
||||
env:
|
||||
PLATFORM: ${{ matrix.platform }}
|
||||
|
||||
- name: Upload packaged desktop artifacts
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
|
||||
with:
|
||||
name: desktop-${{ matrix.platform }}
|
||||
path: |
|
||||
apps/desktop/release/*.dmg
|
||||
apps/desktop/release/*.zip
|
||||
apps/desktop/release/*.exe
|
||||
apps/desktop/release/*.msi
|
||||
apps/desktop/release/SHA256SUMS-${{ matrix.platform }}.txt
|
||||
if-no-files-found: error
|
||||
|
||||
publish:
|
||||
if: github.repository == 'NousResearch/hermes-agent'
|
||||
needs: [prepare, build]
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
GH_TOKEN: ${{ github.token }}
|
||||
CHANNEL: ${{ needs.prepare.outputs.channel }}
|
||||
RELEASE_NAME: ${{ needs.prepare.outputs.release_name }}
|
||||
RELEASE_TAG: ${{ needs.prepare.outputs.release_tag }}
|
||||
steps:
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
|
||||
with:
|
||||
pattern: desktop-*
|
||||
merge-multiple: true
|
||||
path: dist/desktop
|
||||
|
||||
- name: Publish desktop assets to GitHub release
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
shopt -s globstar nullglob
|
||||
|
||||
files=(
|
||||
dist/desktop/**/*.dmg
|
||||
dist/desktop/**/*.zip
|
||||
dist/desktop/**/*.exe
|
||||
dist/desktop/**/*.msi
|
||||
dist/desktop/**/SHA256SUMS-*.txt
|
||||
)
|
||||
|
||||
if (( ${#files[@]} == 0 )); then
|
||||
echo "No desktop artifacts were downloaded for publishing." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ "$CHANNEL" == "nightly" ]]; then
|
||||
git tag -f "$RELEASE_TAG" "$GITHUB_SHA"
|
||||
git push origin "refs/tags/$RELEASE_TAG" --force
|
||||
|
||||
notes="Automated nightly desktop build from main. This prerelease is replaced on each new run."
|
||||
|
||||
if gh release view "$RELEASE_TAG" >/dev/null 2>&1; then
|
||||
while IFS= read -r asset_name; do
|
||||
gh release delete-asset "$RELEASE_TAG" "$asset_name" --yes
|
||||
done < <(gh release view "$RELEASE_TAG" --json assets -q '.assets[].name')
|
||||
|
||||
gh release edit "$RELEASE_TAG" \
|
||||
--title "$RELEASE_NAME" \
|
||||
--prerelease \
|
||||
--notes "$notes"
|
||||
else
|
||||
gh release create "$RELEASE_TAG" \
|
||||
--target "$GITHUB_SHA" \
|
||||
--title "$RELEASE_NAME" \
|
||||
--notes "$notes" \
|
||||
--prerelease
|
||||
fi
|
||||
else
|
||||
if ! gh release view "$RELEASE_TAG" >/dev/null 2>&1; then
|
||||
notes="Automated desktop artifacts attached by desktop-release workflow."
|
||||
gh release create "$RELEASE_TAG" \
|
||||
--target "$GITHUB_SHA" \
|
||||
--title "$RELEASE_NAME" \
|
||||
--notes "$notes"
|
||||
fi
|
||||
fi
|
||||
|
||||
gh release upload "$RELEASE_TAG" "${files[@]}" --clobber
|
||||
151
.github/workflows/lint.yml
vendored
@@ -1,151 +0,0 @@
|
||||
name: Lint (ruff + ty)
|
||||
|
||||
# Surface ruff and ty diagnostics as a diff vs the target branch.
|
||||
# This check is advisory only ATM it always exits zero and never blocks merge.
|
||||
# It posts a Markdown summary to the workflow run and, for pull requests,
|
||||
# comments the same summary on the PR.
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
paths-ignore:
|
||||
- "**/*.md"
|
||||
- "docs/**"
|
||||
- "website/**"
|
||||
pull_request:
|
||||
branches: [main]
|
||||
paths-ignore:
|
||||
- "**/*.md"
|
||||
- "docs/**"
|
||||
- "website/**"
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: write # needed to post/update PR comments
|
||||
|
||||
concurrency:
|
||||
group: lint-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
lint-diff:
|
||||
name: ruff + ty diff
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 10
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
with:
|
||||
fetch-depth: 0 # need full history for merge-base + worktree
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
|
||||
|
||||
- name: Install ruff + ty
|
||||
run: |
|
||||
uv tool install ruff
|
||||
uv tool install ty
|
||||
|
||||
- name: Determine base ref
|
||||
id: base
|
||||
run: |
|
||||
# For PRs, diff against the merge base with the target branch.
|
||||
# For pushes to main, diff against the previous commit on main.
|
||||
if [ "${{ github.event_name }}" = "pull_request" ]; then
|
||||
BASE_SHA=$(git merge-base "origin/${{ github.base_ref }}" HEAD)
|
||||
BASE_REF="origin/${{ github.base_ref }}"
|
||||
else
|
||||
BASE_SHA=$(git rev-parse HEAD~1 2>/dev/null || git rev-parse HEAD)
|
||||
BASE_REF="HEAD~1"
|
||||
fi
|
||||
echo "sha=${BASE_SHA}" >> "$GITHUB_OUTPUT"
|
||||
echo "ref=${BASE_REF}" >> "$GITHUB_OUTPUT"
|
||||
echo "Base SHA: ${BASE_SHA}"
|
||||
echo "Base ref: ${BASE_REF}"
|
||||
|
||||
- name: Run ruff + ty on HEAD
|
||||
run: |
|
||||
mkdir -p .lint-reports/head
|
||||
ruff check --output-format json --exit-zero \
|
||||
> .lint-reports/head/ruff.json || true
|
||||
ty check --output-format gitlab --exit-zero \
|
||||
> .lint-reports/head/ty.json || true
|
||||
echo "HEAD ruff: $(wc -c < .lint-reports/head/ruff.json) bytes"
|
||||
echo "HEAD ty: $(wc -c < .lint-reports/head/ty.json) bytes"
|
||||
|
||||
- name: Run ruff + ty on base (via git worktree)
|
||||
run: |
|
||||
mkdir -p .lint-reports/base
|
||||
# Use a worktree so we don't clobber the main checkout. If the basex
|
||||
# SHA is identical to HEAD (e.g. first commit), skip and leave the
|
||||
# base reports empty — the diff script handles missing files.
|
||||
HEAD_SHA=$(git rev-parse HEAD)
|
||||
BASE_SHA="${{ steps.base.outputs.sha }}"
|
||||
if [ "$BASE_SHA" = "$HEAD_SHA" ]; then
|
||||
echo "Base SHA == HEAD SHA, skipping base scan."
|
||||
echo '[]' > .lint-reports/base/ruff.json
|
||||
echo '[]' > .lint-reports/base/ty.json
|
||||
else
|
||||
git worktree add --detach /tmp/lint-base "$BASE_SHA"
|
||||
(
|
||||
cd /tmp/lint-base
|
||||
ruff check --output-format json --exit-zero \
|
||||
> "$GITHUB_WORKSPACE/.lint-reports/base/ruff.json" || true
|
||||
ty check --output-format gitlab --exit-zero \
|
||||
> "$GITHUB_WORKSPACE/.lint-reports/base/ty.json" || true
|
||||
)
|
||||
git worktree remove --force /tmp/lint-base
|
||||
fi
|
||||
echo "base ruff: $(wc -c < .lint-reports/base/ruff.json) bytes"
|
||||
echo "base ty: $(wc -c < .lint-reports/base/ty.json) bytes"
|
||||
|
||||
- name: Generate diff summary
|
||||
run: |
|
||||
python scripts/lint_diff.py \
|
||||
--base-ruff .lint-reports/base/ruff.json \
|
||||
--head-ruff .lint-reports/head/ruff.json \
|
||||
--base-ty .lint-reports/base/ty.json \
|
||||
--head-ty .lint-reports/head/ty.json \
|
||||
--base-ref "${{ steps.base.outputs.ref }}" \
|
||||
--head-ref "${{ github.event_name == 'pull_request' && github.head_ref || github.ref_name }}" \
|
||||
--output .lint-reports/summary.md
|
||||
cat .lint-reports/summary.md >> "$GITHUB_STEP_SUMMARY"
|
||||
|
||||
- name: Upload reports as artifact
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
|
||||
with:
|
||||
name: lint-reports
|
||||
path: .lint-reports/
|
||||
retention-days: 14
|
||||
|
||||
- name: Post / update PR comment
|
||||
if: github.event_name == 'pull_request'
|
||||
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7
|
||||
with:
|
||||
script: |
|
||||
const fs = require('fs');
|
||||
const body = fs.readFileSync('.lint-reports/summary.md', 'utf8');
|
||||
const marker = '<!-- lint-diff-summary -->';
|
||||
const fullBody = marker + '\n' + body;
|
||||
|
||||
const { data: comments } = await github.rest.issues.listComments({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: context.issue.number,
|
||||
});
|
||||
const existing = comments.find(c => c.body && c.body.includes(marker));
|
||||
if (existing) {
|
||||
await github.rest.issues.updateComment({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
comment_id: existing.id,
|
||||
body: fullBody,
|
||||
});
|
||||
} else {
|
||||
await github.rest.issues.createComment({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: context.issue.number,
|
||||
body: fullBody,
|
||||
});
|
||||
}
|
||||
68
.github/workflows/nix-lockfile-check.yml
vendored
Normal file
@@ -0,0 +1,68 @@
|
||||
name: Nix Lockfile Check
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: write
|
||||
|
||||
concurrency:
|
||||
group: nix-lockfile-check-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
check:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 20
|
||||
steps:
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
|
||||
- uses: ./.github/actions/nix-setup
|
||||
|
||||
- name: Resolve head SHA
|
||||
id: sha
|
||||
shell: bash
|
||||
run: |
|
||||
FULL="${{ github.event.pull_request.head.sha || github.sha }}"
|
||||
echo "full=$FULL" >> "$GITHUB_OUTPUT"
|
||||
echo "short=${FULL:0:7}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Check lockfile hashes
|
||||
id: check
|
||||
continue-on-error: true
|
||||
env:
|
||||
LINK_SHA: ${{ steps.sha.outputs.full }}
|
||||
run: nix run .#fix-lockfiles -- --check
|
||||
|
||||
- name: Post sticky PR comment (stale)
|
||||
if: steps.check.outputs.stale == 'true' && github.event_name == 'pull_request'
|
||||
uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1
|
||||
with:
|
||||
header: nix-lockfile-check
|
||||
message: |
|
||||
### ⚠️ npm lockfile hash out of date
|
||||
|
||||
Checked against commit [`${{ steps.sha.outputs.short }}`](${{ github.server_url }}/${{ github.repository }}/commit/${{ steps.sha.outputs.full }}) (PR head at check time).
|
||||
|
||||
The `hash = "sha256-..."` line in these nix files no longer matches the committed `package-lock.json`:
|
||||
|
||||
${{ steps.check.outputs.report }}
|
||||
|
||||
#### Apply the fix
|
||||
|
||||
- [ ] **Apply lockfile fix** — tick to push a commit with the correct hashes to this PR branch
|
||||
- Or [run the Nix Lockfile Fix workflow](${{ github.server_url }}/${{ github.repository }}/actions/workflows/nix-lockfile-fix.yml) manually (pass PR `#${{ github.event.pull_request.number }}`)
|
||||
- Or locally: `nix run .#fix-lockfiles -- --apply` and commit the diff
|
||||
|
||||
- name: Clear sticky PR comment (resolved)
|
||||
if: steps.check.outputs.stale == 'false' && github.event_name == 'pull_request'
|
||||
uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1
|
||||
with:
|
||||
header: nix-lockfile-check
|
||||
delete: true
|
||||
|
||||
- name: Fail if stale
|
||||
if: steps.check.outputs.stale == 'true'
|
||||
run: exit 1
|
||||
111
.github/workflows/nix-lockfile-fix.yml
vendored
@@ -1,13 +1,6 @@
|
||||
name: Nix Lockfile Fix
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
paths:
|
||||
- 'ui-tui/package-lock.json'
|
||||
- 'ui-tui/package.json'
|
||||
- 'apps/dashboard/package-lock.json'
|
||||
- 'apps/dashboard/package.json'
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
pr_number:
|
||||
@@ -26,105 +19,9 @@ concurrency:
|
||||
cancel-in-progress: false
|
||||
|
||||
jobs:
|
||||
# ── Auto-fix on main ───────────────────────────────────────────────
|
||||
# Fires when a push to main touches package.json or package-lock.json
|
||||
# in ui-tui/ or apps/dashboard/. Runs fix-lockfiles and pushes the hash
|
||||
# update commit directly to main so Nix builds never stay broken.
|
||||
#
|
||||
# Safety invariants:
|
||||
# 1. The fix commit only touches nix/*.nix files, which are NOT in
|
||||
# the paths filter above, so this cannot re-trigger itself.
|
||||
# 2. An explicit file-whitelist check before commit aborts if
|
||||
# fix-lockfiles ever modifies unexpected files.
|
||||
# 3. Job-level concurrency with cancel-in-progress: true ensures
|
||||
# back-to-back pushes collapse to the newest; ref: main checkout
|
||||
# always operates on the latest branch state.
|
||||
# 4. Uses a GitHub App token (not GITHUB_TOKEN) so the fix commit
|
||||
# triggers downstream nix.yml verification.
|
||||
auto-fix-main:
|
||||
if: github.event_name == 'push'
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 25
|
||||
concurrency:
|
||||
group: auto-fix-main
|
||||
cancel-in-progress: true
|
||||
steps:
|
||||
- name: Generate GitHub App token
|
||||
id: app-token
|
||||
uses: actions/create-github-app-token@7bfa3a4717ef143a604ee0a99d859b8886a96d00 # v1.9.3
|
||||
with:
|
||||
app-id: ${{ secrets.APP_ID }}
|
||||
private-key: ${{ secrets.APP_PRIVATE_KEY }}
|
||||
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
with:
|
||||
ref: main
|
||||
token: ${{ steps.app-token.outputs.token }}
|
||||
|
||||
- uses: ./.github/actions/nix-setup
|
||||
with:
|
||||
cachix-auth-token: ${{ secrets.CACHIX_AUTH_TOKEN }}
|
||||
|
||||
- name: Apply lockfile hashes
|
||||
id: apply
|
||||
run: nix run .#fix-lockfiles -- --apply
|
||||
|
||||
- name: Commit & push
|
||||
if: steps.apply.outputs.changed == 'true'
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
|
||||
# Ensure only nix files were modified — prevents accidental
|
||||
# self-triggering if fix-lockfiles ever touches package files.
|
||||
unexpected="$(git diff --name-only | grep -Ev '^nix/(tui|web)\.nix$' || true)"
|
||||
if [ -n "$unexpected" ]; then
|
||||
echo "::error::Unexpected modified files: $unexpected"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Record the base SHA before committing — used to detect package
|
||||
# file changes if we need to rebase after a non-fast-forward push.
|
||||
BASE_SHA="$(git rev-parse HEAD)"
|
||||
|
||||
git config user.name 'github-actions[bot]'
|
||||
git config user.email '41898282+github-actions[bot]@users.noreply.github.com'
|
||||
git add nix/tui.nix nix/web.nix
|
||||
git commit -m "fix(nix): auto-refresh npm lockfile hashes" \
|
||||
-m "Source: $GITHUB_SHA" \
|
||||
-m "Run: $GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID"
|
||||
|
||||
# Retry push with rebase in case main advanced with an unrelated
|
||||
# commit during the nix build. Without this, a non-fast-forward
|
||||
# rejection silently loses the fix. If package files changed during
|
||||
# the rebase, abort — a fresh auto-fix run will handle the new state.
|
||||
for attempt in 1 2 3; do
|
||||
if git push origin HEAD:main; then
|
||||
exit 0
|
||||
fi
|
||||
echo "::warning::Push attempt $attempt failed (non-fast-forward?), rebasing…"
|
||||
git fetch origin main
|
||||
|
||||
# If package files changed between our base and the new main,
|
||||
# our computed hashes are stale. Abort and let the next triggered
|
||||
# run recompute from the correct package-lock state.
|
||||
pkg_changed="$(git diff --name-only "$BASE_SHA"..origin/main -- \
|
||||
'ui-tui/package-lock.json' 'ui-tui/package.json' \
|
||||
'apps/dashboard/package-lock.json' 'apps/dashboard/package.json' || true)"
|
||||
if [ -n "$pkg_changed" ]; then
|
||||
echo "::warning::Package files changed since hash computation — aborting; a fresh run will recompute"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
git rebase origin/main
|
||||
done
|
||||
echo "::error::Failed to push after 3 rebase attempts"
|
||||
exit 1
|
||||
|
||||
# ── PR fix (manual / checkbox) ─────────────────────────────────────
|
||||
# Existing behavior: run on manual dispatch OR when a task-list
|
||||
# checkbox in the sticky lockfile-check comment flips from [ ] to [x].
|
||||
fix:
|
||||
# Run on manual dispatch OR when a task-list checkbox in the sticky
|
||||
# lockfile-check comment flips from `[ ]` to `[x]`.
|
||||
if: |
|
||||
github.event_name == 'workflow_dispatch' ||
|
||||
(github.event_name == 'issue_comment'
|
||||
@@ -202,12 +99,10 @@ jobs:
|
||||
fetch-depth: 0
|
||||
|
||||
- uses: ./.github/actions/nix-setup
|
||||
with:
|
||||
cachix-auth-token: ${{ secrets.CACHIX_AUTH_TOKEN }}
|
||||
|
||||
- name: Apply lockfile hashes
|
||||
id: apply
|
||||
run: nix run .#fix-lockfiles
|
||||
run: nix run .#fix-lockfiles -- --apply
|
||||
|
||||
- name: Commit & push
|
||||
if: steps.apply.outputs.changed == 'true'
|
||||
|
||||
84
.github/workflows/nix.yml
vendored
@@ -7,7 +7,6 @@ on:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: write
|
||||
|
||||
concurrency:
|
||||
group: nix-${{ github.ref }}
|
||||
@@ -23,95 +22,12 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
- uses: ./.github/actions/nix-setup
|
||||
with:
|
||||
cachix-auth-token: ${{ secrets.CACHIX_AUTH_TOKEN }}
|
||||
|
||||
- name: Resolve head SHA
|
||||
if: github.event_name == 'pull_request'
|
||||
id: sha
|
||||
shell: bash
|
||||
run: |
|
||||
FULL="${{ github.event.pull_request.head.sha || github.sha }}"
|
||||
echo "full=$FULL" >> "$GITHUB_OUTPUT"
|
||||
echo "short=${FULL:0:7}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Check flake
|
||||
id: flake
|
||||
if: runner.os == 'Linux'
|
||||
continue-on-error: true
|
||||
run: nix flake check --print-build-logs
|
||||
|
||||
- name: Build package
|
||||
id: build
|
||||
if: runner.os == 'Linux'
|
||||
continue-on-error: true
|
||||
run: nix build --print-build-logs
|
||||
|
||||
# When the real Nix build fails, run a targeted diagnostic to see if
|
||||
# the failure is specifically a stale npm lockfile hash in one of the
|
||||
# known npm subpackages (tui / web). This avoids surfacing a generic
|
||||
# "build failed" message when the fix is a single known command.
|
||||
- name: Diagnose npm lockfile hashes
|
||||
id: hash_check
|
||||
if: (steps.flake.outcome == 'failure' || steps.build.outcome == 'failure') && runner.os == 'Linux'
|
||||
continue-on-error: true
|
||||
env:
|
||||
LINK_SHA: ${{ steps.sha.outputs.full }}
|
||||
run: nix run .#fix-lockfiles -- --check
|
||||
|
||||
# If fix-lockfiles itself crashes (infrastructure blip, cache throttle,
|
||||
# etc.) it won't set stale=true/false. Treat that as a distinct failure
|
||||
# mode rather than silently ignoring it.
|
||||
- name: Fail if hash check crashed without reporting
|
||||
if: steps.hash_check.outcome == 'failure' && steps.hash_check.outputs.stale != 'true' && steps.hash_check.outputs.stale != 'false'
|
||||
run: |
|
||||
echo "::error::fix-lockfiles exited without reporting stale status — likely an infrastructure or script failure"
|
||||
exit 1
|
||||
|
||||
- name: Post sticky PR comment (stale hashes)
|
||||
if: steps.hash_check.outputs.stale == 'true' && github.event_name == 'pull_request'
|
||||
uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1
|
||||
with:
|
||||
header: nix-lockfile-check
|
||||
message: |
|
||||
### ⚠️ npm lockfile hash out of date
|
||||
|
||||
Checked against commit [`${{ steps.sha.outputs.short }}`](${{ github.server_url }}/${{ github.repository }}/commit/${{ steps.sha.outputs.full }}) (PR head at check time).
|
||||
|
||||
The `hash = "sha256-..."` line in these nix files no longer matches the committed `package-lock.json`:
|
||||
|
||||
${{ steps.hash_check.outputs.report }}
|
||||
|
||||
#### Apply the fix
|
||||
|
||||
- [ ] **Apply lockfile fix** — tick to push a commit with the correct hashes to this PR branch
|
||||
- Or [run the Nix Lockfile Fix workflow](${{ github.server_url }}/${{ github.repository }}/actions/workflows/nix-lockfile-fix.yml) manually (pass PR `#${{ github.event.pull_request.number }}`)
|
||||
- Or locally: `nix run .#fix-lockfiles` and commit the diff
|
||||
|
||||
# Clear the sticky comment when either the build passed outright (no
|
||||
# hash check needed) or the hash check explicitly returned stale=false
|
||||
# (build failed for a non-hash reason).
|
||||
- name: Clear sticky PR comment (resolved)
|
||||
if: |
|
||||
github.event_name == 'pull_request' &&
|
||||
runner.os == 'Linux' &&
|
||||
(steps.hash_check.outputs.stale == 'false' ||
|
||||
(steps.flake.outcome == 'success' && steps.build.outcome == 'success'))
|
||||
uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1
|
||||
with:
|
||||
header: nix-lockfile-check
|
||||
delete: true
|
||||
|
||||
- name: Final fail if build or flake failed
|
||||
if: steps.flake.outcome == 'failure' || steps.build.outcome == 'failure'
|
||||
run: |
|
||||
if [ "${{ steps.hash_check.outputs.stale }}" == "true" ]; then
|
||||
echo "::error::Nix build failed due to stale npm lockfile hash. Run: nix run .#fix-lockfiles"
|
||||
else
|
||||
echo "::error::Nix build/flake check failed. See logs above."
|
||||
fi
|
||||
exit 1
|
||||
|
||||
- name: Evaluate flake (macOS)
|
||||
if: runner.os == 'macOS'
|
||||
run: nix flake show --json > /dev/null
|
||||
|
||||
67
.github/workflows/osv-scanner.yml
vendored
@@ -1,67 +0,0 @@
|
||||
name: OSV-Scanner
|
||||
|
||||
# Scans lockfiles (uv.lock, package-lock.json) against the OSV vulnerability
|
||||
# database. Runs on every PR that touches a lockfile and on a weekly schedule
|
||||
# against main.
|
||||
#
|
||||
# This is detection-only — OSV-Scanner does NOT open PRs or modify pins.
|
||||
# It reports known CVEs in currently-pinned dependency versions so we can
|
||||
# decide when and how to patch on our own schedule. Our pinning strategy
|
||||
# (full SHA / exact version) is preserved; only the notification signal
|
||||
# is added.
|
||||
#
|
||||
# Complements the existing supply-chain-audit.yml workflow (which scans
|
||||
# for malicious code patterns in PR diffs) by covering the orthogonal
|
||||
# "currently-pinned dep became known-vulnerable" case.
|
||||
#
|
||||
# Uses Google's officially-recommended reusable workflow, pinned by SHA.
|
||||
# Findings land in the repo's Security tab (Code Scanning > OSV-Scanner).
|
||||
# fail-on-vuln is disabled so the job does not block merges on pre-existing
|
||||
# vulnerabilities in pinned deps that we may need to patch deliberately.
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [main]
|
||||
paths:
|
||||
- 'uv.lock'
|
||||
- 'pyproject.toml'
|
||||
- 'package.json'
|
||||
- 'package-lock.json'
|
||||
- 'ui-tui/package.json'
|
||||
- 'ui-tui/package-lock.json'
|
||||
- 'website/package.json'
|
||||
- 'website/package-lock.json'
|
||||
- '.github/workflows/osv-scanner.yml'
|
||||
push:
|
||||
branches: [main]
|
||||
paths:
|
||||
- 'uv.lock'
|
||||
- 'pyproject.toml'
|
||||
- 'package.json'
|
||||
- 'package-lock.json'
|
||||
- 'ui-tui/package-lock.json'
|
||||
- 'website/package-lock.json'
|
||||
schedule:
|
||||
# Weekly scan against main — catches CVEs published after merge for
|
||||
# deps that haven't changed since.
|
||||
- cron: '0 9 * * 1'
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
# Required by the reusable workflow to upload SARIF to the Security tab.
|
||||
actions: read
|
||||
contents: read
|
||||
security-events: write
|
||||
|
||||
jobs:
|
||||
scan:
|
||||
name: Scan lockfiles
|
||||
uses: google/osv-scanner-action/.github/workflows/osv-scanner-reusable.yml@c51854704019a247608d928f370c98740469d4b5 # v2.3.5
|
||||
with:
|
||||
# Scan explicit lockfiles rather than recursing, so we only look at
|
||||
# the three sources of truth and skip vendored / test / worktree dirs.
|
||||
scan-args: |-
|
||||
--lockfile=uv.lock
|
||||
--lockfile=ui-tui/package-lock.json
|
||||
--lockfile=website/package-lock.json
|
||||
fail-on-vuln: false
|
||||
14
.gitignore
vendored
@@ -54,10 +54,6 @@ environments/benchmarks/evals/
|
||||
|
||||
# Web UI build output
|
||||
hermes_cli/web_dist/
|
||||
apps/desktop/build/
|
||||
apps/desktop/dist/
|
||||
apps/desktop/release/
|
||||
apps/desktop/*.tsbuildinfo
|
||||
|
||||
# Web UI assets — synced from @nous-research/ui at build time via
|
||||
# `npm run sync-assets` (see web/package.json).
|
||||
@@ -73,13 +69,3 @@ mini-swe-agent/
|
||||
.nix-stamps/
|
||||
result
|
||||
website/static/api/skills-index.json
|
||||
models-dev-upstream/
|
||||
|
||||
# Local editor / agent tooling (machine-specific; keep in global config, not the repo)
|
||||
.codex/
|
||||
.cursor/
|
||||
.gemini/
|
||||
.zed/
|
||||
.mcp.json
|
||||
opencode.json
|
||||
config/mcporter.json
|
||||
|
||||
270
AGENTS.md
@@ -2,8 +2,6 @@
|
||||
|
||||
Instructions for AI coding assistants and developers working on the hermes-agent codebase.
|
||||
|
||||
**Never give up on the right solution.**
|
||||
|
||||
## Development Environment
|
||||
|
||||
```bash
|
||||
@@ -39,18 +37,12 @@ hermes-agent/
|
||||
│ ├── platforms/ # Adapter per platform (telegram, discord, slack, whatsapp,
|
||||
│ │ # homeassistant, signal, matrix, mattermost, email, sms,
|
||||
│ │ # dingtalk, wecom, weixin, feishu, qqbot, bluebubbles,
|
||||
│ │ # yuanbao, webhook, api_server, ...). See ADDING_A_PLATFORM.md.
|
||||
│ └── builtin_hooks/ # Extension point for always-registered gateway hooks (none shipped)
|
||||
│ │ # webhook, api_server, ...). See ADDING_A_PLATFORM.md.
|
||||
│ └── builtin_hooks/ # Always-registered gateway hooks (boot-md, ...)
|
||||
├── plugins/ # Plugin system (see "Plugins" section below)
|
||||
│ ├── memory/ # Memory-provider plugins (honcho, mem0, supermemory, ...)
|
||||
│ ├── context_engine/ # Context-engine plugins
|
||||
│ ├── model-providers/ # Inference backend plugins (openrouter, anthropic, gmi, ...)
|
||||
│ ├── kanban/ # Multi-agent board dispatcher + worker plugin
|
||||
│ ├── hermes-achievements/ # Gamified achievement tracking
|
||||
│ ├── observability/ # Metrics / traces / logs plugin
|
||||
│ ├── image_gen/ # Image-generation providers
|
||||
│ └── <others>/ # disk-cleanup, example-dashboard, google_meet, platforms,
|
||||
│ # spotify, strike-freedom-cockpit, ...
|
||||
│ └── <others>/ # Dashboard, image-gen, disk-cleanup, examples, ...
|
||||
├── optional-skills/ # Heavier/niche skills shipped but NOT active by default
|
||||
├── skills/ # Built-in skills bundled with the repo
|
||||
├── ui-tui/ # Ink (React) terminal UI — `hermes --tui`
|
||||
@@ -61,7 +53,7 @@ hermes-agent/
|
||||
├── environments/ # RL training environments (Atropos)
|
||||
├── scripts/ # run_tests.sh, release.py, auxiliary scripts
|
||||
├── website/ # Docusaurus docs site
|
||||
└── tests/ # Pytest suite (~17k tests across ~900 files as of May 2026)
|
||||
└── tests/ # Pytest suite (~15k tests across ~700 files as of Apr 2026)
|
||||
```
|
||||
|
||||
**User config:** `~/.hermes/config.yaml` (settings), `~/.hermes/.env` (API keys only).
|
||||
@@ -69,29 +61,6 @@ hermes-agent/
|
||||
`gateway.log` when running the gateway. Profile-aware via `get_hermes_home()`.
|
||||
Browse with `hermes logs [--follow] [--level ...] [--session ...]`.
|
||||
|
||||
## TypeScript Style
|
||||
|
||||
Applies to TypeScript across Hermes: desktop, TUI, website, and future TS packages.
|
||||
|
||||
- Prefer small nanostores over component state when state is shared, reused, or read by distant UI.
|
||||
- Let each feature own its atoms. Chat state belongs near chat, shell state near shell, shared state in `src/store`.
|
||||
- Components that render from an atom should use `useStore`. Non-rendering actions should read with `$atom.get()`.
|
||||
- Do not pass state through three components when the leaf can subscribe to the atom.
|
||||
- Keep persistence beside the atom that owns it.
|
||||
- Keep route roots thin. They compose routes and shell; they should not become controllers.
|
||||
- No monolithic hooks. A hook should own one narrow job.
|
||||
- Prefer colocated action modules over hidden god hooks.
|
||||
- If a callback is pure side effect, use the terse void form:
|
||||
`onState={st => void setGatewayState(st)}`.
|
||||
- Async UI handlers should make intent explicit:
|
||||
`onClick={() => void save()}`.
|
||||
- Prefer interfaces for public props and shared object shapes. Avoid `type X = { ... }` for object props.
|
||||
- Extend React primitives for props: `React.ComponentProps<'button'>`, `React.ComponentProps<typeof Dialog>`, `Omit<...>`, `Pick<...>`.
|
||||
- Table-driven beats condition ladders when mapping ids, routes, or views.
|
||||
- `src/app` owns routes, pages, and page-specific components.
|
||||
- `src/store` owns shared atoms.
|
||||
- `src/lib` owns shared pure helpers.
|
||||
|
||||
## File Dependency Chain
|
||||
|
||||
```
|
||||
@@ -275,7 +244,7 @@ npm test # vitest
|
||||
|
||||
The dashboard embeds the real `hermes --tui` — **not** a rewrite. See `hermes_cli/pty_bridge.py` + the `@app.websocket("/api/pty")` endpoint in `hermes_cli/web_server.py`.
|
||||
|
||||
- Browser loads `apps/dashboard/src/pages/ChatPage.tsx`, which mounts xterm.js's `Terminal` with the WebGL renderer, `@xterm/addon-fit` for container-driven resize, and `@xterm/addon-unicode11` for modern wide-character widths.
|
||||
- Browser loads `web/src/pages/ChatPage.tsx`, which mounts xterm.js's `Terminal` with the WebGL renderer, `@xterm/addon-fit` for container-driven resize, and `@xterm/addon-unicode11` for modern wide-character widths.
|
||||
- `/api/pty?token=…` upgrades to a WebSocket; auth uses the same ephemeral `_SESSION_TOKEN` as REST, via query param (browsers can't set `Authorization` on WS upgrade).
|
||||
- The server spawns whatever `hermes --tui` would spawn, through `ptyprocess` (POSIX PTY — WSL works, native Windows does not).
|
||||
- Frames: raw PTY bytes each direction; resize via `\x1b[RESIZE:<cols>;<rows>]` intercepted on the server and applied with `TIOCSWINSZ`.
|
||||
@@ -288,16 +257,7 @@ The dashboard embeds the real `hermes --tui` — **not** a rewrite. See `hermes
|
||||
|
||||
## Adding New Tools
|
||||
|
||||
For most custom or local-only tools, do **not** edit Hermes core. Use the plugin
|
||||
route instead: create `~/.hermes/plugins/<name>/plugin.yaml` and
|
||||
`~/.hermes/plugins/<name>/__init__.py`, then register tools with
|
||||
`ctx.register_tool(...)`. Plugin toolsets are discovered automatically and can be
|
||||
enabled or disabled without touching `tools/` or `toolsets.py`.
|
||||
|
||||
Use the built-in route below only when the user is explicitly contributing a new
|
||||
core Hermes tool that should ship in the base system.
|
||||
|
||||
Built-in/core tools require changes in **2 files**:
|
||||
Requires changes in **2 files**:
|
||||
|
||||
**1. Create `tools/your_tool.py`:**
|
||||
```python
|
||||
@@ -320,9 +280,9 @@ registry.register(
|
||||
)
|
||||
```
|
||||
|
||||
**2. Add to `toolsets.py`** — either `_HERMES_CORE_TOOLS` (all platforms) or a new toolset. **This step is required:** auto-discovery imports the tool and registers its schema, but the tool is only *exposed to an agent* if its name appears in a toolset. `_HERMES_CORE_TOOLS` is not dead code — it's the default bundle every platform's base toolset inherits from.
|
||||
**2. Add to `toolsets.py`** — either `_HERMES_CORE_TOOLS` (all platforms) or a new toolset.
|
||||
|
||||
Auto-discovery: any `tools/*.py` file with a top-level `registry.register()` call is imported automatically — no manual import list to maintain. Wiring into a toolset is still a deliberate, manual step.
|
||||
Auto-discovery: any `tools/*.py` file with a top-level `registry.register()` call is imported automatically — no manual import list to maintain.
|
||||
|
||||
The registry handles schema collection, dispatch, availability checking, and error wrapping. All handlers MUST return a JSON string.
|
||||
|
||||
@@ -344,22 +304,6 @@ The registry handles schema collection, dispatch, availability checking, and err
|
||||
section is handled automatically by the deep-merge and does NOT require
|
||||
a version bump.
|
||||
|
||||
### Top-level `config.yaml` sections (non-exhaustive):
|
||||
|
||||
`model`, `agent`, `terminal`, `compression`, `display`, `stt`, `tts`,
|
||||
`memory`, `security`, `delegation`, `smart_model_routing`, `checkpoints`,
|
||||
`auxiliary`, `curator`, `skills`, `gateway`, `logging`, `cron`, `profiles`,
|
||||
`plugins`, `honcho`.
|
||||
|
||||
`auxiliary` holds per-task overrides for side-LLM work (curator, vision,
|
||||
embedding, title generation, session_search, etc.) — each task can pin
|
||||
its own provider/model/base_url/max_tokens/reasoning_effort. See
|
||||
`agent/auxiliary_client.py::_resolve_auto` for resolution order.
|
||||
|
||||
`curator` holds the background skill-maintenance config —
|
||||
`enabled`, `interval_hours`, `min_idle_hours`, `stale_after_days`,
|
||||
`archive_after_days`, `backup` (nested).
|
||||
|
||||
### .env variables (SECRETS ONLY — API keys, tokens, passwords):
|
||||
1. Add to `OPTIONAL_ENV_VARS` in `hermes_cli/config.py` with metadata:
|
||||
```python
|
||||
@@ -538,31 +482,6 @@ generic plugin surface (new hook, new ctx method) — never hardcode
|
||||
plugin-specific logic into core. PR #5295 removed 95 lines of hardcoded
|
||||
honcho argparse from `main.py` for exactly this reason.
|
||||
|
||||
### Model-provider plugins (`plugins/model-providers/<name>/`)
|
||||
|
||||
Every inference backend (openrouter, anthropic, gmi, deepseek, nvidia, …)
|
||||
ships as a plugin here. Each plugin's `__init__.py` calls
|
||||
`providers.register_provider(ProviderProfile(...))` at module load.
|
||||
`providers/__init__.py._discover_providers()` is a **lazy, separate
|
||||
discovery system** — scanned on first `get_provider_profile()` or
|
||||
`list_providers()` call, NOT by the general PluginManager.
|
||||
|
||||
Scan order:
|
||||
1. Bundled: `<repo>/plugins/model-providers/<name>/`
|
||||
2. User: `$HERMES_HOME/plugins/model-providers/<name>/`
|
||||
3. Legacy: `<repo>/providers/<name>.py` (back-compat)
|
||||
|
||||
User plugins of the same name override bundled ones — `register_provider()`
|
||||
is last-writer-wins. This lets third parties swap out any built-in
|
||||
profile without a repo patch.
|
||||
|
||||
The general PluginManager records `kind: model-provider` manifests but does
|
||||
NOT import them (would double-instantiate `ProviderProfile`). Plugins
|
||||
without an explicit `kind:` get auto-coerced via a source-text heuristic
|
||||
(`register_provider` + `ProviderProfile` in `__init__.py`).
|
||||
|
||||
Full authoring guide: `website/docs/developer-guide/model-provider-plugin.md`.
|
||||
|
||||
### Dashboard / context-engine / image-gen plugin directories
|
||||
|
||||
`plugins/context_engine/`, `plugins/image_gen/`, `plugins/example-dashboard/`,
|
||||
@@ -591,176 +510,11 @@ niche skills belong in `optional-skills/`.
|
||||
|
||||
### SKILL.md frontmatter
|
||||
|
||||
Standard fields: `name`, `description`, `version`, `author`, `license`,
|
||||
`platforms` (OS-gating list: `[macos]`, `[linux, macos]`, ...),
|
||||
Standard fields: `name`, `description`, `version`, `platforms`
|
||||
(OS-gating list: `[macos]`, `[linux, macos]`, ...),
|
||||
`metadata.hermes.tags`, `metadata.hermes.category`,
|
||||
`metadata.hermes.related_skills`, `metadata.hermes.config` (config.yaml
|
||||
settings the skill needs — stored under `skills.config.<key>`, prompted
|
||||
during setup, injected at load time).
|
||||
|
||||
Top-level `tags:` and `category:` are also accepted and mirrored from
|
||||
`metadata.hermes.*` by the loader.
|
||||
|
||||
---
|
||||
|
||||
## Toolsets
|
||||
|
||||
All toolsets are defined in `toolsets.py` as a single `TOOLSETS` dict.
|
||||
Each platform's adapter picks a base toolset (e.g. Telegram uses
|
||||
`"messaging"`); `_HERMES_CORE_TOOLS` is the default bundle most
|
||||
platforms inherit from.
|
||||
|
||||
Current toolset keys: `browser`, `clarify`, `code_execution`, `cronjob`,
|
||||
`debugging`, `delegation`, `discord`, `discord_admin`, `feishu_doc`,
|
||||
`feishu_drive`, `file`, `homeassistant`, `image_gen`, `kanban`, `memory`,
|
||||
`messaging`, `moa`, `rl`, `safe`, `search`, `session_search`, `skills`,
|
||||
`spotify`, `terminal`, `todo`, `tts`, `video`, `vision`, `web`, `yuanbao`.
|
||||
|
||||
Enable/disable per platform via `hermes tools` (the curses UI) or the
|
||||
`tools.<platform>.enabled` / `tools.<platform>.disabled` lists in
|
||||
`config.yaml`.
|
||||
|
||||
---
|
||||
|
||||
## Delegation (`delegate_task`)
|
||||
|
||||
`tools/delegate_tool.py` spawns a subagent with an isolated
|
||||
context + terminal session. Synchronous: the parent waits for the
|
||||
child's summary before continuing its own loop — if the parent is
|
||||
interrupted, the child is cancelled.
|
||||
|
||||
Two shapes:
|
||||
|
||||
- **Single:** pass `goal` (+ optional `context`, `toolsets`).
|
||||
- **Batch (parallel):** pass `tasks: [...]` — each gets its own subagent
|
||||
running concurrently. Concurrency is capped by
|
||||
`delegation.max_concurrent_children` (default 3).
|
||||
|
||||
Roles:
|
||||
|
||||
- `role="leaf"` (default) — focused worker. Cannot call `delegate_task`,
|
||||
`clarify`, `memory`, `send_message`, `execute_code`.
|
||||
- `role="orchestrator"` — retains `delegate_task` so it can spawn its
|
||||
own workers. Gated by `delegation.orchestrator_enabled` (default true)
|
||||
and bounded by `delegation.max_spawn_depth` (default 2).
|
||||
|
||||
Key config knobs (under `delegation:` in `config.yaml`):
|
||||
`max_concurrent_children`, `max_spawn_depth`, `child_timeout_seconds`,
|
||||
`orchestrator_enabled`, `subagent_auto_approve`, `inherit_mcp_toolsets`,
|
||||
`max_iterations`.
|
||||
|
||||
Synchronicity rule: delegate_task is **not** durable. For long-running
|
||||
work that must outlive the current turn, use `cronjob` or
|
||||
`terminal(background=True, notify_on_complete=True)` instead.
|
||||
|
||||
---
|
||||
|
||||
## Curator (skill lifecycle)
|
||||
|
||||
Background skill-maintenance system that tracks usage on agent-created
|
||||
skills and auto-archives stale ones. Users never lose skills; archives
|
||||
go to `~/.hermes/skills/.archive/` and are restorable.
|
||||
|
||||
- **Core:** `agent/curator.py` (review loop, auto-transitions, LLM review
|
||||
prompt) + `agent/curator_backup.py` (pre-run tar.gz snapshots).
|
||||
- **CLI:** `hermes_cli/curator.py` wires `hermes curator <verb>` where
|
||||
verbs are: `status`, `run`, `pause`, `resume`, `pin`, `unpin`,
|
||||
`archive`, `restore`, `prune`, `backup`, `rollback`.
|
||||
- **Telemetry:** `tools/skill_usage.py` owns the sidecar
|
||||
`~/.hermes/skills/.usage.json` — per-skill `use_count`, `view_count`,
|
||||
`patch_count`, `last_activity_at`, `state` (active / stale /
|
||||
archived), `pinned`.
|
||||
|
||||
Invariants:
|
||||
- Curator only touches skills with `created_by: "agent"` provenance —
|
||||
bundled + hub-installed skills are off-limits.
|
||||
- Never deletes; max destructive action is archive.
|
||||
- Pinned skills are exempt from every auto-transition and from the
|
||||
LLM review pass.
|
||||
- `skill_manage(action="delete")` refuses pinned skills; patch/edit/
|
||||
write_file/remove_file go through so the agent can keep improving
|
||||
pinned skills.
|
||||
|
||||
Config section (`curator:` in `config.yaml`):
|
||||
`enabled`, `interval_hours`, `min_idle_hours`, `stale_after_days`,
|
||||
`archive_after_days`, `backup.*`.
|
||||
|
||||
Full user-facing docs: `website/docs/user-guide/features/curator.md`.
|
||||
|
||||
---
|
||||
|
||||
## Cron (scheduled jobs)
|
||||
|
||||
`cron/jobs.py` (job store) + `cron/scheduler.py` (tick loop). Agents
|
||||
schedule jobs via the `cronjob` tool; users via `hermes cron <verb>`
|
||||
(`list`, `add`, `edit`, `pause`, `resume`, `run`, `remove`) or the
|
||||
`/cron` slash command.
|
||||
|
||||
Supported schedule formats:
|
||||
- Duration: `"30m"`, `"2h"`, `"1d"`
|
||||
- "every" phrase: `"every 2h"`, `"every monday 9am"`
|
||||
- 5-field cron expression: `"0 9 * * *"`
|
||||
- ISO timestamp (one-shot): `"2026-06-01T09:00:00Z"`
|
||||
|
||||
Per-job fields include `skills` (load specific skills), `model` /
|
||||
`provider` overrides, `script` (pre-run data-collection script whose
|
||||
stdout is injected into the prompt; `no_agent=True` turns the script
|
||||
into the entire job), `context_from` (chain job A's last output into
|
||||
job B's prompt), `workdir` (run in a specific directory with its
|
||||
`AGENTS.md`/`CLAUDE.md` loaded), and multi-platform delivery.
|
||||
|
||||
Hardening invariants:
|
||||
- **3-minute hard interrupt** on cron sessions — runaway agent loops
|
||||
cannot monopolize the scheduler.
|
||||
- Catchup window: half the job's period, clamped to 120s–2h.
|
||||
- Grace window: 120s for one-shot jobs whose fire time was missed.
|
||||
- File lock at `~/.hermes/cron/.tick.lock` prevents duplicate ticks
|
||||
across processes.
|
||||
- Cron sessions pass `skip_memory=True` by default; memory providers
|
||||
intentionally do not run during cron.
|
||||
|
||||
Cron deliveries are **not** mirrored into the target gateway session —
|
||||
they land in their own cron session with a header/footer frame so the
|
||||
main conversation's message-role alternation stays intact.
|
||||
|
||||
---
|
||||
|
||||
## Kanban (multi-agent work queue)
|
||||
|
||||
Durable SQLite-backed board that lets multiple profiles / workers
|
||||
collaborate on shared tasks. Users drive it via `hermes kanban <verb>`;
|
||||
workers spawned by the dispatcher drive it via a dedicated `kanban_*`
|
||||
toolset so their schema footprint is zero when they're not inside a
|
||||
kanban task.
|
||||
|
||||
- **CLI:** `hermes_cli/kanban.py` wires `hermes kanban` with verbs
|
||||
`init`, `create`, `list` (alias `ls`), `show`, `assign`, `link`,
|
||||
`unlink`, `comment`, `complete`, `block`, `unblock`, `archive`,
|
||||
`tail`, plus less-commonly-used `watch`, `stats`, `runs`, `log`,
|
||||
`assignees`, `heartbeat`, `notify-*`, `dispatch`, `daemon`, `gc`.
|
||||
- **Worker toolset:** `tools/kanban_tools.py` exposes `kanban_show`,
|
||||
`kanban_complete`, `kanban_block`, `kanban_heartbeat`, `kanban_comment`,
|
||||
`kanban_create`, `kanban_link` — gated by `HERMES_KANBAN_TASK` so
|
||||
the schema only appears for processes actually running as a worker.
|
||||
- **Dispatcher:** long-lived loop that (default every 60s) reclaims
|
||||
stale claims, promotes ready tasks, atomically claims, and spawns
|
||||
assigned profiles. Runs **inside the gateway** by default via
|
||||
`kanban.dispatch_in_gateway: true`.
|
||||
- **Plugin assets:** `plugins/kanban/dashboard/` (web UI) +
|
||||
`plugins/kanban/systemd/` (`hermes-kanban-dispatcher.service` for
|
||||
standalone dispatcher deployment).
|
||||
|
||||
Isolation model:
|
||||
- **Board** is the hard boundary — workers are spawned with
|
||||
`HERMES_KANBAN_BOARD` pinned in their env so they can't see other
|
||||
boards.
|
||||
- **Tenant** is a soft namespace *within* a board — one specialist
|
||||
fleet can serve multiple businesses with workspace-path + memory-key
|
||||
isolation.
|
||||
- After ~5 consecutive spawn failures on the same task the dispatcher
|
||||
auto-blocks it to prevent spin loops.
|
||||
|
||||
Full user-facing docs: `website/docs/user-guide/features/kanban.md`.
|
||||
`metadata.hermes.config` (config.yaml settings the skill needs — stored
|
||||
under `skills.config.<key>`, prompted during setup, injected at load time).
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -494,7 +494,7 @@ branding:
|
||||
agent_name: "My Agent"
|
||||
welcome: "Welcome message"
|
||||
response_label: " ⚔ Agent "
|
||||
prompt_symbol: "⚔"
|
||||
prompt_symbol: "⚔ ❯ "
|
||||
|
||||
tool_prefix: "╎" # Tool output line prefix
|
||||
```
|
||||
|
||||
26
Dockerfile
@@ -14,7 +14,7 @@ ENV PLAYWRIGHT_BROWSERS_PATH=/opt/hermes/.playwright
|
||||
# that would otherwise accumulate when hermes runs as PID 1. See #15012.
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
build-essential curl nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git openssh-client docker-cli tini && \
|
||||
build-essential nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git openssh-client docker-cli tini && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Non-root user for runtime; UID can be overridden via HERMES_UID at runtime
|
||||
@@ -28,40 +28,20 @@ WORKDIR /opt/hermes
|
||||
# ---------- Layer-cached dependency install ----------
|
||||
# Copy only package manifests first so npm install + Playwright are cached
|
||||
# unless the lockfiles themselves change.
|
||||
#
|
||||
# ui-tui/packages/hermes-ink/ is copied IN FULL (not just its manifests)
|
||||
# because it is referenced as a `file:` workspace dependency from
|
||||
# ui-tui/package.json. Copying the tree up front lets npm resolve the
|
||||
# workspace to real content instead of stopping at a bare package.json.
|
||||
COPY package.json package-lock.json ./
|
||||
COPY web/package.json web/package-lock.json web/
|
||||
COPY ui-tui/package.json ui-tui/package-lock.json ui-tui/
|
||||
COPY ui-tui/packages/hermes-ink/ ui-tui/packages/hermes-ink/
|
||||
|
||||
# `npm_config_install_links=false` forces npm to install `file:` deps as
|
||||
# symlinks (the npm 10+ default) even on Debian's older bundled npm 9.x,
|
||||
# which defaults to `install-links=true` and installs file deps as *copies*.
|
||||
# The host-side package-lock.json is generated with a newer npm that uses
|
||||
# symlinks, so an install-as-copy produces a hidden node_modules/.package-lock.json
|
||||
# that permanently disagrees with the root lock on the @hermes/ink entry.
|
||||
# That disagreement trips the TUI launcher's `_tui_need_npm_install()`
|
||||
# check on every startup and triggers a runtime `npm install` that then
|
||||
# fails with EACCES (node_modules/ is root-owned from build time).
|
||||
ENV npm_config_install_links=false
|
||||
|
||||
RUN npm install --prefer-offline --no-audit && \
|
||||
npx playwright install --with-deps chromium --only-shell && \
|
||||
(cd web && npm install --prefer-offline --no-audit) && \
|
||||
(cd ui-tui && npm install --prefer-offline --no-audit) && \
|
||||
npm cache clean --force
|
||||
|
||||
# ---------- Source code ----------
|
||||
# .dockerignore excludes node_modules, so the installs above survive.
|
||||
COPY --chown=hermes:hermes . .
|
||||
|
||||
# Build browser dashboard and terminal UI assets.
|
||||
RUN cd web && npm run build && \
|
||||
cd ../ui-tui && npm run build
|
||||
# Build web dashboard (Vite outputs to hermes_cli/web_dist/)
|
||||
RUN cd web && npm run build
|
||||
|
||||
# ---------- Permissions ----------
|
||||
# Make install dir world-readable so any HERMES_UID can read it at runtime.
|
||||
|
||||
@@ -9,7 +9,6 @@
|
||||
<a href="https://discord.gg/NousResearch"><img src="https://img.shields.io/badge/Discord-5865F2?style=for-the-badge&logo=discord&logoColor=white" alt="Discord"></a>
|
||||
<a href="https://github.com/NousResearch/hermes-agent/blob/main/LICENSE"><img src="https://img.shields.io/badge/License-MIT-green?style=for-the-badge" alt="License: MIT"></a>
|
||||
<a href="https://nousresearch.com"><img src="https://img.shields.io/badge/Built%20by-Nous%20Research-blueviolet?style=for-the-badge" alt="Built by Nous Research"></a>
|
||||
<a href="README.zh-CN.md"><img src="https://img.shields.io/badge/Lang-中文-red?style=for-the-badge" alt="中文"></a>
|
||||
</p>
|
||||
|
||||
**The self-improving AI agent built by [Nous Research](https://nousresearch.com).** It's the only agent with a built-in learning loop — it creates skills from experience, improves them during use, nudges itself to persist knowledge, searches its own past conversations, and builds a deepening model of who you are across sessions. Run it on a $5 VPS, a GPU cluster, or serverless infrastructure that costs nearly nothing when idle. It's not tied to your laptop — talk to it from Telegram while it works on a cloud VM.
|
||||
@@ -22,7 +21,7 @@ Use any model you want — [Nous Portal](https://portal.nousresearch.com), [Open
|
||||
<tr><td><b>A closed learning loop</b></td><td>Agent-curated memory with periodic nudges. Autonomous skill creation after complex tasks. Skills self-improve during use. FTS5 session search with LLM summarization for cross-session recall. <a href="https://github.com/plastic-labs/honcho">Honcho</a> dialectic user modeling. Compatible with the <a href="https://agentskills.io">agentskills.io</a> open standard.</td></tr>
|
||||
<tr><td><b>Scheduled automations</b></td><td>Built-in cron scheduler with delivery to any platform. Daily reports, nightly backups, weekly audits — all in natural language, running unattended.</td></tr>
|
||||
<tr><td><b>Delegates and parallelizes</b></td><td>Spawn isolated subagents for parallel workstreams. Write Python scripts that call tools via RPC, collapsing multi-step pipelines into zero-context-cost turns.</td></tr>
|
||||
<tr><td><b>Runs anywhere, not just your laptop</b></td><td>Seven terminal backends — local, Docker, SSH, Singularity, Modal, Daytona, and Vercel Sandbox. Daytona and Modal offer serverless persistence — your agent's environment hibernates when idle and wakes on demand, costing nearly nothing between sessions. Run it on a $5 VPS or a GPU cluster.</td></tr>
|
||||
<tr><td><b>Runs anywhere, not just your laptop</b></td><td>Six terminal backends — local, Docker, SSH, Daytona, Singularity, and Modal. Daytona and Modal offer serverless persistence — your agent's environment hibernates when idle and wakes on demand, costing nearly nothing between sessions. Run it on a $5 VPS or a GPU cluster.</td></tr>
|
||||
<tr><td><b>Research-ready</b></td><td>Batch trajectory generation, Atropos RL environments, trajectory compression for training the next generation of tool-calling models.</td></tr>
|
||||
</table>
|
||||
|
||||
|
||||
186
README.zh-CN.md
@@ -1,186 +0,0 @@
|
||||
<p align="center">
|
||||
<img src="assets/banner.png" alt="Hermes Agent" width="100%">
|
||||
</p>
|
||||
|
||||
# Hermes Agent ☤
|
||||
|
||||
<p align="center">
|
||||
<a href="https://hermes-agent.nousresearch.com/docs/"><img src="https://img.shields.io/badge/Docs-hermes--agent.nousresearch.com-FFD700?style=for-the-badge" alt="Documentation"></a>
|
||||
<a href="https://discord.gg/NousResearch"><img src="https://img.shields.io/badge/Discord-5865F2?style=for-the-badge&logo=discord&logoColor=white" alt="Discord"></a>
|
||||
<a href="https://github.com/NousResearch/hermes-agent/blob/main/LICENSE"><img src="https://img.shields.io/badge/License-MIT-green?style=for-the-badge" alt="License: MIT"></a>
|
||||
<a href="https://nousresearch.com"><img src="https://img.shields.io/badge/Built%20by-Nous%20Research-blueviolet?style=for-the-badge" alt="Built by Nous Research"></a>
|
||||
<a href="README.md"><img src="https://img.shields.io/badge/Lang-English-lightgrey?style=for-the-badge" alt="English"></a>
|
||||
</p>
|
||||
|
||||
**由 [Nous Research](https://nousresearch.com) 构建的自进化 AI 代理。** 它是唯一内置学习闭环的智能代理——从经验中创建技能,在使用中改进技能,主动持久化知识,搜索过往对话,并在跨会话中逐步构建对你的深度理解。可以在 $5 的 VPS 上运行,也可以在 GPU 集群上运行,或者使用几乎零成本的 Serverless 基础设施。它不绑定你的笔记本——你可以在 Telegram 上与它对话,而它在云端 VM 上工作。
|
||||
|
||||
支持任意模型——[Nous Portal](https://portal.nousresearch.com)、[OpenRouter](https://openrouter.ai)(200+ 模型)、[NVIDIA NIM](https://build.nvidia.com)(Nemotron)、[小米 MiMo](https://platform.xiaomimimo.com)、[z.ai/GLM](https://z.ai)、[Kimi/Moonshot](https://platform.moonshot.ai)、[MiniMax](https://www.minimax.io)、[Hugging Face](https://huggingface.co)、OpenAI,或自定义端点。使用 `hermes model` 即可切换——无需改代码,无锁定。
|
||||
|
||||
<table>
|
||||
<tr><td><b>真正的终端界面</b></td><td>完整的 TUI,支持多行编辑、斜杠命令自动补全、对话历史、中断重定向和流式工具输出。</td></tr>
|
||||
<tr><td><b>随你所在</b></td><td>Telegram、Discord、Slack、WhatsApp、Signal 和 CLI——全部从单个网关进程运行。语音备忘录转写、跨平台对话连续性。</td></tr>
|
||||
<tr><td><b>闭环学习</b></td><td>代理管理记忆并定期自我提醒。复杂任务后自动创建技能。技能在使用中自我改进。FTS5 会话搜索配合 LLM 摘要实现跨会话回溯。<a href="https://github.com/plastic-labs/honcho">Honcho</a> 辩证式用户建模。兼容 <a href="https://agentskills.io">agentskills.io</a> 开放标准。</td></tr>
|
||||
<tr><td><b>定时自动化</b></td><td>内置 cron 调度器,支持向任何平台投递。日报、夜间备份、周审计——全部用自然语言描述,无人值守运行。</td></tr>
|
||||
<tr><td><b>委派与并行</b></td><td>生成隔离子代理处理并行工作流。编写 Python 脚本通过 RPC 调用工具,将多步管道压缩为零上下文开销的轮次。</td></tr>
|
||||
<tr><td><b>随处运行</b></td><td>六种终端后端——本地、Docker、SSH、Daytona、Singularity 和 Modal。Daytona 和 Modal 提供 Serverless 持久化——代理环境空闲时休眠、按需唤醒,空闲期间几乎零成本。$5 VPS 或 GPU 集群都能跑。</td></tr>
|
||||
<tr><td><b>研究就绪</b></td><td>批量轨迹生成、Atropos RL 环境、轨迹压缩——用于训练下一代工具调用模型。</td></tr>
|
||||
</table>
|
||||
|
||||
---
|
||||
|
||||
## 快速安装
|
||||
|
||||
```bash
|
||||
curl -fsSL https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.sh | bash
|
||||
```
|
||||
|
||||
支持 Linux、macOS、WSL2 和 Android (Termux)。安装程序会自动处理平台特定的配置。
|
||||
|
||||
> **Android / Termux:** 已测试的手动安装路径请参考 [Termux 指南](https://hermes-agent.nousresearch.com/docs/getting-started/termux)。在 Termux 上,Hermes 会安装精选的 `.[termux]` 扩展,因为完整的 `.[all]` 扩展会拉取 Android 不兼容的语音依赖。
|
||||
>
|
||||
> **Windows:** 原生 Windows 不受支持。请安装 [WSL2](https://learn.microsoft.com/zh-cn/windows/wsl/install) 并运行上述命令。
|
||||
|
||||
安装后:
|
||||
|
||||
```bash
|
||||
source ~/.bashrc # 重新加载 shell(或: source ~/.zshrc)
|
||||
hermes # 开始对话!
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 快速入门
|
||||
|
||||
```bash
|
||||
hermes # 交互式 CLI — 开始对话
|
||||
hermes model # 选择 LLM 提供商和模型
|
||||
hermes tools # 配置启用的工具
|
||||
hermes config set # 设置单个配置项
|
||||
hermes gateway # 启动消息网关(Telegram、Discord 等)
|
||||
hermes setup # 运行完整设置向导(一次性配置所有内容)
|
||||
hermes claw migrate # 从 OpenClaw 迁移(如果来自 OpenClaw)
|
||||
hermes update # 更新到最新版本
|
||||
hermes doctor # 诊断问题
|
||||
```
|
||||
|
||||
📖 **[完整文档 →](https://hermes-agent.nousresearch.com/docs/)**
|
||||
|
||||
## CLI 与消息平台 快速对照
|
||||
|
||||
Hermes 有两种入口:用 `hermes` 启动终端 UI,或运行网关从 Telegram、Discord、Slack、WhatsApp、Signal 或 Email 与之对话。进入对话后,许多斜杠命令在两种界面中通用。
|
||||
|
||||
| 操作 | CLI | 消息平台 |
|
||||
|------|-----|----------|
|
||||
| 开始对话 | `hermes` | 运行 `hermes gateway setup` + `hermes gateway start`,然后给机器人发消息 |
|
||||
| 开始新对话 | `/new` 或 `/reset` | `/new` 或 `/reset` |
|
||||
| 更换模型 | `/model [provider:model]` | `/model [provider:model]` |
|
||||
| 设置人格 | `/personality [name]` | `/personality [name]` |
|
||||
| 重试或撤销上一轮 | `/retry`、`/undo` | `/retry`、`/undo` |
|
||||
| 压缩上下文 / 查看用量 | `/compress`、`/usage`、`/insights [--days N]` | `/compress`、`/usage`、`/insights [days]` |
|
||||
| 浏览技能 | `/skills` 或 `/<skill-name>` | `/skills` 或 `/<skill-name>` |
|
||||
| 中断当前工作 | `Ctrl+C` 或发送新消息 | `/stop` 或发送新消息 |
|
||||
| 平台特定状态 | `/platforms` | `/status`、`/sethome` |
|
||||
|
||||
完整命令列表请参阅 [CLI 指南](https://hermes-agent.nousresearch.com/docs/user-guide/cli) 和 [消息网关指南](https://hermes-agent.nousresearch.com/docs/user-guide/messaging)。
|
||||
|
||||
---
|
||||
|
||||
## 文档
|
||||
|
||||
所有文档位于 **[hermes-agent.nousresearch.com/docs](https://hermes-agent.nousresearch.com/docs/)**:
|
||||
|
||||
| 章节 | 内容 |
|
||||
|------|------|
|
||||
| [快速开始](https://hermes-agent.nousresearch.com/docs/getting-started/quickstart) | 安装 → 设置 → 2 分钟内开始首次对话 |
|
||||
| [CLI 使用](https://hermes-agent.nousresearch.com/docs/user-guide/cli) | 命令、快捷键、人格、会话 |
|
||||
| [配置](https://hermes-agent.nousresearch.com/docs/user-guide/configuration) | 配置文件、提供商、模型、所有选项 |
|
||||
| [消息网关](https://hermes-agent.nousresearch.com/docs/user-guide/messaging) | Telegram、Discord、Slack、WhatsApp、Signal、Home Assistant |
|
||||
| [安全](https://hermes-agent.nousresearch.com/docs/user-guide/security) | 命令审批、DM 配对、容器隔离 |
|
||||
| [工具与工具集](https://hermes-agent.nousresearch.com/docs/user-guide/features/tools) | 40+ 工具、工具集系统、终端后端 |
|
||||
| [技能系统](https://hermes-agent.nousresearch.com/docs/user-guide/features/skills) | 过程记忆、技能中心、创建技能 |
|
||||
| [记忆](https://hermes-agent.nousresearch.com/docs/user-guide/features/memory) | 持久记忆、用户画像、最佳实践 |
|
||||
| [MCP 集成](https://hermes-agent.nousresearch.com/docs/user-guide/features/mcp) | 连接任意 MCP 服务器扩展能力 |
|
||||
| [定时调度](https://hermes-agent.nousresearch.com/docs/user-guide/features/cron) | 定时任务与平台投递 |
|
||||
| [上下文文件](https://hermes-agent.nousresearch.com/docs/user-guide/features/context-files) | 影响每次对话的项目上下文 |
|
||||
| [架构](https://hermes-agent.nousresearch.com/docs/developer-guide/architecture) | 项目结构、代理循环、关键类 |
|
||||
| [贡献](https://hermes-agent.nousresearch.com/docs/developer-guide/contributing) | 开发设置、PR 流程、代码风格 |
|
||||
| [CLI 参考](https://hermes-agent.nousresearch.com/docs/reference/cli-commands) | 所有命令和标志 |
|
||||
| [环境变量](https://hermes-agent.nousresearch.com/docs/reference/environment-variables) | 完整环境变量参考 |
|
||||
|
||||
---
|
||||
|
||||
## 从 OpenClaw 迁移
|
||||
|
||||
如果你来自 OpenClaw,Hermes 可以自动导入你的设置、记忆、技能和 API 密钥。
|
||||
|
||||
**首次安装时:** 安装向导(`hermes setup`)会自动检测 `~/.openclaw` 并在配置开始前提供迁移选项。
|
||||
|
||||
**安装后任意时间:**
|
||||
|
||||
```bash
|
||||
hermes claw migrate # 交互式迁移(完整预设)
|
||||
hermes claw migrate --dry-run # 预览将要迁移的内容
|
||||
hermes claw migrate --preset user-data # 仅迁移用户数据,不含密钥
|
||||
hermes claw migrate --overwrite # 覆盖已有冲突
|
||||
```
|
||||
|
||||
导入内容:
|
||||
- **SOUL.md** — 人格文件
|
||||
- **记忆** — MEMORY.md 和 USER.md 条目
|
||||
- **技能** — 用户创建的技能 → `~/.hermes/skills/openclaw-imports/`
|
||||
- **命令白名单** — 审批模式
|
||||
- **消息设置** — 平台配置、允许用户、工作目录
|
||||
- **API 密钥** — 白名单中的密钥(Telegram、OpenRouter、OpenAI、Anthropic、ElevenLabs)
|
||||
- **TTS 资产** — 工作区音频文件
|
||||
- **工作区指令** — AGENTS.md(使用 `--workspace-target`)
|
||||
|
||||
使用 `hermes claw migrate --help` 查看所有选项,或使用 `openclaw-migration` 技能进行交互式代理引导迁移(含干运行预览)。
|
||||
|
||||
---
|
||||
|
||||
## 贡献
|
||||
|
||||
欢迎贡献!请参阅 [贡献指南](https://hermes-agent.nousresearch.com/docs/developer-guide/contributing) 了解开发设置、代码风格和 PR 流程。
|
||||
|
||||
贡献者快速开始——克隆并使用 `setup-hermes.sh`:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/NousResearch/hermes-agent.git
|
||||
cd hermes-agent
|
||||
./setup-hermes.sh # 安装 uv、创建 venv、安装 .[all]、创建符号链接 ~/.local/bin/hermes
|
||||
./hermes # 自动检测 venv,无需先 source
|
||||
```
|
||||
|
||||
手动安装(等效于上述命令):
|
||||
|
||||
```bash
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
uv venv venv --python 3.11
|
||||
source venv/bin/activate
|
||||
uv pip install -e ".[all,dev]"
|
||||
python -m pytest tests/ -q
|
||||
```
|
||||
|
||||
> **RL 训练(可选):** 如需参与 RL/Tinker-Atropos 集成开发:
|
||||
> ```bash
|
||||
> git submodule update --init tinker-atropos
|
||||
> uv pip install -e "./tinker-atropos"
|
||||
> ```
|
||||
|
||||
---
|
||||
|
||||
## 社区
|
||||
|
||||
- 💬 [Discord](https://discord.gg/NousResearch)
|
||||
- 📚 [技能中心](https://agentskills.io)
|
||||
- 🐛 [问题反馈](https://github.com/NousResearch/hermes-agent/issues)
|
||||
- 💡 [讨论区](https://github.com/NousResearch/hermes-agent/discussions)
|
||||
- 🔌 [HermesClaw](https://github.com/AaronWong1999/hermesclaw) — 社区微信桥接:在同一微信账号上运行 Hermes Agent 和 OpenClaw。
|
||||
|
||||
---
|
||||
|
||||
## 许可证
|
||||
|
||||
MIT — 详见 [LICENSE](LICENSE)。
|
||||
|
||||
由 [Nous Research](https://nousresearch.com) 构建。
|
||||
@@ -1,505 +0,0 @@
|
||||
# Hermes Agent v0.12.0 (v2026.4.30)
|
||||
|
||||
**Release Date:** April 30, 2026
|
||||
**Since v0.11.0:** 1,096 commits · 550 merged PRs · 1,270 files changed · 217,776 insertions · 213 community contributors (including co-authors)
|
||||
|
||||
> The Curator release — Hermes Agent now maintains itself. An autonomous background Curator grades, prunes, and consolidates your skill library on its own schedule. The self-improvement loop that reviews what to save got a substantial upgrade. Four new inference providers, a 18th messaging platform, a 19th via Teams plugin, native Spotify + Google Meet integrations, ComfyUI and TouchDesigner-MCP moved from optional to bundled-by-default, and a ~57% cut to visible TUI cold start.
|
||||
|
||||
---
|
||||
|
||||
## ✨ Highlights
|
||||
|
||||
- **Autonomous Curator** — `hermes curator` runs as a background agent on the gateway's cron ticker (7-day cycle default). It grades your skill library, consolidates related skills, prunes dead ones, and writes per-run reports to `logs/curator/run.json` + `REPORT.md`. Archived skills are classified consolidated-vs-pruned via model + heuristic. Defense-in-depth gates protect bundled/hub skills from mutation. Unified under `auxiliary.curator` — pick the curator's model in `hermes model`, manage it from the dashboard. `hermes curator status` ranks skills by usage (most-used / least-used). ([#17277](https://github.com/NousResearch/hermes-agent/pull/17277), [#17307](https://github.com/NousResearch/hermes-agent/pull/17307), [#17941](https://github.com/NousResearch/hermes-agent/pull/17941), [#17868](https://github.com/NousResearch/hermes-agent/pull/17868), [#18033](https://github.com/NousResearch/hermes-agent/pull/18033))
|
||||
|
||||
- **Self-improvement loop — substantially upgraded** — The background review fork (the core of Hermes' self-improvement: after each turn it decides what memories/skills to save or update) is now class-first (rubric-based rather than free-form), active-update biased (prefers the skill the agent just loaded), handles `references/`/`templates/` sub-files, and properly inherits the parent's live runtime (provider, model, credentials actually propagate). Restricted to memory + skills toolsets so it can't sprawl. Memory providers shut down cleanly. Prior-turn tool messages excluded from the summary so the fork sees a clean context. ([#16026](https://github.com/NousResearch/hermes-agent/pull/16026), [#17213](https://github.com/NousResearch/hermes-agent/pull/17213), [#16099](https://github.com/NousResearch/hermes-agent/pull/16099), [#16569](https://github.com/NousResearch/hermes-agent/pull/16569), [#16204](https://github.com/NousResearch/hermes-agent/pull/16204), [#15057](https://github.com/NousResearch/hermes-agent/pull/15057))
|
||||
|
||||
- **Skill integrations — major expansion** — **ComfyUI v5** with official CLI + REST + hardware-gated local install, moved from optional to **built-in by default** ([#17610](https://github.com/NousResearch/hermes-agent/pull/17610), [#17631](https://github.com/NousResearch/hermes-agent/pull/17631), [#17734](https://github.com/NousResearch/hermes-agent/pull/17734)). **TouchDesigner-MCP** bundled by default, expanded with GLSL, post-FX, audio, geometry, and 9 new reference docs ([#16753](https://github.com/NousResearch/hermes-agent/pull/16753), [#16624](https://github.com/NousResearch/hermes-agent/pull/16624), [#16768](https://github.com/NousResearch/hermes-agent/pull/16768) — @kshitijk4poor + @SHL0MS). **Humanizer** skill ports a text-cleaner that strips AI-isms ([#16787](https://github.com/NousResearch/hermes-agent/pull/16787)). **claude-design** HTML artifact skill + design-md (Google DESIGN.md spec) + airtable salvage + `skill_manage` edits in `external_dirs` + direct-URL skill install + `/reload-skills` slash command. ([#16358](https://github.com/NousResearch/hermes-agent/pull/16358), [#14876](https://github.com/NousResearch/hermes-agent/pull/14876), [#16291](https://github.com/NousResearch/hermes-agent/pull/16291), [#17512](https://github.com/NousResearch/hermes-agent/pull/17512), [#16323](https://github.com/NousResearch/hermes-agent/pull/16323), [#17744](https://github.com/NousResearch/hermes-agent/pull/17744))
|
||||
|
||||
- **LM Studio — first-class provider** — upgraded from a custom-endpoint alias to a full-blown native provider: dedicated auth, `hermes doctor` checks, reasoning transport, live `/models` listing. (Salvage of @kshitijk4poor's #17061.) ([#17102](https://github.com/NousResearch/hermes-agent/pull/17102))
|
||||
|
||||
- **Four more new inference providers** — **GMI Cloud** (first-class, salvage of #11955 — @isaachuangGMICLOUD), **Azure AI Foundry** with auto-detection, **MiniMax OAuth** with PKCE browser flow (salvage #15203), **Tencent Tokenhub** (salvage of #16860). ([#16663](https://github.com/NousResearch/hermes-agent/pull/16663), [#15845](https://github.com/NousResearch/hermes-agent/pull/15845), [#17524](https://github.com/NousResearch/hermes-agent/pull/17524), [#16960](https://github.com/NousResearch/hermes-agent/pull/16960))
|
||||
|
||||
- **Pluggable gateway platforms + Microsoft Teams** — the gateway is now a plugin host. Drop-in messaging adapters live outside the core, and Microsoft Teams is the first plugin-shipped platform. (Salvage of #17664.) ([#17751](https://github.com/NousResearch/hermes-agent/pull/17751), [#17828](https://github.com/NousResearch/hermes-agent/pull/17828))
|
||||
|
||||
- **Tencent 元宝 (Yuanbao) — 18th messaging platform** — native gateway adapter with text + media delivery. ([#16298](https://github.com/NousResearch/hermes-agent/pull/16298), [#17424](https://github.com/NousResearch/hermes-agent/pull/17424))
|
||||
|
||||
- **Spotify — native tools + bundled skill + wizard** — 7 tools (play, search, queue, playlists, devices) behind PKCE OAuth, interactive setup wizard, bundled skill, surfacing in `hermes tools`, cron usage documented. ([#15121](https://github.com/NousResearch/hermes-agent/pull/15121), [#15130](https://github.com/NousResearch/hermes-agent/pull/15130), [#15154](https://github.com/NousResearch/hermes-agent/pull/15154), [#15180](https://github.com/NousResearch/hermes-agent/pull/15180))
|
||||
|
||||
- **Google Meet plugin** — join calls, transcribe, speak, follow up. Realtime OpenAI transport + Node bot server, full pipeline bundled as a plugin. ([#16364](https://github.com/NousResearch/hermes-agent/pull/16364))
|
||||
|
||||
- **`hermes -z` one-shot mode + `hermes update --check`** — non-interactive `hermes -z <prompt>` with `--model`/`--provider`/`HERMES_INFERENCE_MODEL`. `hermes update --check` preflight. Opt-in pre-update HERMES_HOME backup. ([#15702](https://github.com/NousResearch/hermes-agent/pull/15702), [#15704](https://github.com/NousResearch/hermes-agent/pull/15704), [#15841](https://github.com/NousResearch/hermes-agent/pull/15841), [#16539](https://github.com/NousResearch/hermes-agent/pull/16539), [#16566](https://github.com/NousResearch/hermes-agent/pull/16566))
|
||||
|
||||
- **Models dashboard tab + in-browser model config** — rich per-model analytics, switch main + auxiliary models from the dashboard. ([#17745](https://github.com/NousResearch/hermes-agent/pull/17745), [#17802](https://github.com/NousResearch/hermes-agent/pull/17802))
|
||||
|
||||
- **Remote model catalog manifest** — OpenRouter + Nous Portal model catalogs are now pulled from a remote manifest so new models show up without a release. ([#16033](https://github.com/NousResearch/hermes-agent/pull/16033))
|
||||
|
||||
- **Native multimodal image routing** — images now route based on the model's actual vision capability rather than provider defaults. ([#16506](https://github.com/NousResearch/hermes-agent/pull/16506))
|
||||
|
||||
- **Gateway media parity** — native multi-image sending across Telegram, Discord, Slack, Mattermost, Email, and Signal; centralized audio routing with FLAC support + Telegram document fallback. ([#17909](https://github.com/NousResearch/hermes-agent/pull/17909), [#17833](https://github.com/NousResearch/hermes-agent/pull/17833))
|
||||
|
||||
- **TUI catches up to (and past) the classic CLI** — LaTeX rendering (@austinpickett), `/reload` .env hot-reload, pluggable busy-indicator styles (@OutThisLife, #13610), opt-in auto-resume of last session, expanded light-terminal auto-detection, session delete from `/resume` picker with `d`, modified mouse-wheel line scroll, and a `/mouse` toggle that kills ConPTY's phantom mouse injection (@kevin-ho). ([#17175](https://github.com/NousResearch/hermes-agent/pull/17175), [#17286](https://github.com/NousResearch/hermes-agent/pull/17286), [#17150](https://github.com/NousResearch/hermes-agent/pull/17150), [#17130](https://github.com/NousResearch/hermes-agent/pull/17130), [#17113](https://github.com/NousResearch/hermes-agent/pull/17113), [#17668](https://github.com/NousResearch/hermes-agent/pull/17668), [#17669](https://github.com/NousResearch/hermes-agent/pull/17669), [#15488](https://github.com/NousResearch/hermes-agent/pull/15488))
|
||||
|
||||
- **Observability + achievements plugins** — bundled Langfuse observability plugin (salvage #16845) + bundled hermes-achievements plugin that scans full session history. ([#16917](https://github.com/NousResearch/hermes-agent/pull/16917), [#17754](https://github.com/NousResearch/hermes-agent/pull/17754))
|
||||
|
||||
- **TTS provider registry + Piper local TTS** — pluggable `tts.providers.<name>` registry; Piper ships as a native local TTS provider. (Closes #8508.) ([#17843](https://github.com/NousResearch/hermes-agent/pull/17843), [#17885](https://github.com/NousResearch/hermes-agent/pull/17885))
|
||||
|
||||
- **Vercel Sandbox backend** — Vercel sandboxes as an execute_code/terminal backend (@kshitijk4poor). ([#17445](https://github.com/NousResearch/hermes-agent/pull/17445))
|
||||
|
||||
- **Secret redaction off by default** — default flipped to off. Prevents the long-standing patch-corruption incidents where fake secret-shaped substrings mangled tool outputs. Opt in via `redaction.enabled: true` when you need it. ([#16794](https://github.com/NousResearch/hermes-agent/pull/16794))
|
||||
|
||||
- **Cold-start performance** — visible TUI cold start cut **~57%** via lazy agent init (@OutThisLife), lazy imports of OpenAI / Anthropic / Firecrawl / account_usage, mtime-cached `load_config()`, memoized `get_tool_definitions()` with TTL-cached `check_fn` results, precompiled dangerous-command patterns. ([#17190](https://github.com/NousResearch/hermes-agent/pull/17190), [#17046](https://github.com/NousResearch/hermes-agent/pull/17046), [#17041](https://github.com/NousResearch/hermes-agent/pull/17041), [#17098](https://github.com/NousResearch/hermes-agent/pull/17098), [#17206](https://github.com/NousResearch/hermes-agent/pull/17206))
|
||||
|
||||
- **Configurable prompt cache TTL** — `prompt_caching.cache_ttl` (5m default, 1h opt-in — cost savings for bursty sessions that keep cache warm). Salvage of #12659. ([#15065](https://github.com/NousResearch/hermes-agent/pull/15065))
|
||||
|
||||
---
|
||||
|
||||
## 🧠 Autonomous Curator & Self-Improvement Loop
|
||||
|
||||
### Curator — autonomous skill maintenance
|
||||
- **`hermes curator` as a background agent** — runs on the gateway's cron ticker, 7-day cycle by default, umbrella-first prompt, inherits parent config, unbounded iterations ([#17277](https://github.com/NousResearch/hermes-agent/pull/17277) — issue #7816)
|
||||
- **Per-run reports** — `logs/curator/run.json` + `REPORT.md` per cycle ([#17307](https://github.com/NousResearch/hermes-agent/pull/17307))
|
||||
- **Consolidated vs pruned classification** — archived skills split with model + heuristic ([#17941](https://github.com/NousResearch/hermes-agent/pull/17941))
|
||||
- **`hermes curator status`** — ranks skills by usage, shows most-used and least-used ([#18033](https://github.com/NousResearch/hermes-agent/pull/18033))
|
||||
- **Unified under `auxiliary.curator`** — pick the model in `hermes model`, configure from the dashboard ([#17868](https://github.com/NousResearch/hermes-agent/pull/17868))
|
||||
- **Documentation** — dedicated curator feature page on the docs site ([#17563](https://github.com/NousResearch/hermes-agent/pull/17563))
|
||||
- Fix: seed defaults on update, create `logs/curator/` directory, defer fire import ([#17927](https://github.com/NousResearch/hermes-agent/pull/17927))
|
||||
- Fix: scan nested archive subdirs in `restore_skill` (@0xDevNinja) ([#17951](https://github.com/NousResearch/hermes-agent/pull/17951))
|
||||
- Fix: use actual skill activity in curator status (@y0shua1ee) ([#17953](https://github.com/NousResearch/hermes-agent/pull/17953))
|
||||
- Fix: `skill_manage` refuses writes on pinned skills; pinning now blocks curator writes ([#17562](https://github.com/NousResearch/hermes-agent/pull/17562), [#17578](https://github.com/NousResearch/hermes-agent/pull/17578))
|
||||
- Fix: `bump_use()` wired into skill invocation + preload + skill_view (salvage #17782) ([#17932](https://github.com/NousResearch/hermes-agent/pull/17932))
|
||||
|
||||
### Self-improvement loop (background review fork)
|
||||
- **Class-first skill-review prompt** — rubric-based grading rather than free-form "should this update" ([#16026](https://github.com/NousResearch/hermes-agent/pull/16026))
|
||||
- **Active-update bias** — prefers updating skills the agent just loaded, handles `references/` + `templates/` sub-files ([#17213](https://github.com/NousResearch/hermes-agent/pull/17213))
|
||||
- **Fork inherits parent's live runtime** — provider, model, credentials actually propagate now ([#16099](https://github.com/NousResearch/hermes-agent/pull/16099))
|
||||
- **Scoped toolsets** — review fork restricted to memory + skills (no shell, no web) ([#16569](https://github.com/NousResearch/hermes-agent/pull/16569))
|
||||
- **Clean shutdown** — background review memory providers exit properly (salvage #15289) ([#16204](https://github.com/NousResearch/hermes-agent/pull/16204))
|
||||
- **Clean context** — prior-history tool messages excluded from review summary (salvage #14967) ([#15057](https://github.com/NousResearch/hermes-agent/pull/15057))
|
||||
|
||||
---
|
||||
|
||||
## 🧩 Skills Ecosystem
|
||||
|
||||
### Skill integrations — newly bundled or promoted
|
||||
- **ComfyUI v5** — official CLI + REST + hardware-gated local install; **moved from optional to built-in** ([#17610](https://github.com/NousResearch/hermes-agent/pull/17610), [#17631](https://github.com/NousResearch/hermes-agent/pull/17631), [#17734](https://github.com/NousResearch/hermes-agent/pull/17734), [#17612](https://github.com/NousResearch/hermes-agent/pull/17612))
|
||||
- **TouchDesigner-MCP** — **bundled by default** ([#16753](https://github.com/NousResearch/hermes-agent/pull/16753) — @kshitijk4poor), expanded with GLSL, post-FX, audio, geometry references ([#16624](https://github.com/NousResearch/hermes-agent/pull/16624)), 9 new reference docs ([#16768](https://github.com/NousResearch/hermes-agent/pull/16768) — @SHL0MS)
|
||||
- **Humanizer** — strips AI-isms from text ([#16787](https://github.com/NousResearch/hermes-agent/pull/16787))
|
||||
- **claude-design** — HTML artifact skill with disambiguation from other design skills ([#16358](https://github.com/NousResearch/hermes-agent/pull/16358))
|
||||
- **design-md** — Google's DESIGN.md spec skill ([#14876](https://github.com/NousResearch/hermes-agent/pull/14876))
|
||||
- **airtable** — salvaged skill + skill API keys wired into `.env` (#15838) ([#16291](https://github.com/NousResearch/hermes-agent/pull/16291))
|
||||
- **pretext** — creative browser demos with @chenglou/pretext ([#17259](https://github.com/NousResearch/hermes-agent/pull/17259))
|
||||
- **spike** + **sketch** — throwaway experiments + HTML mockups, adapted from gsd-build ([#17421](https://github.com/NousResearch/hermes-agent/pull/17421))
|
||||
|
||||
### Skills UX
|
||||
- **Install skills from a direct HTTP(S) URL** — `hermes skills install <url>` ([#16323](https://github.com/NousResearch/hermes-agent/pull/16323))
|
||||
- **`/reload-skills`** slash command (salvage #17670) ([#17744](https://github.com/NousResearch/hermes-agent/pull/17744))
|
||||
- **`hermes skills list`** shows enabled/disabled status ([#16129](https://github.com/NousResearch/hermes-agent/pull/16129))
|
||||
- **`skill_manage` refuses writes on pinned skills** ([#17562](https://github.com/NousResearch/hermes-agent/pull/17562))
|
||||
- **`skill_manage` edits external_dirs skills in place** (salvage #9966) ([#17512](https://github.com/NousResearch/hermes-agent/pull/17512), [#17289](https://github.com/NousResearch/hermes-agent/pull/17289))
|
||||
- Fix: inline-shell rendering in `skill_view` ([#15376](https://github.com/NousResearch/hermes-agent/pull/15376))
|
||||
- Fix: exclude `.archive/` from skill index walk (salvage #17639) ([#17931](https://github.com/NousResearch/hermes-agent/pull/17931))
|
||||
- Fix: dedicated docs page per bundled + optional skill ([#14929](https://github.com/NousResearch/hermes-agent/pull/14929))
|
||||
- Fix: `google-workspace` shared HERMES_HOME helper + ship deps as optional extra ([#15405](https://github.com/NousResearch/hermes-agent/pull/15405))
|
||||
- Fix: auto-wrap ASCII-art code blocks in generated skill pages ([#16497](https://github.com/NousResearch/hermes-agent/pull/16497))
|
||||
- Point agent at `hermes-agent` skill + docs site for Hermes questions ([#16535](https://github.com/NousResearch/hermes-agent/pull/16535))
|
||||
|
||||
---
|
||||
|
||||
## 🏗️ Core Agent & Architecture
|
||||
|
||||
### Provider & Model Support
|
||||
|
||||
#### New providers
|
||||
- **GMI Cloud** — first-class API-key provider on par with Arcee/Kilocode/Xiaomi (salvage of #11955 — @isaachuangGMICLOUD) ([#16663](https://github.com/NousResearch/hermes-agent/pull/16663))
|
||||
- **Azure AI Foundry** — auto-detection, full wiring ([#15845](https://github.com/NousResearch/hermes-agent/pull/15845))
|
||||
- **LM Studio** — upgraded from custom-endpoint alias to first-class provider: dedicated auth, doctor checks, reasoning transport, live `/models` (salvage of #17061 — @kshitijk4poor) ([#17102](https://github.com/NousResearch/hermes-agent/pull/17102))
|
||||
- **MiniMax OAuth** — PKCE browser flow with full OAuth integration (salvage #15203) ([#17524](https://github.com/NousResearch/hermes-agent/pull/17524))
|
||||
- **Tencent Tokenhub** — new provider (salvage of #16860) ([#16960](https://github.com/NousResearch/hermes-agent/pull/16960))
|
||||
|
||||
#### Model catalog
|
||||
- **Remote model catalog manifest** — OpenRouter + Nous Portal catalogs pulled from remote manifest so new models show up without a release ([#16033](https://github.com/NousResearch/hermes-agent/pull/16033))
|
||||
- `openai/gpt-5.5` and `gpt-5.5-pro` added to OpenRouter + Nous Portal ([#15343](https://github.com/NousResearch/hermes-agent/pull/15343))
|
||||
- `deepseek-v4-pro` and `deepseek-v4-flash` added ([#14934](https://github.com/NousResearch/hermes-agent/pull/14934))
|
||||
- `qwen3.6-plus` added to Alibaba-supported models ([#16896](https://github.com/NousResearch/hermes-agent/pull/16896))
|
||||
- Gemini free-tier keys blocked at setup with 429 guidance surfacing ([#15100](https://github.com/NousResearch/hermes-agent/pull/15100))
|
||||
|
||||
#### Model configuration
|
||||
- **Configurable `prompt_caching.cache_ttl`** — 5m default, 1h opt-in (salvage #12659) ([#15065](https://github.com/NousResearch/hermes-agent/pull/15065))
|
||||
- `/fast` whitelist broadened to all OpenAI + Anthropic models ([#16883](https://github.com/NousResearch/hermes-agent/pull/16883))
|
||||
- `auxiliary.extra_body.reasoning` translates into Codex Responses API ([#17004](https://github.com/NousResearch/hermes-agent/pull/17004))
|
||||
- `hermes fallback` command for managing fallback providers ([#16052](https://github.com/NousResearch/hermes-agent/pull/16052))
|
||||
|
||||
### Agent Loop & Conversation
|
||||
- **Native multimodal image routing** — based on model vision capability, not provider defaults ([#16506](https://github.com/NousResearch/hermes-agent/pull/16506))
|
||||
- **Delegate `child_timeout_seconds` default bumped to 600s** ([#14809](https://github.com/NousResearch/hermes-agent/pull/14809))
|
||||
- **Diagnostic dump when subagent times out with 0 API calls** ([#15105](https://github.com/NousResearch/hermes-agent/pull/15105))
|
||||
- **Gateway busts cached agent on compression/context_length config edits** ([#17008](https://github.com/NousResearch/hermes-agent/pull/17008))
|
||||
- **Opt-in runtime-metadata footer on final replies** ([#17026](https://github.com/NousResearch/hermes-agent/pull/17026))
|
||||
- `/reload-mcp` awareness — rebuild cached agents + prompt-cache cost confirmation ([#17729](https://github.com/NousResearch/hermes-agent/pull/17729))
|
||||
- Fix: repair CamelCase + `_tool` suffix tool-call emissions ([#15124](https://github.com/NousResearch/hermes-agent/pull/15124))
|
||||
- Fix: retry on `json.JSONDecodeError` instead of treating as local validation error ([#15107](https://github.com/NousResearch/hermes-agent/pull/15107))
|
||||
- Fix: handle unescaped control chars in `tool_call.arguments` ([#15356](https://github.com/NousResearch/hermes-agent/pull/15356))
|
||||
- Fix: ordering fix in `_copy_reasoning_content_for_api` — cross-provider reasoning isolation (@Zjianru) ([#15749](https://github.com/NousResearch/hermes-agent/pull/15749))
|
||||
- Fix: inject empty `reasoning_content` for DeepSeek/Kimi `tool_calls` unconditionally (@Zjianru) ([#15762](https://github.com/NousResearch/hermes-agent/pull/15762))
|
||||
- Fix: persist streamed `reasoning_content` on assistant turns (#16844) ([#16892](https://github.com/NousResearch/hermes-agent/pull/16892))
|
||||
- Fix: cancel coroutine on timeout so worker thread exits; full traceback on tool failure ([#17428](https://github.com/NousResearch/hermes-agent/pull/17428))
|
||||
- Fix: isolate `get_tool_definitions` quiet_mode cache + dedup LCM injection (#17335) ([#17889](https://github.com/NousResearch/hermes-agent/pull/17889))
|
||||
- Fix: serialize concurrent `hermes_tools` RPC calls from `execute_code` (#17770) ([#17894](https://github.com/NousResearch/hermes-agent/pull/17894), [#17902](https://github.com/NousResearch/hermes-agent/pull/17902))
|
||||
- Fix: rename `[SYSTEM:` → `[IMPORTANT:` in all user-injected markers (dodges Azure content filter) ([#16114](https://github.com/NousResearch/hermes-agent/pull/16114))
|
||||
|
||||
### Compression
|
||||
- **Retry summary on main model for unknown errors before giving up** ([#16774](https://github.com/NousResearch/hermes-agent/pull/16774))
|
||||
- **Notify users when configured aux model fails even if main-model fallback recovers** ([#16775](https://github.com/NousResearch/hermes-agent/pull/16775))
|
||||
- `/compress` wrapped in `_busy_command` to block input during compression ([#15388](https://github.com/NousResearch/hermes-agent/pull/15388))
|
||||
- Fix: reserve system + tools headroom when aux binds threshold ([#15631](https://github.com/NousResearch/hermes-agent/pull/15631))
|
||||
- Fix: use text-char sum for multimodal token estimation in `_find_tail_cut_by_tokens` ([#16369](https://github.com/NousResearch/hermes-agent/pull/16369))
|
||||
|
||||
### Session, Memory & State
|
||||
- **Trigram FTS5 index for CJK search, replace LIKE fallback** (@alt-glitch) ([#16651](https://github.com/NousResearch/hermes-agent/pull/16651))
|
||||
- **Index `tool_name` + `tool_calls` in FTS5, with repair + migration** (salvages #16866) ([#16914](https://github.com/NousResearch/hermes-agent/pull/16914))
|
||||
- **Checkpoints: auto-prune orphan and stale shadow repos at startup** ([#16303](https://github.com/NousResearch/hermes-agent/pull/16303))
|
||||
- **Memory providers notified on mid-process session_id rotation** (#6672) ([#17409](https://github.com/NousResearch/hermes-agent/pull/17409))
|
||||
- Fix: quote underscored terms in FTS5 query sanitization ([#16915](https://github.com/NousResearch/hermes-agent/pull/16915))
|
||||
- Fix: resolve viking_read 500/412 on file URIs + pseudo-summary URIs (salvage #5886) ([#17869](https://github.com/NousResearch/hermes-agent/pull/17869))
|
||||
- Fix: skip external-provider sync on interrupted turns ([#15395](https://github.com/NousResearch/hermes-agent/pull/15395))
|
||||
- Fix: close embedded Hindsight async client cleanly (salvage #14605) ([#16209](https://github.com/NousResearch/hermes-agent/pull/16209))
|
||||
- Fix: pass session transcript to `shutdown_memory_provider` on gateway + CLI (#15165) ([#16571](https://github.com/NousResearch/hermes-agent/pull/16571))
|
||||
- Fix: write-origin metadata seam ([#15346](https://github.com/NousResearch/hermes-agent/pull/15346))
|
||||
- Fix: preserve symlinks during atomic file writes ([#16980](https://github.com/NousResearch/hermes-agent/pull/16980))
|
||||
- Refactor: remove `flush_memories` entirely ([#15696](https://github.com/NousResearch/hermes-agent/pull/15696))
|
||||
|
||||
### Auxiliary models
|
||||
- Fix: surface auxiliary failures in UI (previously silent) ([#15324](https://github.com/NousResearch/hermes-agent/pull/15324))
|
||||
- Fix: surface title-gen auxiliary failures instead of silently dropping ([#16371](https://github.com/NousResearch/hermes-agent/pull/16371))
|
||||
- Fix: generalize unsupported-parameter detector and harden `max_tokens` retry ([#15633](https://github.com/NousResearch/hermes-agent/pull/15633))
|
||||
|
||||
---
|
||||
|
||||
## 📱 Messaging Platforms (Gateway)
|
||||
|
||||
### New Platforms
|
||||
- **Microsoft Teams (19th platform)** — as a plugin, + xdist collision guard ([#17828](https://github.com/NousResearch/hermes-agent/pull/17828))
|
||||
- **Yuanbao (Tencent 元宝, 18th platform)** — native adapter with text + media delivery ([#16298](https://github.com/NousResearch/hermes-agent/pull/16298), [#17424](https://github.com/NousResearch/hermes-agent/pull/17424), [#16880](https://github.com/NousResearch/hermes-agent/pull/16880))
|
||||
|
||||
### Pluggable Gateway Platforms
|
||||
- **Drop-in messaging adapters** — the gateway is now a plugin host for platforms (salvage of #17664) ([#17751](https://github.com/NousResearch/hermes-agent/pull/17751))
|
||||
|
||||
### Telegram
|
||||
- **Chat allowlists for groups and forums** (@web3blind) ([#15027](https://github.com/NousResearch/hermes-agent/pull/15027))
|
||||
- **Send fresh finals for stale preview streams** (port openclaw#72038) ([#16261](https://github.com/NousResearch/hermes-agent/pull/16261))
|
||||
- **Render markdown tables as row-group bullets + prompt hint** ([#16997](https://github.com/NousResearch/hermes-agent/pull/16997))
|
||||
- Document fallback in centralized audio routing ([#17833](https://github.com/NousResearch/hermes-agent/pull/17833))
|
||||
- Native multi-image sending ([#17909](https://github.com/NousResearch/hermes-agent/pull/17909))
|
||||
|
||||
### Discord
|
||||
- **Opt-in toolsets + ID injection + tool split + Feishu wiring** (salvage #15457, #15458) ([#15610](https://github.com/NousResearch/hermes-agent/pull/15610), [#15613](https://github.com/NousResearch/hermes-agent/pull/15613))
|
||||
- Fix: coerce `limit` parameter to int before `min()` call ([#16319](https://github.com/NousResearch/hermes-agent/pull/16319))
|
||||
|
||||
### Slack
|
||||
- **Register every gateway command as a native slash (Discord/Telegram parity)** ([#16164](https://github.com/NousResearch/hermes-agent/pull/16164))
|
||||
- **`strict_mention` config** — prevents thread auto-engagement ([#16193](https://github.com/NousResearch/hermes-agent/pull/16193))
|
||||
- **`channel_skill_bindings`** — bind specific skills to specific Slack channels ([#16283](https://github.com/NousResearch/hermes-agent/pull/16283))
|
||||
|
||||
### Signal
|
||||
- **Native formatting** — markdown → bodyRanges, reply quotes, reactions ([#17417](https://github.com/NousResearch/hermes-agent/pull/17417))
|
||||
- Native multi-image sending ([#17909](https://github.com/NousResearch/hermes-agent/pull/17909))
|
||||
|
||||
### Feishu / Mattermost / Email / Signal
|
||||
- All participate in **native multi-image sending** ([#17909](https://github.com/NousResearch/hermes-agent/pull/17909))
|
||||
|
||||
### Gateway Core
|
||||
- **Centralized audio routing + FLAC support + Telegram doc fallback** ([#17833](https://github.com/NousResearch/hermes-agent/pull/17833))
|
||||
- **Native multi-image sending** across Telegram, Discord, Slack, Mattermost, Email, Signal ([#17909](https://github.com/NousResearch/hermes-agent/pull/17909))
|
||||
- **Make hygiene hard message limit configurable** ([#17000](https://github.com/NousResearch/hermes-agent/pull/17000))
|
||||
- **Opt-in runtime-metadata footer on final replies** ([#17026](https://github.com/NousResearch/hermes-agent/pull/17026))
|
||||
- **`pre_gateway_dispatch` hook** — plugins can intercept before dispatch ([#15050](https://github.com/NousResearch/hermes-agent/pull/15050))
|
||||
- **`pre_approval_request` / `post_approval_response` hooks** ([#16776](https://github.com/NousResearch/hermes-agent/pull/16776))
|
||||
- Fix: timeouts — guard `load_config()` call against runtime exceptions ([#16318](https://github.com/NousResearch/hermes-agent/pull/16318))
|
||||
- Fix: support passing handler tools via registry ([#15613](https://github.com/NousResearch/hermes-agent/pull/15613))
|
||||
|
||||
---
|
||||
|
||||
## 🔧 Tool System
|
||||
|
||||
### Plugin-first architecture
|
||||
- **Pluggable gateway platforms** — platforms can ship as plugins ([#17751](https://github.com/NousResearch/hermes-agent/pull/17751))
|
||||
- **Microsoft Teams as first plugin-shipped platform** ([#17828](https://github.com/NousResearch/hermes-agent/pull/17828))
|
||||
- **`pre_gateway_dispatch` hook** ([#15050](https://github.com/NousResearch/hermes-agent/pull/15050))
|
||||
- **`pre_approval_request` + `post_approval_response` hooks** ([#16776](https://github.com/NousResearch/hermes-agent/pull/16776))
|
||||
- **`duration_ms` on `post_tool_call`** (inspired by Claude Code 2.1.119) ([#15429](https://github.com/NousResearch/hermes-agent/pull/15429))
|
||||
- **Bundled plugins**: Spotify ([#15174](https://github.com/NousResearch/hermes-agent/pull/15174)), Google Meet ([#16364](https://github.com/NousResearch/hermes-agent/pull/16364)), Langfuse observability ([#16917](https://github.com/NousResearch/hermes-agent/pull/16917)), hermes-achievements ([#17754](https://github.com/NousResearch/hermes-agent/pull/17754))
|
||||
- **Page-scoped plugin slots for built-in dashboard pages** ([#15658](https://github.com/NousResearch/hermes-agent/pull/15658))
|
||||
- **Declarative plugin installation for NixOS module** (@alt-glitch) ([#15953](https://github.com/NousResearch/hermes-agent/pull/15953))
|
||||
|
||||
### Browser
|
||||
- **CDP supervisor** — dialog detection + response + cross-origin iframe eval ([#14540](https://github.com/NousResearch/hermes-agent/pull/14540))
|
||||
- **Auto-spawn local Chromium for LAN/localhost URLs** when cloud provider is configured ([#16136](https://github.com/NousResearch/hermes-agent/pull/16136))
|
||||
|
||||
### Execute code / Terminal
|
||||
- **Vercel Sandbox backend** for `execute_code` / terminal (@kshitijk4poor) ([#17445](https://github.com/NousResearch/hermes-agent/pull/17445))
|
||||
- **Collapse subagent `task_id`s to shared container** ([#16177](https://github.com/NousResearch/hermes-agent/pull/16177))
|
||||
- **Docker: run container as host user** to avoid root-owned bind mounts (@benbarclay) ([#17305](https://github.com/NousResearch/hermes-agent/pull/17305))
|
||||
- Fix: safely quote `~/` subpaths in wrapped `cd` commands ([#15394](https://github.com/NousResearch/hermes-agent/pull/15394))
|
||||
- Fix: close file descriptor in `LocalEnvironment._update_cwd` ([#17300](https://github.com/NousResearch/hermes-agent/pull/17300))
|
||||
- Fix: SSH — prevent tar from overwriting remote home dir permissions ([#17898](https://github.com/NousResearch/hermes-agent/pull/17898), [#17867](https://github.com/NousResearch/hermes-agent/pull/17867))
|
||||
|
||||
### Image generation
|
||||
- See Provider section for updates; no new image providers this window.
|
||||
|
||||
### TTS / Voice
|
||||
- **Pluggable TTS provider registry** under `tts.providers.<name>` ([#17843](https://github.com/NousResearch/hermes-agent/pull/17843))
|
||||
- **Piper** as native local TTS provider (closes #8508) ([#17885](https://github.com/NousResearch/hermes-agent/pull/17885))
|
||||
- **Voice mode CLI parity in the TUI** — VAD loop + TTS + crash forensics ([#14810](https://github.com/NousResearch/hermes-agent/pull/14810))
|
||||
- Fix: vision — use HERMES_HOME-based cache dir instead of cwd ([#17719](https://github.com/NousResearch/hermes-agent/pull/17719))
|
||||
|
||||
### Cron
|
||||
- **Honor `hermes tools` config for the cron platform** ([#14798](https://github.com/NousResearch/hermes-agent/pull/14798))
|
||||
- **Per-job `workdir`** — project-aware cron runs ([#15110](https://github.com/NousResearch/hermes-agent/pull/15110))
|
||||
- **`context_from` field** — chain cron job outputs ([#15606](https://github.com/NousResearch/hermes-agent/pull/15606))
|
||||
- Fix: promote `croniter` to a core dependency ([#17577](https://github.com/NousResearch/hermes-agent/pull/17577))
|
||||
|
||||
### Web search
|
||||
- **Expose `limit` for `web_search`** ([#16934](https://github.com/NousResearch/hermes-agent/pull/16934))
|
||||
|
||||
### Maps
|
||||
- Fix: include seconds in timezone UTC offset output ([#16300](https://github.com/NousResearch/hermes-agent/pull/16300))
|
||||
|
||||
### Approvals
|
||||
- **Hardline blocklist for unrecoverable commands** ([#15878](https://github.com/NousResearch/hermes-agent/pull/15878))
|
||||
- Perf: precompile DANGEROUS_PATTERNS and HARDLINE_PATTERNS ([#17206](https://github.com/NousResearch/hermes-agent/pull/17206))
|
||||
|
||||
### ACP
|
||||
- **Advertise and forward image prompts** ([#18030](https://github.com/NousResearch/hermes-agent/pull/18030))
|
||||
|
||||
### API Server
|
||||
- **POST `/v1/runs/{run_id}/stop`** (salvage of #15656) ([#15842](https://github.com/NousResearch/hermes-agent/pull/15842))
|
||||
- **Expose run status for external UIs** (#17085) ([#17458](https://github.com/NousResearch/hermes-agent/pull/17458))
|
||||
|
||||
### Nix
|
||||
- **Declarative plugin installation for NixOS module** (@alt-glitch) ([#15953](https://github.com/NousResearch/hermes-agent/pull/15953))
|
||||
- Fix: use `--rebuild` in fix-lockfiles to bypass cached FOD store paths ([#15444](https://github.com/NousResearch/hermes-agent/pull/15444))
|
||||
- Fix: `extraPackages` now actually works via per-user profile ([#17047](https://github.com/NousResearch/hermes-agent/pull/17047))
|
||||
- Fix: refresh web/ npm-deps hash to unblock main builds ([#17174](https://github.com/NousResearch/hermes-agent/pull/17174))
|
||||
- Fix: replace magic-nix-cache with Cachix ([#17928](https://github.com/NousResearch/hermes-agent/pull/17928))
|
||||
|
||||
---
|
||||
|
||||
## 🖥️ TUI
|
||||
|
||||
### New features
|
||||
- **LaTeX rendering** (@austinpickett) ([#17175](https://github.com/NousResearch/hermes-agent/pull/17175))
|
||||
- **`/reload` .env hot-reload** — ported from the classic CLI ([#17286](https://github.com/NousResearch/hermes-agent/pull/17286))
|
||||
- **Pluggable busy-indicator styles** (@OutThisLife, #13610) ([#17150](https://github.com/NousResearch/hermes-agent/pull/17150))
|
||||
- **Opt-in auto-resume of the most recent session** (@OutThisLife) ([#17130](https://github.com/NousResearch/hermes-agent/pull/17130))
|
||||
- **Expanded light-terminal auto-detection** — `HERMES_TUI_THEME` + background hex (@OutThisLife) ([#17113](https://github.com/NousResearch/hermes-agent/pull/17113))
|
||||
- **Delete sessions from `/resume` picker with `d`** (@OutThisLife) ([#17668](https://github.com/NousResearch/hermes-agent/pull/17668))
|
||||
- **Line-by-line scroll on modified mouse wheel** (@OutThisLife) ([#17669](https://github.com/NousResearch/hermes-agent/pull/17669))
|
||||
- **Delete queued message while editing with ctrl-x / cancel with esc** (@OutThisLife) ([#16707](https://github.com/NousResearch/hermes-agent/pull/16707))
|
||||
- **Per-section visibility for the details accordion** (@OutThisLife) ([#14968](https://github.com/NousResearch/hermes-agent/pull/14968))
|
||||
- **Voice mode CLI parity** — VAD loop + TTS + crash forensics ([#14810](https://github.com/NousResearch/hermes-agent/pull/14810))
|
||||
- **Contextual first-touch hints ported to TUI** — `/busy`, `/verbose` ([#16054](https://github.com/NousResearch/hermes-agent/pull/16054))
|
||||
- **Mini help menu on `?` in the input field** (@ethernet8023) ([#18043](https://github.com/NousResearch/hermes-agent/pull/18043))
|
||||
|
||||
### Fixes
|
||||
- Fix: proactive mouse disable on ConPTY + `/mouse` toggle command (@kevin-ho, WSL2 ghost-mouse fix) ([#15488](https://github.com/NousResearch/hermes-agent/pull/15488))
|
||||
- Fix: restore skills search RPC ([#15870](https://github.com/NousResearch/hermes-agent/pull/15870))
|
||||
- Perf: cache text measurements across yoga flex re-passes ([#14818](https://github.com/NousResearch/hermes-agent/pull/14818))
|
||||
- Perf: stabilize long-session scrolling ([#15926](https://github.com/NousResearch/hermes-agent/pull/15926))
|
||||
- Perf: lazily seed virtual history heights ([#16523](https://github.com/NousResearch/hermes-agent/pull/16523))
|
||||
- Perf: cut visible cold start ~57% with lazy agent init ([#17190](https://github.com/NousResearch/hermes-agent/pull/17190))
|
||||
|
||||
---
|
||||
|
||||
## 🖱️ CLI & User Experience
|
||||
|
||||
### New commands
|
||||
- **`hermes -z <prompt>`** — non-interactive one-shot mode ([#15702](https://github.com/NousResearch/hermes-agent/pull/15702))
|
||||
- **`hermes -z` with `--model` / `--provider` / `HERMES_INFERENCE_MODEL`** ([#15704](https://github.com/NousResearch/hermes-agent/pull/15704))
|
||||
- **`hermes update --check`** preflight flag ([#15841](https://github.com/NousResearch/hermes-agent/pull/15841))
|
||||
- **`hermes fallback`** command for managing fallback providers ([#16052](https://github.com/NousResearch/hermes-agent/pull/16052))
|
||||
- **`/busy`** slash command for busy input mode ([#15382](https://github.com/NousResearch/hermes-agent/pull/15382))
|
||||
- **`/busy` input mode 'steer'** as a third option ([#16279](https://github.com/NousResearch/hermes-agent/pull/16279))
|
||||
- **`/btw` as alias for `/background`** ([#16053](https://github.com/NousResearch/hermes-agent/pull/16053))
|
||||
- **`/reload-skills`** slash command (salvage #17670) ([#17744](https://github.com/NousResearch/hermes-agent/pull/17744))
|
||||
- **Surface `/queue`, `/bg`, `/steer` in agent-running placeholder** ([#16118](https://github.com/NousResearch/hermes-agent/pull/16118))
|
||||
|
||||
### Setup / onboarding
|
||||
- **Auto-reconfigure on existing installs** ([#15879](https://github.com/NousResearch/hermes-agent/pull/15879))
|
||||
- **Contextual first-touch hints for `/busy` and `/verbose`** ([#16046](https://github.com/NousResearch/hermes-agent/pull/16046))
|
||||
- **Cost-saving tips from the April 30 tip-of-the-day** ([#17841](https://github.com/NousResearch/hermes-agent/pull/17841))
|
||||
- **Hyperlink startup banner title to the latest GitHub Release** ([#14945](https://github.com/NousResearch/hermes-agent/pull/14945))
|
||||
|
||||
### Update / backup
|
||||
- **Snapshot pairing data before `git pull`** ([#16383](https://github.com/NousResearch/hermes-agent/pull/16383))
|
||||
- **Auto-backup HERMES_HOME before `hermes update`** (opt-in, off by default) ([#16539](https://github.com/NousResearch/hermes-agent/pull/16539), [#16566](https://github.com/NousResearch/hermes-agent/pull/16566))
|
||||
- **Exclude `checkpoints/` from backups** ([#16572](https://github.com/NousResearch/hermes-agent/pull/16572))
|
||||
- **Exclude SQLite WAL/SHM/journal sidecars from backups** ([#16576](https://github.com/NousResearch/hermes-agent/pull/16576))
|
||||
- **Installer FHS layout for root installs on Linux** ([#15608](https://github.com/NousResearch/hermes-agent/pull/15608))
|
||||
- Fix: kill stale dashboards instead of warning ([#17832](https://github.com/NousResearch/hermes-agent/pull/17832))
|
||||
- Fix: show correct update status on nix-built hermes ([#17550](https://github.com/NousResearch/hermes-agent/pull/17550))
|
||||
|
||||
### Slash-command housekeeping
|
||||
- Refactor: drop `/provider`, `/plan` handler, and clean up slash registry ([#15047](https://github.com/NousResearch/hermes-agent/pull/15047))
|
||||
- Refactor: drop `persist_session` plumbing + fix broken `/btw` mid-turn bypass ([#16075](https://github.com/NousResearch/hermes-agent/pull/16075))
|
||||
|
||||
### OpenClaw migration (for folks coming from OpenClaw)
|
||||
- **Hardened OpenClaw import** — plan-first apply, redaction, pre-migration backup ([#16911](https://github.com/NousResearch/hermes-agent/pull/16911))
|
||||
- Fix: case-preserving brand rewrite + one-time `~/.openclaw` residue banner ([#16327](https://github.com/NousResearch/hermes-agent/pull/16327))
|
||||
- Fix: resolve `openclaw` workspace files from `agents.defaults.workspace` ([#16879](https://github.com/NousResearch/hermes-agent/pull/16879))
|
||||
- Fix: resolve model aliases against real OpenClaw catalog schema (salvage #16778) ([#16977](https://github.com/NousResearch/hermes-agent/pull/16977))
|
||||
|
||||
---
|
||||
|
||||
## 📊 Web Dashboard
|
||||
|
||||
- **Models tab** — rich per-model analytics ([#17745](https://github.com/NousResearch/hermes-agent/pull/17745))
|
||||
- **Configure main + auxiliary models from the Models page** ([#17802](https://github.com/NousResearch/hermes-agent/pull/17802))
|
||||
- **Dashboard Chat tab — xterm.js + JSON-RPC sidecar** (supersedes #12710 + #13379, @OutThisLife) ([#14890](https://github.com/NousResearch/hermes-agent/pull/14890))
|
||||
- **Dashboard layout refresh** (@austinpickett) ([#14899](https://github.com/NousResearch/hermes-agent/pull/14899))
|
||||
- **`--stop` and `--status` flags** on the dashboard CLI ([#17840](https://github.com/NousResearch/hermes-agent/pull/17840))
|
||||
- **Page-scoped plugin slots for built-in pages** ([#15658](https://github.com/NousResearch/hermes-agent/pull/15658))
|
||||
- Fix: replace all buttons for design system buttons ([#17007](https://github.com/NousResearch/hermes-agent/pull/17007))
|
||||
|
||||
---
|
||||
|
||||
## ⚡ Performance
|
||||
|
||||
- **TUI visible cold start cut ~57%** via lazy agent init ([#17190](https://github.com/NousResearch/hermes-agent/pull/17190))
|
||||
- **Lazy-import OpenAI, Anthropic, Firecrawl, account_usage** ([#17046](https://github.com/NousResearch/hermes-agent/pull/17046))
|
||||
- **mtime-cache `load_config()` and `read_raw_config()`** ([#17041](https://github.com/NousResearch/hermes-agent/pull/17041))
|
||||
- **Memoize `get_tool_definitions()` + TTL-cache `check_fn` results** ([#17098](https://github.com/NousResearch/hermes-agent/pull/17098))
|
||||
- **Precompile DANGEROUS_PATTERNS and HARDLINE_PATTERNS** ([#17206](https://github.com/NousResearch/hermes-agent/pull/17206))
|
||||
- **Cache Ink text measurements across yoga flex re-passes** ([#14818](https://github.com/NousResearch/hermes-agent/pull/14818))
|
||||
- **Stabilize long-session scrolling** ([#15926](https://github.com/NousResearch/hermes-agent/pull/15926))
|
||||
- **Lazily seed virtual history heights** ([#16523](https://github.com/NousResearch/hermes-agent/pull/16523))
|
||||
|
||||
---
|
||||
|
||||
## 🔒 Security & Reliability
|
||||
|
||||
- **Secret redaction off by default** — stops corrupting patches / API payloads with fake-key substitutions. Opt in via `redaction.enabled: true` ([#16794](https://github.com/NousResearch/hermes-agent/pull/16794))
|
||||
- **`[SYSTEM:` → `[IMPORTANT:`** in all user-injected markers (Azure content filter dodge) ([#16114](https://github.com/NousResearch/hermes-agent/pull/16114))
|
||||
- **Hardline blocklist for unrecoverable commands** ([#15878](https://github.com/NousResearch/hermes-agent/pull/15878))
|
||||
- **Canonical `mask_secret` helper; fix status.py DIM drift** ([#17207](https://github.com/NousResearch/hermes-agent/pull/17207))
|
||||
- **Sweep expired paste.rs uploads on a real timer** ([#16431](https://github.com/NousResearch/hermes-agent/pull/16431))
|
||||
- **Preserve symlinks during atomic file writes** ([#16980](https://github.com/NousResearch/hermes-agent/pull/16980))
|
||||
- **Probe `/dev/tty` by opening it, not bare existence** ([#17024](https://github.com/NousResearch/hermes-agent/pull/17024))
|
||||
|
||||
---
|
||||
|
||||
## 🐛 Notable Bug Fixes
|
||||
|
||||
This window includes 360 `fix:` PRs. Selected highlights from across the stack:
|
||||
|
||||
- **Background review fork inherits parent's live runtime** — provider/model/creds now propagate correctly ([#16099](https://github.com/NousResearch/hermes-agent/pull/16099))
|
||||
- **Hindsight configurable `HINDSIGHT_TIMEOUT` env var** ([#15077](https://github.com/NousResearch/hermes-agent/pull/15077))
|
||||
- **Tools: normalize numeric entries + clear stale `no_mcp` in `_save_platform_tools`** ([#15607](https://github.com/NousResearch/hermes-agent/pull/15607))
|
||||
- **MCP: rewrite `definitions` refs to `$defs` in input schemas** — closes provider-side 400s
|
||||
- **Azure content filter compatibility** — renamed `[SYSTEM:` markers so Azure's content filter stops flagging them ([#16114](https://github.com/NousResearch/hermes-agent/pull/16114))
|
||||
- **Vision cache uses HERMES_HOME instead of cwd** ([#17719](https://github.com/NousResearch/hermes-agent/pull/17719))
|
||||
- **FTS5 search** — tool_name + tool_calls indexing with repair + migration ([#16914](https://github.com/NousResearch/hermes-agent/pull/16914))
|
||||
- **Streaming reasoning persists on assistant turns** ([#16892](https://github.com/NousResearch/hermes-agent/pull/16892))
|
||||
- **execute_code concurrent RPC serialization** (#17770) ([#17894](https://github.com/NousResearch/hermes-agent/pull/17894), [#17902](https://github.com/NousResearch/hermes-agent/pull/17902))
|
||||
- **Background reviewer scoped to memory + skills toolsets** — no more accidental web/shell escapes ([#16569](https://github.com/NousResearch/hermes-agent/pull/16569))
|
||||
- **Compression recovery** — retry on main before giving up; notify user when aux fails ([#16774](https://github.com/NousResearch/hermes-agent/pull/16774), [#16775](https://github.com/NousResearch/hermes-agent/pull/16775))
|
||||
- **`croniter` promoted to a core dependency** ([#17577](https://github.com/NousResearch/hermes-agent/pull/17577))
|
||||
- **Discord tool `limit` parameter coerced to int** before `min()` call ([#16319](https://github.com/NousResearch/hermes-agent/pull/16319))
|
||||
- **Yuanbao messaging platform entrance fix** ([#16880](https://github.com/NousResearch/hermes-agent/pull/16880))
|
||||
- **ACP advertise and forward image prompts** ([#18030](https://github.com/NousResearch/hermes-agent/pull/18030))
|
||||
- **DeepSeek / Kimi reasoning content isolation** across cross-provider histories (@Zjianru) ([#15749](https://github.com/NousResearch/hermes-agent/pull/15749), [#15762](https://github.com/NousResearch/hermes-agent/pull/15762))
|
||||
- **Preserve reasoning_content replay on DeepSeek v4 + Kimi/Moonshot thinking** ([#18045](https://github.com/NousResearch/hermes-agent/pull/18045))
|
||||
|
||||
The vast majority of the 360 fixes landed in the streaming/compression/tool-calling paths across all providers — DeepSeek, Kimi, Moonshot, GLM, Qwen, MiniMax, Gemini, Anthropic, OpenAI — alongside TUI polish (resize, scroll, sticky-prompt) and gateway platform-specific edge cases.
|
||||
|
||||
---
|
||||
|
||||
## 🧪 Testing & CI
|
||||
|
||||
- Hermetic test parity (`scripts/run_tests.sh`) held across this window
|
||||
- **Microsoft Teams xdist collision guard** — prevents worker collisions when Teams platform tests run in parallel ([#17828](https://github.com/NousResearch/hermes-agent/pull/17828))
|
||||
- Chore: remove unused imports and dead locals (ruff F401, F841) ([#17010](https://github.com/NousResearch/hermes-agent/pull/17010))
|
||||
|
||||
---
|
||||
|
||||
## 📚 Documentation
|
||||
|
||||
- **Curator feature page** added to docs site ([#17563](https://github.com/NousResearch/hermes-agent/pull/17563))
|
||||
- **Document pin also blocking `skill_manage` writes** ([#17578](https://github.com/NousResearch/hermes-agent/pull/17578))
|
||||
- **Direct-URL skill install documented** across features, reference, guide, and `hermes-agent` skill ([#16355](https://github.com/NousResearch/hermes-agent/pull/16355))
|
||||
- **Hooks tutorial — build a BOOT.md startup checklist** (replaces the removed built-in hook) ([#17202](https://github.com/NousResearch/hermes-agent/pull/17202))
|
||||
- **ComfyUI docs: ask local vs cloud FIRST before hardware check** ([#17612](https://github.com/NousResearch/hermes-agent/pull/17612))
|
||||
- **Obliteratus skill: link YouTube video guide in SKILL.md** ([#15808](https://github.com/NousResearch/hermes-agent/pull/15808))
|
||||
- Per-skill docs pages generated for bundled + optional skills; ASCII art code blocks auto-wrapped ([#14929](https://github.com/NousResearch/hermes-agent/pull/14929), [#16497](https://github.com/NousResearch/hermes-agent/pull/16497))
|
||||
|
||||
---
|
||||
|
||||
## ⚖️ Removed / Reverted
|
||||
|
||||
- **Kanban multi-profile collaboration board** — landed in #16081, reverted in ([#16098](https://github.com/NousResearch/hermes-agent/pull/16098)) while the design is reworked
|
||||
- **computer-use cua-driver** — 3 preparatory PRs landed then were reverted in ([#16927](https://github.com/NousResearch/hermes-agent/pull/16927))
|
||||
- **BOOT.md built-in hook** removed ([#17093](https://github.com/NousResearch/hermes-agent/pull/17093)); the hooks tutorial ([#17202](https://github.com/NousResearch/hermes-agent/pull/17202)) shows how to build the same workflow yourself with a shell hook
|
||||
- **`/provider` + `/plan` slash commands dropped** ([#15047](https://github.com/NousResearch/hermes-agent/pull/15047))
|
||||
- **`flush_memories` removed entirely** ([#15696](https://github.com/NousResearch/hermes-agent/pull/15696))
|
||||
|
||||
---
|
||||
|
||||
## 👥 Contributors
|
||||
|
||||
### Core
|
||||
- **@teknium1** (Teknium)
|
||||
|
||||
### Top Community Contributors (by merged PR count since v0.11.0)
|
||||
|
||||
- **@OutThisLife** (Brooklyn) — 52 PRs · TUI — light-terminal detection + pluggable busy styles + auto-resume + session-delete from /resume + mouse-wheel scrolling + xterm.js dashboard Chat tab + cold-start cut + accordion polish
|
||||
- **@kshitijk4poor** — 12 PRs · LM Studio first-class provider (salvage), Vercel Sandbox backend, GMI Cloud salvage, bundled-by-default touchdesigner-mcp, many tool-call / reasoning fixes
|
||||
- **@helix4u** — 10 PRs · MCP schema robustness, assorted stability fixes
|
||||
- **@alt-glitch** — 8 PRs · trigram FTS5 CJK search, declarative Nix plugin install, matrix/feishu hints and fixes
|
||||
- **@ethernet8023** — 4 PRs
|
||||
- **@austinpickett** — 4 PRs · LaTeX rendering in TUI, dashboard layout refresh
|
||||
- **@benbarclay** — 3 PRs · Docker run-as-host-user so bind mounts don't get root-owned
|
||||
- **@vominh1919** — 2 PRs
|
||||
- **@stephenschoettler** — 2 PRs
|
||||
- **@kevin-ho** — ConPTY mouse-injection fix (#15488)
|
||||
- **@Zjianru** — cross-provider reasoning_content isolation + DeepSeek/Kimi empty-reasoning injection (#15749, #15762)
|
||||
- **@web3blind** — Telegram chat allowlists for groups and forums (#15027)
|
||||
- **@SHL0MS** — 9 new TouchDesigner-MCP reference docs (#16768)
|
||||
- **@0xDevNinja** — curator `restore_skill` nested-archive fix (#17951)
|
||||
- **@y0shua1ee** — curator `use` activity fix (#17953)
|
||||
|
||||
### Also contributing
|
||||
Salvaged or co-authored work from **@isaachuangGMICLOUD** (GMI Cloud), earlier upstream PRs from the original author of each salvage chain, and a long tail of one-shot fixes, documentation nudges, and skill contributions from the community.
|
||||
|
||||
### All Contributors (alphabetical, excluding @teknium1)
|
||||
|
||||
@0xbyt4, @0xharryriddle, @0xDevNinja, @0z1-ghb, @5park1e, @A-FdL-Prog, @aj-nt, @akhater, @alblez, @alexg0bot,
|
||||
@alexzhu0, @AllardQuek, @alt-glitch, @amanning3390, @amanuel2, @AndreKurait, @andrewhosf, @Andy283, @andyylin,
|
||||
@angel12, @AntAISecurityLab, @ash, @austinpickett, @badgerbees, @BadTechBandit, @Bartok9, @beenherebefore,
|
||||
@beesrsj2500, @BeliefanX, @benbarclay, @benjaminsehl, @BlackishGreen33, @bloodcarter, @BlueBirdBack,
|
||||
@briandevans, @brooklynnicholson, @bsgdigital, @buray, @bwjoke, @camaragon, @cdanis, @cgarwood82,
|
||||
@charles-brooks, @chen1749144759, @chengoak, @ching-kaching, @Contentment003111, @crayfish-ai, @CruxExperts,
|
||||
@cyclingwithelephants, @dandaka, @danklynn, @ddupont808, @dhabibi, @difujia, @dimitrovi, @dlkakbs,
|
||||
@dontcallmejames, @EKKOLearnAI, @emozilla, @ericnicolaides, @Erosika, @ethernet8023, @exiao, @Feranmi10,
|
||||
@flobo3, @foxion37, @georgeglessner, @georgex8001, @ghostmfr, @H-Ali13381, @HangGlidersRule, @harryplusplus,
|
||||
@haru398801, @heathley, @hejuntt1014, @hekaru-agent, @helix4u, @Heltman, @HenkDz, @heyitsaamir, @hharry11,
|
||||
@hhhonzik, @hhuang91, @HiddenPuppy, @htsh, @iamagenius00, @in-liberty420, @innocarpe, @irispillars, @iRonin,
|
||||
@isaachuangGMICLOUD, @Ito-69, @j3ffffff, @jackjin1997, @jakubkrcmar, @Jason2031, @JayGwod, @jerome-benoit,
|
||||
@johnncenae, @Kailigithub, @keiravoss94, @kevin-ho, @knockyai, @konsisumer, @kshitijk4poor, @kunlabs, @l0hde,
|
||||
@Leihb, @leoneparise, @LeonSGP43, @liizfq, @liuhao1024, @loongzhao, @lsdsjy, @luyao618, @ma-pony, @Magaav,
|
||||
@MagicRay1217, @math0r-be, @MattMaximo, @maxims-oss, @MaxyMoos, @maymuneth, @mcndjxlefnd, @memosr,
|
||||
@MestreY0d4-Uninter, @mewwts, @Mirac1eSky, @MorAlekss, @mrhwick, @mrunmayee17, @mssteuer, @Nanako0129,
|
||||
@nazirulhafiy, @Nerijusas, @Nicecsh, @nicoloboschi, @nightq, @ningfangbin, @octo-patch, @Octopus,
|
||||
@OutThisLife, @Paperclip, @pein892, @perlowja, @prasadus92, @qike-ms, @qiyin-code, @Readon, @ReginaldasR,
|
||||
@revaraver, @rfilgueiras, @rmoen, @romanornr, @rugvedS07, @rylena, @samrusani, @Sanjays2402, @sasha-id,
|
||||
@Satoshi-agi, @scheidti, @scotttrinh, @season179, @SeeYangZhi, @sgaofen, @shamork, @shannonsands, @SHL0MS,
|
||||
@simbam99, @Societus, @socrates1024, @Sonoyunchu, @sprmn24, @stephenschoettler, @tangyuanjc, @TechPrototyper,
|
||||
@tekgnosis-net, @ThomassJonax, @tmimmanuel, @tochukwuada, @Tosko4, @Tranquil-Flow, @twozle, @txbxxx,
|
||||
@UgwujaGeorge, @Versun, @vlwkaos, @voidborne-d, @vominh1919, @Wang-tianhao, @Wangshengyang2004, @web3blind,
|
||||
@westers, @Wysie, @xandersbell, @xiahu88988, @XieNBi, @xinbenlv, @xnbi, @y0shua1ee, @yatesjalex, @yes999zc,
|
||||
@yeyitech, @Yoimex, @YueLich, @Yukipukii1, @zhiyanliu, @zicochaos, @Zjianru, @zkl2333, @zons-zhaozhy,
|
||||
@ztexydt-cqh.
|
||||
|
||||
Also: @Siddharth Balyan, @YuShu.
|
||||
|
||||
---
|
||||
|
||||
**Full Changelog**: [v2026.4.23...v2026.4.30](https://github.com/NousResearch/hermes-agent/compare/v2026.4.23...v2026.4.30)
|
||||
@@ -112,17 +112,6 @@ def main() -> None:
|
||||
import acp
|
||||
from .server import HermesACPAgent
|
||||
|
||||
# MCP tool discovery from config.yaml — run before asyncio.run() so
|
||||
# it's safe to use blocking waits. (ACP also registers per-session
|
||||
# MCP servers dynamically via asyncio.to_thread inside the event
|
||||
# loop; that path is unaffected.) Moved from model_tools.py module
|
||||
# scope to avoid freezing the gateway's loop on lazy import (#16856).
|
||||
try:
|
||||
from tools.mcp_tool import discover_mcp_tools
|
||||
discover_mcp_tools()
|
||||
except Exception:
|
||||
logger.debug("MCP tool discovery failed at ACP startup", exc_info=True)
|
||||
|
||||
agent = HermesACPAgent()
|
||||
try:
|
||||
asyncio.run(acp.run_agent(agent, use_unstable_protocol=True))
|
||||
|
||||
@@ -3,8 +3,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import contextvars
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from collections import defaultdict, deque
|
||||
@@ -14,7 +12,6 @@ from typing import Any, Deque, Optional
|
||||
import acp
|
||||
from acp.schema import (
|
||||
AgentCapabilities,
|
||||
AgentMessageChunk,
|
||||
AuthenticateResponse,
|
||||
AvailableCommand,
|
||||
AvailableCommandsUpdate,
|
||||
@@ -32,7 +29,6 @@ from acp.schema import (
|
||||
McpServerStdio,
|
||||
ModelInfo,
|
||||
NewSessionResponse,
|
||||
PromptCapabilities,
|
||||
PromptResponse,
|
||||
ResumeSessionResponse,
|
||||
SetSessionConfigOptionResponse,
|
||||
@@ -48,8 +44,6 @@ from acp.schema import (
|
||||
TextContentBlock,
|
||||
UnstructuredCommandInput,
|
||||
Usage,
|
||||
UsageUpdate,
|
||||
UserMessageChunk,
|
||||
)
|
||||
|
||||
# AuthMethodAgent was renamed from AuthMethod in agent-client-protocol 0.9.0
|
||||
@@ -67,7 +61,6 @@ from acp_adapter.events import (
|
||||
)
|
||||
from acp_adapter.permissions import make_approval_callback
|
||||
from acp_adapter.session import SessionManager, SessionState, _expand_acp_enabled_toolsets
|
||||
from acp_adapter.tools import build_tool_complete, build_tool_start
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -94,69 +87,17 @@ def _extract_text(
|
||||
| EmbeddedResourceContentBlock
|
||||
],
|
||||
) -> str:
|
||||
"""Extract plain text from ACP content blocks for display/commands."""
|
||||
"""Extract plain text from ACP content blocks."""
|
||||
parts: list[str] = []
|
||||
for block in prompt:
|
||||
if isinstance(block, TextContentBlock):
|
||||
parts.append(block.text)
|
||||
elif hasattr(block, "text"):
|
||||
parts.append(str(block.text))
|
||||
# Non-text blocks are ignored for now.
|
||||
return "\n".join(parts)
|
||||
|
||||
|
||||
def _image_block_to_openai_part(block: ImageContentBlock) -> dict[str, Any] | None:
|
||||
"""Convert an ACP image content block to OpenAI-style multimodal content."""
|
||||
data = str(getattr(block, "data", "") or "").strip()
|
||||
uri = str(getattr(block, "uri", "") or "").strip()
|
||||
mime_type = str(getattr(block, "mime_type", "") or "image/png").strip() or "image/png"
|
||||
|
||||
if data:
|
||||
url = data if data.startswith("data:") else f"data:{mime_type};base64,{data}"
|
||||
elif uri:
|
||||
url = uri
|
||||
else:
|
||||
return None
|
||||
|
||||
return {"type": "image_url", "image_url": {"url": url}}
|
||||
|
||||
|
||||
def _content_blocks_to_openai_user_content(
|
||||
prompt: list[
|
||||
TextContentBlock
|
||||
| ImageContentBlock
|
||||
| AudioContentBlock
|
||||
| ResourceContentBlock
|
||||
| EmbeddedResourceContentBlock
|
||||
],
|
||||
) -> str | list[dict[str, Any]]:
|
||||
"""Convert ACP prompt blocks into a Hermes/OpenAI-compatible user content payload."""
|
||||
parts: list[dict[str, Any]] = []
|
||||
text_parts: list[str] = []
|
||||
|
||||
for block in prompt:
|
||||
if isinstance(block, TextContentBlock):
|
||||
if block.text:
|
||||
parts.append({"type": "text", "text": block.text})
|
||||
text_parts.append(block.text)
|
||||
continue
|
||||
if isinstance(block, ImageContentBlock):
|
||||
image_part = _image_block_to_openai_part(block)
|
||||
if image_part is not None:
|
||||
parts.append(image_part)
|
||||
continue
|
||||
|
||||
if not parts:
|
||||
return _extract_text(prompt)
|
||||
|
||||
# Keep pure text prompts as strings so slash-command handling and text-only
|
||||
# providers keep the exact legacy path. Switch to structured content only
|
||||
# when an actual non-text block is present.
|
||||
if all(part.get("type") == "text" for part in parts):
|
||||
return "\n".join(text_parts)
|
||||
|
||||
return parts
|
||||
|
||||
|
||||
class HermesACPAgent(acp.Agent):
|
||||
"""ACP Agent implementation wrapping Hermes AIAgent."""
|
||||
|
||||
@@ -167,8 +108,6 @@ class HermesACPAgent(acp.Agent):
|
||||
"context": "Show conversation context info",
|
||||
"reset": "Clear conversation history",
|
||||
"compact": "Compress conversation context",
|
||||
"steer": "Inject guidance into the currently running agent turn",
|
||||
"queue": "Queue a prompt to run after the current turn finishes",
|
||||
"version": "Show Hermes version",
|
||||
}
|
||||
|
||||
@@ -198,16 +137,6 @@ class HermesACPAgent(acp.Agent):
|
||||
"name": "compact",
|
||||
"description": "Compress conversation context",
|
||||
},
|
||||
{
|
||||
"name": "steer",
|
||||
"description": "Inject guidance into the currently running agent turn",
|
||||
"input_hint": "guidance for the active turn",
|
||||
},
|
||||
{
|
||||
"name": "queue",
|
||||
"description": "Queue a prompt to run after the current turn finishes",
|
||||
"input_hint": "prompt to run next",
|
||||
},
|
||||
{
|
||||
"name": "version",
|
||||
"description": "Show Hermes version",
|
||||
@@ -318,66 +247,6 @@ class HermesACPAgent(acp.Agent):
|
||||
|
||||
return target_provider, new_model
|
||||
|
||||
@staticmethod
|
||||
def _build_usage_update(state: SessionState) -> UsageUpdate | None:
|
||||
"""Build ACP native context-usage data for clients like Zed.
|
||||
|
||||
Zed's circular context indicator is driven by ACP ``usage_update``
|
||||
session updates: ``size`` is the model context window and ``used`` is
|
||||
the current request pressure. Hermes estimates ``used`` from the same
|
||||
buckets it sends to providers: system prompt, conversation history, and
|
||||
tool schemas.
|
||||
"""
|
||||
agent = state.agent
|
||||
compressor = getattr(agent, "context_compressor", None)
|
||||
size = int(getattr(compressor, "context_length", 0) or 0)
|
||||
if size <= 0:
|
||||
return None
|
||||
|
||||
try:
|
||||
from agent.model_metadata import estimate_request_tokens_rough
|
||||
|
||||
used = estimate_request_tokens_rough(
|
||||
state.history,
|
||||
system_prompt=getattr(agent, "_cached_system_prompt", "") or "",
|
||||
tools=getattr(agent, "tools", None) or None,
|
||||
)
|
||||
except Exception:
|
||||
logger.debug("Could not estimate ACP native context usage", exc_info=True)
|
||||
used = int(getattr(compressor, "last_prompt_tokens", 0) or 0)
|
||||
|
||||
return UsageUpdate(
|
||||
session_update="usage_update",
|
||||
size=max(size, 0),
|
||||
used=max(used, 0),
|
||||
)
|
||||
|
||||
async def _send_usage_update(self, state: SessionState) -> None:
|
||||
"""Send ACP native context usage to the connected client."""
|
||||
if not self._conn:
|
||||
return
|
||||
update = self._build_usage_update(state)
|
||||
if update is None:
|
||||
return
|
||||
try:
|
||||
await self._conn.session_update(
|
||||
session_id=state.session_id,
|
||||
update=update,
|
||||
)
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"Failed to send ACP usage update for session %s",
|
||||
state.session_id,
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
def _schedule_usage_update(self, state: SessionState) -> None:
|
||||
"""Schedule native context indicator refresh after ACP responses."""
|
||||
if not self._conn:
|
||||
return
|
||||
loop = asyncio.get_running_loop()
|
||||
loop.call_soon(asyncio.create_task, self._send_usage_update(state))
|
||||
|
||||
async def _register_session_mcp_servers(
|
||||
self,
|
||||
state: SessionState,
|
||||
@@ -482,7 +351,6 @@ class HermesACPAgent(acp.Agent):
|
||||
agent_info=Implementation(name="hermes-agent", version=HERMES_VERSION),
|
||||
agent_capabilities=AgentCapabilities(
|
||||
load_session=True,
|
||||
prompt_capabilities=PromptCapabilities(image=True),
|
||||
session_capabilities=SessionCapabilities(
|
||||
fork=SessionForkCapabilities(),
|
||||
list=SessionListCapabilities(),
|
||||
@@ -508,140 +376,6 @@ class HermesACPAgent(acp.Agent):
|
||||
|
||||
# ---- Session management -------------------------------------------------
|
||||
|
||||
@staticmethod
|
||||
def _history_message_text(message: dict[str, Any]) -> str:
|
||||
"""Extract displayable text from a persisted OpenAI-style message."""
|
||||
content = message.get("content")
|
||||
if isinstance(content, str):
|
||||
return content.strip()
|
||||
if isinstance(content, list):
|
||||
parts: list[str] = []
|
||||
for item in content:
|
||||
if isinstance(item, dict):
|
||||
text = item.get("text")
|
||||
if isinstance(text, str):
|
||||
parts.append(text)
|
||||
elif item.get("type") == "text" and isinstance(item.get("content"), str):
|
||||
parts.append(item["content"])
|
||||
elif isinstance(item, str):
|
||||
parts.append(item)
|
||||
return "\n".join(part.strip() for part in parts if part and part.strip()).strip()
|
||||
return ""
|
||||
|
||||
@staticmethod
|
||||
def _history_message_update(
|
||||
*,
|
||||
role: str,
|
||||
text: str,
|
||||
) -> UserMessageChunk | AgentMessageChunk | None:
|
||||
"""Build an ACP history replay update for a user/assistant message."""
|
||||
block = TextContentBlock(type="text", text=text)
|
||||
if role == "user":
|
||||
return UserMessageChunk(
|
||||
session_update="user_message_chunk",
|
||||
content=block,
|
||||
)
|
||||
if role == "assistant":
|
||||
return AgentMessageChunk(
|
||||
session_update="agent_message_chunk",
|
||||
content=block,
|
||||
)
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _history_tool_call_name_args(tool_call: dict[str, Any]) -> tuple[str, dict[str, Any]]:
|
||||
"""Extract function name/arguments from an OpenAI-style tool_call."""
|
||||
function = tool_call.get("function") if isinstance(tool_call.get("function"), dict) else {}
|
||||
name = str(function.get("name") or tool_call.get("name") or "unknown_tool")
|
||||
raw_args = function.get("arguments") or tool_call.get("arguments") or tool_call.get("args") or {}
|
||||
if isinstance(raw_args, str):
|
||||
try:
|
||||
parsed = json.loads(raw_args)
|
||||
except Exception:
|
||||
parsed = {"raw": raw_args}
|
||||
raw_args = parsed
|
||||
if not isinstance(raw_args, dict):
|
||||
raw_args = {}
|
||||
return name, raw_args
|
||||
|
||||
@staticmethod
|
||||
def _history_tool_call_id(tool_call: dict[str, Any]) -> str:
|
||||
"""Return the stable provider tool call id for ACP history replay."""
|
||||
return str(
|
||||
tool_call.get("id")
|
||||
or tool_call.get("call_id")
|
||||
or tool_call.get("tool_call_id")
|
||||
or ""
|
||||
).strip()
|
||||
|
||||
async def _replay_session_history(self, state: SessionState) -> None:
|
||||
"""Send persisted user/assistant history to clients during session/load.
|
||||
|
||||
Zed's ACP history UI calls ``session/load`` after the user picks an item
|
||||
from the Agents sidebar. The agent must then replay the full conversation
|
||||
as user/assistant chunks plus reconstructed tool-call start/completion
|
||||
notifications; merely restoring server-side state makes Hermes remember
|
||||
context, but leaves the editor looking like a clean thread.
|
||||
"""
|
||||
if not self._conn or not state.history:
|
||||
return
|
||||
|
||||
active_tool_calls: dict[str, tuple[str, dict[str, Any]]] = {}
|
||||
|
||||
async def _send(update: Any) -> bool:
|
||||
try:
|
||||
await self._conn.session_update(session_id=state.session_id, update=update)
|
||||
return True
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"Failed to replay ACP history for session %s",
|
||||
state.session_id,
|
||||
exc_info=True,
|
||||
)
|
||||
return False
|
||||
|
||||
for message in state.history:
|
||||
role = str(message.get("role") or "")
|
||||
|
||||
if role in {"user", "assistant"}:
|
||||
text = self._history_message_text(message)
|
||||
if text:
|
||||
update = self._history_message_update(role=role, text=text)
|
||||
if update is not None and not await _send(update):
|
||||
return
|
||||
|
||||
if role == "assistant" and isinstance(message.get("tool_calls"), list):
|
||||
for tool_call in message["tool_calls"]:
|
||||
if not isinstance(tool_call, dict):
|
||||
continue
|
||||
tool_call_id = self._history_tool_call_id(tool_call)
|
||||
if not tool_call_id:
|
||||
continue
|
||||
tool_name, args = self._history_tool_call_name_args(tool_call)
|
||||
active_tool_calls[tool_call_id] = (tool_name, args)
|
||||
if not await _send(build_tool_start(tool_call_id, tool_name, args)):
|
||||
return
|
||||
continue
|
||||
|
||||
if role == "tool":
|
||||
tool_call_id = str(message.get("tool_call_id") or "").strip()
|
||||
tool_name = str(message.get("tool_name") or "").strip()
|
||||
function_args: dict[str, Any] | None = None
|
||||
if tool_call_id in active_tool_calls:
|
||||
tool_name, function_args = active_tool_calls.pop(tool_call_id)
|
||||
if not tool_call_id or not tool_name:
|
||||
continue
|
||||
result = message.get("content")
|
||||
if not await _send(
|
||||
build_tool_complete(
|
||||
tool_call_id,
|
||||
tool_name,
|
||||
result=result if isinstance(result, str) else None,
|
||||
function_args=function_args,
|
||||
)
|
||||
):
|
||||
return
|
||||
|
||||
async def new_session(
|
||||
self,
|
||||
cwd: str,
|
||||
@@ -652,24 +386,11 @@ class HermesACPAgent(acp.Agent):
|
||||
await self._register_session_mcp_servers(state, mcp_servers)
|
||||
logger.info("New session %s (cwd=%s)", state.session_id, cwd)
|
||||
self._schedule_available_commands_update(state.session_id)
|
||||
self._schedule_usage_update(state)
|
||||
return NewSessionResponse(
|
||||
session_id=state.session_id,
|
||||
models=self._build_model_state(state),
|
||||
)
|
||||
|
||||
def _schedule_history_replay(self, state: SessionState) -> None:
|
||||
"""Replay persisted history after session/load or session/resume returns.
|
||||
|
||||
Zed only attaches streamed transcript/tool updates once the load/resume
|
||||
response has completed. Sending replay notifications while the request is
|
||||
still in-flight can make the server look correct in logs while the editor
|
||||
drops or fails to attach the tool-call history.
|
||||
"""
|
||||
loop = asyncio.get_running_loop()
|
||||
replay_coro = self._replay_session_history(state)
|
||||
loop.call_soon(asyncio.create_task, replay_coro)
|
||||
|
||||
async def load_session(
|
||||
self,
|
||||
cwd: str,
|
||||
@@ -683,9 +404,7 @@ class HermesACPAgent(acp.Agent):
|
||||
return None
|
||||
await self._register_session_mcp_servers(state, mcp_servers)
|
||||
logger.info("Loaded session %s", session_id)
|
||||
self._schedule_history_replay(state)
|
||||
self._schedule_available_commands_update(session_id)
|
||||
self._schedule_usage_update(state)
|
||||
return LoadSessionResponse(models=self._build_model_state(state))
|
||||
|
||||
async def resume_session(
|
||||
@@ -701,17 +420,12 @@ class HermesACPAgent(acp.Agent):
|
||||
state = self.session_manager.create_session(cwd=cwd)
|
||||
await self._register_session_mcp_servers(state, mcp_servers)
|
||||
logger.info("Resumed session %s", state.session_id)
|
||||
self._schedule_history_replay(state)
|
||||
self._schedule_available_commands_update(state.session_id)
|
||||
self._schedule_usage_update(state)
|
||||
return ResumeSessionResponse(models=self._build_model_state(state))
|
||||
|
||||
async def cancel(self, session_id: str, **kwargs: Any) -> None:
|
||||
state = self.session_manager.get_session(session_id)
|
||||
if state and state.cancel_event:
|
||||
with state.runtime_lock:
|
||||
if state.is_running and state.current_prompt_text:
|
||||
state.interrupted_prompt_text = state.current_prompt_text
|
||||
state.cancel_event.set()
|
||||
try:
|
||||
if getattr(state, "agent", None) and hasattr(state.agent, "interrupt"):
|
||||
@@ -802,77 +516,18 @@ class HermesACPAgent(acp.Agent):
|
||||
return PromptResponse(stop_reason="refusal")
|
||||
|
||||
user_text = _extract_text(prompt).strip()
|
||||
user_content = _content_blocks_to_openai_user_content(prompt)
|
||||
has_content = bool(user_text) or (
|
||||
isinstance(user_content, list) and bool(user_content)
|
||||
)
|
||||
if not has_content:
|
||||
if not user_text:
|
||||
return PromptResponse(stop_reason="end_turn")
|
||||
|
||||
# /steer on an idle session has no in-flight tool call to inject into.
|
||||
# Rewrite it so the payload runs as a normal user prompt, matching the
|
||||
# gateway's behavior (gateway/run.py ~L4898). Two sub-cases:
|
||||
# 1. Zed-interrupt salvage — a prior prompt was cancelled by the
|
||||
# client right before /steer arrived; replay it with the steer
|
||||
# text attached as explicit correction/guidance so the user's
|
||||
# in-flight work isn't lost.
|
||||
# 2. Plain idle — no prior work to salvage; just run the steer
|
||||
# payload as a regular prompt. Without this, _cmd_steer would
|
||||
# silently append to state.queued_prompts and respond with
|
||||
# "No active turn — queued for the next turn", which looks like
|
||||
# /queue even though the user never typed /queue.
|
||||
if isinstance(user_content, str) and user_text.startswith("/steer"):
|
||||
steer_text = user_text.split(maxsplit=1)[1].strip() if len(user_text.split(maxsplit=1)) > 1 else ""
|
||||
interrupted_prompt = ""
|
||||
rewrite_idle = False
|
||||
with state.runtime_lock:
|
||||
if not state.is_running and steer_text:
|
||||
if state.interrupted_prompt_text:
|
||||
interrupted_prompt = state.interrupted_prompt_text
|
||||
state.interrupted_prompt_text = ""
|
||||
else:
|
||||
rewrite_idle = True
|
||||
if interrupted_prompt:
|
||||
user_text = (
|
||||
f"{interrupted_prompt}\n\n"
|
||||
f"User correction/guidance after interrupt: {steer_text}"
|
||||
)
|
||||
user_content = user_text
|
||||
elif rewrite_idle:
|
||||
user_text = steer_text
|
||||
user_content = steer_text
|
||||
|
||||
# Intercept slash commands — handle locally without calling the LLM.
|
||||
# Slash commands are text-only; if the client included images/resources,
|
||||
# send the whole multimodal prompt to the agent instead of treating it as
|
||||
# an ACP command.
|
||||
if isinstance(user_content, str) and user_text.startswith("/"):
|
||||
# Intercept slash commands — handle locally without calling the LLM
|
||||
if user_text.startswith("/"):
|
||||
response_text = self._handle_slash_command(user_text, state)
|
||||
if response_text is not None:
|
||||
if self._conn:
|
||||
update = acp.update_agent_message_text(response_text)
|
||||
await self._conn.session_update(session_id, update)
|
||||
await self._send_usage_update(state)
|
||||
return PromptResponse(stop_reason="end_turn")
|
||||
|
||||
# If Zed sends another regular prompt while the same ACP session is
|
||||
# still running, queue it instead of racing two AIAgent loops against
|
||||
# the same state.history. /steer and /queue are handled above and can
|
||||
# land immediately.
|
||||
with state.runtime_lock:
|
||||
if state.is_running:
|
||||
queued_text = user_text or "[Image attachment]"
|
||||
state.queued_prompts.append(queued_text)
|
||||
depth = len(state.queued_prompts)
|
||||
if self._conn:
|
||||
update = acp.update_agent_message_text(
|
||||
f"Queued for the next turn. ({depth} queued)"
|
||||
)
|
||||
await self._conn.session_update(session_id, update)
|
||||
return PromptResponse(stop_reason="end_turn")
|
||||
state.is_running = True
|
||||
state.current_prompt_text = user_text or "[Image attachment]"
|
||||
|
||||
logger.info("Prompt on session %s: %s", session_id, user_text[:100])
|
||||
|
||||
conn = self._conn
|
||||
@@ -885,37 +540,24 @@ class HermesACPAgent(acp.Agent):
|
||||
tool_call_meta: dict[str, dict[str, Any]] = {}
|
||||
previous_approval_cb = None
|
||||
|
||||
streamed_message = False
|
||||
|
||||
if conn:
|
||||
tool_progress_cb = make_tool_progress_cb(conn, session_id, loop, tool_call_ids, tool_call_meta)
|
||||
reasoning_cb = make_thinking_cb(conn, session_id, loop)
|
||||
thinking_cb = make_thinking_cb(conn, session_id, loop)
|
||||
step_cb = make_step_cb(conn, session_id, loop, tool_call_ids, tool_call_meta)
|
||||
message_cb = make_message_cb(conn, session_id, loop)
|
||||
|
||||
def stream_delta_cb(text: str) -> None:
|
||||
nonlocal streamed_message
|
||||
if text:
|
||||
streamed_message = True
|
||||
message_cb(text)
|
||||
|
||||
approval_cb = make_approval_callback(conn.request_permission, loop, session_id)
|
||||
else:
|
||||
tool_progress_cb = None
|
||||
reasoning_cb = None
|
||||
thinking_cb = None
|
||||
step_cb = None
|
||||
stream_delta_cb = None
|
||||
message_cb = None
|
||||
approval_cb = None
|
||||
|
||||
agent = state.agent
|
||||
agent.tool_progress_callback = tool_progress_cb
|
||||
# ACP thought panes should not receive Hermes' local kawaii waiting/status
|
||||
# updates. Route provider/model reasoning deltas instead; if the provider
|
||||
# emits no reasoning, Zed should not get a fake "thinking" accordion.
|
||||
agent.thinking_callback = None
|
||||
agent.reasoning_callback = reasoning_cb
|
||||
agent.thinking_callback = thinking_cb
|
||||
agent.step_callback = step_cb
|
||||
agent.stream_delta_callback = stream_delta_cb
|
||||
agent.message_callback = message_cb
|
||||
|
||||
# Approval callback is per-thread (thread-local, GHSA-qg5c-hvr5-hjgr).
|
||||
# Set it INSIDE _run_agent so the TLS write happens in the executor
|
||||
@@ -932,22 +574,6 @@ class HermesACPAgent(acp.Agent):
|
||||
|
||||
def _run_agent() -> dict:
|
||||
nonlocal previous_approval_cb, previous_interactive
|
||||
# Bind HERMES_SESSION_KEY for this session so per-session caches
|
||||
# (e.g. the interactive sudo password cache in tools.terminal_tool)
|
||||
# scope to the ACP session rather than leaking across sessions
|
||||
# that land on the same reused executor thread. This call runs
|
||||
# inside a contextvars.copy_context() below, so the ContextVar
|
||||
# write is isolated from other concurrent ACP sessions.
|
||||
try:
|
||||
from gateway.session_context import (
|
||||
clear_session_vars,
|
||||
set_session_vars,
|
||||
)
|
||||
session_tokens = set_session_vars(session_key=session_id)
|
||||
except Exception:
|
||||
session_tokens = None
|
||||
clear_session_vars = None # type: ignore[assignment]
|
||||
logger.debug("Could not set ACP session context", exc_info=True)
|
||||
if approval_cb:
|
||||
try:
|
||||
from tools import terminal_tool as _terminal_tool
|
||||
@@ -961,10 +587,9 @@ class HermesACPAgent(acp.Agent):
|
||||
os.environ["HERMES_INTERACTIVE"] = "1"
|
||||
try:
|
||||
result = agent.run_conversation(
|
||||
user_message=user_content,
|
||||
user_message=user_text,
|
||||
conversation_history=state.history,
|
||||
task_id=session_id,
|
||||
persist_user_message=user_text or "[Image attachment]",
|
||||
)
|
||||
return result
|
||||
except Exception as e:
|
||||
@@ -982,24 +607,11 @@ class HermesACPAgent(acp.Agent):
|
||||
_terminal_tool.set_approval_callback(previous_approval_cb)
|
||||
except Exception:
|
||||
logger.debug("Could not restore approval callback", exc_info=True)
|
||||
if session_tokens is not None and clear_session_vars is not None:
|
||||
try:
|
||||
clear_session_vars(session_tokens)
|
||||
except Exception:
|
||||
logger.debug("Could not clear ACP session context", exc_info=True)
|
||||
|
||||
try:
|
||||
# Wrap the executor call in a fresh copy of the current context so
|
||||
# concurrent ACP sessions on the shared ThreadPoolExecutor don't
|
||||
# stomp on each other's ContextVar writes (HERMES_SESSION_KEY in
|
||||
# particular — used by the interactive sudo password cache scope).
|
||||
ctx = contextvars.copy_context()
|
||||
result = await loop.run_in_executor(_executor, ctx.run, _run_agent)
|
||||
result = await loop.run_in_executor(_executor, _run_agent)
|
||||
except Exception:
|
||||
logger.exception("Executor error for session %s", session_id)
|
||||
with state.runtime_lock:
|
||||
state.is_running = False
|
||||
state.current_prompt_text = ""
|
||||
return PromptResponse(stop_reason="end_turn")
|
||||
|
||||
if result.get("messages"):
|
||||
@@ -1021,32 +633,10 @@ class HermesACPAgent(acp.Agent):
|
||||
)
|
||||
except Exception:
|
||||
logger.debug("Failed to auto-title ACP session %s", session_id, exc_info=True)
|
||||
if final_response and conn and not streamed_message:
|
||||
if final_response and conn:
|
||||
update = acp.update_agent_message_text(final_response)
|
||||
await conn.session_update(session_id, update)
|
||||
|
||||
# Mark this turn idle before draining queued work so recursive prompt()
|
||||
# calls can acquire the session. Queued turns are intentionally run as
|
||||
# normal follow-up user prompts, preserving role alternation and history.
|
||||
with state.runtime_lock:
|
||||
state.is_running = False
|
||||
state.current_prompt_text = ""
|
||||
|
||||
while True:
|
||||
with state.runtime_lock:
|
||||
if not state.queued_prompts:
|
||||
break
|
||||
next_prompt = state.queued_prompts.pop(0)
|
||||
if conn:
|
||||
await conn.session_update(
|
||||
session_id,
|
||||
acp.update_user_message_text(next_prompt),
|
||||
)
|
||||
await self.prompt(
|
||||
prompt=[TextContentBlock(type="text", text=next_prompt)],
|
||||
session_id=session_id,
|
||||
)
|
||||
|
||||
usage = None
|
||||
if any(result.get(key) is not None for key in ("prompt_tokens", "completion_tokens", "total_tokens")):
|
||||
usage = Usage(
|
||||
@@ -1057,8 +647,6 @@ class HermesACPAgent(acp.Agent):
|
||||
cached_read_tokens=result.get("cache_read_tokens"),
|
||||
)
|
||||
|
||||
await self._send_usage_update(state)
|
||||
|
||||
stop_reason = "cancelled" if state.cancel_event and state.cancel_event.is_set() else "end_turn"
|
||||
return PromptResponse(stop_reason=stop_reason, usage=usage)
|
||||
|
||||
@@ -1126,8 +714,6 @@ class HermesACPAgent(acp.Agent):
|
||||
"context": self._cmd_context,
|
||||
"reset": self._cmd_reset,
|
||||
"compact": self._cmd_compact,
|
||||
"steer": self._cmd_steer,
|
||||
"queue": self._cmd_queue,
|
||||
"version": self._cmd_version,
|
||||
}.get(cmd)
|
||||
|
||||
@@ -1191,84 +777,22 @@ class HermesACPAgent(acp.Agent):
|
||||
return f"Could not list tools: {e}"
|
||||
|
||||
def _cmd_context(self, args: str, state: SessionState) -> str:
|
||||
"""Show ACP session context pressure and compression guidance."""
|
||||
n_messages = len(state.history)
|
||||
|
||||
# Count by role.
|
||||
if n_messages == 0:
|
||||
return "Conversation is empty (no messages yet)."
|
||||
# Count by role
|
||||
roles: dict[str, int] = {}
|
||||
for msg in state.history:
|
||||
role = msg.get("role", "unknown")
|
||||
roles[role] = roles.get(role, 0) + 1
|
||||
|
||||
agent = state.agent
|
||||
model = state.model or getattr(agent, "model", "")
|
||||
provider = getattr(agent, "provider", None) or "auto"
|
||||
compressor = getattr(agent, "context_compressor", None)
|
||||
context_length = int(getattr(compressor, "context_length", 0) or 0)
|
||||
threshold_tokens = int(getattr(compressor, "threshold_tokens", 0) or 0)
|
||||
|
||||
try:
|
||||
from agent.model_metadata import estimate_request_tokens_rough
|
||||
|
||||
system_prompt = getattr(agent, "_cached_system_prompt", "") or ""
|
||||
tools = getattr(agent, "tools", None) or None
|
||||
approx_tokens = estimate_request_tokens_rough(
|
||||
state.history,
|
||||
system_prompt=system_prompt,
|
||||
tools=tools,
|
||||
)
|
||||
except Exception:
|
||||
logger.debug("Could not estimate ACP context usage", exc_info=True)
|
||||
approx_tokens = 0
|
||||
|
||||
if threshold_tokens <= 0 and context_length > 0:
|
||||
threshold_tokens = int(context_length * 0.80)
|
||||
|
||||
lines = [
|
||||
f"Conversation: {n_messages} messages"
|
||||
if n_messages
|
||||
else "Conversation is empty (no messages yet).",
|
||||
f"Conversation: {n_messages} messages",
|
||||
f" user: {roles.get('user', 0)}, assistant: {roles.get('assistant', 0)}, "
|
||||
f"tool: {roles.get('tool', 0)}, system: {roles.get('system', 0)}",
|
||||
]
|
||||
model = state.model or getattr(state.agent, "model", "")
|
||||
if model:
|
||||
lines.append(f"Model: {model}")
|
||||
lines.append(f"Provider: {provider}")
|
||||
|
||||
if approx_tokens > 0:
|
||||
if context_length > 0:
|
||||
usage_pct = (approx_tokens / context_length) * 100
|
||||
lines.append(
|
||||
f"Context usage: ~{approx_tokens:,} / {context_length:,} tokens ({usage_pct:.1f}%)"
|
||||
)
|
||||
else:
|
||||
lines.append(f"Context usage: ~{approx_tokens:,} tokens")
|
||||
|
||||
if threshold_tokens > 0:
|
||||
if approx_tokens > 0:
|
||||
threshold_pct = (threshold_tokens / context_length) * 100 if context_length > 0 else 0
|
||||
remaining = max(threshold_tokens - approx_tokens, 0)
|
||||
if approx_tokens >= threshold_tokens:
|
||||
lines.append(
|
||||
f"Compression: due now (threshold ~{threshold_tokens:,}"
|
||||
+ (f", {threshold_pct:.0f}%" if threshold_pct else "")
|
||||
+ "). Run /compact."
|
||||
)
|
||||
else:
|
||||
lines.append(
|
||||
f"Compression: ~{remaining:,} tokens until threshold "
|
||||
f"(~{threshold_tokens:,}"
|
||||
+ (f", {threshold_pct:.0f}%" if threshold_pct else "")
|
||||
+ ")."
|
||||
)
|
||||
else:
|
||||
lines.append(f"Compression threshold: ~{threshold_tokens:,} tokens")
|
||||
|
||||
if getattr(agent, "compression_enabled", True) is False:
|
||||
lines.append("Compression is disabled for this agent.")
|
||||
else:
|
||||
lines.append("Tip: run /compact to compress manually before the threshold.")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
def _cmd_reset(self, args: str, state: SessionState) -> str:
|
||||
@@ -1286,16 +810,10 @@ class HermesACPAgent(acp.Agent):
|
||||
if not hasattr(agent, "_compress_context"):
|
||||
return "Context compression not available for this agent."
|
||||
|
||||
from agent.model_metadata import estimate_request_tokens_rough
|
||||
from agent.model_metadata import estimate_messages_tokens_rough
|
||||
|
||||
original_count = len(state.history)
|
||||
# Include system prompt + tool schemas so the figure reflects real
|
||||
# request pressure, not a transcript-only underestimate (#6217).
|
||||
_sys_prompt = getattr(agent, "_cached_system_prompt", "") or ""
|
||||
_tools = getattr(agent, "tools", None) or None
|
||||
approx_tokens = estimate_request_tokens_rough(
|
||||
state.history, system_prompt=_sys_prompt, tools=_tools
|
||||
)
|
||||
approx_tokens = estimate_messages_tokens_rough(state.history)
|
||||
original_session_db = getattr(agent, "_session_db", None)
|
||||
|
||||
try:
|
||||
@@ -1315,13 +833,7 @@ class HermesACPAgent(acp.Agent):
|
||||
self.session_manager.save_session(state.session_id)
|
||||
|
||||
new_count = len(state.history)
|
||||
_sys_prompt_after = getattr(agent, "_cached_system_prompt", "") or _sys_prompt
|
||||
_tools_after = getattr(agent, "tools", None) or _tools
|
||||
new_tokens = estimate_request_tokens_rough(
|
||||
state.history,
|
||||
system_prompt=_sys_prompt_after,
|
||||
tools=_tools_after,
|
||||
)
|
||||
new_tokens = estimate_messages_tokens_rough(state.history)
|
||||
return (
|
||||
f"Context compressed: {original_count} -> {new_count} messages\n"
|
||||
f"~{approx_tokens:,} -> ~{new_tokens:,} tokens"
|
||||
@@ -1329,34 +841,6 @@ class HermesACPAgent(acp.Agent):
|
||||
except Exception as e:
|
||||
return f"Compression failed: {e}"
|
||||
|
||||
def _cmd_steer(self, args: str, state: SessionState) -> str:
|
||||
steer_text = args.strip()
|
||||
if not steer_text:
|
||||
return "Usage: /steer <guidance>"
|
||||
|
||||
if state.is_running and hasattr(state.agent, "steer"):
|
||||
try:
|
||||
if state.agent.steer(steer_text):
|
||||
preview = steer_text[:80] + ("..." if len(steer_text) > 80 else "")
|
||||
return f"⏩ Steer queued for the active turn: {preview}"
|
||||
except Exception as exc:
|
||||
logger.warning("ACP steer failed for session %s: %s", state.session_id, exc)
|
||||
return f"⚠️ Steer failed: {exc}"
|
||||
|
||||
with state.runtime_lock:
|
||||
state.queued_prompts.append(steer_text)
|
||||
depth = len(state.queued_prompts)
|
||||
return f"No active turn — queued for the next turn. ({depth} queued)"
|
||||
|
||||
def _cmd_queue(self, args: str, state: SessionState) -> str:
|
||||
queued_text = args.strip()
|
||||
if not queued_text:
|
||||
return "Usage: /queue <prompt>"
|
||||
with state.runtime_lock:
|
||||
state.queued_prompts.append(queued_text)
|
||||
depth = len(state.queued_prompts)
|
||||
return f"Queued for the next turn. ({depth} queued)"
|
||||
|
||||
def _cmd_version(self, args: str, state: SessionState) -> str:
|
||||
return f"Hermes Agent v{HERMES_VERSION}"
|
||||
|
||||
|
||||
@@ -26,33 +26,6 @@ from typing import Any, Dict, List, Optional
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _win_path_to_wsl(path: str) -> str | None:
|
||||
"""Convert a Windows drive path to its WSL /mnt/<drive>/... equivalent."""
|
||||
match = re.match(r"^([A-Za-z]):[\\/](.*)$", path)
|
||||
if not match:
|
||||
return None
|
||||
drive = match.group(1).lower()
|
||||
tail = match.group(2).replace("\\", "/")
|
||||
return f"/mnt/{drive}/{tail}"
|
||||
|
||||
|
||||
def _translate_acp_cwd(cwd: str) -> str:
|
||||
"""Translate Windows ACP cwd values when Hermes itself is running in WSL.
|
||||
|
||||
Windows ACP clients can launch ``hermes acp`` inside WSL while still sending
|
||||
editor workspaces as Windows drive paths such as ``E:\\Projects``. Store
|
||||
and execute against the WSL mount path so agents, tools, and persisted ACP
|
||||
sessions all agree on the usable workspace. Native Linux/macOS keeps the
|
||||
original cwd unchanged.
|
||||
"""
|
||||
from hermes_constants import is_wsl
|
||||
|
||||
if not is_wsl():
|
||||
return cwd
|
||||
translated = _win_path_to_wsl(str(cwd))
|
||||
return translated if translated is not None else cwd
|
||||
|
||||
|
||||
def _normalize_cwd_for_compare(cwd: str | None) -> str:
|
||||
raw = str(cwd or ".").strip()
|
||||
if not raw:
|
||||
@@ -61,9 +34,11 @@ def _normalize_cwd_for_compare(cwd: str | None) -> str:
|
||||
|
||||
# Normalize Windows drive paths into the equivalent WSL mount form so
|
||||
# ACP history filters match the same workspace across Windows and WSL.
|
||||
translated = _win_path_to_wsl(expanded)
|
||||
if translated is not None:
|
||||
expanded = translated
|
||||
match = re.match(r"^([A-Za-z]):[\\/](.*)$", expanded)
|
||||
if match:
|
||||
drive = match.group(1).lower()
|
||||
tail = match.group(2).replace("\\", "/")
|
||||
expanded = f"/mnt/{drive}/{tail}"
|
||||
elif re.match(r"^/mnt/[A-Za-z]/", expanded):
|
||||
expanded = f"/mnt/{expanded[5].lower()}/{expanded[7:]}"
|
||||
|
||||
@@ -121,18 +96,12 @@ def _acp_stderr_print(*args, **kwargs) -> None:
|
||||
|
||||
|
||||
def _register_task_cwd(task_id: str, cwd: str) -> None:
|
||||
"""Bind a task/session id to the editor's working directory for tools.
|
||||
|
||||
Zed can launch Hermes from a Windows workspace while the ACP process runs
|
||||
inside WSL. In that case ACP sends cwd as e.g. ``E:\\Projects\\POTI``;
|
||||
local tools need the WSL mount equivalent or subprocess creation fails
|
||||
before the command can run.
|
||||
"""
|
||||
"""Bind a task/session id to the editor's working directory for tools."""
|
||||
if not task_id:
|
||||
return
|
||||
try:
|
||||
from tools.terminal_tool import register_task_env_overrides
|
||||
register_task_env_overrides(task_id, {"cwd": _translate_acp_cwd(cwd)})
|
||||
register_task_env_overrides(task_id, {"cwd": cwd})
|
||||
except Exception:
|
||||
logger.debug("Failed to register ACP task cwd override", exc_info=True)
|
||||
|
||||
@@ -176,11 +145,6 @@ class SessionState:
|
||||
model: str = ""
|
||||
history: List[Dict[str, Any]] = field(default_factory=list)
|
||||
cancel_event: Any = None # threading.Event
|
||||
is_running: bool = False
|
||||
queued_prompts: List[str] = field(default_factory=list)
|
||||
runtime_lock: Any = field(default_factory=Lock)
|
||||
current_prompt_text: str = ""
|
||||
interrupted_prompt_text: str = ""
|
||||
|
||||
|
||||
class SessionManager:
|
||||
@@ -211,7 +175,6 @@ class SessionManager:
|
||||
"""Create a new session with a unique ID and a fresh AIAgent."""
|
||||
import threading
|
||||
|
||||
cwd = _translate_acp_cwd(cwd)
|
||||
session_id = str(uuid.uuid4())
|
||||
agent = self._make_agent(session_id=session_id, cwd=cwd)
|
||||
state = SessionState(
|
||||
@@ -254,7 +217,6 @@ class SessionManager:
|
||||
"""Deep-copy a session's history into a new session."""
|
||||
import threading
|
||||
|
||||
cwd = _translate_acp_cwd(cwd)
|
||||
original = self.get_session(session_id) # checks DB too
|
||||
if original is None:
|
||||
return None
|
||||
@@ -356,7 +318,6 @@ class SessionManager:
|
||||
|
||||
def update_cwd(self, session_id: str, cwd: str) -> Optional[SessionState]:
|
||||
"""Update the working directory for a session and its tool overrides."""
|
||||
cwd = _translate_acp_cwd(cwd)
|
||||
state = self.get_session(session_id) # checks DB too
|
||||
if state is None:
|
||||
return None
|
||||
@@ -466,10 +427,17 @@ class SessionManager:
|
||||
except Exception:
|
||||
logger.debug("Failed to update ACP session metadata", exc_info=True)
|
||||
|
||||
# Replace stored messages with current history atomically so a
|
||||
# mid-rewrite failure rolls back and the previously persisted
|
||||
# conversation is preserved (salvaged from #13675).
|
||||
db.replace_messages(state.session_id, state.history)
|
||||
# Replace stored messages with current history.
|
||||
db.clear_messages(state.session_id)
|
||||
for msg in state.history:
|
||||
db.append_message(
|
||||
session_id=state.session_id,
|
||||
role=msg.get("role", "user"),
|
||||
content=msg.get("content"),
|
||||
tool_name=msg.get("tool_name") or msg.get("name"),
|
||||
tool_calls=msg.get("tool_calls"),
|
||||
tool_call_id=msg.get("tool_call_id"),
|
||||
)
|
||||
except Exception:
|
||||
logger.warning("Failed to persist ACP session %s", state.session_id, exc_info=True)
|
||||
|
||||
|
||||
@@ -28,11 +28,6 @@ TOOL_KIND_MAP: Dict[str, ToolKind] = {
|
||||
"terminal": "execute",
|
||||
"process": "execute",
|
||||
"execute_code": "execute",
|
||||
# Session/meta tools
|
||||
"todo": "other",
|
||||
"skill_view": "read",
|
||||
"skills_list": "read",
|
||||
"skill_manage": "edit",
|
||||
# Web / fetch
|
||||
"web_search": "fetch",
|
||||
"web_extract": "fetch",
|
||||
@@ -56,28 +51,6 @@ TOOL_KIND_MAP: Dict[str, ToolKind] = {
|
||||
}
|
||||
|
||||
|
||||
_POLISHED_TOOLS = {
|
||||
# Core operator loop
|
||||
"todo", "memory", "session_search", "delegate_task",
|
||||
# Files / execution
|
||||
"read_file", "write_file", "patch", "search_files", "terminal", "process", "execute_code",
|
||||
# Skills / web / browser / media
|
||||
"skill_view", "skills_list", "skill_manage", "web_search", "web_extract",
|
||||
"browser_navigate", "browser_click", "browser_type", "browser_press", "browser_scroll",
|
||||
"browser_back", "browser_snapshot", "browser_console", "browser_get_images", "browser_vision",
|
||||
"vision_analyze", "image_generate", "text_to_speech",
|
||||
# Schedulers / platform integrations
|
||||
"cronjob", "send_message", "clarify", "discord", "discord_admin",
|
||||
"ha_list_entities", "ha_get_state", "ha_list_services", "ha_call_service",
|
||||
"feishu_doc_read", "feishu_drive_list_comments", "feishu_drive_list_comment_replies",
|
||||
"feishu_drive_reply_comment", "feishu_drive_add_comment",
|
||||
"kanban_create", "kanban_show", "kanban_comment", "kanban_complete",
|
||||
"kanban_block", "kanban_link", "kanban_heartbeat",
|
||||
"yb_query_group_info", "yb_query_group_members", "yb_search_sticker",
|
||||
"yb_send_dm", "yb_send_sticker", "mixture_of_agents",
|
||||
}
|
||||
|
||||
|
||||
def get_tool_kind(tool_name: str) -> ToolKind:
|
||||
"""Return the ACP ToolKind for a hermes tool, defaulting to 'other'."""
|
||||
return TOOL_KIND_MAP.get(tool_name, "other")
|
||||
@@ -112,645 +85,18 @@ def build_tool_title(tool_name: str, args: Dict[str, Any]) -> str:
|
||||
if urls:
|
||||
return f"extract: {urls[0]}" + (f" (+{len(urls)-1})" if len(urls) > 1 else "")
|
||||
return "web extract"
|
||||
if tool_name == "process":
|
||||
action = str(args.get("action") or "").strip() or "manage"
|
||||
sid = str(args.get("session_id") or "").strip()
|
||||
return f"process {action}: {sid}" if sid else f"process {action}"
|
||||
if tool_name == "delegate_task":
|
||||
tasks = args.get("tasks")
|
||||
if isinstance(tasks, list) and tasks:
|
||||
return f"delegate batch ({len(tasks)} tasks)"
|
||||
goal = args.get("goal", "")
|
||||
if goal and len(goal) > 60:
|
||||
goal = goal[:57] + "..."
|
||||
return f"delegate: {goal}" if goal else "delegate task"
|
||||
if tool_name == "session_search":
|
||||
query = str(args.get("query") or "").strip()
|
||||
return f"session search: {query}" if query else "recent sessions"
|
||||
if tool_name == "memory":
|
||||
action = str(args.get("action") or "manage").strip() or "manage"
|
||||
target = str(args.get("target") or "memory").strip() or "memory"
|
||||
return f"memory {action}: {target}"
|
||||
if tool_name == "execute_code":
|
||||
code = str(args.get("code") or "").strip()
|
||||
first_line = next((line.strip() for line in code.splitlines() if line.strip()), "")
|
||||
if first_line:
|
||||
if len(first_line) > 70:
|
||||
first_line = first_line[:67] + "..."
|
||||
return f"python: {first_line}"
|
||||
return "python code"
|
||||
if tool_name == "todo":
|
||||
items = args.get("todos")
|
||||
if isinstance(items, list):
|
||||
return f"todo ({len(items)} item{'s' if len(items) != 1 else ''})"
|
||||
return "todo"
|
||||
if tool_name == "skill_view":
|
||||
name = str(args.get("name") or "?").strip() or "?"
|
||||
file_path = str(args.get("file_path") or "").strip()
|
||||
suffix = f"/{file_path}" if file_path else ""
|
||||
return f"skill view ({name}{suffix})"
|
||||
if tool_name == "skills_list":
|
||||
category = str(args.get("category") or "").strip()
|
||||
return f"skills list ({category})" if category else "skills list"
|
||||
if tool_name == "skill_manage":
|
||||
action = str(args.get("action") or "manage").strip() or "manage"
|
||||
name = str(args.get("name") or "?").strip() or "?"
|
||||
file_path = str(args.get("file_path") or "").strip()
|
||||
target = f"{name}/{file_path}" if file_path else name
|
||||
if len(target) > 64:
|
||||
target = target[:61] + "..."
|
||||
return f"skill {action}: {target}"
|
||||
if tool_name == "browser_navigate":
|
||||
return f"navigate: {args.get('url', '?')}"
|
||||
if tool_name == "browser_snapshot":
|
||||
return "browser snapshot"
|
||||
if tool_name == "browser_vision":
|
||||
return f"browser vision: {str(args.get('question', '?'))[:50]}"
|
||||
if tool_name == "browser_get_images":
|
||||
return "browser images"
|
||||
return "execute code"
|
||||
if tool_name == "vision_analyze":
|
||||
return f"analyze image: {str(args.get('question', '?'))[:50]}"
|
||||
if tool_name == "image_generate":
|
||||
prompt = str(args.get("prompt") or args.get("description") or "").strip()
|
||||
return f"generate image: {prompt[:50]}" if prompt else "generate image"
|
||||
if tool_name == "cronjob":
|
||||
action = str(args.get("action") or "manage").strip() or "manage"
|
||||
job_id = str(args.get("job_id") or args.get("id") or "").strip()
|
||||
return f"cron {action}: {job_id}" if job_id else f"cron {action}"
|
||||
return f"analyze image: {args.get('question', '?')[:50]}"
|
||||
return tool_name
|
||||
|
||||
|
||||
def _text(content: str) -> Any:
|
||||
return acp.tool_content(acp.text_block(content))
|
||||
|
||||
|
||||
def _json_loads_maybe(value: Optional[str]) -> Any:
|
||||
if not isinstance(value, str):
|
||||
return value
|
||||
try:
|
||||
return json.loads(value)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Some Hermes tools append a human hint after a JSON payload, e.g.
|
||||
# ``{...}\n\n[Hint: Results truncated...]``. Keep the structured rendering path
|
||||
# by decoding the first JSON value instead of falling back to raw text.
|
||||
try:
|
||||
decoded, _ = json.JSONDecoder().raw_decode(value.lstrip())
|
||||
return decoded
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _truncate_text(text: str, limit: int = 5000) -> str:
|
||||
if len(text) <= limit:
|
||||
return text
|
||||
return text[: max(0, limit - 100)] + f"\n... ({len(text)} chars total, truncated)"
|
||||
|
||||
|
||||
def _fenced_text(text: str, language: str = "") -> str:
|
||||
"""Return a Markdown fence that cannot be broken by backticks in text."""
|
||||
longest = max((len(run) for run in text.split("`")[1::2]), default=0)
|
||||
fence = "`" * max(3, longest + 1)
|
||||
return f"{fence}{language}\n{text}\n{fence}"
|
||||
|
||||
|
||||
def _format_todo_result(result: Optional[str]) -> Optional[str]:
|
||||
data = _json_loads_maybe(result)
|
||||
if not isinstance(data, dict) or not isinstance(data.get("todos"), list):
|
||||
return None
|
||||
summary = data.get("summary") if isinstance(data.get("summary"), dict) else {}
|
||||
icon = {
|
||||
"completed": "✅",
|
||||
"in_progress": "🔄",
|
||||
"pending": "⏳",
|
||||
"cancelled": "✗",
|
||||
}
|
||||
lines = ["**Todo list**", ""]
|
||||
for item in data["todos"]:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
status = str(item.get("status") or "pending")
|
||||
content = str(item.get("content") or item.get("id") or "").strip()
|
||||
if content:
|
||||
lines.append(f"- {icon.get(status, '•')} {content}")
|
||||
if summary:
|
||||
cancelled = summary.get("cancelled", 0)
|
||||
lines.extend([
|
||||
"",
|
||||
"**Progress:** "
|
||||
f"{summary.get('completed', 0)} completed, "
|
||||
f"{summary.get('in_progress', 0)} in progress, "
|
||||
f"{summary.get('pending', 0)} pending"
|
||||
+ (f", {cancelled} cancelled" if cancelled else ""),
|
||||
])
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _format_read_file_result(result: Optional[str], args: Optional[Dict[str, Any]]) -> Optional[str]:
|
||||
data = _json_loads_maybe(result)
|
||||
if not isinstance(data, dict):
|
||||
return None
|
||||
if data.get("error") and not data.get("content"):
|
||||
return f"Read failed: {data.get('error')}"
|
||||
content = data.get("content")
|
||||
if not isinstance(content, str):
|
||||
return None
|
||||
path = str((args or {}).get("path") or data.get("path") or "file").strip()
|
||||
offset = (args or {}).get("offset")
|
||||
limit = (args or {}).get("limit")
|
||||
range_bits = []
|
||||
if offset:
|
||||
range_bits.append(f"from line {offset}")
|
||||
if limit:
|
||||
range_bits.append(f"limit {limit}")
|
||||
suffix = f" ({', '.join(range_bits)})" if range_bits else ""
|
||||
header = f"Read {path}{suffix}"
|
||||
if data.get("total_lines") is not None:
|
||||
header += f" — {data.get('total_lines')} total lines"
|
||||
# Hermes read_file output is line-numbered with `|`. If we send it as raw
|
||||
# Markdown, Zed can interpret pipes as tables and collapse the layout.
|
||||
# Fence the payload so file lines stay readable and literal.
|
||||
return _truncate_text(f"{header}\n\n{_fenced_text(content)}")
|
||||
|
||||
|
||||
def _format_search_files_result(result: Optional[str]) -> Optional[str]:
|
||||
data = _json_loads_maybe(result)
|
||||
if not isinstance(data, dict):
|
||||
return None
|
||||
matches = data.get("matches")
|
||||
if not isinstance(matches, list):
|
||||
return None
|
||||
|
||||
total = data.get("total_count", len(matches))
|
||||
shown = min(len(matches), 12)
|
||||
truncated = bool(data.get("truncated")) or len(matches) > shown
|
||||
lines = [
|
||||
"Search results",
|
||||
f"Found {total} match{'es' if total != 1 else ''}; showing {shown}.",
|
||||
"",
|
||||
]
|
||||
|
||||
for match in matches[:shown]:
|
||||
if not isinstance(match, dict):
|
||||
lines.append(f"- {match}")
|
||||
continue
|
||||
|
||||
path = str(match.get("path") or match.get("file") or match.get("filename") or "?")
|
||||
line = match.get("line") or match.get("line_number")
|
||||
content = str(match.get("content") or match.get("text") or "").strip()
|
||||
loc = f"{path}:{line}" if line else path
|
||||
lines.append(f"- {loc}")
|
||||
if content:
|
||||
snippet = _truncate_text(" ".join(content.split()), 300)
|
||||
lines.append(f" {snippet}")
|
||||
|
||||
if truncated:
|
||||
lines.extend([
|
||||
"",
|
||||
"Results truncated. Narrow the search, add file_glob, or use offset to page.",
|
||||
])
|
||||
return _truncate_text("\n".join(lines), limit=7000)
|
||||
|
||||
|
||||
def _format_execute_code_result(result: Optional[str]) -> Optional[str]:
|
||||
data = _json_loads_maybe(result)
|
||||
if not isinstance(data, dict):
|
||||
return result if isinstance(result, str) and result.strip() else None
|
||||
output = str(data.get("output") or "")
|
||||
error = str(data.get("error") or "")
|
||||
exit_code = data.get("exit_code")
|
||||
parts = [f"Exit code: {exit_code}" if exit_code is not None else "Execution complete"]
|
||||
if output:
|
||||
parts.extend(["", "Output:", output])
|
||||
if error:
|
||||
parts.extend(["", "Error:", error])
|
||||
return _truncate_text("\n".join(parts))
|
||||
|
||||
|
||||
def _extract_markdown_headings(content: str, limit: int = 8) -> list[str]:
|
||||
headings: list[str] = []
|
||||
for line in content.splitlines():
|
||||
stripped = line.strip()
|
||||
if stripped.startswith("#"):
|
||||
heading = stripped.lstrip("#").strip()
|
||||
if heading:
|
||||
headings.append(heading)
|
||||
if len(headings) >= limit:
|
||||
break
|
||||
return headings
|
||||
|
||||
|
||||
def _format_skill_view_result(result: Optional[str]) -> Optional[str]:
|
||||
data = _json_loads_maybe(result)
|
||||
if not isinstance(data, dict):
|
||||
return None
|
||||
if data.get("success") is False:
|
||||
return f"Skill view failed: {data.get('error', 'unknown error')}"
|
||||
name = str(data.get("name") or "skill")
|
||||
file_path = str(data.get("file") or data.get("path") or "SKILL.md")
|
||||
description = str(data.get("description") or "").strip()
|
||||
content = str(data.get("content") or "")
|
||||
linked = data.get("linked_files") if isinstance(data.get("linked_files"), dict) else None
|
||||
|
||||
lines = ["**Skill loaded**", "", f"- **Name:** `{name}`", f"- **File:** `{file_path}`"]
|
||||
if description:
|
||||
lines.append(f"- **Description:** {description}")
|
||||
if content:
|
||||
lines.append(f"- **Content:** {len(content):,} chars loaded into agent context")
|
||||
if linked:
|
||||
linked_count = sum(len(v) for v in linked.values() if isinstance(v, list))
|
||||
lines.append(f"- **Linked files:** {linked_count}")
|
||||
|
||||
headings = _extract_markdown_headings(content)
|
||||
if headings:
|
||||
lines.extend(["", "**Sections**"])
|
||||
lines.extend(f"- {heading}" for heading in headings)
|
||||
|
||||
lines.extend([
|
||||
"",
|
||||
"_Full skill content is available to the agent but hidden here to keep ACP readable._",
|
||||
])
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _format_skill_manage_result(result: Optional[str], args: Optional[Dict[str, Any]]) -> Optional[str]:
|
||||
data = _json_loads_maybe(result)
|
||||
if not isinstance(data, dict):
|
||||
return None
|
||||
|
||||
action = str((args or {}).get("action") or "manage").strip() or "manage"
|
||||
name = str((args or {}).get("name") or data.get("name") or "skill").strip() or "skill"
|
||||
file_path = str((args or {}).get("file_path") or data.get("file_path") or "SKILL.md").strip() or "SKILL.md"
|
||||
success = data.get("success")
|
||||
status = "✅ Skill updated" if success is not False else "✗ Skill update failed"
|
||||
|
||||
lines = [f"**{status}**", "", f"- **Action:** `{action}`", f"- **Skill:** `{name}`"]
|
||||
if action not in {"delete"}:
|
||||
lines.append(f"- **File:** `{file_path}`")
|
||||
|
||||
message = str(data.get("message") or data.get("error") or "").strip()
|
||||
if message:
|
||||
lines.append(f"- **Result:** {message}")
|
||||
|
||||
replacements = data.get("replacements") or data.get("replacement_count")
|
||||
if replacements is not None:
|
||||
lines.append(f"- **Replacements:** {replacements}")
|
||||
|
||||
path = str(data.get("path") or "").strip()
|
||||
if path:
|
||||
lines.append(f"- **Path:** `{path}`")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _format_web_search_result(result: Optional[str]) -> Optional[str]:
|
||||
data = _json_loads_maybe(result)
|
||||
if not isinstance(data, dict):
|
||||
return None
|
||||
web = data.get("data", {}).get("web") if isinstance(data.get("data"), dict) else data.get("web")
|
||||
if not isinstance(web, list):
|
||||
return None
|
||||
lines = [f"Web results: {len(web)}"]
|
||||
for item in web[:10]:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
title = str(item.get("title") or item.get("url") or "result").strip()
|
||||
url = str(item.get("url") or "").strip()
|
||||
desc = str(item.get("description") or "").strip()
|
||||
lines.append(f"• {title}" + (f" — {url}" if url else ""))
|
||||
if desc:
|
||||
lines.append(f" {desc}")
|
||||
return _truncate_text("\n".join(lines))
|
||||
|
||||
|
||||
def _format_web_extract_result(result: Optional[str]) -> Optional[str]:
|
||||
"""Return only web_extract errors for ACP; success stays compact via title."""
|
||||
data = _json_loads_maybe(result)
|
||||
if not isinstance(data, dict):
|
||||
return None
|
||||
if data.get("success") is False and data.get("error"):
|
||||
return f"Web extract failed: {data.get('error')}"
|
||||
results = data.get("results")
|
||||
if not isinstance(results, list):
|
||||
return None
|
||||
|
||||
failures: list[str] = []
|
||||
for item in results[:10]:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
error = str(item.get("error") or "").strip()
|
||||
if not error or error in {"None", "null"}:
|
||||
continue
|
||||
url = str(item.get("url") or "").strip()
|
||||
title = str(item.get("title") or url or "Untitled").strip()
|
||||
failures.append(
|
||||
f"- {title}" + (f" — {url}" if url and url != title else "") + f"\n Error: {_truncate_text(error, limit=500)}"
|
||||
)
|
||||
|
||||
if not failures:
|
||||
return None
|
||||
lines = [f"Web extract failed for {len(failures)} URL{'s' if len(failures) != 1 else ''}"]
|
||||
lines.extend(failures)
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _format_process_result(result: Optional[str], args: Optional[Dict[str, Any]]) -> Optional[str]:
|
||||
data = _json_loads_maybe(result)
|
||||
if not isinstance(data, dict):
|
||||
return result if isinstance(result, str) and result.strip() else None
|
||||
if data.get("success") is False and data.get("error"):
|
||||
return f"Process error: {data.get('error')}"
|
||||
action = str((args or {}).get("action") or "process").strip() or "process"
|
||||
if isinstance(data.get("processes"), list):
|
||||
processes = data["processes"]
|
||||
lines = [f"Processes: {len(processes)}"]
|
||||
for proc in processes[:20]:
|
||||
if not isinstance(proc, dict):
|
||||
lines.append(f"- {proc}")
|
||||
continue
|
||||
sid = str(proc.get("session_id") or proc.get("id") or "?")
|
||||
status = str(proc.get("status") or ("exited" if proc.get("exited") else "running"))
|
||||
cmd = str(proc.get("command") or "").strip()
|
||||
pid = proc.get("pid")
|
||||
code = proc.get("exit_code")
|
||||
bits = [status]
|
||||
if pid is not None:
|
||||
bits.append(f"pid {pid}")
|
||||
if code is not None:
|
||||
bits.append(f"exit {code}")
|
||||
lines.append(f"- `{sid}` — {', '.join(bits)}" + (f" — {cmd[:120]}" if cmd else ""))
|
||||
if len(processes) > 20:
|
||||
lines.append(f"... {len(processes) - 20} more process(es)")
|
||||
return "\n".join(lines)
|
||||
|
||||
status = str(data.get("status") or data.get("state") or action).strip()
|
||||
sid = str(data.get("session_id") or (args or {}).get("session_id") or "").strip()
|
||||
lines = [f"Process {action}: {status}" + (f" (`{sid}`)" if sid else "")]
|
||||
for key, label in (("command", "Command"), ("pid", "PID"), ("exit_code", "Exit code"), ("returncode", "Exit code"), ("lines", "Lines")):
|
||||
if data.get(key) is not None:
|
||||
lines.append(f"- **{label}:** {data.get(key)}")
|
||||
output = data.get("output") or data.get("new_output") or data.get("log") or data.get("stdout")
|
||||
error = data.get("error") or data.get("stderr")
|
||||
if output:
|
||||
lines.extend(["", "Output:", _truncate_text(str(output), limit=5000)])
|
||||
if error:
|
||||
lines.extend(["", "Error:", _truncate_text(str(error), limit=2000)])
|
||||
msg = data.get("message")
|
||||
if msg and not output and not error:
|
||||
lines.append(str(msg))
|
||||
return _truncate_text("\n".join(lines), limit=7000)
|
||||
|
||||
|
||||
def _format_delegate_result(result: Optional[str]) -> Optional[str]:
|
||||
data = _json_loads_maybe(result)
|
||||
if not isinstance(data, dict):
|
||||
return None
|
||||
if data.get("error") and not isinstance(data.get("results"), list):
|
||||
return f"Delegation failed: {data.get('error')}"
|
||||
results = data.get("results")
|
||||
if not isinstance(results, list):
|
||||
return None
|
||||
total = data.get("total_duration_seconds")
|
||||
lines = [f"Delegation results: {len(results)} task{'s' if len(results) != 1 else ''}" + (f" in {total}s" if total is not None else "")]
|
||||
icon = {"completed": "✅", "failed": "✗", "error": "✗", "timeout": "⏱", "interrupted": "⚠"}
|
||||
for item in results:
|
||||
if not isinstance(item, dict):
|
||||
lines.append(f"- {item}")
|
||||
continue
|
||||
idx = item.get("task_index")
|
||||
status = str(item.get("status") or "unknown")
|
||||
model = item.get("model")
|
||||
dur = item.get("duration_seconds")
|
||||
role = item.get("_child_role")
|
||||
header = f"{icon.get(status, '•')} Task {idx + 1 if isinstance(idx, int) else '?'}: {status}"
|
||||
bits = []
|
||||
if model:
|
||||
bits.append(str(model))
|
||||
if role:
|
||||
bits.append(f"role={role}")
|
||||
if dur is not None:
|
||||
bits.append(f"{dur}s")
|
||||
if bits:
|
||||
header += " (" + ", ".join(bits) + ")"
|
||||
lines.extend(["", header])
|
||||
summary = str(item.get("summary") or "").strip()
|
||||
error = str(item.get("error") or "").strip()
|
||||
if summary:
|
||||
lines.append(_truncate_text(summary, limit=1200))
|
||||
if error:
|
||||
lines.append("Error: " + _truncate_text(error, limit=800))
|
||||
trace = item.get("tool_trace")
|
||||
if isinstance(trace, list) and trace:
|
||||
names = [str(t.get("tool") or "?") for t in trace if isinstance(t, dict)]
|
||||
if names:
|
||||
lines.append("Tools: " + ", ".join(names[:12]) + (f" (+{len(names)-12})" if len(names) > 12 else ""))
|
||||
return _truncate_text("\n".join(lines), limit=8000)
|
||||
|
||||
|
||||
def _format_session_search_result(result: Optional[str]) -> Optional[str]:
|
||||
data = _json_loads_maybe(result)
|
||||
if not isinstance(data, dict):
|
||||
return None
|
||||
if data.get("success") is False:
|
||||
return f"Session search failed: {data.get('error', 'unknown error')}"
|
||||
results = data.get("results")
|
||||
if not isinstance(results, list):
|
||||
return None
|
||||
mode = data.get("mode") or "search"
|
||||
query = data.get("query")
|
||||
lines = ["Recent sessions" if mode == "recent" else f"Session search results" + (f" for `{query}`" if query else "")]
|
||||
if not results:
|
||||
lines.append(str(data.get("message") or "No matching sessions found."))
|
||||
return "\n".join(lines)
|
||||
for item in results:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
sid = str(item.get("session_id") or "?")
|
||||
title = str(item.get("title") or item.get("when") or "Untitled session").strip()
|
||||
when = str(item.get("last_active") or item.get("started_at") or item.get("when") or "").strip()
|
||||
count = item.get("message_count")
|
||||
source = str(item.get("source") or "").strip()
|
||||
meta = ", ".join(str(x) for x in [when, source, f"{count} msgs" if count is not None else ""] if x)
|
||||
lines.append(f"- **{title}** (`{sid}`)" + (f" — {meta}" if meta else ""))
|
||||
summary = str(item.get("summary") or item.get("preview") or "").strip()
|
||||
if summary:
|
||||
lines.append(" " + _truncate_text(" ".join(summary.split()), limit=500))
|
||||
return _truncate_text("\n".join(lines), limit=7000)
|
||||
|
||||
|
||||
def _format_memory_result(result: Optional[str], args: Optional[Dict[str, Any]]) -> Optional[str]:
|
||||
data = _json_loads_maybe(result)
|
||||
if not isinstance(data, dict):
|
||||
return None
|
||||
action = str((args or {}).get("action") or "memory").strip() or "memory"
|
||||
target = str(data.get("target") or (args or {}).get("target") or "memory")
|
||||
if data.get("success") is False:
|
||||
lines = [f"✗ Memory {action} failed ({target})", str(data.get("error") or "unknown error")]
|
||||
matches = data.get("matches")
|
||||
if isinstance(matches, list) and matches:
|
||||
lines.append("Matches:")
|
||||
lines.extend(f"- {_truncate_text(str(m), 160)}" for m in matches[:5])
|
||||
return "\n".join(lines)
|
||||
lines = [f"✅ Memory {action} saved ({target})"]
|
||||
if data.get("message"):
|
||||
lines.append(str(data.get("message")))
|
||||
if data.get("entry_count") is not None:
|
||||
lines.append(f"Entries: {data.get('entry_count')}")
|
||||
if data.get("usage"):
|
||||
lines.append(f"Usage: {data.get('usage')}")
|
||||
# Avoid dumping all memory entries into ACP UI; show only the explicit new value preview.
|
||||
preview = str((args or {}).get("content") or (args or {}).get("old_text") or "").strip()
|
||||
if preview:
|
||||
lines.append("Preview: " + _truncate_text(preview, limit=300))
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _format_edit_result(tool_name: str, result: Optional[str], args: Optional[Dict[str, Any]]) -> Optional[str]:
|
||||
data = _json_loads_maybe(result)
|
||||
path = str((args or {}).get("path") or "file").strip()
|
||||
if isinstance(data, dict):
|
||||
if data.get("success") is False or data.get("error"):
|
||||
return f"{tool_name} failed for {path}: {data.get('error', 'unknown error')}"
|
||||
message = str(data.get("message") or "").strip()
|
||||
replacements = data.get("replacements") or data.get("replacement_count")
|
||||
lines = [f"✅ {tool_name} completed" + (f" for `{path}`" if path else "")]
|
||||
if message:
|
||||
lines.append(message)
|
||||
if replacements is not None:
|
||||
lines.append(f"Replacements: {replacements}")
|
||||
if data.get("files_modified"):
|
||||
files = data.get("files_modified")
|
||||
if isinstance(files, list):
|
||||
lines.append("Files: " + ", ".join(f"`{f}`" for f in files[:8]))
|
||||
return "\n".join(lines)
|
||||
if isinstance(result, str) and result.strip():
|
||||
return _truncate_text(result, limit=3000)
|
||||
return f"✅ {tool_name} completed" + (f" for `{path}`" if path else "")
|
||||
|
||||
|
||||
def _format_browser_result(tool_name: str, result: Optional[str], args: Optional[Dict[str, Any]]) -> Optional[str]:
|
||||
data = _json_loads_maybe(result)
|
||||
if not isinstance(data, dict):
|
||||
return result if isinstance(result, str) and result.strip() else None
|
||||
if data.get("success") is False or data.get("error"):
|
||||
return f"{tool_name} failed: {data.get('error', 'unknown error')}"
|
||||
if tool_name == "browser_get_images":
|
||||
images = data.get("images") or data.get("data")
|
||||
if isinstance(images, list):
|
||||
lines = [f"Images found: {len(images)}"]
|
||||
for img in images[:12]:
|
||||
if isinstance(img, dict):
|
||||
alt = str(img.get("alt") or "").strip()
|
||||
url = str(img.get("url") or img.get("src") or "").strip()
|
||||
lines.append(f"- {alt or 'image'}" + (f" — {url}" if url else ""))
|
||||
return _truncate_text("\n".join(lines), limit=5000)
|
||||
title = str(data.get("title") or data.get("url") or data.get("status") or tool_name)
|
||||
text = str(data.get("text") or data.get("content") or data.get("snapshot") or data.get("analysis") or data.get("message") or "").strip()
|
||||
lines = [title]
|
||||
if data.get("url") and data.get("url") != title:
|
||||
lines.append(str(data.get("url")))
|
||||
if text:
|
||||
lines.extend(["", _truncate_text(text, limit=5000)])
|
||||
return _truncate_text("\n".join(lines), limit=7000)
|
||||
|
||||
|
||||
def _format_media_or_cron_result(tool_name: str, result: Optional[str]) -> Optional[str]:
|
||||
data = _json_loads_maybe(result)
|
||||
if not isinstance(data, dict):
|
||||
return result if isinstance(result, str) and result.strip() else None
|
||||
if data.get("success") is False or data.get("error"):
|
||||
return f"{tool_name} failed: {data.get('error', 'unknown error')}"
|
||||
lines = [f"✅ {tool_name} completed"]
|
||||
for key in ("file_path", "path", "url", "image_url", "job_id", "id", "status", "message", "next_run"):
|
||||
if data.get(key):
|
||||
lines.append(f"- **{key}:** {data.get(key)}")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _format_generic_structured_result(tool_name: str, result: Optional[str]) -> Optional[str]:
|
||||
data = _json_loads_maybe(result)
|
||||
if not isinstance(data, (dict, list)):
|
||||
return result if isinstance(result, str) and result.strip() else None
|
||||
if isinstance(data, list):
|
||||
lines = [f"{tool_name}: {len(data)} item{'s' if len(data) != 1 else ''}"]
|
||||
for item in data[:12]:
|
||||
lines.append(f"- {_truncate_text(str(item), limit=240)}")
|
||||
return _truncate_text("\n".join(lines), limit=5000)
|
||||
|
||||
if data.get("success") is False or data.get("error"):
|
||||
return f"{tool_name} failed: {data.get('error', 'unknown error')}"
|
||||
|
||||
lines = [f"✅ {tool_name} completed" if data.get("success") is True else f"{tool_name} result"]
|
||||
priority_keys = (
|
||||
"message", "status", "id", "task_id", "issue_id", "title", "name", "entity_id",
|
||||
"state", "service", "url", "path", "file_path", "count", "total", "next_run",
|
||||
)
|
||||
seen = set()
|
||||
for key in priority_keys:
|
||||
value = data.get(key)
|
||||
if value in (None, "", [], {}):
|
||||
continue
|
||||
seen.add(key)
|
||||
lines.append(f"- **{key}:** {_truncate_text(str(value), limit=500)}")
|
||||
|
||||
for key, value in data.items():
|
||||
if key in seen or key in {"success", "raw", "content", "entries"}:
|
||||
continue
|
||||
if value in (None, "", [], {}):
|
||||
continue
|
||||
if isinstance(value, (dict, list)):
|
||||
preview = json.dumps(value, ensure_ascii=False, default=str)
|
||||
else:
|
||||
preview = str(value)
|
||||
lines.append(f"- **{key}:** {_truncate_text(preview, limit=500)}")
|
||||
if len(lines) >= 14:
|
||||
break
|
||||
|
||||
content = data.get("content")
|
||||
if isinstance(content, str) and content.strip():
|
||||
lines.extend(["", _truncate_text(content.strip(), limit=1500)])
|
||||
return _truncate_text("\n".join(lines), limit=7000)
|
||||
|
||||
|
||||
def _build_polished_completion_content(
|
||||
tool_name: str,
|
||||
result: Optional[str],
|
||||
function_args: Optional[Dict[str, Any]],
|
||||
) -> Optional[List[Any]]:
|
||||
formatter = {
|
||||
"todo": lambda: _format_todo_result(result),
|
||||
"read_file": lambda: _format_read_file_result(result, function_args),
|
||||
"write_file": lambda: _format_edit_result(tool_name, result, function_args),
|
||||
"patch": lambda: _format_edit_result(tool_name, result, function_args),
|
||||
"search_files": lambda: _format_search_files_result(result),
|
||||
"execute_code": lambda: _format_execute_code_result(result),
|
||||
"process": lambda: _format_process_result(result, function_args),
|
||||
"delegate_task": lambda: _format_delegate_result(result),
|
||||
"session_search": lambda: _format_session_search_result(result),
|
||||
"memory": lambda: _format_memory_result(result, function_args),
|
||||
"skill_view": lambda: _format_skill_view_result(result),
|
||||
"skill_manage": lambda: _format_skill_manage_result(result, function_args),
|
||||
"web_search": lambda: _format_web_search_result(result),
|
||||
"web_extract": lambda: _format_web_extract_result(result),
|
||||
"browser_navigate": lambda: _format_browser_result(tool_name, result, function_args),
|
||||
"browser_snapshot": lambda: _format_browser_result(tool_name, result, function_args),
|
||||
"browser_vision": lambda: _format_browser_result(tool_name, result, function_args),
|
||||
"browser_get_images": lambda: _format_browser_result(tool_name, result, function_args),
|
||||
"vision_analyze": lambda: _format_media_or_cron_result(tool_name, result),
|
||||
"image_generate": lambda: _format_media_or_cron_result(tool_name, result),
|
||||
"cronjob": lambda: _format_media_or_cron_result(tool_name, result),
|
||||
}.get(tool_name)
|
||||
if formatter is None and tool_name in _POLISHED_TOOLS:
|
||||
formatter = lambda: _format_generic_structured_result(tool_name, result)
|
||||
if formatter is None:
|
||||
return None
|
||||
text = formatter()
|
||||
if not text:
|
||||
return None
|
||||
return [_text(text)]
|
||||
|
||||
|
||||
def _build_patch_mode_content(patch_text: str) -> List[Any]:
|
||||
"""Parse V4A patch mode input into ACP diff blocks when possible."""
|
||||
if not patch_text:
|
||||
@@ -912,11 +258,7 @@ def _build_tool_complete_content(
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
polished_content = _build_polished_completion_content(tool_name, result, function_args)
|
||||
if polished_content:
|
||||
return polished_content
|
||||
|
||||
return [_text(display_result)]
|
||||
return [acp.tool_content(acp.text_block(display_result))]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -946,6 +288,7 @@ def build_tool_start(
|
||||
content = _build_patch_mode_content(patch_text)
|
||||
return acp.start_tool_call(
|
||||
tool_call_id, title, kind=kind, content=content, locations=locations,
|
||||
raw_input=arguments,
|
||||
)
|
||||
|
||||
if tool_name == "write_file":
|
||||
@@ -954,172 +297,32 @@ def build_tool_start(
|
||||
content = [acp.tool_diff_content(path=path, new_text=file_content)]
|
||||
return acp.start_tool_call(
|
||||
tool_call_id, title, kind=kind, content=content, locations=locations,
|
||||
raw_input=arguments,
|
||||
)
|
||||
|
||||
if tool_name == "terminal":
|
||||
command = arguments.get("command", "")
|
||||
content = [_text(f"$ {command}")]
|
||||
content = [acp.tool_content(acp.text_block(f"$ {command}"))]
|
||||
return acp.start_tool_call(
|
||||
tool_call_id, title, kind=kind, content=content, locations=locations,
|
||||
raw_input=arguments,
|
||||
)
|
||||
|
||||
if tool_name == "read_file":
|
||||
# The title and location already identify the file. Sending a synthetic
|
||||
# "Reading ..." content block makes Zed render an unhelpful Output
|
||||
# section before the real file contents arrive on completion.
|
||||
path = arguments.get("path", "")
|
||||
content = [acp.tool_content(acp.text_block(f"Reading {path}"))]
|
||||
return acp.start_tool_call(
|
||||
tool_call_id, title, kind=kind, content=None, locations=locations,
|
||||
tool_call_id, title, kind=kind, content=content, locations=locations,
|
||||
raw_input=arguments,
|
||||
)
|
||||
|
||||
if tool_name == "search_files":
|
||||
pattern = arguments.get("pattern", "")
|
||||
target = arguments.get("target", "content")
|
||||
search_path = arguments.get("path")
|
||||
where = f" in {search_path}" if search_path else ""
|
||||
content = [_text(f"Searching for '{pattern}' ({target}){where}")]
|
||||
return acp.start_tool_call(
|
||||
tool_call_id, title, kind=kind, content=content, locations=locations,
|
||||
)
|
||||
|
||||
if tool_name == "todo":
|
||||
items = arguments.get("todos")
|
||||
if isinstance(items, list):
|
||||
preview_lines = ["Updating todo list", ""]
|
||||
for item in items[:8]:
|
||||
if isinstance(item, dict):
|
||||
preview_lines.append(f"- {item.get('status', 'pending')}: {item.get('content', item.get('id', ''))}")
|
||||
if len(items) > 8:
|
||||
preview_lines.append(f"... {len(items) - 8} more")
|
||||
content = [_text("\n".join(preview_lines))]
|
||||
else:
|
||||
content = [_text("Reading todo list")]
|
||||
return acp.start_tool_call(
|
||||
tool_call_id, title, kind=kind, content=content, locations=locations,
|
||||
)
|
||||
|
||||
if tool_name == "skill_view":
|
||||
name = str(arguments.get("name") or "?").strip() or "?"
|
||||
file_path = str(arguments.get("file_path") or "SKILL.md").strip() or "SKILL.md"
|
||||
content = [_text(f"Loading skill '{name}' ({file_path})")]
|
||||
return acp.start_tool_call(
|
||||
tool_call_id, title, kind=kind, content=content, locations=locations,
|
||||
)
|
||||
|
||||
if tool_name == "skill_manage":
|
||||
action = str(arguments.get("action") or "manage").strip() or "manage"
|
||||
name = str(arguments.get("name") or "?").strip() or "?"
|
||||
file_path = str(arguments.get("file_path") or "SKILL.md").strip() or "SKILL.md"
|
||||
path = f"skills/{name}/{file_path}" if file_path else f"skills/{name}"
|
||||
|
||||
if action == "patch":
|
||||
old = str(arguments.get("old_string") or "")
|
||||
new = str(arguments.get("new_string") or "")
|
||||
content = [acp.tool_diff_content(path=path, old_text=old or None, new_text=new)]
|
||||
elif action in {"edit", "create"}:
|
||||
content = [
|
||||
acp.tool_diff_content(
|
||||
path=path,
|
||||
new_text=str(arguments.get("content") or ""),
|
||||
)
|
||||
]
|
||||
elif action == "write_file":
|
||||
target = str(arguments.get("file_path") or "file")
|
||||
content = [
|
||||
acp.tool_diff_content(
|
||||
path=f"skills/{name}/{target}",
|
||||
new_text=str(arguments.get("file_content") or ""),
|
||||
)
|
||||
]
|
||||
elif action in {"delete", "remove_file"}:
|
||||
target = str(arguments.get("file_path") or file_path or name)
|
||||
content = [_text(f"Removing {target} from skill '{name}'")]
|
||||
else:
|
||||
content = [_text(f"Running skill_manage action '{action}' on skill '{name}' ({file_path})")]
|
||||
|
||||
return acp.start_tool_call(
|
||||
tool_call_id, title, kind=kind, content=content, locations=locations,
|
||||
)
|
||||
|
||||
if tool_name == "execute_code":
|
||||
code = str(arguments.get("code") or "").strip()
|
||||
preview = code[:1200] + (f"\n... ({len(code)} chars total, truncated)" if len(code) > 1200 else "")
|
||||
content = [_text(f"Running Python helper script:\n\n```python\n{preview}\n```" if preview else "Running Python helper script")]
|
||||
return acp.start_tool_call(
|
||||
tool_call_id, title, kind=kind, content=content, locations=locations,
|
||||
)
|
||||
|
||||
if tool_name == "web_search":
|
||||
query = str(arguments.get("query") or "").strip()
|
||||
content = [_text(f"Searching the web for: {query}" if query else "Searching the web")]
|
||||
return acp.start_tool_call(
|
||||
tool_call_id, title, kind=kind, content=content, locations=locations,
|
||||
)
|
||||
|
||||
if tool_name == "web_extract":
|
||||
# The title identifies the URL(s). Avoid a duplicate content block so
|
||||
# Zed renders this like read_file: compact start, concise completion.
|
||||
return acp.start_tool_call(
|
||||
tool_call_id, title, kind=kind, content=None, locations=locations,
|
||||
)
|
||||
|
||||
if tool_name == "process":
|
||||
action = str(arguments.get("action") or "").strip() or "manage"
|
||||
sid = str(arguments.get("session_id") or "").strip()
|
||||
data_preview = str(arguments.get("data") or "").strip()
|
||||
text = f"Process action: {action}" + (f"\nSession: {sid}" if sid else "")
|
||||
if data_preview:
|
||||
text += "\nInput: " + _truncate_text(data_preview, limit=500)
|
||||
content = [_text(text)]
|
||||
return acp.start_tool_call(
|
||||
tool_call_id, title, kind=kind, content=content, locations=locations,
|
||||
)
|
||||
|
||||
if tool_name == "delegate_task":
|
||||
tasks = arguments.get("tasks")
|
||||
if isinstance(tasks, list) and tasks:
|
||||
lines = [f"Delegating {len(tasks)} tasks", ""]
|
||||
for i, task in enumerate(tasks[:8], 1):
|
||||
if isinstance(task, dict):
|
||||
goal = str(task.get("goal") or "").strip()
|
||||
role = str(task.get("role") or "").strip()
|
||||
lines.append(f"{i}. " + _truncate_text(goal, limit=160) + (f" ({role})" if role else ""))
|
||||
if len(tasks) > 8:
|
||||
lines.append(f"... {len(tasks) - 8} more")
|
||||
content = [_text("\n".join(lines))]
|
||||
else:
|
||||
goal = str(arguments.get("goal") or "").strip()
|
||||
content = [_text("Delegating task" + (f":\n{_truncate_text(goal, limit=800)}" if goal else ""))]
|
||||
return acp.start_tool_call(
|
||||
tool_call_id, title, kind=kind, content=content, locations=locations,
|
||||
)
|
||||
|
||||
if tool_name == "session_search":
|
||||
query = str(arguments.get("query") or "").strip()
|
||||
content = [_text(f"Searching past sessions for: {query}" if query else "Loading recent sessions")]
|
||||
return acp.start_tool_call(
|
||||
tool_call_id, title, kind=kind, content=content, locations=locations,
|
||||
)
|
||||
|
||||
if tool_name == "memory":
|
||||
action = str(arguments.get("action") or "manage").strip() or "manage"
|
||||
target = str(arguments.get("target") or "memory").strip() or "memory"
|
||||
preview = str(arguments.get("content") or arguments.get("old_text") or "").strip()
|
||||
text = f"Memory {action} ({target})"
|
||||
if preview:
|
||||
text += "\nPreview: " + _truncate_text(preview, limit=500)
|
||||
content = [_text(text)]
|
||||
return acp.start_tool_call(
|
||||
tool_call_id, title, kind=kind, content=content, locations=locations,
|
||||
)
|
||||
|
||||
if tool_name in _POLISHED_TOOLS:
|
||||
try:
|
||||
args_text = json.dumps(arguments, indent=2, default=str)
|
||||
except (TypeError, ValueError):
|
||||
args_text = str(arguments)
|
||||
content = [_text(_truncate_text(args_text, limit=1200))]
|
||||
content = [acp.tool_content(acp.text_block(f"Searching for '{pattern}' ({target})"))]
|
||||
return acp.start_tool_call(
|
||||
tool_call_id, title, kind=kind, content=content, locations=locations,
|
||||
raw_input=arguments,
|
||||
)
|
||||
|
||||
# Generic fallback
|
||||
@@ -1131,7 +334,7 @@ def build_tool_start(
|
||||
content = [acp.tool_content(acp.text_block(args_text))]
|
||||
return acp.start_tool_call(
|
||||
tool_call_id, title, kind=kind, content=content, locations=locations,
|
||||
raw_input=None if tool_name in _POLISHED_TOOLS else arguments,
|
||||
raw_input=arguments,
|
||||
)
|
||||
|
||||
|
||||
@@ -1144,22 +347,18 @@ def build_tool_complete(
|
||||
) -> ToolCallProgress:
|
||||
"""Create a ToolCallUpdate (progress) event for a completed tool call."""
|
||||
kind = get_tool_kind(tool_name)
|
||||
if tool_name == "web_extract":
|
||||
error_text = _format_web_extract_result(result)
|
||||
content = [_text(error_text)] if error_text else None
|
||||
else:
|
||||
content = _build_tool_complete_content(
|
||||
tool_name,
|
||||
result,
|
||||
function_args=function_args,
|
||||
snapshot=snapshot,
|
||||
)
|
||||
content = _build_tool_complete_content(
|
||||
tool_name,
|
||||
result,
|
||||
function_args=function_args,
|
||||
snapshot=snapshot,
|
||||
)
|
||||
return acp.update_tool_call(
|
||||
tool_call_id,
|
||||
kind=kind,
|
||||
status="completed",
|
||||
content=content,
|
||||
raw_output=None if tool_name in _POLISHED_TOOLS else result,
|
||||
raw_output=result,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -20,27 +20,12 @@ from pathlib import Path
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
from utils import base_url_host_matches, normalize_proxy_env_vars
|
||||
from utils import normalize_proxy_env_vars
|
||||
|
||||
# NOTE: `import anthropic` is deliberately NOT at module top — the SDK pulls
|
||||
# ~220 ms of imports (anthropic.types, anthropic.lib.tools._beta_runner, etc.)
|
||||
# and the 3 usage sites (build_anthropic_client, build_anthropic_bedrock_client,
|
||||
# read_claude_code_credentials_from_keychain) are all on cold user-triggered
|
||||
# paths. Access via the `_get_anthropic_sdk()` accessor below, which caches
|
||||
# the module after the first call and returns None on ImportError.
|
||||
_anthropic_sdk: Any = ... # sentinel — None means "tried and missing"
|
||||
|
||||
|
||||
def _get_anthropic_sdk():
|
||||
"""Return the ``anthropic`` SDK module, importing lazily. None if not installed."""
|
||||
global _anthropic_sdk
|
||||
if _anthropic_sdk is ...:
|
||||
try:
|
||||
import anthropic as _sdk
|
||||
_anthropic_sdk = _sdk
|
||||
except ImportError:
|
||||
_anthropic_sdk = None
|
||||
return _anthropic_sdk
|
||||
try:
|
||||
import anthropic as _anthropic_sdk
|
||||
except ImportError:
|
||||
_anthropic_sdk = None # type: ignore[assignment]
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -76,7 +61,6 @@ _ADAPTIVE_THINKING_SUBSTRINGS = ("4-6", "4.6", "4-7", "4.7")
|
||||
# Models where temperature/top_p/top_k return 400 if set to non-default values.
|
||||
# This is the Opus 4.7 contract; future 4.x+ models are expected to follow it.
|
||||
_NO_SAMPLING_PARAMS_SUBSTRINGS = ("4-7", "4.7")
|
||||
_FAST_MODE_SUPPORTED_SUBSTRINGS = ("opus-4-6", "opus-4.6")
|
||||
|
||||
# ── Max output token limits per Anthropic model ───────────────────────
|
||||
# Source: Anthropic docs + Cline model catalog. Anthropic's API requires
|
||||
@@ -106,9 +90,6 @@ _ANTHROPIC_OUTPUT_LIMITS = {
|
||||
"claude-3-haiku": 4_096,
|
||||
# Third-party Anthropic-compatible providers
|
||||
"minimax": 131_072,
|
||||
# Qwen models via DashScope Anthropic-compatible endpoint
|
||||
# DashScope enforces max_tokens ∈ [1, 65536]
|
||||
"qwen3": 65_536,
|
||||
}
|
||||
|
||||
# For any model not in the table, assume the highest current limit.
|
||||
@@ -220,45 +201,20 @@ def _forbids_sampling_params(model: str) -> bool:
|
||||
return any(v in model for v in _NO_SAMPLING_PARAMS_SUBSTRINGS)
|
||||
|
||||
|
||||
def _supports_fast_mode(model: str) -> bool:
|
||||
"""Return True for models that support Anthropic Fast Mode (speed=fast).
|
||||
|
||||
Per Anthropic docs, fast mode is currently supported on Opus 4.6 only.
|
||||
Sending ``speed: "fast"`` to any other Claude model (including Opus 4.7)
|
||||
returns HTTP 400. This guard prevents silently 400'ing when stale config
|
||||
or older callers leave fast mode enabled across a model upgrade.
|
||||
"""
|
||||
return any(v in model for v in _FAST_MODE_SUPPORTED_SUBSTRINGS)
|
||||
|
||||
|
||||
# Beta headers for enhanced features (sent with ALL auth types).
|
||||
# As of Opus 4.7 (2026-04-16), the first two are GA on Claude 4.6+ — the
|
||||
# As of Opus 4.7 (2026-04-16), both of these are GA on Claude 4.6+ — the
|
||||
# beta headers are still accepted (harmless no-op) but not required. Kept
|
||||
# here so older Claude (4.5, 4.1) + third-party Anthropic-compat endpoints
|
||||
# that still gate on the headers continue to get the enhanced features.
|
||||
#
|
||||
# ``context-1m-2025-08-07`` unlocks the 1M context window on Claude Opus 4.6/4.7
|
||||
# and Sonnet 4.6 when served via AWS Bedrock or Azure AI Foundry. 1M is GA on
|
||||
# native Anthropic (api.anthropic.com) for Opus 4.6+, but Bedrock/Azure still
|
||||
# gate it behind this beta header as of 2026-04 — without it Bedrock caps Opus
|
||||
# at 200K even though model_metadata.py advertises 1M. The header is a harmless
|
||||
# no-op on endpoints where 1M is GA.
|
||||
#
|
||||
# Migration guide: remove these if you no longer support ≤4.5 models or once
|
||||
# Bedrock/Azure promote 1M to GA.
|
||||
# Migration guide: remove these if you no longer support ≤4.5 models.
|
||||
_COMMON_BETAS = [
|
||||
"interleaved-thinking-2025-05-14",
|
||||
"fine-grained-tool-streaming-2025-05-14",
|
||||
"context-1m-2025-08-07",
|
||||
]
|
||||
# MiniMax's Anthropic-compatible endpoints fail tool-use requests when
|
||||
# the fine-grained tool streaming beta is present. Omit it so tool calls
|
||||
# fall back to the provider's default response path.
|
||||
_TOOL_STREAMING_BETA = "fine-grained-tool-streaming-2025-05-14"
|
||||
# 1M context beta — see comment on _COMMON_BETAS above. Stripped for
|
||||
# Bearer-auth (MiniMax) endpoints since they host their own models and
|
||||
# unknown Anthropic beta headers risk request rejection.
|
||||
_CONTEXT_1M_BETA = "context-1m-2025-08-07"
|
||||
|
||||
# Fast mode beta — enables the ``speed: "fast"`` request parameter for
|
||||
# significantly higher output token throughput on Opus 4.6 (~2.5x).
|
||||
@@ -380,88 +336,6 @@ def _is_kimi_coding_endpoint(base_url: str | None) -> bool:
|
||||
return normalized.rstrip("/").lower().startswith("https://api.kimi.com/coding")
|
||||
|
||||
|
||||
# Model-name prefixes that identify the Kimi / Moonshot family. Covers
|
||||
# - official slugs: ``kimi-k2.5``, ``kimi_thinking``, ``moonshot-v1-8k``
|
||||
# - common release lines: ``k1.5-...``, ``k2-thinking``, ``k25-...``, ``k2.5-...``
|
||||
# Matched case-insensitively against the post-``normalize_model_name`` form,
|
||||
# so a caller's ``provider/vendor/model`` slug is handled the same as a
|
||||
# bare name.
|
||||
_KIMI_FAMILY_MODEL_PREFIXES = (
|
||||
"kimi-", "kimi_",
|
||||
"moonshot-", "moonshot_",
|
||||
"k1.", "k1-",
|
||||
"k2.", "k2-",
|
||||
"k25", "k2.5",
|
||||
)
|
||||
|
||||
|
||||
def _model_name_is_kimi_family(model: str | None) -> bool:
|
||||
if not isinstance(model, str):
|
||||
return False
|
||||
m = model.strip().lower()
|
||||
if not m:
|
||||
return False
|
||||
# Strip vendor prefix (e.g. ``moonshotai/kimi-k2.5`` → ``kimi-k2.5``)
|
||||
if "/" in m:
|
||||
m = m.rsplit("/", 1)[-1]
|
||||
return m.startswith(_KIMI_FAMILY_MODEL_PREFIXES)
|
||||
|
||||
|
||||
def _is_kimi_family_endpoint(base_url: str | None, model: str | None = None) -> bool:
|
||||
"""Return True for any Kimi / Moonshot Anthropic-Messages-speaking endpoint.
|
||||
|
||||
Broader than ``_is_kimi_coding_endpoint`` — matches:
|
||||
|
||||
- Kimi's official ``/coding`` URL (legacy check, preserved)
|
||||
- Any ``api.kimi.com`` / ``moonshot.ai`` / ``moonshot.cn`` host
|
||||
- Custom or proxied endpoints whose *model* name is in the Kimi / Moonshot
|
||||
family (``kimi-*``, ``moonshot-*``, ``k1.*``, ``k2.*``, …). Users with
|
||||
``api_mode: anthropic_messages`` on a private gateway fronting Kimi
|
||||
fall into this branch — the upstream still enforces Kimi's thinking
|
||||
semantics (reasoning_content required on every replayed tool-call
|
||||
message) regardless of the gateway's hostname.
|
||||
|
||||
Used to decide whether to drop Anthropic's ``thinking`` kwarg and to
|
||||
preserve unsigned reasoning_content-derived thinking blocks on replay.
|
||||
See hermes-agent#13848, #17057.
|
||||
"""
|
||||
if _is_kimi_coding_endpoint(base_url):
|
||||
return True
|
||||
for _domain in ("api.kimi.com", "moonshot.ai", "moonshot.cn"):
|
||||
if base_url_host_matches(base_url or "", _domain):
|
||||
return True
|
||||
if _model_name_is_kimi_family(model):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _is_deepseek_anthropic_endpoint(base_url: str | None) -> bool:
|
||||
"""Return True for DeepSeek's Anthropic-compatible endpoint.
|
||||
|
||||
DeepSeek's ``/anthropic`` route speaks the Anthropic Messages protocol
|
||||
but, when thinking mode is enabled, requires the ``thinking`` blocks
|
||||
from prior assistant turns to round-trip on subsequent requests — the
|
||||
generic third-party path strips them and triggers HTTP 400::
|
||||
|
||||
The content[].thinking in the thinking mode must be passed back
|
||||
to the API.
|
||||
|
||||
Per DeepSeek's published compatibility matrix the blocks are unsigned
|
||||
(no Anthropic-proprietary signature, no ``redacted_thinking`` support),
|
||||
so this endpoint is handled with the same strip-signed / keep-unsigned
|
||||
policy used for Kimi's ``/coding`` endpoint. The match is pinned to
|
||||
the ``/anthropic`` path so the OpenAI-compatible ``api.deepseek.com``
|
||||
base URL (which never reaches this adapter) is not misclassified.
|
||||
See hermes-agent#16748.
|
||||
"""
|
||||
if not base_url_host_matches(base_url or "", "api.deepseek.com"):
|
||||
return False
|
||||
normalized = _normalize_base_url_text(base_url)
|
||||
if not normalized:
|
||||
return False
|
||||
return "/anthropic" in normalized.rstrip("/").lower()
|
||||
|
||||
|
||||
def _requires_bearer_auth(base_url: str | None) -> bool:
|
||||
"""Return True for Anthropic-compatible providers that require Bearer auth.
|
||||
|
||||
@@ -476,45 +350,20 @@ def _requires_bearer_auth(base_url: str | None) -> bool:
|
||||
return normalized.startswith(("https://api.minimax.io/anthropic", "https://api.minimaxi.com/anthropic"))
|
||||
|
||||
|
||||
def _common_betas_for_base_url(
|
||||
base_url: str | None,
|
||||
*,
|
||||
drop_context_1m_beta: bool = False,
|
||||
) -> list[str]:
|
||||
def _common_betas_for_base_url(base_url: str | None) -> list[str]:
|
||||
"""Return the beta headers that are safe for the configured endpoint.
|
||||
|
||||
MiniMax's Anthropic-compatible endpoints (Bearer-auth) reject requests
|
||||
that include Anthropic's ``fine-grained-tool-streaming`` beta — every
|
||||
tool-use message triggers a connection error. Strip that beta for
|
||||
Bearer-auth endpoints while keeping all other betas intact.
|
||||
|
||||
The ``context-1m-2025-08-07`` beta is also stripped for Bearer-auth
|
||||
endpoints — MiniMax hosts its own models, not Claude, so the header is
|
||||
irrelevant at best and risks request rejection at worst.
|
||||
|
||||
``drop_context_1m_beta=True`` additionally strips the 1M-context beta on
|
||||
otherwise-unrelated endpoints. The OAuth retry path flips this flag after
|
||||
a subscription rejects the beta with
|
||||
"The long context beta is not yet available for this subscription" so
|
||||
subsequent requests in the same session don't repeat the probe. See the
|
||||
reactive recovery loop in ``run_agent.py`` and issue-comment history on
|
||||
PR #17680 for the full rationale.
|
||||
"""
|
||||
if _requires_bearer_auth(base_url):
|
||||
_stripped = {_TOOL_STREAMING_BETA, _CONTEXT_1M_BETA}
|
||||
return [b for b in _COMMON_BETAS if b not in _stripped]
|
||||
if drop_context_1m_beta:
|
||||
return [b for b in _COMMON_BETAS if b != _CONTEXT_1M_BETA]
|
||||
return [b for b in _COMMON_BETAS if b != _TOOL_STREAMING_BETA]
|
||||
return _COMMON_BETAS
|
||||
|
||||
|
||||
def build_anthropic_client(
|
||||
api_key: str,
|
||||
base_url: str = None,
|
||||
timeout: float = None,
|
||||
*,
|
||||
drop_context_1m_beta: bool = False,
|
||||
):
|
||||
def build_anthropic_client(api_key: str, base_url: str = None, timeout: float = None):
|
||||
"""Create an Anthropic client, auto-detecting setup-tokens vs API keys.
|
||||
|
||||
If *timeout* is provided it overrides the default 900s read timeout. The
|
||||
@@ -523,15 +372,8 @@ def build_anthropic_client(
|
||||
Anthropic-compatible providers respect the same knob as OpenAI-wire
|
||||
providers.
|
||||
|
||||
``drop_context_1m_beta=True`` strips ``context-1m-2025-08-07`` from the
|
||||
client-level ``anthropic-beta`` header. Used by the reactive OAuth retry
|
||||
path in ``run_agent.py`` when a subscription rejects the beta; leave at
|
||||
its default on fresh clients so 1M-capable subscriptions keep the
|
||||
capability.
|
||||
|
||||
Returns an anthropic.Anthropic instance.
|
||||
"""
|
||||
_anthropic_sdk = _get_anthropic_sdk()
|
||||
if _anthropic_sdk is None:
|
||||
raise ImportError(
|
||||
"The 'anthropic' package is required for the Anthropic provider. "
|
||||
@@ -548,20 +390,8 @@ def build_anthropic_client(
|
||||
"timeout": Timeout(timeout=float(_read_timeout), connect=10.0),
|
||||
}
|
||||
if normalized_base_url:
|
||||
# Azure Anthropic endpoints require an ``api-version`` query parameter.
|
||||
# Pass it via default_query so the SDK appends it to every request URL
|
||||
# without corrupting the base_url (appending it directly produces
|
||||
# malformed paths like /anthropic?api-version=.../v1/messages).
|
||||
_is_azure_endpoint = "azure.com" in normalized_base_url.lower()
|
||||
if _is_azure_endpoint and "api-version" not in normalized_base_url:
|
||||
kwargs["base_url"] = normalized_base_url.rstrip("/")
|
||||
kwargs["default_query"] = {"api-version": "2025-04-15"}
|
||||
else:
|
||||
kwargs["base_url"] = normalized_base_url
|
||||
common_betas = _common_betas_for_base_url(
|
||||
normalized_base_url,
|
||||
drop_context_1m_beta=drop_context_1m_beta,
|
||||
)
|
||||
kwargs["base_url"] = normalized_base_url
|
||||
common_betas = _common_betas_for_base_url(normalized_base_url)
|
||||
|
||||
if _is_kimi_coding_endpoint(base_url):
|
||||
# Kimi's /coding endpoint requires User-Agent: claude-code/0.1.0
|
||||
@@ -617,16 +447,8 @@ def build_anthropic_bedrock_client(region: str):
|
||||
Claude feature parity: prompt caching, thinking budgets, adaptive
|
||||
thinking, fast mode — features not available via the Converse API.
|
||||
|
||||
Attaches the common Anthropic beta headers as client-level defaults so
|
||||
that Bedrock-hosted Claude models get the same enhanced features as
|
||||
native Anthropic. The ``context-1m-2025-08-07`` beta in particular
|
||||
unlocks the 1M context window for Opus 4.6/4.7 on Bedrock — without
|
||||
it, Bedrock caps these models at 200K even though the Anthropic API
|
||||
serves them with 1M natively.
|
||||
|
||||
Auth uses the boto3 default credential chain (IAM roles, SSO, env vars).
|
||||
"""
|
||||
_anthropic_sdk = _get_anthropic_sdk()
|
||||
if _anthropic_sdk is None:
|
||||
raise ImportError(
|
||||
"The 'anthropic' package is required for the Bedrock provider. "
|
||||
@@ -642,7 +464,6 @@ def build_anthropic_bedrock_client(region: str):
|
||||
return _anthropic_sdk.AnthropicBedrock(
|
||||
aws_region=region,
|
||||
timeout=Timeout(timeout=900.0, connect=10.0),
|
||||
default_headers={"anthropic-beta": ",".join(_COMMON_BETAS)},
|
||||
)
|
||||
|
||||
|
||||
@@ -658,6 +479,9 @@ def _read_claude_code_credentials_from_keychain() -> Optional[Dict[str, Any]]:
|
||||
|
||||
Returns dict with {accessToken, refreshToken?, expiresAt?} or None.
|
||||
"""
|
||||
import platform
|
||||
import subprocess
|
||||
|
||||
if platform.system() != "Darwin":
|
||||
return None
|
||||
|
||||
@@ -1202,12 +1026,9 @@ def normalize_model_name(model: str, preserve_dots: bool = False) -> str:
|
||||
# These must not be converted to hyphens. See issue #12295.
|
||||
if _is_bedrock_model_id(model):
|
||||
return model
|
||||
# Only convert dots to hyphens for Anthropic/Claude models.
|
||||
# Non-Anthropic models (gpt-5.4, gemini-2.5, etc.) use dots
|
||||
# as part of their canonical names. See issue #17171.
|
||||
_lower = model.lower()
|
||||
if _lower.startswith("claude-") or _lower.startswith("anthropic/"):
|
||||
model = model.replace(".", "-")
|
||||
# OpenRouter uses dots for version separators (claude-opus-4.6),
|
||||
# Anthropic uses hyphens (claude-opus-4-6). Convert dots to hyphens.
|
||||
model = model.replace(".", "-")
|
||||
return model
|
||||
|
||||
|
||||
@@ -1224,74 +1045,17 @@ def _sanitize_tool_id(tool_id: str) -> str:
|
||||
return sanitized or "tool_0"
|
||||
|
||||
|
||||
def _normalize_tool_input_schema(schema: Any) -> Dict[str, Any]:
|
||||
"""Normalize tool schemas before sending them to Anthropic.
|
||||
|
||||
Anthropic's tool schema validator rejects nullable unions such as
|
||||
``anyOf: [{"type": "string"}, {"type": "null"}]`` that Pydantic/MCP
|
||||
commonly emits for optional fields. Tool optionality is represented by
|
||||
the parent ``required`` array, so we delegate to the shared
|
||||
``strip_nullable_unions`` helper to collapse nullable unions to the
|
||||
non-null branch while preserving metadata like description/default.
|
||||
|
||||
``keep_nullable_hint=False`` because the Anthropic validator does not
|
||||
recognize the OpenAPI-style ``nullable: true`` extension and strict
|
||||
schema-to-grammar converters may reject unknown keywords.
|
||||
|
||||
Top-level ``oneOf``/``allOf``/``anyOf`` are also stripped here: the
|
||||
Anthropic API rejects union keywords at the schema root with a generic
|
||||
HTTP 400. Several upstream and plugin tools ship schemas with one of
|
||||
these keywords at the top level (commonly for Pydantic discriminated
|
||||
unions). If we land here with those keywords still present after
|
||||
nullable-union stripping, drop them and fall back to a plain object
|
||||
schema so the tool still validates at the Anthropic boundary.
|
||||
"""
|
||||
if not schema:
|
||||
return {"type": "object", "properties": {}}
|
||||
|
||||
from tools.schema_sanitizer import strip_nullable_unions
|
||||
|
||||
normalized = strip_nullable_unions(schema, keep_nullable_hint=False)
|
||||
if not isinstance(normalized, dict):
|
||||
return {"type": "object", "properties": {}}
|
||||
# Strip top-level union keywords that Anthropic's validator rejects.
|
||||
banned = {"oneOf", "allOf", "anyOf"}
|
||||
if banned & normalized.keys():
|
||||
normalized = {k: v for k, v in normalized.items() if k not in banned}
|
||||
if "type" not in normalized:
|
||||
normalized["type"] = "object"
|
||||
if normalized.get("type") == "object" and not isinstance(normalized.get("properties"), dict):
|
||||
normalized = {**normalized, "properties": {}}
|
||||
return normalized
|
||||
|
||||
|
||||
def convert_tools_to_anthropic(tools: List[Dict]) -> List[Dict]:
|
||||
"""Convert OpenAI tool definitions to Anthropic format."""
|
||||
if not tools:
|
||||
return []
|
||||
result = []
|
||||
seen_names: set = set()
|
||||
for t in tools:
|
||||
fn = t.get("function", {})
|
||||
name = fn.get("name", "")
|
||||
# Defensive dedup: Anthropic rejects requests with duplicate tool
|
||||
# names. Upstream injection paths already dedup, but this guard
|
||||
# converts a hard API failure into a warning. See: #18478
|
||||
if name and name in seen_names:
|
||||
logger.warning(
|
||||
"convert_tools_to_anthropic: duplicate tool name '%s' "
|
||||
"— dropping second occurrence",
|
||||
name,
|
||||
)
|
||||
continue
|
||||
if name:
|
||||
seen_names.add(name)
|
||||
result.append({
|
||||
"name": name,
|
||||
"name": fn.get("name", ""),
|
||||
"description": fn.get("description", ""),
|
||||
"input_schema": _normalize_tool_input_schema(
|
||||
fn.get("parameters", {"type": "object", "properties": {}})
|
||||
),
|
||||
"input_schema": fn.get("parameters", {"type": "object", "properties": {}}),
|
||||
})
|
||||
return result
|
||||
|
||||
@@ -1422,7 +1186,6 @@ def _convert_content_to_anthropic(content: Any) -> Any:
|
||||
def convert_messages_to_anthropic(
|
||||
messages: List[Dict],
|
||||
base_url: str | None = None,
|
||||
model: str | None = None,
|
||||
) -> Tuple[Optional[Any], List[Dict]]:
|
||||
"""Convert OpenAI-format messages to Anthropic format.
|
||||
|
||||
@@ -1434,12 +1197,6 @@ def convert_messages_to_anthropic(
|
||||
endpoint, all thinking block signatures are stripped. Signatures are
|
||||
Anthropic-proprietary — third-party endpoints cannot validate them and will
|
||||
reject them with HTTP 400 "Invalid signature in thinking block".
|
||||
|
||||
When *model* is provided and matches the Kimi / Moonshot family (or
|
||||
*base_url* is a Kimi / Moonshot host), unsigned thinking blocks
|
||||
synthesised from ``reasoning_content`` are preserved on replayed
|
||||
assistant tool-call messages — Kimi requires the field to exist, even
|
||||
if empty.
|
||||
"""
|
||||
system = None
|
||||
result = []
|
||||
@@ -1668,16 +1425,7 @@ def convert_messages_to_anthropic(
|
||||
# cache markers can interfere with signature validation.
|
||||
_THINKING_TYPES = frozenset(("thinking", "redacted_thinking"))
|
||||
_is_third_party = _is_third_party_anthropic_endpoint(base_url)
|
||||
# Kimi /coding and DeepSeek /anthropic share a contract: both speak the
|
||||
# Anthropic Messages protocol upstream but require that thinking blocks
|
||||
# synthesised from reasoning_content round-trip on subsequent turns when
|
||||
# thinking is enabled. Signed Anthropic blocks still have to be stripped
|
||||
# (neither endpoint can validate Anthropic's signatures); unsigned blocks
|
||||
# are preserved. See hermes-agent#13848 (Kimi) and #16748 (DeepSeek).
|
||||
_preserve_unsigned_thinking = (
|
||||
_is_kimi_family_endpoint(base_url, model)
|
||||
or _is_deepseek_anthropic_endpoint(base_url)
|
||||
)
|
||||
_is_kimi = _is_kimi_coding_endpoint(base_url)
|
||||
|
||||
last_assistant_idx = None
|
||||
for i in range(len(result) - 1, -1, -1):
|
||||
@@ -1689,22 +1437,22 @@ def convert_messages_to_anthropic(
|
||||
if m.get("role") != "assistant" or not isinstance(m.get("content"), list):
|
||||
continue
|
||||
|
||||
if _preserve_unsigned_thinking:
|
||||
# Kimi's /coding and DeepSeek's /anthropic endpoints both enable
|
||||
# thinking server-side and require unsigned thinking blocks on
|
||||
# replayed assistant tool-call messages. Strip signed Anthropic
|
||||
# blocks (neither upstream can validate Anthropic signatures) but
|
||||
# preserve the unsigned ones we synthesised from reasoning_content.
|
||||
if _is_kimi:
|
||||
# Kimi's /coding endpoint enables thinking server-side and
|
||||
# requires unsigned thinking blocks on replayed assistant
|
||||
# tool-call messages. Strip signed Anthropic blocks (Kimi
|
||||
# can't validate signatures) but preserve the unsigned ones
|
||||
# we synthesised from reasoning_content above.
|
||||
new_content = []
|
||||
for b in m["content"]:
|
||||
if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES:
|
||||
new_content.append(b)
|
||||
continue
|
||||
if b.get("signature") or b.get("data"):
|
||||
# Anthropic-signed block — upstream can't validate, strip
|
||||
# Anthropic-signed block — Kimi can't validate, strip
|
||||
continue
|
||||
# Unsigned thinking (synthesised from reasoning_content) —
|
||||
# keep it: the upstream needs it for message-history validation.
|
||||
# keep it: Kimi needs it for message-history validation.
|
||||
new_content.append(b)
|
||||
m["content"] = new_content or [{"type": "text", "text": "(empty)"}]
|
||||
elif _is_third_party or idx != last_assistant_idx:
|
||||
@@ -1761,7 +1509,6 @@ def build_anthropic_kwargs(
|
||||
context_length: Optional[int] = None,
|
||||
base_url: str | None = None,
|
||||
fast_mode: bool = False,
|
||||
drop_context_1m_beta: bool = False,
|
||||
) -> Dict[str, Any]:
|
||||
"""Build kwargs for anthropic.messages.create().
|
||||
|
||||
@@ -1801,9 +1548,7 @@ def build_anthropic_kwargs(
|
||||
Currently only supported on native Anthropic endpoints (not third-party
|
||||
compatible ones).
|
||||
"""
|
||||
system, anthropic_messages = convert_messages_to_anthropic(
|
||||
messages, base_url=base_url, model=model
|
||||
)
|
||||
system, anthropic_messages = convert_messages_to_anthropic(messages, base_url=base_url)
|
||||
anthropic_tools = convert_tools_to_anthropic(tools) if tools else []
|
||||
|
||||
model = normalize_model_name(model, preserve_dots=preserve_dots)
|
||||
@@ -1909,7 +1654,7 @@ def build_anthropic_kwargs(
|
||||
# silently hides reasoning text that Hermes surfaces in its CLI. We
|
||||
# request "summarized" so the reasoning blocks stay populated — matching
|
||||
# 4.6 behavior and preserving the activity-feed UX during long tool runs.
|
||||
_is_kimi_coding = _is_kimi_family_endpoint(base_url, model)
|
||||
_is_kimi_coding = _is_kimi_coding_endpoint(base_url)
|
||||
if reasoning_config and isinstance(reasoning_config, dict) and not _is_kimi_coding:
|
||||
if reasoning_config.get("enabled") is not False and "haiku" not in model.lower():
|
||||
effort = str(reasoning_config.get("effort", "medium")).lower()
|
||||
@@ -1944,22 +1689,13 @@ def build_anthropic_kwargs(
|
||||
|
||||
# ── Fast mode (Opus 4.6 only) ────────────────────────────────────
|
||||
# Adds extra_body.speed="fast" + the fast-mode beta header for ~2.5x
|
||||
# output speed. Per Anthropic docs, fast mode is only supported on
|
||||
# Opus 4.6 — Opus 4.7 and other models 400 on the speed parameter.
|
||||
# Only for native Anthropic endpoints — third-party providers would
|
||||
# reject the unknown beta header and speed parameter.
|
||||
if (
|
||||
fast_mode
|
||||
and not _is_third_party_anthropic_endpoint(base_url)
|
||||
and _supports_fast_mode(model)
|
||||
):
|
||||
# output speed. Only for native Anthropic endpoints — third-party
|
||||
# providers would reject the unknown beta header and speed parameter.
|
||||
if fast_mode and not _is_third_party_anthropic_endpoint(base_url):
|
||||
kwargs.setdefault("extra_body", {})["speed"] = "fast"
|
||||
# Build extra_headers with ALL applicable betas (the per-request
|
||||
# extra_headers override the client-level anthropic-beta header).
|
||||
betas = list(_common_betas_for_base_url(
|
||||
base_url,
|
||||
drop_context_1m_beta=drop_context_1m_beta,
|
||||
))
|
||||
betas = list(_common_betas_for_base_url(base_url))
|
||||
if is_oauth:
|
||||
betas.extend(_OAUTH_ONLY_BETAS)
|
||||
betas.append(_FAST_MODE_BETA)
|
||||
|
||||
@@ -291,52 +291,14 @@ def has_aws_credentials(env: Optional[Dict[str, str]] = None) -> bool:
|
||||
def resolve_bedrock_region(env: Optional[Dict[str, str]] = None) -> str:
|
||||
"""Resolve the AWS region for Bedrock API calls.
|
||||
|
||||
Priority:
|
||||
1. AWS_REGION env var
|
||||
2. AWS_DEFAULT_REGION env var
|
||||
3. boto3/botocore configured region (from ~/.aws/config or SSO profile)
|
||||
4. us-east-1 (hard fallback)
|
||||
|
||||
The boto3 fallback is critical for EU/AP users who configure their region
|
||||
in ~/.aws/config via a named profile rather than env vars — without it,
|
||||
live model discovery would always return us.* profile IDs regardless of
|
||||
the user's actual region.
|
||||
Priority: AWS_REGION → AWS_DEFAULT_REGION → us-east-1 (fallback).
|
||||
"""
|
||||
env = env if env is not None else os.environ
|
||||
explicit = (
|
||||
return (
|
||||
env.get("AWS_REGION", "").strip()
|
||||
or env.get("AWS_DEFAULT_REGION", "").strip()
|
||||
or "us-east-1"
|
||||
)
|
||||
if explicit:
|
||||
return explicit
|
||||
try:
|
||||
import botocore.session
|
||||
region = botocore.session.get_session().get_config_variable("region")
|
||||
if region:
|
||||
return region
|
||||
except Exception:
|
||||
pass
|
||||
return "us-east-1"
|
||||
|
||||
|
||||
def bedrock_model_ids_or_none() -> Optional[List[str]]:
|
||||
"""Live-discover Bedrock model IDs for the active region.
|
||||
|
||||
Returns a list of model ID strings if discovery succeeds and yields
|
||||
at least one model, or ``None`` on failure / empty result. Callers
|
||||
should fall back to the static curated list when ``None`` is returned.
|
||||
|
||||
This helper consolidates the discover → extract-ids → fallback
|
||||
pattern that was previously duplicated across ``provider_model_ids``,
|
||||
``list_authenticated_providers`` section 2, and section 3.
|
||||
"""
|
||||
try:
|
||||
discovered = discover_bedrock_models(resolve_bedrock_region())
|
||||
if discovered:
|
||||
return [m["id"] for m in discovered]
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@@ -227,23 +227,6 @@ def _responses_tools(tools: Optional[List[Dict[str, Any]]] = None) -> Optional[L
|
||||
# Message format conversion
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_RESPONSE_MESSAGE_STATUSES = {"completed", "incomplete", "in_progress"}
|
||||
|
||||
|
||||
def _normalize_responses_message_status(value: Any, *, default: str = "completed") -> str:
|
||||
"""Normalize a Responses assistant message status for replay.
|
||||
|
||||
The API accepts completed/incomplete/in_progress on replayed assistant
|
||||
output messages. Preserve those exactly (modulo case/hyphen spelling) so
|
||||
incomplete Codex continuation turns don't get falsely marked completed.
|
||||
"""
|
||||
if isinstance(value, str):
|
||||
status = value.strip().lower().replace("-", "_").replace(" ", "_")
|
||||
if status in _RESPONSE_MESSAGE_STATUSES:
|
||||
return status
|
||||
return default
|
||||
|
||||
|
||||
def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
"""Convert internal chat-style messages to Responses input items."""
|
||||
items: List[Dict[str, Any]] = []
|
||||
@@ -289,57 +272,7 @@ def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Di
|
||||
seen_item_ids.add(item_id)
|
||||
has_codex_reasoning = True
|
||||
|
||||
# Replay exact assistant message items (with id/phase) from
|
||||
# previous turns so the API can maintain prefix-cache hits.
|
||||
# OpenAI docs: "preserve and resend phase on all assistant
|
||||
# messages — dropping it can degrade performance."
|
||||
codex_message_items = msg.get("codex_message_items")
|
||||
replayed_message_items = 0
|
||||
if isinstance(codex_message_items, list):
|
||||
for raw_item in codex_message_items:
|
||||
if not isinstance(raw_item, dict):
|
||||
continue
|
||||
if raw_item.get("type") != "message" or raw_item.get("role") != "assistant":
|
||||
continue
|
||||
raw_content_parts = raw_item.get("content")
|
||||
if not isinstance(raw_content_parts, list):
|
||||
continue
|
||||
|
||||
normalized_content_parts = []
|
||||
for part in raw_content_parts:
|
||||
if not isinstance(part, dict):
|
||||
continue
|
||||
part_type = str(part.get("type") or "").strip()
|
||||
if part_type not in {"output_text", "text"}:
|
||||
continue
|
||||
text = part.get("text", "")
|
||||
if text is None:
|
||||
text = ""
|
||||
if not isinstance(text, str):
|
||||
text = str(text)
|
||||
normalized_content_parts.append({"type": "output_text", "text": text})
|
||||
|
||||
if not normalized_content_parts:
|
||||
continue
|
||||
|
||||
replay_item = {
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"status": _normalize_responses_message_status(raw_item.get("status")),
|
||||
"content": normalized_content_parts,
|
||||
}
|
||||
item_id = raw_item.get("id")
|
||||
if isinstance(item_id, str) and item_id.strip():
|
||||
replay_item["id"] = item_id.strip()
|
||||
phase = raw_item.get("phase")
|
||||
if isinstance(phase, str) and phase.strip():
|
||||
replay_item["phase"] = phase.strip()
|
||||
items.append(replay_item)
|
||||
replayed_message_items += 1
|
||||
|
||||
if replayed_message_items > 0:
|
||||
pass
|
||||
elif content_parts:
|
||||
if content_parts:
|
||||
items.append({"role": "assistant", "content": content_parts})
|
||||
elif content_text.strip():
|
||||
items.append({"role": "assistant", "content": content_text})
|
||||
@@ -499,47 +432,6 @@ def _preflight_codex_input_items(raw_items: Any) -> List[Dict[str, Any]]:
|
||||
normalized.append(reasoning_item)
|
||||
continue
|
||||
|
||||
if item_type == "message":
|
||||
role = item.get("role")
|
||||
if role != "assistant":
|
||||
raise ValueError(f"Codex Responses input[{idx}] message items must have role='assistant'.")
|
||||
content = item.get("content")
|
||||
if not isinstance(content, list):
|
||||
raise ValueError(f"Codex Responses input[{idx}] message item must have content list.")
|
||||
normalized_content = []
|
||||
for part_idx, part in enumerate(content):
|
||||
if not isinstance(part, dict):
|
||||
raise ValueError(
|
||||
f"Codex Responses input[{idx}] message content[{part_idx}] must be an object."
|
||||
)
|
||||
part_type = part.get("type")
|
||||
if part_type not in {"output_text", "text"}:
|
||||
raise ValueError(
|
||||
f"Codex Responses input[{idx}] message content[{part_idx}] has unsupported type {part_type!r}."
|
||||
)
|
||||
text = part.get("text", "")
|
||||
if text is None:
|
||||
text = ""
|
||||
if not isinstance(text, str):
|
||||
text = str(text)
|
||||
normalized_content.append({"type": "output_text", "text": text})
|
||||
if not normalized_content:
|
||||
raise ValueError(f"Codex Responses input[{idx}] message item must contain at least one text part.")
|
||||
normalized_item: Dict[str, Any] = {
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"status": _normalize_responses_message_status(item.get("status")),
|
||||
"content": normalized_content,
|
||||
}
|
||||
item_id = item.get("id")
|
||||
if isinstance(item_id, str) and item_id.strip():
|
||||
normalized_item["id"] = item_id.strip()
|
||||
phase = item.get("phase")
|
||||
if isinstance(phase, str) and phase.strip():
|
||||
normalized_item["phase"] = phase.strip()
|
||||
normalized.append(normalized_item)
|
||||
continue
|
||||
|
||||
role = item.get("role")
|
||||
if role in {"user", "assistant"}:
|
||||
content = item.get("content", "")
|
||||
@@ -824,7 +716,6 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
|
||||
content_parts: List[str] = []
|
||||
reasoning_parts: List[str] = []
|
||||
reasoning_items_raw: List[Dict[str, Any]] = []
|
||||
message_items_raw: List[Dict[str, Any]] = []
|
||||
tool_calls: List[Any] = []
|
||||
has_incomplete_items = response_status in {"queued", "in_progress", "incomplete"}
|
||||
saw_commentary_phase = False
|
||||
@@ -843,7 +734,6 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
|
||||
|
||||
if item_type == "message":
|
||||
item_phase = getattr(item, "phase", None)
|
||||
normalized_phase = None
|
||||
if isinstance(item_phase, str):
|
||||
normalized_phase = item_phase.strip().lower()
|
||||
if normalized_phase in {"commentary", "analysis"}:
|
||||
@@ -853,18 +743,6 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
|
||||
message_text = _extract_responses_message_text(item)
|
||||
if message_text:
|
||||
content_parts.append(message_text)
|
||||
raw_message_item: Dict[str, Any] = {
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"status": _normalize_responses_message_status(item_status),
|
||||
"content": [{"type": "output_text", "text": message_text}],
|
||||
}
|
||||
item_id = getattr(item, "id", None)
|
||||
if isinstance(item_id, str) and item_id:
|
||||
raw_message_item["id"] = item_id
|
||||
if normalized_phase:
|
||||
raw_message_item["phase"] = normalized_phase
|
||||
message_items_raw.append(raw_message_item)
|
||||
elif item_type == "reasoning":
|
||||
reasoning_text = _extract_responses_reasoning_text(item)
|
||||
if reasoning_text:
|
||||
@@ -977,7 +855,6 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
|
||||
reasoning_content=None,
|
||||
reasoning_details=None,
|
||||
codex_reasoning_items=reasoning_items_raw or None,
|
||||
codex_message_items=message_items_raw or None,
|
||||
)
|
||||
|
||||
if tool_calls:
|
||||
|
||||
@@ -43,9 +43,6 @@ SUMMARY_PREFIX = (
|
||||
"they were already addressed. "
|
||||
"Your current task is identified in the '## Active Task' section of the "
|
||||
"summary — resume exactly from there. "
|
||||
"IMPORTANT: Your persistent memory (MEMORY.md, USER.md) in the system "
|
||||
"prompt is ALWAYS authoritative and active — never ignore or deprioritize "
|
||||
"memory content due to this compaction note. "
|
||||
"Respond ONLY to the latest user message "
|
||||
"that appears AFTER this summary. The current session state (files, "
|
||||
"config, etc.) may reflect work described here — avoid repeating it:"
|
||||
@@ -64,52 +61,9 @@ _PRUNED_TOOL_PLACEHOLDER = "[Old tool output cleared to save context space]"
|
||||
|
||||
# Chars per token rough estimate
|
||||
_CHARS_PER_TOKEN = 4
|
||||
# Flat token cost per attached image part. Real cost varies by provider and
|
||||
# dimensions (Anthropic ≈ width×height/750, GPT-4o up to ~1700 for
|
||||
# high-detail 2048×2048, Gemini 258/tile), but 1600 is a realistic ceiling
|
||||
# that keeps compression budgeting honest for multi-image conversations.
|
||||
# Matches Claude Code's IMAGE_TOKEN_ESTIMATE constant.
|
||||
_IMAGE_TOKEN_ESTIMATE = 1600
|
||||
# Same figure expressed in the char-budget currency the rest of the
|
||||
# compressor speaks in. Used when accumulating message "content length"
|
||||
# for tail-cut decisions.
|
||||
_IMAGE_CHAR_EQUIVALENT = _IMAGE_TOKEN_ESTIMATE * _CHARS_PER_TOKEN
|
||||
_SUMMARY_FAILURE_COOLDOWN_SECONDS = 600
|
||||
|
||||
|
||||
def _content_length_for_budget(raw_content: Any) -> int:
|
||||
"""Return the effective char-length of a message's content for token budgeting.
|
||||
|
||||
Plain strings: ``len(content)``. Multimodal lists: sum of text-part
|
||||
``len(text)`` plus a flat ``_IMAGE_CHAR_EQUIVALENT`` per image part
|
||||
(``image_url`` / ``input_image`` / Anthropic-style ``image``). This
|
||||
keeps the compressor from treating a turn with 5 attached images as
|
||||
near-zero tokens just because the text part is empty.
|
||||
"""
|
||||
if isinstance(raw_content, str):
|
||||
return len(raw_content)
|
||||
if not isinstance(raw_content, list):
|
||||
return len(str(raw_content or ""))
|
||||
|
||||
total = 0
|
||||
for p in raw_content:
|
||||
if isinstance(p, str):
|
||||
total += len(p)
|
||||
continue
|
||||
if not isinstance(p, dict):
|
||||
total += len(str(p))
|
||||
continue
|
||||
ptype = p.get("type")
|
||||
if ptype in {"image_url", "input_image", "image"}:
|
||||
total += _IMAGE_CHAR_EQUIVALENT
|
||||
else:
|
||||
# text / input_text / tool_result-with-text / anything else with
|
||||
# a text field. Ignore the raw base64 payload inside image_url
|
||||
# dicts — dimensions don't matter, only whether it's an image.
|
||||
total += len(p.get("text", "") or "")
|
||||
return total
|
||||
|
||||
|
||||
def _content_text_for_contains(content: Any) -> str:
|
||||
"""Return a best-effort text view of message content.
|
||||
|
||||
@@ -341,13 +295,8 @@ class ContextCompressor(ContextEngine):
|
||||
self._context_probe_persistable = False
|
||||
self._previous_summary = None
|
||||
self._last_summary_error = None
|
||||
self._last_summary_dropped_count = 0
|
||||
self._last_summary_fallback_used = False
|
||||
self._last_aux_model_failure_error = None
|
||||
self._last_aux_model_failure_model = None
|
||||
self._last_compression_savings_pct = 100.0
|
||||
self._ineffective_compression_count = 0
|
||||
self._summary_failure_cooldown_until = 0.0 # transient errors must not block a fresh session
|
||||
|
||||
def update_model(
|
||||
self,
|
||||
@@ -449,17 +398,6 @@ class ContextCompressor(ContextEngine):
|
||||
self._ineffective_compression_count: int = 0
|
||||
self._summary_failure_cooldown_until: float = 0.0
|
||||
self._last_summary_error: Optional[str] = None
|
||||
# When summary generation fails and a static fallback is inserted,
|
||||
# record how many turns were unrecoverably dropped so callers
|
||||
# (gateway hygiene, /compress) can surface a visible warning.
|
||||
self._last_summary_dropped_count: int = 0
|
||||
self._last_summary_fallback_used: bool = False
|
||||
# When a user-configured summary model fails and we recover by
|
||||
# retrying on the main model, record the failure so gateway /
|
||||
# CLI callers can still warn the user even though compression
|
||||
# succeeded. Silent recovery would hide the broken config.
|
||||
self._last_aux_model_failure_error: Optional[str] = None
|
||||
self._last_aux_model_failure_model: Optional[str] = None
|
||||
|
||||
def update_from_response(self, usage: Dict[str, Any]):
|
||||
"""Update tracked token usage from API response."""
|
||||
@@ -542,11 +480,11 @@ class ContextCompressor(ContextEngine):
|
||||
# Token-budget approach: walk backward accumulating tokens
|
||||
accumulated = 0
|
||||
boundary = len(result)
|
||||
min_protect = min(protect_tail_count, len(result))
|
||||
min_protect = min(protect_tail_count, len(result) - 1)
|
||||
for i in range(len(result) - 1, -1, -1):
|
||||
msg = result[i]
|
||||
raw_content = msg.get("content") or ""
|
||||
content_len = _content_length_for_budget(raw_content)
|
||||
content_len = sum(len(p.get("text", "")) for p in raw_content) if isinstance(raw_content, list) else len(raw_content)
|
||||
msg_tokens = content_len // _CHARS_PER_TOKEN + 10
|
||||
for tc in msg.get("tool_calls") or []:
|
||||
if isinstance(tc, dict):
|
||||
@@ -557,16 +495,7 @@ class ContextCompressor(ContextEngine):
|
||||
break
|
||||
accumulated += msg_tokens
|
||||
boundary = i
|
||||
# Translate the budget walk into a "protected count", apply the
|
||||
# floor in count-space (where `max` reads naturally: protect at
|
||||
# least `min_protect` messages or whatever the budget reserved,
|
||||
# whichever is more), then convert back to a prune boundary.
|
||||
# Doing this in index-space with `max` would invert the direction
|
||||
# (smaller index = MORE protected), so a generous budget would
|
||||
# silently get truncated back down to `min_protect`.
|
||||
budget_protect_count = len(result) - boundary
|
||||
protected_count = max(budget_protect_count, min_protect)
|
||||
prune_boundary = len(result) - protected_count
|
||||
prune_boundary = max(boundary, len(result) - min_protect)
|
||||
else:
|
||||
prune_boundary = len(result) - protect_tail_count
|
||||
|
||||
@@ -582,8 +511,6 @@ class ContextCompressor(ContextEngine):
|
||||
# Skip multimodal content (list of content blocks)
|
||||
if isinstance(content, list):
|
||||
continue
|
||||
if not isinstance(content, str):
|
||||
continue
|
||||
if len(content) < 200:
|
||||
continue
|
||||
h = hashlib.md5(content.encode("utf-8", errors="replace")).hexdigest()[:12]
|
||||
@@ -603,8 +530,6 @@ class ContextCompressor(ContextEngine):
|
||||
# Skip multimodal content (list of content blocks)
|
||||
if isinstance(content, list):
|
||||
continue
|
||||
if not isinstance(content, str):
|
||||
continue
|
||||
if not content or content == _PRUNED_TOOL_PLACEHOLDER:
|
||||
continue
|
||||
# Skip already-deduplicated or previously-summarized results
|
||||
@@ -920,66 +845,22 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
||||
or "does not exist" in _err_str
|
||||
or "no available channel" in _err_str
|
||||
)
|
||||
_is_timeout = (
|
||||
_status in (408, 429, 502, 504)
|
||||
or "timeout" in _err_str
|
||||
)
|
||||
if (
|
||||
(_is_model_not_found or _is_timeout)
|
||||
_is_model_not_found
|
||||
and self.summary_model
|
||||
and self.summary_model != self.model
|
||||
and not getattr(self, "_summary_model_fallen_back", False)
|
||||
):
|
||||
self._summary_model_fallen_back = True
|
||||
logging.warning(
|
||||
"Summary model '%s' unavailable (%s). "
|
||||
"Summary model '%s' not available (%s). "
|
||||
"Falling back to main model '%s' for compression.",
|
||||
self.summary_model, e, self.model,
|
||||
)
|
||||
# Record the aux-model failure so callers can warn the user
|
||||
# even if the retry-on-main succeeds — a misconfigured aux
|
||||
# model is something the user needs to fix.
|
||||
_err_text = str(e).strip() or e.__class__.__name__
|
||||
if len(_err_text) > 220:
|
||||
_err_text = _err_text[:217].rstrip() + "..."
|
||||
self._last_aux_model_failure_error = _err_text
|
||||
self._last_aux_model_failure_model = self.summary_model
|
||||
self.summary_model = "" # empty = use main model
|
||||
self._summary_failure_cooldown_until = 0.0 # no cooldown
|
||||
return self._generate_summary(turns_to_summarize, focus_topic=focus_topic) # retry immediately
|
||||
|
||||
# Unknown-error best-effort retry on main model. Losing N turns of
|
||||
# context is almost always worse than one extra summary attempt, so
|
||||
# if we haven't already fallen back and the summary model differs
|
||||
# from the main model, try once more on main before entering
|
||||
# cooldown. Errors that DID match _is_model_not_found above are
|
||||
# already handled by the fast-path retry; this branch catches
|
||||
# everything else (400s, provider-specific "no route" strings,
|
||||
# aggregator rejections, etc.) where auto-retry is still safer
|
||||
# than dropping the turns.
|
||||
if (
|
||||
self.summary_model
|
||||
and self.summary_model != self.model
|
||||
and not getattr(self, "_summary_model_fallen_back", False)
|
||||
):
|
||||
self._summary_model_fallen_back = True
|
||||
logging.warning(
|
||||
"Summary model '%s' failed (%s). "
|
||||
"Retrying on main model '%s' before giving up.",
|
||||
self.summary_model, e, self.model,
|
||||
)
|
||||
# Record the aux-model failure (see 404 branch above) — user
|
||||
# should know their configured model is broken even if main
|
||||
# recovers the call.
|
||||
_err_text = str(e).strip() or e.__class__.__name__
|
||||
if len(_err_text) > 220:
|
||||
_err_text = _err_text[:217].rstrip() + "..."
|
||||
self._last_aux_model_failure_error = _err_text
|
||||
self._last_aux_model_failure_model = self.summary_model
|
||||
self.summary_model = "" # empty = use main model
|
||||
self._summary_failure_cooldown_until = 0.0
|
||||
return self._generate_summary(turns_to_summarize, focus_topic=focus_topic)
|
||||
|
||||
# Transient errors (timeout, rate limit, network) — shorter cooldown
|
||||
_transient_cooldown = 60
|
||||
self._summary_failure_cooldown_until = time.monotonic() + _transient_cooldown
|
||||
@@ -996,39 +877,15 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _strip_summary_prefix(summary: str) -> str:
|
||||
"""Return summary body without the current or legacy handoff prefix."""
|
||||
text = (summary or "").strip()
|
||||
for prefix in (SUMMARY_PREFIX, LEGACY_SUMMARY_PREFIX):
|
||||
if text.startswith(prefix):
|
||||
return text[len(prefix):].lstrip()
|
||||
return text
|
||||
|
||||
@classmethod
|
||||
def _with_summary_prefix(cls, summary: str) -> str:
|
||||
def _with_summary_prefix(summary: str) -> str:
|
||||
"""Normalize summary text to the current compaction handoff format."""
|
||||
text = cls._strip_summary_prefix(summary)
|
||||
text = (summary or "").strip()
|
||||
for prefix in (LEGACY_SUMMARY_PREFIX, SUMMARY_PREFIX):
|
||||
if text.startswith(prefix):
|
||||
text = text[len(prefix):].lstrip()
|
||||
break
|
||||
return f"{SUMMARY_PREFIX}\n{text}" if text else SUMMARY_PREFIX
|
||||
|
||||
@staticmethod
|
||||
def _is_context_summary_content(content: Any) -> bool:
|
||||
text = _content_text_for_contains(content).lstrip()
|
||||
return text.startswith(SUMMARY_PREFIX) or text.startswith(LEGACY_SUMMARY_PREFIX)
|
||||
|
||||
@classmethod
|
||||
def _find_latest_context_summary(
|
||||
cls,
|
||||
messages: List[Dict[str, Any]],
|
||||
start: int,
|
||||
end: int,
|
||||
) -> tuple[Optional[int], str]:
|
||||
"""Find the newest handoff summary inside a compression window."""
|
||||
for idx in range(end - 1, start - 1, -1):
|
||||
content = messages[idx].get("content")
|
||||
if cls._is_context_summary_content(content):
|
||||
return idx, cls._strip_summary_prefix(_content_text_for_contains(content))
|
||||
return None, ""
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Tool-call / tool-result pair integrity helpers
|
||||
# ------------------------------------------------------------------
|
||||
@@ -1037,8 +894,8 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
||||
def _get_tool_call_id(tc) -> str:
|
||||
"""Extract the call ID from a tool_call entry (dict or SimpleNamespace)."""
|
||||
if isinstance(tc, dict):
|
||||
return tc.get("call_id", "") or tc.get("id", "") or ""
|
||||
return getattr(tc, "call_id", "") or getattr(tc, "id", "") or ""
|
||||
return tc.get("id", "")
|
||||
return getattr(tc, "id", "") or ""
|
||||
|
||||
def _sanitize_tool_pairs(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
"""Fix orphaned tool_call / tool_result pairs after compression.
|
||||
@@ -1225,9 +1082,8 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
||||
|
||||
for i in range(n - 1, head_end - 1, -1):
|
||||
msg = messages[i]
|
||||
raw_content = msg.get("content") or ""
|
||||
content_len = _content_length_for_budget(raw_content)
|
||||
msg_tokens = content_len // _CHARS_PER_TOKEN + 10 # +10 for role/metadata
|
||||
content = msg.get("content") or ""
|
||||
msg_tokens = len(content) // _CHARS_PER_TOKEN + 10 # +10 for role/metadata
|
||||
# Include tool call arguments in estimate
|
||||
for tc in msg.get("tool_calls") or []:
|
||||
if isinstance(tc, dict):
|
||||
@@ -1296,13 +1152,6 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
||||
related to this topic and be more aggressive about compressing
|
||||
everything else. Inspired by Claude Code's ``/compact``.
|
||||
"""
|
||||
# Reset per-call summary failure state — callers inspect these fields
|
||||
# after compress() returns to decide whether to surface a warning.
|
||||
self._last_summary_dropped_count = 0
|
||||
self._last_summary_fallback_used = False
|
||||
self._last_summary_error = None
|
||||
self._last_aux_model_failure_error = None
|
||||
self._last_aux_model_failure_model = None
|
||||
n_messages = len(messages)
|
||||
# Only need head + 3 tail messages minimum (token budget decides the real tail size)
|
||||
_min_for_compress = self.protect_first_n + 3 + 1
|
||||
@@ -1335,15 +1184,6 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
||||
return messages
|
||||
|
||||
turns_to_summarize = messages[compress_start:compress_end]
|
||||
summary_idx, summary_body = self._find_latest_context_summary(
|
||||
messages,
|
||||
compress_start,
|
||||
compress_end,
|
||||
)
|
||||
if summary_idx is not None:
|
||||
if summary_body and not self._previous_summary:
|
||||
self._previous_summary = summary_body
|
||||
turns_to_summarize = messages[summary_idx + 1:compress_end]
|
||||
|
||||
if not self.quiet_mode:
|
||||
logger.info(
|
||||
@@ -1376,7 +1216,7 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
||||
msg = messages[i].copy()
|
||||
if i == 0 and msg.get("role") == "system":
|
||||
existing = msg.get("content")
|
||||
_compression_note = "[Note: Some earlier conversation turns have been compacted into a handoff summary to preserve context space. The current session state may still reflect earlier work, so build on that summary and state rather than re-doing work. Your persistent memory (MEMORY.md, USER.md) remains fully authoritative regardless of compaction.]"
|
||||
_compression_note = "[Note: Some earlier conversation turns have been compacted into a handoff summary to preserve context space. The current session state may still reflect earlier work, so build on that summary and state rather than re-doing work.]"
|
||||
if _compression_note not in _content_text_for_contains(existing):
|
||||
msg["content"] = _append_text_to_content(
|
||||
existing,
|
||||
@@ -1390,13 +1230,11 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
||||
if not self.quiet_mode:
|
||||
logger.warning("Summary generation failed — inserting static fallback context marker")
|
||||
n_dropped = compress_end - compress_start
|
||||
self._last_summary_dropped_count = n_dropped
|
||||
self._last_summary_fallback_used = True
|
||||
summary = (
|
||||
f"{SUMMARY_PREFIX}\n"
|
||||
f"Summary generation was unavailable. {n_dropped} message(s) were "
|
||||
f"Summary generation was unavailable. {n_dropped} conversation turns were "
|
||||
f"removed to free context space but could not be summarized. The removed "
|
||||
f"messages contained earlier work in this session. Continue based on the "
|
||||
f"turns contained earlier work in this session. Continue based on the "
|
||||
f"recent messages below and the current state of any files or resources."
|
||||
)
|
||||
|
||||
@@ -1421,19 +1259,6 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
||||
# Merge the summary into the first tail message instead
|
||||
# of inserting a standalone message that breaks alternation.
|
||||
_merge_summary_into_tail = True
|
||||
|
||||
# When the summary lands as a standalone role="user" message,
|
||||
# weak models read the verbatim "## Active Task" quote of a past
|
||||
# user request as fresh input (#11475, #14521). Append the explicit
|
||||
# end marker — the same one used in the merge-into-tail path — so
|
||||
# the model has a clear "summary above, not new input" signal.
|
||||
if not _merge_summary_into_tail and summary_role == "user":
|
||||
summary = (
|
||||
summary
|
||||
+ "\n\n--- END OF CONTEXT SUMMARY — "
|
||||
"respond to the message below, not the summary above ---"
|
||||
)
|
||||
|
||||
if not _merge_summary_into_tail:
|
||||
compressed.append({"role": summary_role, "content": summary})
|
||||
|
||||
|
||||
@@ -608,7 +608,7 @@ class CopilotACPClient:
|
||||
end = start + limit if isinstance(limit, int) and limit > 0 else None
|
||||
content = "".join(lines[start:end])
|
||||
if content:
|
||||
content = redact_sensitive_text(content, force=True)
|
||||
content = redact_sensitive_text(content)
|
||||
response = {
|
||||
"jsonrpc": "2.0",
|
||||
"id": message_id,
|
||||
|
||||
@@ -3,18 +3,17 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import random
|
||||
import threading
|
||||
import time
|
||||
import uuid
|
||||
import os
|
||||
import re
|
||||
from dataclasses import dataclass, fields, replace
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict, List, Optional, Set, Tuple
|
||||
|
||||
from hermes_constants import OPENROUTER_BASE_URL
|
||||
from hermes_cli.config import get_env_value, load_env
|
||||
import hermes_cli.auth as auth_mod
|
||||
from hermes_cli.auth import (
|
||||
CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
|
||||
@@ -456,70 +455,6 @@ class CredentialPool:
|
||||
logger.debug("Failed to sync from credentials file: %s", exc)
|
||||
return entry
|
||||
|
||||
def _sync_codex_entry_from_auth_store(self, entry: PooledCredential) -> PooledCredential:
|
||||
"""Sync a Codex device_code pool entry from auth.json if tokens differ.
|
||||
|
||||
When a Codex OAuth access token expires (or the ChatGPT account hits
|
||||
its 5h/weekly quota), the pool entry gets marked ``STATUS_EXHAUSTED``
|
||||
with a ``last_error_reset_at`` that can be many hours in the future.
|
||||
Meanwhile the user may run ``hermes model`` / ``hermes auth`` which
|
||||
performs a fresh device-code login and writes new tokens to
|
||||
``auth.json`` under ``_auth_store_lock``. Without this sync the pool
|
||||
entry stays frozen until ``last_error_reset_at`` elapses — even
|
||||
though fresh credentials are sitting on disk — and every request
|
||||
fails with "no available entries (all exhausted or empty)".
|
||||
|
||||
Mirrors the Nous/Anthropic resync paths above. Only applies to
|
||||
device_code-sourced entries; env/API-key-sourced entries have no
|
||||
auth.json shadow to sync from.
|
||||
"""
|
||||
if self.provider != "openai-codex" or entry.source != "device_code":
|
||||
return entry
|
||||
try:
|
||||
with _auth_store_lock():
|
||||
auth_store = _load_auth_store()
|
||||
state = _load_provider_state(auth_store, "openai-codex")
|
||||
if not isinstance(state, dict):
|
||||
return entry
|
||||
tokens = state.get("tokens")
|
||||
if not isinstance(tokens, dict):
|
||||
return entry
|
||||
store_access = tokens.get("access_token", "")
|
||||
store_refresh = tokens.get("refresh_token", "")
|
||||
# Adopt auth.json tokens when either side differs. Codex refresh
|
||||
# tokens are single-use too, so a fresh refresh_token from
|
||||
# another process means our entry's pair is consumed/stale.
|
||||
entry_access = entry.access_token or ""
|
||||
entry_refresh = entry.refresh_token or ""
|
||||
if store_access and (
|
||||
store_access != entry_access
|
||||
or (store_refresh and store_refresh != entry_refresh)
|
||||
):
|
||||
logger.debug(
|
||||
"Pool entry %s: syncing Codex tokens from auth.json "
|
||||
"(refreshed by another process)",
|
||||
entry.id,
|
||||
)
|
||||
field_updates: Dict[str, Any] = {
|
||||
"access_token": store_access,
|
||||
"refresh_token": store_refresh or entry.refresh_token,
|
||||
"last_status": None,
|
||||
"last_status_at": None,
|
||||
"last_error_code": None,
|
||||
"last_error_reason": None,
|
||||
"last_error_message": None,
|
||||
"last_error_reset_at": None,
|
||||
}
|
||||
if state.get("last_refresh"):
|
||||
field_updates["last_refresh"] = state["last_refresh"]
|
||||
updated = replace(entry, **field_updates)
|
||||
self._replace_entry(entry, updated)
|
||||
self._persist()
|
||||
return updated
|
||||
except Exception as exc:
|
||||
logger.debug("Failed to sync Codex entry from auth.json: %s", exc)
|
||||
return entry
|
||||
|
||||
def _sync_nous_entry_from_auth_store(self, entry: PooledCredential) -> PooledCredential:
|
||||
"""Sync a Nous pool entry from auth.json if tokens differ.
|
||||
|
||||
@@ -852,18 +787,6 @@ class CredentialPool:
|
||||
if synced is not entry:
|
||||
entry = synced
|
||||
cleared_any = True
|
||||
# For openai-codex entries, same pattern: the user may have
|
||||
# re-authed via `hermes model` / `hermes auth` after a 429/401,
|
||||
# leaving fresh tokens on disk while the pool entry is still
|
||||
# frozen behind last_error_reset_at (can be hours in the
|
||||
# future for ChatGPT weekly windows).
|
||||
if (self.provider == "openai-codex"
|
||||
and entry.source == "device_code"
|
||||
and entry.last_status == STATUS_EXHAUSTED):
|
||||
synced = self._sync_codex_entry_from_auth_store(entry)
|
||||
if synced is not entry:
|
||||
entry = synced
|
||||
cleared_any = True
|
||||
if entry.last_status == STATUS_EXHAUSTED:
|
||||
exhausted_until = _exhausted_until(entry)
|
||||
if exhausted_until is not None and now < exhausted_until:
|
||||
@@ -1300,48 +1223,6 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
|
||||
except Exception as exc:
|
||||
logger.debug("Qwen OAuth token seed failed: %s", exc)
|
||||
|
||||
elif provider == "minimax-oauth":
|
||||
# MiniMax OAuth tokens live in ~/.hermes/auth.json providers.minimax-oauth.
|
||||
# Seed the pool so `/auth list` reflects the logged-in state and the
|
||||
# standard `hermes auth remove minimax-oauth <N>` flow works.
|
||||
# Use refresh_if_expiring=False equivalent: resolve_minimax_oauth_runtime_credentials
|
||||
# always refreshes on expiry, so instead read raw state here to avoid
|
||||
# surprise network calls during provider discovery.
|
||||
try:
|
||||
from hermes_cli.auth import get_provider_auth_state
|
||||
state = get_provider_auth_state("minimax-oauth")
|
||||
if state and state.get("access_token"):
|
||||
source_name = "oauth"
|
||||
if not _is_suppressed(provider, source_name):
|
||||
active_sources.add(source_name)
|
||||
expires_at_ms = None
|
||||
try:
|
||||
from datetime import datetime as _dt
|
||||
raw = state.get("expires_at", "")
|
||||
if raw:
|
||||
expires_at_ms = int(_dt.fromisoformat(raw).timestamp() * 1000)
|
||||
except Exception:
|
||||
expires_at_ms = None
|
||||
base_url = str(state.get("inference_base_url", "") or "").rstrip("/")
|
||||
changed |= _upsert_entry(
|
||||
entries,
|
||||
provider,
|
||||
source_name,
|
||||
{
|
||||
"source": source_name,
|
||||
"auth_type": AUTH_TYPE_OAUTH,
|
||||
"access_token": state["access_token"],
|
||||
"refresh_token": state.get("refresh_token"),
|
||||
"expires_at_ms": expires_at_ms,
|
||||
"base_url": base_url,
|
||||
"label": state.get("label", "") or label_from_token(
|
||||
state.get("access_token", ""), source_name
|
||||
),
|
||||
},
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.debug("MiniMax OAuth token seed failed: %s", exc)
|
||||
|
||||
elif provider == "openai-codex":
|
||||
# Respect user suppression — `hermes auth remove openai-codex` marks
|
||||
# the device_code source as suppressed so it won't be re-seeded from
|
||||
@@ -1381,16 +1262,6 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
|
||||
def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool, Set[str]]:
|
||||
changed = False
|
||||
active_sources: Set[str] = set()
|
||||
|
||||
# Prefer ~/.hermes/.env over os.environ — the user's config file is the
|
||||
# authoritative source for Hermes credentials. Stale env vars from parent
|
||||
# processes (Codex CLI, test scripts, etc.) should not override deliberate
|
||||
# changes to the .env file.
|
||||
def _get_env_prefer_dotenv(key: str) -> str:
|
||||
env_file = load_env()
|
||||
val = env_file.get(key) or os.environ.get(key) or ""
|
||||
return val.strip()
|
||||
|
||||
# Honour user suppression — `hermes auth remove <provider> <N>` for an
|
||||
# env-seeded credential marks the env:<VAR> source as suppressed so it
|
||||
# won't be re-seeded from the user's shell environment or ~/.hermes/.env.
|
||||
@@ -1402,8 +1273,7 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
|
||||
def _is_source_suppressed(_p, _s): # type: ignore[misc]
|
||||
return False
|
||||
if provider == "openrouter":
|
||||
# Prefer ~/.hermes/.env over os.environ
|
||||
token = _get_env_prefer_dotenv("OPENROUTER_API_KEY")
|
||||
token = os.getenv("OPENROUTER_API_KEY", "").strip()
|
||||
if token:
|
||||
source = "env:OPENROUTER_API_KEY"
|
||||
if _is_source_suppressed(provider, source):
|
||||
@@ -1429,7 +1299,7 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
|
||||
|
||||
env_url = ""
|
||||
if pconfig.base_url_env_var:
|
||||
env_url = _get_env_prefer_dotenv(pconfig.base_url_env_var).rstrip("/")
|
||||
env_url = os.getenv(pconfig.base_url_env_var, "").strip().rstrip("/")
|
||||
|
||||
env_vars = list(pconfig.api_key_env_vars)
|
||||
if provider == "anthropic":
|
||||
@@ -1440,8 +1310,7 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
|
||||
]
|
||||
|
||||
for env_var in env_vars:
|
||||
# Prefer ~/.hermes/.env over os.environ
|
||||
token = _get_env_prefer_dotenv(env_var)
|
||||
token = os.getenv(env_var, "").strip()
|
||||
if not token:
|
||||
continue
|
||||
source = f"env:{env_var}"
|
||||
|
||||
@@ -47,6 +47,7 @@ from __future__ import annotations
|
||||
|
||||
import os
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Callable, List, Optional
|
||||
|
||||
|
||||
@@ -252,19 +253,6 @@ def _remove_nous_device_code(provider: str, removed) -> RemovalResult:
|
||||
return result
|
||||
|
||||
|
||||
def _remove_minimax_oauth(provider: str, removed) -> RemovalResult:
|
||||
"""MiniMax OAuth lives in auth.json providers.minimax-oauth — clear it.
|
||||
|
||||
Same pattern as Nous: single-source OAuth state with refresh tokens.
|
||||
Suppression of the `oauth` source ensures the pool reseed path
|
||||
(_seed_from_singletons) doesn't instantly undo the removal.
|
||||
"""
|
||||
result = RemovalResult()
|
||||
if _clear_auth_store_provider(provider):
|
||||
result.cleaned.append(f"Cleared {provider} OAuth tokens from auth store")
|
||||
return result
|
||||
|
||||
|
||||
def _remove_codex_device_code(provider: str, removed) -> RemovalResult:
|
||||
"""Codex tokens live in TWO places: our auth store AND ~/.codex/auth.json.
|
||||
|
||||
@@ -402,11 +390,6 @@ def _register_all_sources() -> None:
|
||||
remove_fn=_remove_qwen_cli,
|
||||
description="~/.qwen/oauth_creds.json",
|
||||
))
|
||||
register(RemovalStep(
|
||||
provider="minimax-oauth", source_id="oauth",
|
||||
remove_fn=_remove_minimax_oauth,
|
||||
description="auth.json providers.minimax-oauth",
|
||||
))
|
||||
register(RemovalStep(
|
||||
provider="*", source_id="config:",
|
||||
match_fn=lambda src: src.startswith("config:") or src == "model_config",
|
||||
|
||||
1674
agent/curator.py
@@ -1,693 +0,0 @@
|
||||
"""Curator snapshot + rollback.
|
||||
|
||||
A pre-run snapshot of ``~/.hermes/skills/`` (excluding ``.curator_backups/``
|
||||
itself) is taken before any mutating curator pass. Snapshots are tar.gz
|
||||
files under ``~/.hermes/skills/.curator_backups/<utc-iso>/`` with a
|
||||
companion ``manifest.json`` describing the snapshot (reason, time, size,
|
||||
counted skill files). Rollback picks a snapshot, moves the current
|
||||
``skills/`` tree aside into another snapshot so even the rollback itself
|
||||
is undoable, then extracts the chosen snapshot into place.
|
||||
|
||||
The snapshot does NOT include:
|
||||
- ``.curator_backups/`` (would recurse)
|
||||
- ``.hub/`` (hub-installed skills — managed by the hub, not us)
|
||||
|
||||
It DOES include:
|
||||
- all SKILL.md files + their directories (``scripts/``, ``references/``,
|
||||
``templates/``, ``assets/``)
|
||||
- ``.usage.json`` (usage telemetry — needed to rehydrate state cleanly)
|
||||
- ``.archive/`` (so rollback restores previously-archived skills too)
|
||||
- ``.curator_state`` (so rolling back also restores the last-run-at
|
||||
pointer — otherwise the curator would immediately re-fire on the next
|
||||
tick)
|
||||
- ``.bundled_manifest`` (so protection markers stay consistent)
|
||||
|
||||
Alongside the skills tarball, each snapshot also captures a copy of
|
||||
``~/.hermes/cron/jobs.json`` as ``cron-jobs.json`` when it exists. Cron
|
||||
jobs reference skills by name in their ``skills``/``skill`` fields; the
|
||||
curator's consolidation pass rewrites those in place via
|
||||
``cron.jobs.rewrite_skill_refs()``. Without capturing the pre-run state,
|
||||
rolling back the skills tree would leave cron jobs pointing at the
|
||||
umbrella skills even though the narrow skills they were originally
|
||||
configured with have been restored. We store the whole jobs.json for
|
||||
fidelity but rollback only touches the ``skills``/``skill`` fields — the
|
||||
rest (schedule, next_run_at, enabled, prompt, etc.) is live state and
|
||||
we leave it alone.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import tarfile
|
||||
import tempfile
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
DEFAULT_KEEP = 5
|
||||
|
||||
# Entries under skills/ that should NEVER be rolled up into a snapshot.
|
||||
# .hub/ is managed by the skills hub; rolling it back would break lockfile
|
||||
# invariants. .curator_backups is the backup dir itself — recursion bomb.
|
||||
_EXCLUDE_TOP_LEVEL = {".curator_backups", ".hub"}
|
||||
|
||||
# Snapshot id regex: UTC ISO with colons replaced by dashes so the filename
|
||||
# is portable (Windows-safe). An optional ``-NN`` suffix handles two
|
||||
# snapshots landing in the same wallclock second.
|
||||
_ID_RE = re.compile(r"^\d{4}-\d{2}-\d{2}T\d{2}-\d{2}-\d{2}Z(-\d{2})?$")
|
||||
|
||||
|
||||
def _backups_dir() -> Path:
|
||||
return get_hermes_home() / "skills" / ".curator_backups"
|
||||
|
||||
|
||||
def _skills_dir() -> Path:
|
||||
return get_hermes_home() / "skills"
|
||||
|
||||
|
||||
def _cron_jobs_file() -> Path:
|
||||
"""Source path for the live cron jobs store (``~/.hermes/cron/jobs.json``)."""
|
||||
return get_hermes_home() / "cron" / "jobs.json"
|
||||
|
||||
|
||||
CRON_JOBS_FILENAME = "cron-jobs.json"
|
||||
|
||||
|
||||
def _backup_cron_jobs_into(dest: Path) -> Dict[str, Any]:
|
||||
"""Copy the live cron jobs.json into ``dest`` as ``cron-jobs.json``.
|
||||
|
||||
Returns a small dict describing what was captured so the caller can
|
||||
fold it into the manifest. Never raises — if the cron file is missing
|
||||
or unreadable, the return dict has ``backed_up=False`` and the reason,
|
||||
and the snapshot proceeds without cron data (the snapshot is still
|
||||
useful for rolling back skills).
|
||||
"""
|
||||
src = _cron_jobs_file()
|
||||
info: Dict[str, Any] = {"backed_up": False, "jobs_count": 0}
|
||||
if not src.exists():
|
||||
info["reason"] = "no cron/jobs.json present"
|
||||
return info
|
||||
try:
|
||||
raw = src.read_text(encoding="utf-8")
|
||||
except OSError as e:
|
||||
logger.debug("Failed to read cron/jobs.json for backup: %s", e)
|
||||
info["reason"] = f"read error: {e}"
|
||||
return info
|
||||
# Count jobs as a nice diagnostic — but don't fail the snapshot if the
|
||||
# file is unparseable; just store the raw text and let rollback deal
|
||||
# with it (or not, if it's corrupted). jobs.json wraps the list as
|
||||
# `{"jobs": [...], "updated_at": ...}` — we count via that shape, and
|
||||
# fall back to bare-list shape just in case the format ever changes.
|
||||
try:
|
||||
parsed = json.loads(raw)
|
||||
if isinstance(parsed, dict):
|
||||
inner = parsed.get("jobs")
|
||||
if isinstance(inner, list):
|
||||
info["jobs_count"] = len(inner)
|
||||
elif isinstance(parsed, list):
|
||||
info["jobs_count"] = len(parsed)
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
info["jobs_count"] = 0
|
||||
info["parse_warning"] = "jobs.json was not valid JSON at snapshot time"
|
||||
try:
|
||||
(dest / CRON_JOBS_FILENAME).write_text(raw, encoding="utf-8")
|
||||
except OSError as e:
|
||||
logger.debug("Failed to write cron backup file: %s", e)
|
||||
info["reason"] = f"write error: {e}"
|
||||
return info
|
||||
info["backed_up"] = True
|
||||
return info
|
||||
|
||||
|
||||
def _utc_id(now: Optional[datetime] = None) -> str:
|
||||
"""UTC ISO-ish filesystem-safe timestamp: ``2026-05-01T13-05-42Z``."""
|
||||
if now is None:
|
||||
now = datetime.now(timezone.utc)
|
||||
# isoformat → "2026-05-01T13:05:42.123456+00:00"; strip subseconds and tz.
|
||||
s = now.replace(microsecond=0).isoformat()
|
||||
if s.endswith("+00:00"):
|
||||
s = s[:-6]
|
||||
return s.replace(":", "-") + "Z"
|
||||
|
||||
|
||||
def _load_config() -> Dict[str, Any]:
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
cfg = load_config()
|
||||
except Exception as e:
|
||||
logger.debug("Failed to load config for curator backup: %s", e)
|
||||
return {}
|
||||
if not isinstance(cfg, dict):
|
||||
return {}
|
||||
cur = cfg.get("curator") or {}
|
||||
if not isinstance(cur, dict):
|
||||
return {}
|
||||
bk = cur.get("backup") or {}
|
||||
return bk if isinstance(bk, dict) else {}
|
||||
|
||||
|
||||
def is_enabled() -> bool:
|
||||
"""Default ON — the whole point of the backup is safety by default."""
|
||||
return bool(_load_config().get("enabled", True))
|
||||
|
||||
|
||||
def get_keep() -> int:
|
||||
cfg = _load_config()
|
||||
try:
|
||||
n = int(cfg.get("keep", DEFAULT_KEEP))
|
||||
except (TypeError, ValueError):
|
||||
n = DEFAULT_KEEP
|
||||
return max(1, n)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Snapshot
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _count_skill_files(base: Path) -> int:
|
||||
try:
|
||||
return sum(1 for _ in base.rglob("SKILL.md"))
|
||||
except OSError:
|
||||
return 0
|
||||
|
||||
|
||||
def _write_manifest(dest: Path, reason: str, archive_path: Path,
|
||||
skills_counted: int,
|
||||
cron_info: Optional[Dict[str, Any]] = None) -> None:
|
||||
manifest = {
|
||||
"id": dest.name,
|
||||
"reason": reason,
|
||||
"created_at": datetime.now(timezone.utc).isoformat(),
|
||||
"archive": archive_path.name,
|
||||
"archive_bytes": archive_path.stat().st_size,
|
||||
"skill_files": skills_counted,
|
||||
}
|
||||
if cron_info is not None:
|
||||
manifest["cron_jobs"] = {
|
||||
"backed_up": bool(cron_info.get("backed_up", False)),
|
||||
"jobs_count": int(cron_info.get("jobs_count", 0)),
|
||||
}
|
||||
if not cron_info.get("backed_up"):
|
||||
manifest["cron_jobs"]["reason"] = cron_info.get("reason", "not captured")
|
||||
if cron_info.get("parse_warning"):
|
||||
manifest["cron_jobs"]["parse_warning"] = cron_info["parse_warning"]
|
||||
(dest / "manifest.json").write_text(
|
||||
json.dumps(manifest, indent=2, sort_keys=True), encoding="utf-8"
|
||||
)
|
||||
|
||||
|
||||
def snapshot_skills(reason: str = "manual") -> Optional[Path]:
|
||||
"""Create a tar.gz snapshot of ``~/.hermes/skills/`` and prune old ones.
|
||||
|
||||
Returns the snapshot directory path, or ``None`` if the snapshot was
|
||||
skipped (backup disabled, skills dir missing, or an IO error occurred —
|
||||
in which case we log at debug and return None so the curator never
|
||||
aborts a pass because of a backup failure).
|
||||
"""
|
||||
if not is_enabled():
|
||||
logger.debug("Curator backup disabled by config; skipping snapshot")
|
||||
return None
|
||||
|
||||
skills = _skills_dir()
|
||||
if not skills.exists():
|
||||
logger.debug("No ~/.hermes/skills/ directory — nothing to back up")
|
||||
return None
|
||||
|
||||
backups = _backups_dir()
|
||||
try:
|
||||
backups.mkdir(parents=True, exist_ok=True)
|
||||
except OSError as e:
|
||||
logger.debug("Failed to create backups dir %s: %s", backups, e)
|
||||
return None
|
||||
|
||||
# Uniquify: if a snapshot with the same second already exists (can
|
||||
# happen if two curator runs fire in the same second), append a short
|
||||
# counter. Avoids clobbering and avoids timestamp collisions.
|
||||
base_id = _utc_id()
|
||||
snap_id = base_id
|
||||
counter = 1
|
||||
while (backups / snap_id).exists():
|
||||
snap_id = f"{base_id}-{counter:02d}"
|
||||
counter += 1
|
||||
|
||||
dest = backups / snap_id
|
||||
try:
|
||||
dest.mkdir(parents=True, exist_ok=False)
|
||||
except OSError as e:
|
||||
logger.debug("Failed to create snapshot dir %s: %s", dest, e)
|
||||
return None
|
||||
|
||||
archive = dest / "skills.tar.gz"
|
||||
try:
|
||||
# Stream into the tarball — no tempdir copy needed.
|
||||
with tarfile.open(archive, "w:gz", compresslevel=6) as tf:
|
||||
for entry in sorted(skills.iterdir()):
|
||||
if entry.name in _EXCLUDE_TOP_LEVEL:
|
||||
continue
|
||||
# arcname: store paths relative to skills/ so extraction
|
||||
# drops cleanly back into the skills dir.
|
||||
tf.add(str(entry), arcname=entry.name, recursive=True)
|
||||
# Capture cron/jobs.json alongside the tarball. Never fails the
|
||||
# snapshot — the skills side is the core guarantee; cron is
|
||||
# additive. We still record in the manifest whether it was
|
||||
# captured so rollback can surface "no cron data in this snapshot".
|
||||
cron_info = _backup_cron_jobs_into(dest)
|
||||
_write_manifest(dest, reason, archive,
|
||||
_count_skill_files(skills),
|
||||
cron_info=cron_info)
|
||||
except (OSError, tarfile.TarError) as e:
|
||||
logger.debug("Curator snapshot failed: %s", e, exc_info=True)
|
||||
# Clean up partial snapshot
|
||||
try:
|
||||
shutil.rmtree(dest, ignore_errors=True)
|
||||
except OSError:
|
||||
pass
|
||||
return None
|
||||
|
||||
_prune_old(keep=get_keep())
|
||||
logger.info("Curator snapshot created: %s (%s)", snap_id, reason)
|
||||
return dest
|
||||
|
||||
|
||||
def _prune_old(keep: int) -> List[str]:
|
||||
"""Delete regular snapshots beyond the newest *keep*. Returns deleted
|
||||
ids. Staging dirs (``.rollback-staging-*``) are implementation detail
|
||||
and pruned independently on every call."""
|
||||
backups = _backups_dir()
|
||||
if not backups.exists():
|
||||
return []
|
||||
entries: List[Tuple[str, Path]] = []
|
||||
stale_staging: List[Path] = []
|
||||
for child in backups.iterdir():
|
||||
if not child.is_dir():
|
||||
continue
|
||||
if child.name.startswith(".rollback-staging-"):
|
||||
# Staging dirs are only supposed to exist briefly during a
|
||||
# rollback. If we find one here (e.g. from a crashed rollback),
|
||||
# clean it up opportunistically.
|
||||
stale_staging.append(child)
|
||||
continue
|
||||
if _ID_RE.match(child.name):
|
||||
entries.append((child.name, child))
|
||||
# Newest first (lexicographic works because the id is UTC ISO).
|
||||
entries.sort(key=lambda t: t[0], reverse=True)
|
||||
deleted: List[str] = []
|
||||
for _, path in entries[keep:]:
|
||||
try:
|
||||
shutil.rmtree(path)
|
||||
deleted.append(path.name)
|
||||
except OSError as e:
|
||||
logger.debug("Failed to prune %s: %s", path, e)
|
||||
for path in stale_staging:
|
||||
try:
|
||||
shutil.rmtree(path)
|
||||
except OSError as e:
|
||||
logger.debug("Failed to clean stale staging dir %s: %s", path, e)
|
||||
return deleted
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# List + rollback
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _read_manifest(snap_dir: Path) -> Dict[str, Any]:
|
||||
mf = snap_dir / "manifest.json"
|
||||
if not mf.exists():
|
||||
return {}
|
||||
try:
|
||||
return json.loads(mf.read_text(encoding="utf-8"))
|
||||
except (OSError, json.JSONDecodeError):
|
||||
return {}
|
||||
|
||||
|
||||
def list_backups() -> List[Dict[str, Any]]:
|
||||
"""Return all restorable snapshots, newest first. Only entries with a
|
||||
real ``skills.tar.gz`` tarball are listed — transient
|
||||
``.rollback-staging-*`` directories created mid-rollback are
|
||||
implementation detail and not shown."""
|
||||
backups = _backups_dir()
|
||||
if not backups.exists():
|
||||
return []
|
||||
out: List[Dict[str, Any]] = []
|
||||
for child in sorted(backups.iterdir(), reverse=True):
|
||||
if not child.is_dir():
|
||||
continue
|
||||
if not _ID_RE.match(child.name):
|
||||
continue
|
||||
if not (child / "skills.tar.gz").exists():
|
||||
continue
|
||||
mf = _read_manifest(child)
|
||||
mf.setdefault("id", child.name)
|
||||
mf.setdefault("path", str(child))
|
||||
if "archive_bytes" not in mf:
|
||||
arc = child / "skills.tar.gz"
|
||||
try:
|
||||
mf["archive_bytes"] = arc.stat().st_size
|
||||
except OSError:
|
||||
mf["archive_bytes"] = 0
|
||||
out.append(mf)
|
||||
return out
|
||||
|
||||
|
||||
def _resolve_backup(backup_id: Optional[str]) -> Optional[Path]:
|
||||
"""Return the path of the requested backup, or the newest one if
|
||||
*backup_id* is None. Returns None if no match."""
|
||||
backups = _backups_dir()
|
||||
if not backups.exists():
|
||||
return None
|
||||
if backup_id:
|
||||
target = backups / backup_id
|
||||
if (
|
||||
target.is_dir()
|
||||
and _ID_RE.match(backup_id)
|
||||
and (target / "skills.tar.gz").exists()
|
||||
):
|
||||
return target
|
||||
return None
|
||||
candidates = [
|
||||
c for c in sorted(backups.iterdir(), reverse=True)
|
||||
if c.is_dir() and _ID_RE.match(c.name) and (c / "skills.tar.gz").exists()
|
||||
]
|
||||
return candidates[0] if candidates else None
|
||||
|
||||
|
||||
def _restore_cron_skill_links(snapshot_dir: Path) -> Dict[str, Any]:
|
||||
"""Reconcile backed-up cron skill links into the live ``cron/jobs.json``.
|
||||
|
||||
We do NOT overwrite the whole cron file. Only the ``skills`` and
|
||||
``skill`` fields are restored, and only on jobs that still exist in the
|
||||
current file (matched by ``id``). Everything else about the job —
|
||||
schedule, next_run_at, last_run_at, enabled, prompt, workdir, hooks —
|
||||
is live state that the user/scheduler has modified since the snapshot;
|
||||
overwriting it would regress unrelated cron activity.
|
||||
|
||||
Rules:
|
||||
- Jobs present in backup AND live, with differing skills → skills restored.
|
||||
- Jobs present in backup AND live, with matching skills → no-op.
|
||||
- Jobs present in backup but gone from live (user deleted the job
|
||||
after the snapshot) → skipped, noted in the return report.
|
||||
- Jobs present in live but not in backup (user created a new cron
|
||||
job after the snapshot) → left untouched.
|
||||
|
||||
Never raises; failures are captured in the return dict. Writes through
|
||||
``cron.jobs`` to pick up the same lock + atomic-write path that tick()
|
||||
uses, so we don't race the scheduler.
|
||||
"""
|
||||
report: Dict[str, Any] = {
|
||||
"attempted": False,
|
||||
"restored": [],
|
||||
"skipped_missing": [],
|
||||
"unchanged": 0,
|
||||
"error": None,
|
||||
}
|
||||
backup_file = snapshot_dir / CRON_JOBS_FILENAME
|
||||
if not backup_file.exists():
|
||||
report["error"] = f"snapshot has no {CRON_JOBS_FILENAME}"
|
||||
return report
|
||||
|
||||
try:
|
||||
backup_text = backup_file.read_text(encoding="utf-8")
|
||||
backup_parsed = json.loads(backup_text)
|
||||
except (OSError, json.JSONDecodeError) as e:
|
||||
report["error"] = f"failed to load backed-up jobs: {e}"
|
||||
return report
|
||||
# jobs.json on disk is `{"jobs": [...], "updated_at": ...}`; accept both
|
||||
# that shape and a bare list for forward compat.
|
||||
if isinstance(backup_parsed, dict):
|
||||
backup_jobs = backup_parsed.get("jobs")
|
||||
elif isinstance(backup_parsed, list):
|
||||
backup_jobs = backup_parsed
|
||||
else:
|
||||
backup_jobs = None
|
||||
if not isinstance(backup_jobs, list):
|
||||
report["error"] = "backed-up cron-jobs.json has no jobs list"
|
||||
return report
|
||||
|
||||
# Build a lookup of the backed-up skill state keyed by job id.
|
||||
# We only need the two skill-ish fields (legacy single and modern list).
|
||||
backup_by_id: Dict[str, Dict[str, Any]] = {}
|
||||
for job in backup_jobs:
|
||||
if not isinstance(job, dict):
|
||||
continue
|
||||
jid = job.get("id")
|
||||
if not isinstance(jid, str) or not jid:
|
||||
continue
|
||||
backup_by_id[jid] = {
|
||||
"skills": job.get("skills"),
|
||||
"skill": job.get("skill"),
|
||||
"name": job.get("name") or jid,
|
||||
}
|
||||
|
||||
if not backup_by_id:
|
||||
report["attempted"] = True # we tried but there was nothing to do
|
||||
return report
|
||||
|
||||
# Load and rewrite the live jobs under the scheduler's lock.
|
||||
try:
|
||||
from cron.jobs import load_jobs, save_jobs, _jobs_file_lock
|
||||
except ImportError as e:
|
||||
report["error"] = f"cron module unavailable: {e}"
|
||||
return report
|
||||
|
||||
report["attempted"] = True
|
||||
try:
|
||||
with _jobs_file_lock:
|
||||
live_jobs = load_jobs()
|
||||
changed = False
|
||||
|
||||
live_ids = set()
|
||||
for live in live_jobs:
|
||||
if not isinstance(live, dict):
|
||||
continue
|
||||
jid = live.get("id")
|
||||
if not isinstance(jid, str) or not jid:
|
||||
continue
|
||||
live_ids.add(jid)
|
||||
|
||||
backup = backup_by_id.get(jid)
|
||||
if backup is None:
|
||||
continue # live job didn't exist at snapshot time
|
||||
|
||||
cur_skills = live.get("skills")
|
||||
cur_skill = live.get("skill")
|
||||
bkp_skills = backup.get("skills")
|
||||
bkp_skill = backup.get("skill")
|
||||
|
||||
if cur_skills == bkp_skills and cur_skill == bkp_skill:
|
||||
report["unchanged"] += 1
|
||||
continue
|
||||
|
||||
# Restore. Preserve absence (don't force the key to appear
|
||||
# if the backup didn't have it either).
|
||||
if bkp_skills is None:
|
||||
live.pop("skills", None)
|
||||
else:
|
||||
live["skills"] = bkp_skills
|
||||
if bkp_skill is None:
|
||||
live.pop("skill", None)
|
||||
else:
|
||||
live["skill"] = bkp_skill
|
||||
|
||||
report["restored"].append({
|
||||
"job_id": jid,
|
||||
"job_name": backup.get("name") or jid,
|
||||
"from": {"skills": cur_skills, "skill": cur_skill},
|
||||
"to": {"skills": bkp_skills, "skill": bkp_skill},
|
||||
})
|
||||
changed = True
|
||||
|
||||
# Jobs in backup but not in live = user deleted them after snapshot
|
||||
for jid, backup in backup_by_id.items():
|
||||
if jid not in live_ids:
|
||||
report["skipped_missing"].append({
|
||||
"job_id": jid,
|
||||
"job_name": backup.get("name") or jid,
|
||||
})
|
||||
|
||||
if changed:
|
||||
save_jobs(live_jobs)
|
||||
except Exception as e: # noqa: BLE001 — rollback must not die mid-restore
|
||||
logger.debug("Cron skill-link restore failed: %s", e, exc_info=True)
|
||||
report["error"] = f"restore failed mid-flight: {e}"
|
||||
|
||||
return report
|
||||
|
||||
|
||||
|
||||
def rollback(backup_id: Optional[str] = None) -> Tuple[bool, str, Optional[Path]]:
|
||||
"""Restore ``~/.hermes/skills/`` from a snapshot.
|
||||
|
||||
Strategy:
|
||||
1. Resolve the target snapshot (explicit id or newest regular).
|
||||
2. Take a safety snapshot of the CURRENT skills tree under
|
||||
``.curator_backups/pre-rollback-<ts>/`` so the rollback itself is
|
||||
undoable.
|
||||
3. Move all current top-level entries (except ``.curator_backups``
|
||||
and ``.hub``) into a tempdir.
|
||||
4. Extract the chosen snapshot into ``~/.hermes/skills/``.
|
||||
5. On failure during 4, move the tempdir contents back (best-effort)
|
||||
and return failure.
|
||||
|
||||
Returns ``(ok, message, snapshot_path)``.
|
||||
"""
|
||||
target = _resolve_backup(backup_id)
|
||||
if target is None:
|
||||
return (
|
||||
False,
|
||||
f"no matching backup found"
|
||||
+ (f" for id '{backup_id}'" if backup_id else "")
|
||||
+ " (use `hermes curator rollback --list` to see available snapshots)",
|
||||
None,
|
||||
)
|
||||
archive = target / "skills.tar.gz"
|
||||
if not archive.exists():
|
||||
return (False, f"snapshot {target.name} has no skills.tar.gz — corrupted?", None)
|
||||
|
||||
skills = _skills_dir()
|
||||
skills.mkdir(parents=True, exist_ok=True)
|
||||
backups = _backups_dir()
|
||||
backups.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Step 2: safety snapshot of current state FIRST. If this fails we bail
|
||||
# out before touching anything — otherwise a failed extract could leave
|
||||
# the user with no skills.
|
||||
try:
|
||||
snapshot_skills(reason=f"pre-rollback to {target.name}")
|
||||
except Exception as e:
|
||||
return (False, f"pre-rollback safety snapshot failed: {e}", None)
|
||||
|
||||
# Additionally move current entries into an internal staging dir so
|
||||
# the extract happens into an empty skills tree (predictable result).
|
||||
# This dir is implementation detail — not listed as a restorable
|
||||
# backup. The safety snapshot above is the user-facing undo handle.
|
||||
staged = backups / f".rollback-staging-{_utc_id()}"
|
||||
try:
|
||||
staged.mkdir(parents=True, exist_ok=False)
|
||||
except OSError as e:
|
||||
return (False, f"failed to create staging dir: {e}", None)
|
||||
|
||||
moved: List[Tuple[Path, Path]] = []
|
||||
try:
|
||||
for entry in list(skills.iterdir()):
|
||||
if entry.name in _EXCLUDE_TOP_LEVEL:
|
||||
continue
|
||||
dest = staged / entry.name
|
||||
shutil.move(str(entry), str(dest))
|
||||
moved.append((entry, dest))
|
||||
except OSError as e:
|
||||
# Best-effort rollback of the move
|
||||
for orig, dest in moved:
|
||||
try:
|
||||
shutil.move(str(dest), str(orig))
|
||||
except OSError:
|
||||
pass
|
||||
try:
|
||||
shutil.rmtree(staged, ignore_errors=True)
|
||||
except OSError:
|
||||
pass
|
||||
return (False, f"failed to stage current skills: {e}", None)
|
||||
|
||||
# Step 4: extract the snapshot into skills/
|
||||
try:
|
||||
with tarfile.open(archive, "r:gz") as tf:
|
||||
# Python 3.12+ supports filter='data' for safer extraction.
|
||||
# Fall back to the unfiltered call for older interpreters but
|
||||
# still reject absolute paths and .. components defensively.
|
||||
for member in tf.getmembers():
|
||||
name = member.name
|
||||
if name.startswith("/") or ".." in Path(name).parts:
|
||||
raise tarfile.TarError(
|
||||
f"refusing to extract unsafe path: {name!r}"
|
||||
)
|
||||
try:
|
||||
tf.extractall(str(skills), filter="data") # type: ignore[call-arg]
|
||||
except TypeError:
|
||||
# Python < 3.12 — no filter kwarg
|
||||
tf.extractall(str(skills))
|
||||
except (OSError, tarfile.TarError) as e:
|
||||
# Best-effort recover: move staged contents back
|
||||
for orig, dest in moved:
|
||||
try:
|
||||
shutil.move(str(dest), str(orig))
|
||||
except OSError:
|
||||
pass
|
||||
try:
|
||||
shutil.rmtree(staged, ignore_errors=True)
|
||||
except OSError:
|
||||
pass
|
||||
return (False, f"snapshot extract failed (state restored): {e}", None)
|
||||
|
||||
# Extract succeeded — the staging dir has served its purpose. The
|
||||
# user's undo handle is the safety snapshot tarball we took earlier.
|
||||
try:
|
||||
shutil.rmtree(staged, ignore_errors=True)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
# Reconcile cron skill-links. Surgical: only the skills/skill fields
|
||||
# on jobs matched by id. Everything else in jobs.json is live state
|
||||
# (schedule, next_run_at, enabled, prompt, etc.) and we leave it
|
||||
# alone. Failures here don't fail the overall rollback — the skills
|
||||
# tree is already restored, which is the main guarantee.
|
||||
cron_report = _restore_cron_skill_links(target)
|
||||
|
||||
summary_bits = [f"restored from snapshot {target.name}"]
|
||||
if cron_report.get("attempted"):
|
||||
restored_n = len(cron_report.get("restored") or [])
|
||||
skipped_n = len(cron_report.get("skipped_missing") or [])
|
||||
if cron_report.get("error"):
|
||||
summary_bits.append(f"cron links: error — {cron_report['error']}")
|
||||
elif restored_n == 0 and skipped_n == 0 and cron_report.get("unchanged", 0) == 0:
|
||||
# Attempted but nothing matched — empty snapshot or no overlapping ids.
|
||||
pass
|
||||
else:
|
||||
parts = []
|
||||
if restored_n:
|
||||
parts.append(f"{restored_n} job(s) had skill links restored")
|
||||
if skipped_n:
|
||||
parts.append(f"{skipped_n} backed-up job(s) no longer exist (skipped)")
|
||||
if cron_report.get("unchanged"):
|
||||
parts.append(f"{cron_report['unchanged']} already matched")
|
||||
summary_bits.append("cron links: " + ", ".join(parts))
|
||||
|
||||
logger.info("Curator rollback: restored from %s (cron_report=%s)",
|
||||
target.name, cron_report)
|
||||
return (True, "; ".join(summary_bits), target)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Human-readable summary for CLI
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def format_size(n: int) -> str:
|
||||
for unit in ("B", "KB", "MB", "GB"):
|
||||
if n < 1024 or unit == "GB":
|
||||
return f"{n:.1f} {unit}" if unit != "B" else f"{n} B"
|
||||
n /= 1024
|
||||
return f"{n:.1f} GB"
|
||||
|
||||
|
||||
def summarize_backups() -> str:
|
||||
rows = list_backups()
|
||||
if not rows:
|
||||
return "No curator snapshots yet."
|
||||
lines = [f"{'id':<24} {'reason':<40} {'skills':>6} {'size':>8}"]
|
||||
lines.append("─" * len(lines[0]))
|
||||
for r in rows:
|
||||
lines.append(
|
||||
f"{r.get('id','?'):<24} "
|
||||
f"{(r.get('reason','?') or '?')[:40]:<40} "
|
||||
f"{r.get('skill_files', 0):>6} "
|
||||
f"{format_size(int(r.get('archive_bytes', 0))):>8}"
|
||||
)
|
||||
return "\n".join(lines)
|
||||
@@ -42,7 +42,6 @@ class FailoverReason(enum.Enum):
|
||||
# Context / payload
|
||||
context_overflow = "context_overflow" # Context too large — compress, not failover
|
||||
payload_too_large = "payload_too_large" # 413 — compress payload
|
||||
image_too_large = "image_too_large" # Native image part exceeds provider's per-image limit — shrink and retry
|
||||
|
||||
# Model
|
||||
model_not_found = "model_not_found" # 404 or invalid model — fallback to different model
|
||||
@@ -54,8 +53,6 @@ class FailoverReason(enum.Enum):
|
||||
# Provider-specific
|
||||
thinking_signature = "thinking_signature" # Anthropic thinking block sig invalid
|
||||
long_context_tier = "long_context_tier" # Anthropic "extra usage" tier gate
|
||||
oauth_long_context_beta_forbidden = "oauth_long_context_beta_forbidden" # Anthropic OAuth subscription rejects 1M context beta — disable beta and retry
|
||||
llama_cpp_grammar_pattern = "llama_cpp_grammar_pattern" # llama.cpp json-schema-to-grammar rejects regex escapes in `pattern` / `format` — strip from tools and retry
|
||||
|
||||
# Catch-all
|
||||
unknown = "unknown" # Unclassifiable — retry with backoff
|
||||
@@ -93,7 +90,6 @@ class ClassifiedError:
|
||||
_BILLING_PATTERNS = [
|
||||
"insufficient credits",
|
||||
"insufficient_quota",
|
||||
"insufficient balance",
|
||||
"credit balance",
|
||||
"credits have been exhausted",
|
||||
"top up your credits",
|
||||
@@ -151,20 +147,6 @@ _PAYLOAD_TOO_LARGE_PATTERNS = [
|
||||
"error code: 413",
|
||||
]
|
||||
|
||||
# Image-size patterns. Matched against 400 bodies (not 413) because most
|
||||
# providers return a 400 with a specific image-too-big message before the
|
||||
# whole request hits the 413 size limit. Anthropic's wording is the most
|
||||
# important here (hard 5 MB per image, returned as
|
||||
# "messages.N.content.K.image.source.base64: image exceeds 5 MB maximum").
|
||||
_IMAGE_TOO_LARGE_PATTERNS = [
|
||||
"image exceeds", # Anthropic: "image exceeds 5 MB maximum"
|
||||
"image too large", # generic
|
||||
"image_too_large", # error_code variant
|
||||
"image size exceeds", # variant
|
||||
# "request_too_large" on a request known to contain an image → image is
|
||||
# the likely culprit; we still try the shrink path before giving up.
|
||||
]
|
||||
|
||||
# Context overflow patterns
|
||||
_CONTEXT_OVERFLOW_PATTERNS = [
|
||||
"context length",
|
||||
@@ -452,50 +434,6 @@ def classify_api_error(
|
||||
should_compress=True,
|
||||
)
|
||||
|
||||
# Anthropic OAuth subscription rejects the 1M-context beta header.
|
||||
# Observed error body: "The long context beta is not yet available for
|
||||
# this subscription." Returned as HTTP 400 from native Anthropic when
|
||||
# the subscription doesn't include 1M context, even though the request
|
||||
# carries ``anthropic-beta: context-1m-2025-08-07``. The recovery path
|
||||
# in run_agent.py rebuilds the Anthropic client with the beta stripped
|
||||
# and retries once. Pattern is narrow enough that it won't collide with
|
||||
# the 429 tier-gate pattern above (different status, different phrase).
|
||||
if (
|
||||
status_code == 400
|
||||
and "long context beta" in error_msg
|
||||
and "not yet available" in error_msg
|
||||
):
|
||||
return _result(
|
||||
FailoverReason.oauth_long_context_beta_forbidden,
|
||||
retryable=True,
|
||||
should_compress=False,
|
||||
)
|
||||
|
||||
# llama.cpp's ``json-schema-to-grammar`` converter (used by its OAI
|
||||
# server to build GBNF tool-call parsers) rejects regex escape classes
|
||||
# like ``\d``/``\w``/``\s`` and most ``format`` values. MCP servers
|
||||
# routinely emit ``"pattern": "\\d{4}-\\d{2}-\\d{2}"`` for date/phone/
|
||||
# email params. llama.cpp surfaces this as HTTP 400 with one of a few
|
||||
# recognizable phrases; on match we strip ``pattern``/``format`` from
|
||||
# ``self.tools`` in the retry loop and retry once. Cloud providers are
|
||||
# unaffected — they accept these keywords and we never hit this branch.
|
||||
if (
|
||||
status_code == 400
|
||||
and (
|
||||
"error parsing grammar" in error_msg
|
||||
or "json-schema-to-grammar" in error_msg
|
||||
or (
|
||||
"unable to generate parser" in error_msg
|
||||
and "template" in error_msg
|
||||
)
|
||||
)
|
||||
):
|
||||
return _result(
|
||||
FailoverReason.llama_cpp_grammar_pattern,
|
||||
retryable=True,
|
||||
should_compress=False,
|
||||
)
|
||||
|
||||
# ── 2. HTTP status code classification ──────────────────────────
|
||||
|
||||
if status_code is not None:
|
||||
@@ -546,12 +484,7 @@ def classify_api_error(
|
||||
|
||||
is_disconnect = any(p in error_msg for p in _SERVER_DISCONNECT_PATTERNS)
|
||||
if is_disconnect and not status_code:
|
||||
# Absolute token/message-count thresholds are only a proxy for smaller
|
||||
# context windows. Large-context sessions can have hundreds of
|
||||
# messages while still being far below their actual token budget.
|
||||
is_large = approx_tokens > context_length * 0.6 or (
|
||||
context_length <= 256000 and (approx_tokens > 120000 or num_messages > 200)
|
||||
)
|
||||
is_large = approx_tokens > context_length * 0.6 or approx_tokens > 120000 or num_messages > 200
|
||||
if is_large:
|
||||
return _result(
|
||||
FailoverReason.context_overflow,
|
||||
@@ -738,15 +671,6 @@ def _classify_400(
|
||||
) -> ClassifiedError:
|
||||
"""Classify 400 Bad Request — context overflow, format error, or generic."""
|
||||
|
||||
# Image-too-large from 400 (Anthropic's 5 MB per-image check fires this way).
|
||||
# Must be checked BEFORE context_overflow because messages can trip both
|
||||
# patterns ("exceeds" + "image") and image-shrink is a cheaper recovery.
|
||||
if any(p in error_msg for p in _IMAGE_TOO_LARGE_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.image_too_large,
|
||||
retryable=True,
|
||||
)
|
||||
|
||||
# Context overflow from 400
|
||||
if any(p in error_msg for p in _CONTEXT_OVERFLOW_PATTERNS):
|
||||
return result_fn(
|
||||
@@ -797,12 +721,7 @@ def _classify_400(
|
||||
if not err_body_msg:
|
||||
err_body_msg = str(body.get("message") or "").strip().lower()
|
||||
is_generic = len(err_body_msg) < 30 or err_body_msg in ("error", "")
|
||||
# Absolute token/message-count thresholds are only a proxy for smaller
|
||||
# context windows. Large-context sessions can have many messages while
|
||||
# still being far below their actual token budget.
|
||||
is_large = approx_tokens > context_length * 0.4 or (
|
||||
context_length <= 256000 and (approx_tokens > 80000 or num_messages > 80)
|
||||
)
|
||||
is_large = approx_tokens > context_length * 0.4 or approx_tokens > 80000 or num_messages > 80
|
||||
|
||||
if is_generic and is_large:
|
||||
return result_fn(
|
||||
@@ -879,13 +798,6 @@ def _classify_by_message(
|
||||
should_compress=True,
|
||||
)
|
||||
|
||||
# Image-too-large patterns (from message text when no status_code)
|
||||
if any(p in error_msg for p in _IMAGE_TOO_LARGE_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.image_too_large,
|
||||
retryable=True,
|
||||
)
|
||||
|
||||
# Usage-limit patterns need the same disambiguation as 402: some providers
|
||||
# surface "usage limit" errors without an HTTP status code. A transient
|
||||
# signal ("try again", "resets at", …) means it's a periodic quota, not
|
||||
|
||||
@@ -30,6 +30,7 @@ from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
import uuid
|
||||
from types import SimpleNamespace
|
||||
@@ -41,6 +42,7 @@ from agent import google_oauth
|
||||
from agent.gemini_schema import sanitize_gemini_tool_parameters
|
||||
from agent.google_code_assist import (
|
||||
CODE_ASSIST_ENDPOINT,
|
||||
FREE_TIER_ID,
|
||||
CodeAssistError,
|
||||
ProjectContext,
|
||||
resolve_project_context,
|
||||
|
||||
@@ -679,21 +679,7 @@ def translate_stream_event(event: Dict[str, Any], model: str, tool_call_indices:
|
||||
finish_reason_raw = str(cand.get("finishReason") or "")
|
||||
if finish_reason_raw:
|
||||
mapped = "tool_calls" if tool_call_indices else _map_gemini_finish_reason(finish_reason_raw)
|
||||
finish_chunk = _make_stream_chunk(model=model, finish_reason=mapped)
|
||||
# Attach usage from this event's usageMetadata so the streaming
|
||||
# loop in run_agent.py can record token counts (mirrors the
|
||||
# non-streaming path in translate_gemini_response).
|
||||
usage_meta = event.get("usageMetadata") or {}
|
||||
if usage_meta:
|
||||
finish_chunk.usage = SimpleNamespace(
|
||||
prompt_tokens=int(usage_meta.get("promptTokenCount") or 0),
|
||||
completion_tokens=int(usage_meta.get("candidatesTokenCount") or 0),
|
||||
total_tokens=int(usage_meta.get("totalTokenCount") or 0),
|
||||
prompt_tokens_details=SimpleNamespace(
|
||||
cached_tokens=int(usage_meta.get("cachedContentTokenCount") or 0),
|
||||
),
|
||||
)
|
||||
chunks.append(finish_chunk)
|
||||
chunks.append(_make_stream_chunk(model=model, finish_reason=mapped))
|
||||
return chunks
|
||||
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict
|
||||
from typing import Any, Dict, List
|
||||
|
||||
# Gemini's ``FunctionDeclaration.parameters`` field accepts the ``Schema``
|
||||
# object, which is only a subset of OpenAPI 3.0 / JSON Schema. Strip fields
|
||||
|
||||
@@ -29,6 +29,7 @@ from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
|
||||
@@ -49,13 +49,14 @@ import json
|
||||
import logging
|
||||
import os
|
||||
import secrets
|
||||
import socket
|
||||
import stat
|
||||
import threading
|
||||
import time
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
from dataclasses import dataclass
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional, Tuple
|
||||
|
||||
@@ -97,7 +98,6 @@ _DEFAULT_CLIENT_SECRET = f"GOCSPX-{_PUBLIC_CLIENT_SECRET_SUFFIX}"
|
||||
|
||||
# Regex patterns for fallback scraping from an installed gemini-cli.
|
||||
import re as _re
|
||||
from utils import atomic_replace
|
||||
_CLIENT_ID_PATTERN = _re.compile(
|
||||
r"OAUTH_CLIENT_ID\s*=\s*['\"]([0-9]+-[a-z0-9]+\.apps\.googleusercontent\.com)['\"]"
|
||||
)
|
||||
@@ -489,30 +489,17 @@ def save_credentials(creds: GoogleCredentials) -> Path:
|
||||
"""Atomically write creds to disk with 0o600 permissions."""
|
||||
path = _credentials_path()
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
# Tighten parent dir to 0o700 so siblings can't traverse to the creds file.
|
||||
# On Windows this is a no-op (POSIX mode bits aren't enforced); ignore failures.
|
||||
try:
|
||||
os.chmod(path.parent, 0o700)
|
||||
except OSError:
|
||||
pass
|
||||
payload = json.dumps(creds.to_dict(), indent=2, sort_keys=True) + "\n"
|
||||
|
||||
with _credentials_lock():
|
||||
tmp_path = path.with_suffix(f".tmp.{os.getpid()}.{secrets.token_hex(4)}")
|
||||
try:
|
||||
# Create with 0o600 atomically to close the TOCTOU window where the
|
||||
# default umask (often 0o644) would briefly expose tokens to other
|
||||
# local users between open() and chmod().
|
||||
fd = os.open(
|
||||
str(tmp_path),
|
||||
os.O_WRONLY | os.O_CREAT | os.O_EXCL,
|
||||
stat.S_IRUSR | stat.S_IWUSR,
|
||||
)
|
||||
with os.fdopen(fd, "w", encoding="utf-8") as fh:
|
||||
with open(tmp_path, "w", encoding="utf-8") as fh:
|
||||
fh.write(payload)
|
||||
fh.flush()
|
||||
os.fsync(fh.fileno())
|
||||
atomic_replace(tmp_path, path)
|
||||
os.chmod(tmp_path, stat.S_IRUSR | stat.S_IWUSR)
|
||||
os.replace(tmp_path, path)
|
||||
finally:
|
||||
try:
|
||||
if tmp_path.exists():
|
||||
|
||||
233
agent/i18n.py
@@ -1,233 +0,0 @@
|
||||
"""Lightweight internationalization (i18n) for Hermes static user-facing messages.
|
||||
|
||||
Scope (thin slice, by design): only the highest-impact static strings shown
|
||||
to the user by Hermes itself -- approval prompts, a handful of gateway slash
|
||||
command replies, restart-drain notices. Agent-generated output, log lines,
|
||||
error tracebacks, tool outputs, and slash-command descriptions all stay in
|
||||
English.
|
||||
|
||||
Catalog files live under ``locales/<lang>.yaml`` at the repo root. Each
|
||||
catalog is a flat dict keyed by dotted paths (e.g. ``approval.choose`` or
|
||||
``gateway.approval_expired``). Missing keys fall back to English; if English
|
||||
is missing too, the key path itself is returned so a broken catalog never
|
||||
crashes the agent.
|
||||
|
||||
Usage::
|
||||
|
||||
from agent.i18n import t
|
||||
print(t("approval.choose_long")) # current lang
|
||||
print(t("gateway.draining", count=3)) # {count} formatted
|
||||
print(t("approval.choose_long", lang="zh")) # explicit override
|
||||
|
||||
Language resolution order:
|
||||
1. Explicit ``lang=`` argument passed to :func:`t`
|
||||
2. ``HERMES_LANGUAGE`` environment variable (for tests / quick override)
|
||||
3. ``display.language`` from config.yaml
|
||||
4. ``"en"`` (baseline)
|
||||
|
||||
Supported languages: en, zh, ja, de, es, fr, tr, uk. Unknown values fall back to en.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import threading
|
||||
from functools import lru_cache
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
SUPPORTED_LANGUAGES: tuple[str, ...] = ("en", "zh", "ja", "de", "es", "fr", "tr", "uk")
|
||||
DEFAULT_LANGUAGE = "en"
|
||||
|
||||
# Accept a few natural aliases so users who type "chinese" / "zh-CN" / "jp"
|
||||
# get the right catalog instead of silently falling back to English.
|
||||
_LANGUAGE_ALIASES: dict[str, str] = {
|
||||
"english": "en", "en-us": "en", "en-gb": "en",
|
||||
"chinese": "zh", "mandarin": "zh", "zh-cn": "zh", "zh-tw": "zh", "zh-hans": "zh", "zh-hant": "zh",
|
||||
"japanese": "ja", "jp": "ja", "ja-jp": "ja",
|
||||
"german": "de", "deutsch": "de", "de-de": "de",
|
||||
"spanish": "es", "español": "es", "espanol": "es", "es-es": "es", "es-mx": "es",
|
||||
"french": "fr", "français": "fr", "france": "fr", "fr-fr": "fr", "fr-be": "fr", "fr-ca": "fr", "fr-ch": "fr",
|
||||
"ukrainian": "uk", "ukrainisch": "uk", "українська": "uk", "uk-ua": "uk", "ua": "uk",
|
||||
"turkish": "tr", "türkçe": "tr", "tr-tr": "tr",
|
||||
}
|
||||
|
||||
_catalog_cache: dict[str, dict[str, str]] = {}
|
||||
_catalog_lock = threading.Lock()
|
||||
|
||||
|
||||
def _locales_dir() -> Path:
|
||||
"""Return the directory containing locale YAML files.
|
||||
|
||||
Lives next to the repo root so both the bundled install and editable
|
||||
checkouts find it without PYTHONPATH gymnastics.
|
||||
"""
|
||||
# agent/i18n.py -> agent/ -> repo root
|
||||
return Path(__file__).resolve().parent.parent / "locales"
|
||||
|
||||
|
||||
def _normalize_lang(value: Any) -> str:
|
||||
"""Normalize a user-supplied language value to a supported code.
|
||||
|
||||
Accepts supported codes directly, common aliases (``chinese`` -> ``zh``),
|
||||
and case-insensitive regional tags (``zh-CN`` -> ``zh``). Returns the
|
||||
default language for unknown values.
|
||||
"""
|
||||
if not isinstance(value, str):
|
||||
return DEFAULT_LANGUAGE
|
||||
key = value.strip().lower()
|
||||
if not key:
|
||||
return DEFAULT_LANGUAGE
|
||||
if key in SUPPORTED_LANGUAGES:
|
||||
return key
|
||||
if key in _LANGUAGE_ALIASES:
|
||||
return _LANGUAGE_ALIASES[key]
|
||||
# Try stripping a region suffix (e.g. "pt-br" -> "pt" won't be supported,
|
||||
# but "zh-CN" -> "zh" will).
|
||||
base = key.split("-", 1)[0]
|
||||
if base in SUPPORTED_LANGUAGES:
|
||||
return base
|
||||
return DEFAULT_LANGUAGE
|
||||
|
||||
|
||||
def _load_catalog(lang: str) -> dict[str, str]:
|
||||
"""Load and flatten one locale YAML file into a dotted-key dict.
|
||||
|
||||
YAML files can be nested for human readability; this produces the flat
|
||||
key space :func:`t` expects. Cached per-language for the process.
|
||||
"""
|
||||
with _catalog_lock:
|
||||
cached = _catalog_cache.get(lang)
|
||||
if cached is not None:
|
||||
return cached
|
||||
|
||||
path = _locales_dir() / f"{lang}.yaml"
|
||||
if not path.is_file():
|
||||
logger.debug("i18n catalog missing for %s at %s", lang, path)
|
||||
with _catalog_lock:
|
||||
_catalog_cache[lang] = {}
|
||||
return {}
|
||||
|
||||
try:
|
||||
import yaml # PyYAML is already a hermes dependency
|
||||
with path.open("r", encoding="utf-8") as f:
|
||||
raw = yaml.safe_load(f) or {}
|
||||
except Exception as exc:
|
||||
logger.warning("Failed to load i18n catalog %s: %s", path, exc)
|
||||
with _catalog_lock:
|
||||
_catalog_cache[lang] = {}
|
||||
return {}
|
||||
|
||||
flat: dict[str, str] = {}
|
||||
_flatten_into(raw, "", flat)
|
||||
with _catalog_lock:
|
||||
_catalog_cache[lang] = flat
|
||||
return flat
|
||||
|
||||
|
||||
def _flatten_into(node: Any, prefix: str, out: dict[str, str]) -> None:
|
||||
if isinstance(node, dict):
|
||||
for key, value in node.items():
|
||||
child_key = f"{prefix}.{key}" if prefix else str(key)
|
||||
_flatten_into(value, child_key, out)
|
||||
elif isinstance(node, str):
|
||||
out[prefix] = node
|
||||
# Non-string, non-dict leaves are ignored -- catalogs are text-only.
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def _config_language_cached() -> str | None:
|
||||
"""Read ``display.language`` from config.yaml once per process.
|
||||
|
||||
Cached because ``t()`` is called in hot paths (every approval prompt,
|
||||
every gateway reply) and re-reading YAML each call would be wasteful.
|
||||
``reset_language_cache()`` clears this when config changes at runtime
|
||||
(e.g. after the setup wizard).
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
cfg = load_config()
|
||||
lang = (cfg.get("display") or {}).get("language")
|
||||
if lang:
|
||||
return _normalize_lang(lang)
|
||||
except Exception as exc:
|
||||
logger.debug("Could not read display.language from config: %s", exc)
|
||||
return None
|
||||
|
||||
|
||||
def reset_language_cache() -> None:
|
||||
"""Invalidate cached language resolution and catalogs.
|
||||
|
||||
Call after :func:`hermes_cli.config.save_config` if a running process
|
||||
needs to pick up a changed ``display.language`` without restart.
|
||||
"""
|
||||
_config_language_cached.cache_clear()
|
||||
with _catalog_lock:
|
||||
_catalog_cache.clear()
|
||||
|
||||
|
||||
def get_language() -> str:
|
||||
"""Resolve the active language using env > config > default order."""
|
||||
env_lang = os.environ.get("HERMES_LANGUAGE")
|
||||
if env_lang:
|
||||
return _normalize_lang(env_lang)
|
||||
cfg_lang = _config_language_cached()
|
||||
if cfg_lang:
|
||||
return cfg_lang
|
||||
return DEFAULT_LANGUAGE
|
||||
|
||||
|
||||
def t(key: str, lang: str | None = None, **format_kwargs: Any) -> str:
|
||||
"""Translate a dotted key to the active language.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
key
|
||||
Dotted path into the catalog, e.g. ``"approval.choose_long"``.
|
||||
lang
|
||||
Explicit language override. Takes precedence over env + config.
|
||||
**format_kwargs
|
||||
``str.format`` substitution arguments (``t("gateway.drain", count=3)``
|
||||
expects a catalog entry with a ``{count}`` placeholder).
|
||||
|
||||
Returns
|
||||
-------
|
||||
The translated string, or the English fallback if the key is missing in
|
||||
the target language, or the bare key if English is also missing.
|
||||
"""
|
||||
target = _normalize_lang(lang) if lang else get_language()
|
||||
catalog = _load_catalog(target)
|
||||
value = catalog.get(key)
|
||||
|
||||
if value is None and target != DEFAULT_LANGUAGE:
|
||||
# Fall through to English rather than showing a key path to the user.
|
||||
value = _load_catalog(DEFAULT_LANGUAGE).get(key)
|
||||
|
||||
if value is None:
|
||||
# Last-ditch: return the key itself. A broken catalog should not
|
||||
# crash anything; it just looks ugly until someone fixes it.
|
||||
logger.debug("i18n miss: key=%r lang=%r", key, target)
|
||||
value = key
|
||||
|
||||
if format_kwargs:
|
||||
try:
|
||||
return value.format(**format_kwargs)
|
||||
except (KeyError, IndexError, ValueError) as exc:
|
||||
logger.warning(
|
||||
"i18n format failed for key=%r lang=%r kwargs=%r: %s",
|
||||
key, target, format_kwargs, exc,
|
||||
)
|
||||
return value
|
||||
return value
|
||||
|
||||
|
||||
__all__ = [
|
||||
"SUPPORTED_LANGUAGES",
|
||||
"DEFAULT_LANGUAGE",
|
||||
"t",
|
||||
"get_language",
|
||||
"reset_language_cache",
|
||||
]
|
||||
@@ -1,236 +0,0 @@
|
||||
"""Routing helpers for inbound user-attached images.
|
||||
|
||||
Two modes:
|
||||
|
||||
native — attach images as OpenAI-style ``image_url`` content parts on the
|
||||
user turn. Provider adapters (Anthropic, Gemini, Bedrock, Codex,
|
||||
OpenAI chat.completions) already translate these into their
|
||||
vendor-specific multimodal formats.
|
||||
|
||||
text — run ``vision_analyze`` on each image up-front and prepend the
|
||||
description to the user's text. The model never sees the pixels;
|
||||
it only sees a lossy text summary. This is the pre-existing
|
||||
behaviour and still the right choice for non-vision models.
|
||||
|
||||
The decision is made once per message turn by :func:`decide_image_input_mode`.
|
||||
It reads ``agent.image_input_mode`` from config.yaml (``auto`` | ``native``
|
||||
| ``text``, default ``auto``) and the active model's capability metadata.
|
||||
|
||||
In ``auto`` mode:
|
||||
- If the user has explicitly configured ``auxiliary.vision.provider``
|
||||
(i.e. not ``auto`` and not empty), we assume they want the text pipeline
|
||||
regardless of the main model — they've opted in to a specific vision
|
||||
backend for a reason (cost, quality, local-only, etc.).
|
||||
- Otherwise, if the active model reports ``supports_vision=True`` in its
|
||||
models.dev metadata, we attach natively.
|
||||
- Otherwise (non-vision model, no explicit override), we fall back to text.
|
||||
|
||||
This keeps ``vision_analyze`` surfaced as a tool in every session — skills
|
||||
and agent flows that chain it (browser screenshots, deeper inspection of
|
||||
URL-referenced images, style-gating loops) keep working. The routing only
|
||||
affects *how user-attached images on the current turn* are presented to the
|
||||
main model.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import logging
|
||||
import mimetypes
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
_VALID_MODES = frozenset({"auto", "native", "text"})
|
||||
|
||||
|
||||
def _coerce_mode(raw: Any) -> str:
|
||||
"""Normalize a config value into one of the valid modes."""
|
||||
if not isinstance(raw, str):
|
||||
return "auto"
|
||||
val = raw.strip().lower()
|
||||
if val in _VALID_MODES:
|
||||
return val
|
||||
return "auto"
|
||||
|
||||
|
||||
def _explicit_aux_vision_override(cfg: Optional[Dict[str, Any]]) -> bool:
|
||||
"""True when the user configured a specific auxiliary vision backend.
|
||||
|
||||
An explicit override means the user *wants* the text pipeline (they're
|
||||
paying for a dedicated vision model), so we don't silently bypass it.
|
||||
"""
|
||||
if not isinstance(cfg, dict):
|
||||
return False
|
||||
aux = cfg.get("auxiliary") or {}
|
||||
if not isinstance(aux, dict):
|
||||
return False
|
||||
vision = aux.get("vision") or {}
|
||||
if not isinstance(vision, dict):
|
||||
return False
|
||||
|
||||
provider = str(vision.get("provider") or "").strip().lower()
|
||||
model = str(vision.get("model") or "").strip()
|
||||
base_url = str(vision.get("base_url") or "").strip()
|
||||
|
||||
# "auto" / "" / blank = not explicit
|
||||
if provider in ("", "auto") and not model and not base_url:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def _lookup_supports_vision(provider: str, model: str) -> Optional[bool]:
|
||||
"""Return True/False if we can resolve caps, None if unknown."""
|
||||
if not provider or not model:
|
||||
return None
|
||||
try:
|
||||
from agent.models_dev import get_model_capabilities
|
||||
caps = get_model_capabilities(provider, model)
|
||||
except Exception as exc: # pragma: no cover - defensive
|
||||
logger.debug("image_routing: caps lookup failed for %s:%s — %s", provider, model, exc)
|
||||
return None
|
||||
if caps is None:
|
||||
return None
|
||||
return bool(caps.supports_vision)
|
||||
|
||||
|
||||
def decide_image_input_mode(
|
||||
provider: str,
|
||||
model: str,
|
||||
cfg: Optional[Dict[str, Any]],
|
||||
) -> str:
|
||||
"""Return ``"native"`` or ``"text"`` for the given turn.
|
||||
|
||||
Args:
|
||||
provider: active inference provider ID (e.g. ``"anthropic"``, ``"openrouter"``).
|
||||
model: active model slug as it would be sent to the provider.
|
||||
cfg: loaded config.yaml dict, or None. When None, behaves as auto.
|
||||
"""
|
||||
mode_cfg = "auto"
|
||||
if isinstance(cfg, dict):
|
||||
agent_cfg = cfg.get("agent") or {}
|
||||
if isinstance(agent_cfg, dict):
|
||||
mode_cfg = _coerce_mode(agent_cfg.get("image_input_mode"))
|
||||
|
||||
if mode_cfg == "native":
|
||||
return "native"
|
||||
if mode_cfg == "text":
|
||||
return "text"
|
||||
|
||||
# auto
|
||||
if _explicit_aux_vision_override(cfg):
|
||||
return "text"
|
||||
|
||||
supports = _lookup_supports_vision(provider, model)
|
||||
if supports is True:
|
||||
return "native"
|
||||
return "text"
|
||||
|
||||
|
||||
# Image size handling is REACTIVE rather than proactive: we attempt native
|
||||
# attachment at full size regardless of provider, and rely on
|
||||
# ``run_agent._try_shrink_image_parts_in_messages`` to shrink + retry if
|
||||
# the provider rejects the request (e.g. Anthropic's hard 5 MB per-image
|
||||
# ceiling returned as HTTP 400 "image exceeds 5 MB maximum").
|
||||
#
|
||||
# Why reactive: our knowledge of provider ceilings is partial and evolving
|
||||
# (OpenAI accepts 49 MB+, Anthropic 5 MB, Gemini 100 MB, others unknown).
|
||||
# A proactive per-provider table would be stale the moment a provider raises
|
||||
# or lowers its limit, and silently degrading quality for users on providers
|
||||
# that would have accepted the full image is the worse failure mode.
|
||||
# The shrink-on-reject path loses 1 API call + maybe 1s of Pillow work when
|
||||
# it fires, which is cheaper than permanent quality loss.
|
||||
|
||||
|
||||
def _guess_mime(path: Path) -> str:
|
||||
mime, _ = mimetypes.guess_type(str(path))
|
||||
if mime and mime.startswith("image/"):
|
||||
return mime
|
||||
# mimetypes on some Linux distros mis-maps .jpg; default to jpeg when
|
||||
# the suffix looks imagey.
|
||||
suffix = path.suffix.lower()
|
||||
return {
|
||||
".jpg": "image/jpeg",
|
||||
".jpeg": "image/jpeg",
|
||||
".png": "image/png",
|
||||
".gif": "image/gif",
|
||||
".webp": "image/webp",
|
||||
".bmp": "image/bmp",
|
||||
}.get(suffix, "image/jpeg")
|
||||
|
||||
|
||||
def _file_to_data_url(path: Path) -> Optional[str]:
|
||||
"""Encode a local image as a base64 data URL at its native size.
|
||||
|
||||
Size limits are NOT enforced here — the agent retry loop
|
||||
(``run_agent._try_shrink_image_parts_in_messages``) shrinks on the
|
||||
provider's first rejection. Keeping this simple means providers that
|
||||
accept large images (OpenAI 49 MB+, Gemini 100 MB) don't pay a silent
|
||||
quality tax just because one other provider is stricter.
|
||||
|
||||
Returns None only if the file can't be read (missing, permission
|
||||
denied, etc.); the caller reports those paths in ``skipped``.
|
||||
"""
|
||||
try:
|
||||
raw = path.read_bytes()
|
||||
except Exception as exc:
|
||||
logger.warning("image_routing: failed to read %s — %s", path, exc)
|
||||
return None
|
||||
mime = _guess_mime(path)
|
||||
b64 = base64.b64encode(raw).decode("ascii")
|
||||
return f"data:{mime};base64,{b64}"
|
||||
|
||||
|
||||
def build_native_content_parts(
|
||||
user_text: str,
|
||||
image_paths: List[str],
|
||||
) -> Tuple[List[Dict[str, Any]], List[str]]:
|
||||
"""Build an OpenAI-style ``content`` list for a user turn.
|
||||
|
||||
Shape:
|
||||
[{"type": "text", "text": "..."},
|
||||
{"type": "image_url", "image_url": {"url": "data:image/png;base64,..."}},
|
||||
...]
|
||||
|
||||
Images are attached at their native size. If a provider rejects the
|
||||
request because an image is too large (e.g. Anthropic's 5 MB per-image
|
||||
ceiling), the agent's retry loop transparently shrinks and retries
|
||||
once — see ``run_agent._try_shrink_image_parts_in_messages``.
|
||||
|
||||
Returns (content_parts, skipped_paths). Skipped paths are files that
|
||||
couldn't be read from disk.
|
||||
"""
|
||||
parts: List[Dict[str, Any]] = []
|
||||
skipped: List[str] = []
|
||||
|
||||
text = (user_text or "").strip()
|
||||
if text:
|
||||
parts.append({"type": "text", "text": text})
|
||||
|
||||
for raw_path in image_paths:
|
||||
p = Path(raw_path)
|
||||
if not p.exists() or not p.is_file():
|
||||
skipped.append(str(raw_path))
|
||||
continue
|
||||
data_url = _file_to_data_url(p)
|
||||
if not data_url:
|
||||
skipped.append(str(raw_path))
|
||||
continue
|
||||
parts.append({
|
||||
"type": "image_url",
|
||||
"image_url": {"url": data_url},
|
||||
})
|
||||
|
||||
# If the text was empty, add a neutral prompt so the turn isn't just images.
|
||||
if not text and any(p.get("type") == "image_url" for p in parts):
|
||||
parts.insert(0, {"type": "text", "text": "What do you see in this image?"})
|
||||
|
||||
return parts, skipped
|
||||
|
||||
|
||||
__all__ = [
|
||||
"decide_image_input_mode",
|
||||
"build_native_content_parts",
|
||||
]
|
||||
@@ -1,48 +0,0 @@
|
||||
"""LM Studio reasoning-effort resolution shared by the chat-completions
|
||||
transport and run_agent's iteration-limit summary path.
|
||||
|
||||
LM Studio publishes per-model ``capabilities.reasoning.allowed_options`` (e.g.
|
||||
``["off","on"]`` for toggle-style models, ``["off","minimal","low"]`` for
|
||||
graduated models). We map the user's ``reasoning_config`` onto LM Studio's
|
||||
OpenAI-compatible vocabulary, then clamp against the model's allowed set so
|
||||
the server doesn't 400 on an unsupported effort.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import List, Optional
|
||||
|
||||
# LM Studio accepts these top-level reasoning_effort values via its
|
||||
# OpenAI-compatible chat.completions endpoint.
|
||||
_LM_VALID_EFFORTS = {"none", "minimal", "low", "medium", "high", "xhigh"}
|
||||
|
||||
# Toggle-style models publish allowed_options as ["off","on"] in /api/v1/models.
|
||||
# Map them onto the OpenAI-compatible request vocabulary.
|
||||
_LM_EFFORT_ALIASES = {"off": "none", "on": "medium"}
|
||||
|
||||
|
||||
def resolve_lmstudio_effort(
|
||||
reasoning_config: Optional[dict],
|
||||
allowed_options: Optional[List[str]],
|
||||
) -> Optional[str]:
|
||||
"""Return the ``reasoning_effort`` string to send to LM Studio, or ``None``.
|
||||
|
||||
``None`` means "omit the field": the user picked a level the model can't
|
||||
honor, so let LM Studio fall back to the model's declared default rather
|
||||
than silently substituting a different effort. When ``allowed_options`` is
|
||||
falsy (probe failed), skip clamping and send the resolved effort anyway.
|
||||
"""
|
||||
effort = "medium"
|
||||
if reasoning_config and isinstance(reasoning_config, dict):
|
||||
if reasoning_config.get("enabled") is False:
|
||||
effort = "none"
|
||||
else:
|
||||
raw = (reasoning_config.get("effort") or "").strip().lower()
|
||||
raw = _LM_EFFORT_ALIASES.get(raw, raw)
|
||||
if raw in _LM_VALID_EFFORTS:
|
||||
effort = raw
|
||||
if allowed_options:
|
||||
allowed = {_LM_EFFORT_ALIASES.get(opt, opt) for opt in allowed_options}
|
||||
if effort not in allowed:
|
||||
return None
|
||||
return effort
|
||||
@@ -20,25 +20,25 @@ def summarize_manual_compression(
|
||||
headline = f"No changes from compression: {before_count} messages"
|
||||
if after_tokens == before_tokens:
|
||||
token_line = (
|
||||
f"Approx request size: ~{before_tokens:,} tokens (unchanged)"
|
||||
f"Rough transcript estimate: ~{before_tokens:,} tokens (unchanged)"
|
||||
)
|
||||
else:
|
||||
token_line = (
|
||||
f"Approx request size: ~{before_tokens:,} → "
|
||||
f"Rough transcript estimate: ~{before_tokens:,} → "
|
||||
f"~{after_tokens:,} tokens"
|
||||
)
|
||||
else:
|
||||
headline = f"Compressed: {before_count} → {after_count} messages"
|
||||
token_line = (
|
||||
f"Approx request size: ~{before_tokens:,} → "
|
||||
f"Rough transcript estimate: ~{before_tokens:,} → "
|
||||
f"~{after_tokens:,} tokens"
|
||||
)
|
||||
|
||||
note = None
|
||||
if not noop and after_count < before_count and after_tokens > before_tokens:
|
||||
note = (
|
||||
"Note: fewer messages can still raise this estimate when "
|
||||
"compression rewrites the transcript into denser summaries."
|
||||
"Note: fewer messages can still raise this rough transcript estimate "
|
||||
"when compression rewrites the transcript into denser summaries."
|
||||
)
|
||||
|
||||
return {
|
||||
|
||||
@@ -1,14 +1,17 @@
|
||||
"""MemoryManager — orchestrates memory providers for the agent.
|
||||
"""MemoryManager — orchestrates the built-in memory provider plus at most
|
||||
ONE external plugin memory provider.
|
||||
|
||||
Single integration point in run_agent.py. Replaces scattered per-backend
|
||||
code with one manager that delegates to registered providers.
|
||||
|
||||
Only ONE external plugin provider is allowed at a time — attempting to
|
||||
register a second external provider is rejected with a warning. This
|
||||
The BuiltinMemoryProvider is always registered first and cannot be removed.
|
||||
Only ONE external (non-builtin) provider is allowed at a time — attempting
|
||||
to register a second external provider is rejected with a warning. This
|
||||
prevents tool schema bloat and conflicting memory backends.
|
||||
|
||||
Usage in run_agent.py:
|
||||
self._memory_manager = MemoryManager()
|
||||
self._memory_manager.add_provider(BuiltinMemoryProvider(...))
|
||||
# Only ONE of these:
|
||||
self._memory_manager.add_provider(plugin_provider)
|
||||
|
||||
@@ -25,6 +28,7 @@ Usage in run_agent.py:
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import inspect
|
||||
@@ -46,7 +50,7 @@ _INTERNAL_CONTEXT_RE = re.compile(
|
||||
re.IGNORECASE,
|
||||
)
|
||||
_INTERNAL_NOTE_RE = re.compile(
|
||||
r'\[System note:\s*The following is recalled memory context,\s*NOT new user input\.\s*Treat as (?:informational background data|authoritative reference data[^\]]*)\.\]\s*',
|
||||
r'\[System note:\s*The following is recalled memory context,\s*NOT new user input\.\s*Treat as informational background data\.\]\s*',
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
@@ -59,129 +63,19 @@ def sanitize_context(text: str) -> str:
|
||||
return text
|
||||
|
||||
|
||||
class StreamingContextScrubber:
|
||||
"""Stateful scrubber for streaming text that may contain split memory-context spans.
|
||||
|
||||
The one-shot ``sanitize_context`` regex cannot survive chunk boundaries:
|
||||
a ``<memory-context>`` opened in one delta and closed in a later delta
|
||||
leaks its payload to the UI because the non-greedy block regex needs
|
||||
both tags in one string. This scrubber runs a small state machine
|
||||
across deltas, holding back partial-tag tails and discarding
|
||||
everything inside a span (including the system-note line).
|
||||
|
||||
Usage::
|
||||
|
||||
scrubber = StreamingContextScrubber()
|
||||
for delta in stream:
|
||||
visible = scrubber.feed(delta)
|
||||
if visible:
|
||||
emit(visible)
|
||||
trailing = scrubber.flush() # at end of stream
|
||||
if trailing:
|
||||
emit(trailing)
|
||||
|
||||
The scrubber is re-entrant per agent instance. Callers building new
|
||||
top-level responses (new turn) should create a fresh scrubber or call
|
||||
``reset()``.
|
||||
"""
|
||||
|
||||
_OPEN_TAG = "<memory-context>"
|
||||
_CLOSE_TAG = "</memory-context>"
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._in_span: bool = False
|
||||
self._buf: str = ""
|
||||
|
||||
def reset(self) -> None:
|
||||
self._in_span = False
|
||||
self._buf = ""
|
||||
|
||||
def feed(self, text: str) -> str:
|
||||
"""Return the visible portion of ``text`` after scrubbing.
|
||||
|
||||
Any trailing fragment that could be the start of an open/close tag
|
||||
is held back in the internal buffer and surfaced on the next
|
||||
``feed()`` call or discarded/emitted by ``flush()``.
|
||||
"""
|
||||
if not text:
|
||||
return ""
|
||||
buf = self._buf + text
|
||||
self._buf = ""
|
||||
out: list[str] = []
|
||||
|
||||
while buf:
|
||||
if self._in_span:
|
||||
idx = buf.lower().find(self._CLOSE_TAG)
|
||||
if idx == -1:
|
||||
# Hold back a potential partial close tag; drop the rest
|
||||
held = self._max_partial_suffix(buf, self._CLOSE_TAG)
|
||||
self._buf = buf[-held:] if held else ""
|
||||
return "".join(out)
|
||||
# Found close — skip span content + tag, continue
|
||||
buf = buf[idx + len(self._CLOSE_TAG):]
|
||||
self._in_span = False
|
||||
else:
|
||||
idx = buf.lower().find(self._OPEN_TAG)
|
||||
if idx == -1:
|
||||
# No open tag — hold back a potential partial open tag
|
||||
held = self._max_partial_suffix(buf, self._OPEN_TAG)
|
||||
if held:
|
||||
out.append(buf[:-held])
|
||||
self._buf = buf[-held:]
|
||||
else:
|
||||
out.append(buf)
|
||||
return "".join(out)
|
||||
# Emit text before the tag, enter span
|
||||
if idx > 0:
|
||||
out.append(buf[:idx])
|
||||
buf = buf[idx + len(self._OPEN_TAG):]
|
||||
self._in_span = True
|
||||
|
||||
return "".join(out)
|
||||
|
||||
def flush(self) -> str:
|
||||
"""Emit any held-back buffer at end-of-stream.
|
||||
|
||||
If we're still inside an unterminated span the remaining content is
|
||||
discarded (safer: leaking partial memory context is worse than a
|
||||
truncated answer). Otherwise the held-back partial-tag tail is
|
||||
emitted verbatim (it turned out not to be a real tag).
|
||||
"""
|
||||
if self._in_span:
|
||||
self._buf = ""
|
||||
self._in_span = False
|
||||
return ""
|
||||
tail = self._buf
|
||||
self._buf = ""
|
||||
return tail
|
||||
|
||||
@staticmethod
|
||||
def _max_partial_suffix(buf: str, tag: str) -> int:
|
||||
"""Return the length of the longest buf-suffix that is a tag-prefix.
|
||||
|
||||
Case-insensitive. Returns 0 if no suffix could start the tag.
|
||||
"""
|
||||
tag_lower = tag.lower()
|
||||
buf_lower = buf.lower()
|
||||
max_check = min(len(buf_lower), len(tag_lower) - 1)
|
||||
for i in range(max_check, 0, -1):
|
||||
if tag_lower.startswith(buf_lower[-i:]):
|
||||
return i
|
||||
return 0
|
||||
|
||||
|
||||
def build_memory_context_block(raw_context: str) -> str:
|
||||
"""Wrap prefetched memory in a fenced block with system note."""
|
||||
"""Wrap prefetched memory in a fenced block with system note.
|
||||
|
||||
The fence prevents the model from treating recalled context as user
|
||||
discourse. Injected at API-call time only — never persisted.
|
||||
"""
|
||||
if not raw_context or not raw_context.strip():
|
||||
return ""
|
||||
clean = sanitize_context(raw_context)
|
||||
if clean != raw_context:
|
||||
logger.warning("memory provider returned pre-wrapped context; stripped")
|
||||
return (
|
||||
"<memory-context>\n"
|
||||
"[System note: The following is recalled memory context, "
|
||||
"NOT new user input. Treat as authoritative reference data — "
|
||||
"this is the agent's persistent memory and should inform all responses.]\n\n"
|
||||
"NOT new user input. Treat as informational background data.]\n\n"
|
||||
f"{clean}\n"
|
||||
"</memory-context>"
|
||||
)
|
||||
@@ -400,41 +294,6 @@ class MemoryManager:
|
||||
provider.name, e,
|
||||
)
|
||||
|
||||
def on_session_switch(
|
||||
self,
|
||||
new_session_id: str,
|
||||
*,
|
||||
parent_session_id: str = "",
|
||||
reset: bool = False,
|
||||
**kwargs,
|
||||
) -> None:
|
||||
"""Notify all providers that the agent's session_id has rotated.
|
||||
|
||||
Fires on ``/resume``, ``/branch``, ``/reset``, ``/new``, and
|
||||
context compression — any path that reassigns
|
||||
``AIAgent.session_id`` without tearing the provider down.
|
||||
|
||||
Providers keep running; they only need to refresh cached
|
||||
per-session state so subsequent writes land in the correct
|
||||
session's record. See ``MemoryProvider.on_session_switch`` for
|
||||
the full contract.
|
||||
"""
|
||||
if not new_session_id:
|
||||
return
|
||||
for provider in self._providers:
|
||||
try:
|
||||
provider.on_session_switch(
|
||||
new_session_id,
|
||||
parent_session_id=parent_session_id,
|
||||
reset=reset,
|
||||
**kwargs,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug(
|
||||
"Memory provider '%s' on_session_switch failed: %s",
|
||||
provider.name, e,
|
||||
)
|
||||
|
||||
def on_pre_compress(self, messages: List[Dict[str, Any]]) -> str:
|
||||
"""Notify all providers before context compression.
|
||||
|
||||
|
||||
@@ -1,16 +1,17 @@
|
||||
"""Abstract base class for pluggable memory providers.
|
||||
|
||||
Memory providers give the agent persistent recall across sessions.
|
||||
The MemoryManager enforces a one-external-provider limit to prevent
|
||||
tool schema bloat and conflicting memory backends.
|
||||
Memory providers give the agent persistent recall across sessions. One
|
||||
external provider is active at a time alongside the always-on built-in
|
||||
memory (MEMORY.md / USER.md). The MemoryManager enforces this limit.
|
||||
|
||||
External providers (Honcho, Hindsight, Mem0, etc.) are registered
|
||||
and managed via MemoryManager. Only one external provider runs at a
|
||||
time.
|
||||
Built-in memory is always active as the first provider and cannot be removed.
|
||||
External providers (Honcho, Hindsight, Mem0, etc.) are additive — they never
|
||||
disable the built-in store. Only one external provider runs at a time to
|
||||
prevent tool schema bloat and conflicting memory backends.
|
||||
|
||||
Registration:
|
||||
Plugins ship in plugins/memory/<name>/ and are activated via
|
||||
the memory.provider config key.
|
||||
1. Built-in: BuiltinMemoryProvider — always present, not removable.
|
||||
2. Plugins: Ship in plugins/memory/<name>/, activated by memory.provider config.
|
||||
|
||||
Lifecycle (called by MemoryManager, wired in run_agent.py):
|
||||
initialize() — connect, create resources, warm up
|
||||
@@ -24,7 +25,6 @@ Lifecycle (called by MemoryManager, wired in run_agent.py):
|
||||
Optional hooks (override to opt in):
|
||||
on_turn_start(turn, message, **kwargs) — per-turn tick with runtime context
|
||||
on_session_end(messages) — end-of-session extraction
|
||||
on_session_switch(new_session_id, **kwargs) — mid-process session_id rotation
|
||||
on_pre_compress(messages) -> str — extract before context compression
|
||||
on_memory_write(action, target, content, metadata=None) — mirror built-in memory writes
|
||||
on_delegation(task, result, **kwargs) — parent-side observation of subagent work
|
||||
@@ -160,45 +160,6 @@ class MemoryProvider(ABC):
|
||||
(CLI exit, /reset, gateway session expiry).
|
||||
"""
|
||||
|
||||
def on_session_switch(
|
||||
self,
|
||||
new_session_id: str,
|
||||
*,
|
||||
parent_session_id: str = "",
|
||||
reset: bool = False,
|
||||
**kwargs,
|
||||
) -> None:
|
||||
"""Called when the agent switches session_id mid-process.
|
||||
|
||||
Fires on ``/resume``, ``/branch``, ``/reset``, ``/new`` (CLI), the
|
||||
gateway equivalents, and context compression — any path that
|
||||
reassigns ``AIAgent.session_id`` without tearing the provider down.
|
||||
|
||||
Providers that cache per-session state in ``initialize()``
|
||||
(``_session_id``, ``_document_id``, accumulated turn buffers,
|
||||
counters) should update or reset that state here so subsequent
|
||||
writes land in the correct session's record.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
new_session_id:
|
||||
The session_id the agent just switched to.
|
||||
parent_session_id:
|
||||
The previous session_id, if meaningful — set for ``/branch``
|
||||
(fork lineage), context compression (continuation lineage),
|
||||
and ``/resume`` (the session we're leaving). Empty string
|
||||
when no lineage applies.
|
||||
reset:
|
||||
``True`` when this is a genuinely new conversation, not a
|
||||
resumption of an existing one. Fired by ``/reset`` / ``/new``.
|
||||
Providers should flush accumulated per-session buffers
|
||||
(``_session_turns``, ``_turn_counter``, etc.) when this is
|
||||
set. ``False`` for ``/resume`` / ``/branch`` / compression
|
||||
where the logical conversation continues under the new id.
|
||||
|
||||
Default is no-op for backward compatibility.
|
||||
"""
|
||||
|
||||
def on_pre_compress(self, messages: List[Dict[str, Any]]) -> str:
|
||||
"""Called before context compression discards old messages.
|
||||
|
||||
|
||||
@@ -46,13 +46,11 @@ def _resolve_requests_verify() -> bool | str:
|
||||
# are preserved so the full model name reaches cache lookups and server queries.
|
||||
_PROVIDER_PREFIXES: frozenset[str] = frozenset({
|
||||
"openrouter", "nous", "openai-codex", "copilot", "copilot-acp",
|
||||
"gemini", "ollama-cloud", "zai", "kimi-coding", "kimi-coding-cn", "stepfun", "minimax", "minimax-oauth", "minimax-cn", "anthropic", "deepseek",
|
||||
"gemini", "ollama-cloud", "zai", "kimi-coding", "kimi-coding-cn", "stepfun", "minimax", "minimax-cn", "anthropic", "deepseek",
|
||||
"opencode-zen", "opencode-go", "ai-gateway", "kilocode", "alibaba",
|
||||
"qwen-oauth",
|
||||
"xiaomi",
|
||||
"arcee",
|
||||
"gmi",
|
||||
"tencent-tokenhub",
|
||||
"custom", "local",
|
||||
# Common aliases
|
||||
"google", "google-gemini", "google-ai-studio",
|
||||
@@ -61,9 +59,7 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
|
||||
"ollama",
|
||||
"stepfun", "opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen",
|
||||
"mimo", "xiaomi-mimo",
|
||||
"tencent", "tokenhub", "tencent-cloud", "tencentmaas",
|
||||
"arcee-ai", "arceeai",
|
||||
"gmi-cloud", "gmicloud",
|
||||
"xai", "x-ai", "x.ai", "grok",
|
||||
"nvidia", "nim", "nvidia-nim", "nemotron",
|
||||
"qwen-portal",
|
||||
@@ -110,11 +106,9 @@ _endpoint_model_metadata_cache_time: Dict[str, float] = {}
|
||||
_ENDPOINT_MODEL_CACHE_TTL = 300
|
||||
|
||||
# Descending tiers for context length probing when the model is unknown.
|
||||
# We start at 256K (covers GPT-5.x, many current large-context models) and
|
||||
# step down on context-length errors until one works. Tier[0] is also the
|
||||
# default fallback when no detection method succeeds.
|
||||
# We start at 128K (a safe default for most modern models) and step down
|
||||
# on context-length errors until one works.
|
||||
CONTEXT_PROBE_TIERS = [
|
||||
256_000,
|
||||
128_000,
|
||||
64_000,
|
||||
32_000,
|
||||
@@ -149,11 +143,10 @@ DEFAULT_CONTEXT_LENGTHS = {
|
||||
"claude": 200000,
|
||||
# OpenAI — GPT-5 family (most have 400k; specific overrides first)
|
||||
# Source: https://developers.openai.com/api/docs/models
|
||||
# GPT-5.5 (launched Apr 23 2026) is 1.05M on the direct OpenAI API and
|
||||
# ChatGPT Codex OAuth caps it at 272K; both paths resolve via their own
|
||||
# provider-aware branches (_resolve_codex_oauth_context_length + models.dev).
|
||||
# This hardcoded value is only reached when every probe misses.
|
||||
"gpt-5.5": 1050000,
|
||||
# GPT-5.5 (launched Apr 23 2026). 400k is the fallback for providers we
|
||||
# can't probe live. ChatGPT Codex OAuth actually caps lower (272k as of
|
||||
# Apr 2026) and is resolved via _resolve_codex_oauth_context_length().
|
||||
"gpt-5.5": 400000,
|
||||
"gpt-5.4-nano": 400000, # 400k (not 1.05M like full 5.4)
|
||||
"gpt-5.4-mini": 400000, # 400k (not 1.05M like full 5.4)
|
||||
"gpt-5.4": 1050000, # GPT-5.4, GPT-5.4 Pro (1.05M context)
|
||||
@@ -169,17 +162,7 @@ DEFAULT_CONTEXT_LENGTHS = {
|
||||
"gemma-4-31b": 256000,
|
||||
"gemma-3": 131072,
|
||||
"gemma": 8192, # fallback for older gemma models
|
||||
# DeepSeek — V4 family ships with a 1M context window. The legacy
|
||||
# aliases ``deepseek-chat`` / ``deepseek-reasoner`` are server-side
|
||||
# mapped to the non-thinking / thinking modes of ``deepseek-v4-flash``
|
||||
# and inherit the same 1M window. The ``deepseek`` substring entry
|
||||
# below remains as a 128K fallback for older / unknown DeepSeek model
|
||||
# ids (e.g. via custom endpoints).
|
||||
# https://api-docs.deepseek.com/zh-cn/quick_start/pricing
|
||||
"deepseek-v4-pro": 1_000_000,
|
||||
"deepseek-v4-flash": 1_000_000,
|
||||
"deepseek-chat": 1_000_000,
|
||||
"deepseek-reasoner": 1_000_000,
|
||||
# DeepSeek
|
||||
"deepseek": 128000,
|
||||
# Meta
|
||||
"llama": 131072,
|
||||
@@ -210,8 +193,6 @@ DEFAULT_CONTEXT_LENGTHS = {
|
||||
"grok": 131072, # catch-all (grok-beta, unknown grok-*)
|
||||
# Kimi
|
||||
"kimi": 262144,
|
||||
# Tencent — Hy3 Preview (Hunyuan) with 256K context window
|
||||
"hy3-preview": 256000,
|
||||
# Nemotron — NVIDIA's open-weights series (128K context across all sizes)
|
||||
"nemotron": 131072,
|
||||
# Arcee
|
||||
@@ -313,22 +294,9 @@ _URL_TO_PROVIDER: Dict[str, str] = {
|
||||
"integrate.api.nvidia.com": "nvidia",
|
||||
"api.xiaomimimo.com": "xiaomi",
|
||||
"xiaomimimo.com": "xiaomi",
|
||||
"api.gmi-serving.com": "gmi",
|
||||
"tokenhub.tencentmaas.com": "tencent-tokenhub",
|
||||
"ollama.com": "ollama-cloud",
|
||||
}
|
||||
|
||||
# Auto-extend with hostnames derived from provider profiles.
|
||||
# Any provider with a base_url not already in the map gets added automatically.
|
||||
try:
|
||||
from providers import list_providers as _list_providers
|
||||
for _pp in _list_providers():
|
||||
_host = _pp.get_hostname()
|
||||
if _host and _host not in _URL_TO_PROVIDER:
|
||||
_URL_TO_PROVIDER[_host] = _pp.name
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def _infer_provider_from_url(base_url: str) -> Optional[str]:
|
||||
"""Infer the models.dev provider name from a base URL.
|
||||
@@ -636,6 +604,8 @@ def fetch_endpoint_model_metadata(
|
||||
if isinstance(ctx, int) and ctx > 0:
|
||||
context_length = ctx
|
||||
break
|
||||
if context_length is None:
|
||||
context_length = _extract_context_length(model)
|
||||
if context_length is not None:
|
||||
entry["context_length"] = context_length
|
||||
|
||||
@@ -719,29 +689,6 @@ def fetch_endpoint_model_metadata(
|
||||
return {}
|
||||
|
||||
|
||||
def _resolve_endpoint_context_length(
|
||||
model: str,
|
||||
base_url: str,
|
||||
api_key: str = "",
|
||||
) -> Optional[int]:
|
||||
"""Resolve context length from an endpoint's live ``/models`` metadata."""
|
||||
endpoint_metadata = fetch_endpoint_model_metadata(base_url, api_key=api_key)
|
||||
matched = endpoint_metadata.get(model)
|
||||
if not matched:
|
||||
if len(endpoint_metadata) == 1:
|
||||
matched = next(iter(endpoint_metadata.values()))
|
||||
else:
|
||||
for key, entry in endpoint_metadata.items():
|
||||
if model in key or key in model:
|
||||
matched = entry
|
||||
break
|
||||
if matched:
|
||||
context_length = matched.get("context_length")
|
||||
if isinstance(context_length, int):
|
||||
return context_length
|
||||
return None
|
||||
|
||||
|
||||
def _get_context_cache_path() -> Path:
|
||||
"""Return path to the persistent context length cache file."""
|
||||
from hermes_constants import get_hermes_home
|
||||
@@ -1025,7 +972,10 @@ def _query_local_context_length(model: str, base_url: str, api_key: str = "") ->
|
||||
ctx = cfg.get("context_length")
|
||||
if ctx and isinstance(ctx, (int, float)):
|
||||
return int(ctx)
|
||||
break
|
||||
# Fall back to max_context_length (theoretical model max)
|
||||
ctx = m.get("max_context_length") or m.get("context_length")
|
||||
if ctx and isinstance(ctx, (int, float)):
|
||||
return int(ctx)
|
||||
|
||||
# LM Studio / vLLM / llama.cpp: try /v1/models/{model}
|
||||
resp = client.get(f"{server_url}/v1/models/{model}")
|
||||
@@ -1243,7 +1193,6 @@ def get_model_context_length(
|
||||
api_key: str = "",
|
||||
config_context_length: int | None = None,
|
||||
provider: str = "",
|
||||
custom_providers: list | None = None,
|
||||
) -> int:
|
||||
"""Get the context length for a model.
|
||||
|
||||
@@ -1258,39 +1207,19 @@ def get_model_context_length(
|
||||
6. Nous suffix-match via OpenRouter cache
|
||||
7. models.dev registry lookup (provider-aware)
|
||||
8. Thin hardcoded defaults (broad family patterns)
|
||||
9. Default fallback (256K)
|
||||
9. Default fallback (128K)
|
||||
"""
|
||||
# 0. Explicit config override — user knows best
|
||||
if config_context_length is not None and isinstance(config_context_length, int) and config_context_length > 0:
|
||||
return config_context_length
|
||||
|
||||
# 0b. custom_providers per-model override — check before any probe.
|
||||
# This closes the gap where /model switch and display paths used to fall
|
||||
# back to 128K despite the user having a per-model context_length set.
|
||||
# See #15779.
|
||||
if custom_providers and base_url and model:
|
||||
try:
|
||||
from hermes_cli.config import get_custom_provider_context_length
|
||||
cp_ctx = get_custom_provider_context_length(
|
||||
model=model,
|
||||
base_url=base_url,
|
||||
custom_providers=custom_providers,
|
||||
)
|
||||
if cp_ctx:
|
||||
return cp_ctx
|
||||
except Exception:
|
||||
pass # fall through to probing
|
||||
|
||||
# Normalise provider-prefixed model names (e.g. "local:model-name" →
|
||||
# "model-name") so cache lookups and server queries use the bare ID that
|
||||
# local servers actually know about. Ollama "model:tag" colons are preserved.
|
||||
model = _strip_provider_prefix(model)
|
||||
|
||||
# 1. Check persistent cache (model+provider)
|
||||
# LM Studio is excluded — its loaded context length is transient (the
|
||||
# user can reload the model with a different context_length at any time
|
||||
# via /api/v1/models/load), so a stale cached value would mask reloads.
|
||||
if base_url and provider != "lmstudio":
|
||||
if base_url:
|
||||
cached = get_cached_context_length(model, base_url)
|
||||
if cached is not None:
|
||||
# Invalidate stale Codex OAuth cache entries: pre-PR #14935 builds
|
||||
@@ -1335,16 +1264,28 @@ def get_model_context_length(
|
||||
# returns 128k) instead of the model's full context (400k). models.dev
|
||||
# has the correct per-provider values and is checked at step 5+.
|
||||
if _is_custom_endpoint(base_url) and not _is_known_provider_base_url(base_url):
|
||||
context_length = _resolve_endpoint_context_length(model, base_url, api_key=api_key)
|
||||
if context_length is not None:
|
||||
return context_length
|
||||
endpoint_metadata = fetch_endpoint_model_metadata(base_url, api_key=api_key)
|
||||
matched = endpoint_metadata.get(model)
|
||||
if not matched:
|
||||
# Single-model servers: if only one model is loaded, use it
|
||||
if len(endpoint_metadata) == 1:
|
||||
matched = next(iter(endpoint_metadata.values()))
|
||||
else:
|
||||
# Fuzzy match: substring in either direction
|
||||
for key, entry in endpoint_metadata.items():
|
||||
if model in key or key in model:
|
||||
matched = entry
|
||||
break
|
||||
if matched:
|
||||
context_length = matched.get("context_length")
|
||||
if isinstance(context_length, int):
|
||||
return context_length
|
||||
if not _is_known_provider_base_url(base_url):
|
||||
# 3. Try querying local server directly
|
||||
if is_local_endpoint(base_url):
|
||||
local_ctx = _query_local_context_length(model, base_url, api_key=api_key)
|
||||
if local_ctx and local_ctx > 0:
|
||||
if provider != "lmstudio":
|
||||
save_context_length(model, base_url, local_ctx)
|
||||
save_context_length(model, base_url, local_ctx)
|
||||
return local_ctx
|
||||
logger.info(
|
||||
"Could not detect context length for model %r at %s — "
|
||||
@@ -1402,12 +1343,6 @@ def get_model_context_length(
|
||||
if base_url:
|
||||
save_context_length(model, base_url, codex_ctx)
|
||||
return codex_ctx
|
||||
if effective_provider == "gmi" and base_url:
|
||||
# GMI exposes authoritative context_length via /models, but it is not
|
||||
# in models.dev yet. Preserve that higher-fidelity endpoint lookup.
|
||||
ctx = _resolve_endpoint_context_length(model, base_url, api_key=api_key)
|
||||
if ctx is not None:
|
||||
return ctx
|
||||
if effective_provider:
|
||||
from agent.models_dev import lookup_models_dev_context
|
||||
ctx = lookup_models_dev_context(effective_provider, model)
|
||||
@@ -1417,7 +1352,7 @@ def get_model_context_length(
|
||||
# 6. OpenRouter live API metadata (provider-unaware fallback)
|
||||
metadata = fetch_model_metadata()
|
||||
if model in metadata:
|
||||
return metadata[model].get("context_length", DEFAULT_FALLBACK_CONTEXT)
|
||||
return metadata[model].get("context_length", 128000)
|
||||
|
||||
# 8. Hardcoded defaults (fuzzy match — longest key first for specificity)
|
||||
# Only check `default_model in model` (is the key a substring of the input).
|
||||
@@ -1434,11 +1369,10 @@ def get_model_context_length(
|
||||
if base_url and is_local_endpoint(base_url):
|
||||
local_ctx = _query_local_context_length(model, base_url, api_key=api_key)
|
||||
if local_ctx and local_ctx > 0:
|
||||
if provider != "lmstudio":
|
||||
save_context_length(model, base_url, local_ctx)
|
||||
save_context_length(model, base_url, local_ctx)
|
||||
return local_ctx
|
||||
|
||||
# 10. Default fallback — 256K
|
||||
# 10. Default fallback — 128K
|
||||
return DEFAULT_FALLBACK_CONTEXT
|
||||
|
||||
|
||||
|
||||
@@ -149,7 +149,6 @@ PROVIDER_TO_MODELS_DEV: Dict[str, str] = {
|
||||
"stepfun": "stepfun",
|
||||
"kimi-coding-cn": "kimi-for-coding",
|
||||
"minimax": "minimax",
|
||||
"minimax-oauth": "minimax",
|
||||
"minimax-cn": "minimax-cn",
|
||||
"deepseek": "deepseek",
|
||||
"alibaba": "alibaba",
|
||||
|
||||
@@ -81,56 +81,15 @@ def _repair_schema(node: Any, is_schema: bool = True) -> Any:
|
||||
return repaired
|
||||
|
||||
# Rule 2: when anyOf is present, type belongs only on the children.
|
||||
# Additionally, Moonshot rejects null-type branches inside anyOf
|
||||
# (enum value (<nil>) does not match any type in [string]).
|
||||
# Collapse the anyOf to the first non-null branch and infer its type.
|
||||
if "anyOf" in repaired and isinstance(repaired["anyOf"], list):
|
||||
repaired.pop("type", None)
|
||||
non_null = [b for b in repaired["anyOf"]
|
||||
if isinstance(b, dict) and b.get("type") != "null"]
|
||||
if non_null and len(non_null) < len(repaired["anyOf"]):
|
||||
# Drop the anyOf wrapper — keep only the non-null branch.
|
||||
# If there's a single non-null branch, promote it and fall
|
||||
# through to Rules 1/3 so nullable/enum cleanup still applies
|
||||
# to the merged node.
|
||||
if len(non_null) == 1:
|
||||
merge = {k: v for k, v in repaired.items() if k != "anyOf"}
|
||||
merge.update(non_null[0])
|
||||
repaired = merge
|
||||
else:
|
||||
repaired["anyOf"] = non_null
|
||||
return repaired
|
||||
else:
|
||||
# Nothing to collapse — parent type stripped, children already
|
||||
# repaired by the recursive walk above.
|
||||
return repaired
|
||||
|
||||
# Moonshot also rejects non-standard keywords like ``nullable`` on
|
||||
# parameter schemas — strip it.
|
||||
repaired.pop("nullable", None)
|
||||
return repaired
|
||||
|
||||
# Rule 1: property schemas without type need one. $ref nodes are exempt
|
||||
# — their type comes from the referenced definition.
|
||||
# Fill missing type BEFORE Rule 3 so enum cleanup can check the type.
|
||||
if "$ref" not in repaired:
|
||||
repaired = _fill_missing_type(repaired)
|
||||
|
||||
# Rule 3: Moonshot rejects null/empty-string values inside enum arrays
|
||||
# when the parent type is a scalar (string, integer, etc.). The error:
|
||||
# "enum value (<nil>) does not match any type in [string]"
|
||||
# Strip null and empty-string from enum values, and if the enum becomes
|
||||
# empty, drop it entirely.
|
||||
if "enum" in repaired and isinstance(repaired["enum"], list):
|
||||
node_type = repaired.get("type")
|
||||
if node_type in ("string", "integer", "number", "boolean"):
|
||||
cleaned = [v for v in repaired["enum"]
|
||||
if v is not None and v != ""]
|
||||
if cleaned:
|
||||
repaired["enum"] = cleaned
|
||||
else:
|
||||
repaired.pop("enum")
|
||||
|
||||
return repaired
|
||||
if "$ref" in repaired:
|
||||
return repaired
|
||||
return _fill_missing_type(repaired)
|
||||
|
||||
|
||||
def _fill_missing_type(node: Dict[str, Any]) -> Dict[str, Any]:
|
||||
|
||||
@@ -18,7 +18,6 @@ import os
|
||||
import tempfile
|
||||
import time
|
||||
from typing import Any, Mapping, Optional
|
||||
from utils import atomic_replace
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -119,7 +118,7 @@ def record_nous_rate_limit(
|
||||
try:
|
||||
with os.fdopen(fd, "w") as f:
|
||||
json.dump(state, f)
|
||||
atomic_replace(tmp_path, path)
|
||||
os.replace(tmp_path, path)
|
||||
except Exception:
|
||||
# Clean up temp file on failure
|
||||
try:
|
||||
@@ -181,145 +180,3 @@ def format_remaining(seconds: float) -> str:
|
||||
h, remainder = divmod(s, 3600)
|
||||
m = remainder // 60
|
||||
return f"{h}h {m}m" if m else f"{h}h"
|
||||
|
||||
|
||||
# Buckets with reset windows shorter than this are treated as transient
|
||||
# (upstream jitter, secondary throttling) rather than a genuine quota
|
||||
# exhaustion worth a cross-session breaker trip.
|
||||
_MIN_RESET_FOR_BREAKER_SECONDS = 60.0
|
||||
|
||||
|
||||
def is_genuine_nous_rate_limit(
|
||||
*,
|
||||
headers: Optional[Mapping[str, str]] = None,
|
||||
last_known_state: Optional[Any] = None,
|
||||
) -> bool:
|
||||
"""Decide whether a 429 from Nous Portal is a real account rate limit.
|
||||
|
||||
Nous Portal multiplexes multiple upstream providers (DeepSeek, Kimi,
|
||||
MiMo, Hermes, ...) behind one endpoint. A 429 can mean either:
|
||||
|
||||
(a) The caller's own RPM / RPH / TPM / TPH bucket on Nous is
|
||||
exhausted — a genuine rate limit that will last until the
|
||||
bucket resets.
|
||||
(b) The upstream provider is out of capacity for a specific model
|
||||
— transient, clears in seconds, and has nothing to do with
|
||||
the caller's quota on Nous.
|
||||
|
||||
Tripping the cross-session breaker on (b) blocks ALL Nous requests
|
||||
(and all models, since Nous is one provider key) for minutes even
|
||||
though the caller's account is healthy and a different model would
|
||||
have worked. That's the bug users hit when DeepSeek V4 Pro 429s
|
||||
trigger a breaker that then blocks Kimi 2.6 and MiMo V2.5 Pro.
|
||||
|
||||
We tell the two apart by looking at:
|
||||
|
||||
1. The 429 response's own ``x-ratelimit-*`` headers. Nous emits
|
||||
the full suite on every response including 429s. An exhausted
|
||||
bucket (``remaining == 0`` with a reset window >= 60s) is
|
||||
proof of (a).
|
||||
2. The last-known-good rate-limit state captured by
|
||||
``_capture_rate_limits()`` on the previous successful
|
||||
response. If any bucket there was already near-exhausted with
|
||||
a substantial reset window, the current 429 is almost
|
||||
certainly (a) continuing from that condition.
|
||||
|
||||
If neither signal fires, we treat the 429 as (b): fail the single
|
||||
request, let the retry loop or model-switch proceed, and do NOT
|
||||
write the cross-session breaker file.
|
||||
|
||||
Returns True when the evidence points at (a).
|
||||
"""
|
||||
# Signal 1: current 429 response headers.
|
||||
state = _parse_buckets_from_headers(headers)
|
||||
if _has_exhausted_bucket(state):
|
||||
return True
|
||||
|
||||
# Signal 2: last-known-good state from a recent successful response.
|
||||
# Accepts either a RateLimitState (dataclass from rate_limit_tracker)
|
||||
# or a dict of bucket snapshots.
|
||||
if last_known_state is not None and _has_exhausted_bucket_in_object(last_known_state):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def _parse_buckets_from_headers(
|
||||
headers: Optional[Mapping[str, str]],
|
||||
) -> dict[str, tuple[Optional[int], Optional[float]]]:
|
||||
"""Extract (remaining, reset_seconds) per bucket from x-ratelimit-* headers.
|
||||
|
||||
Returns empty dict when no rate-limit headers are present.
|
||||
"""
|
||||
if not headers:
|
||||
return {}
|
||||
|
||||
lowered = {k.lower(): v for k, v in headers.items()}
|
||||
if not any(k.startswith("x-ratelimit-") for k in lowered):
|
||||
return {}
|
||||
|
||||
def _maybe_int(raw: Optional[str]) -> Optional[int]:
|
||||
if raw is None:
|
||||
return None
|
||||
try:
|
||||
return int(float(raw))
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
def _maybe_float(raw: Optional[str]) -> Optional[float]:
|
||||
if raw is None:
|
||||
return None
|
||||
try:
|
||||
return float(raw)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
result: dict[str, tuple[Optional[int], Optional[float]]] = {}
|
||||
for tag in ("requests", "requests-1h", "tokens", "tokens-1h"):
|
||||
remaining = _maybe_int(lowered.get(f"x-ratelimit-remaining-{tag}"))
|
||||
reset = _maybe_float(lowered.get(f"x-ratelimit-reset-{tag}"))
|
||||
if remaining is not None or reset is not None:
|
||||
result[tag] = (remaining, reset)
|
||||
return result
|
||||
|
||||
|
||||
def _has_exhausted_bucket(
|
||||
buckets: Mapping[str, tuple[Optional[int], Optional[float]]],
|
||||
) -> bool:
|
||||
"""Return True when any bucket has remaining == 0 AND a meaningful reset window."""
|
||||
for remaining, reset in buckets.values():
|
||||
if remaining is None or remaining > 0:
|
||||
continue
|
||||
if reset is None:
|
||||
continue
|
||||
if reset >= _MIN_RESET_FOR_BREAKER_SECONDS:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _has_exhausted_bucket_in_object(state: Any) -> bool:
|
||||
"""Check a RateLimitState-like object for an exhausted bucket.
|
||||
|
||||
Accepts the dataclass from ``agent.rate_limit_tracker`` (buckets
|
||||
exposed as attributes ``requests_min``, ``requests_hour``,
|
||||
``tokens_min``, ``tokens_hour``) and falls back gracefully for any
|
||||
object missing those attributes.
|
||||
"""
|
||||
for attr in ("requests_min", "requests_hour", "tokens_min", "tokens_hour"):
|
||||
bucket = getattr(state, attr, None)
|
||||
if bucket is None:
|
||||
continue
|
||||
limit = getattr(bucket, "limit", 0) or 0
|
||||
remaining = getattr(bucket, "remaining", 0) or 0
|
||||
# Prefer the adjusted "remaining_seconds_now" property when present;
|
||||
# fall back to raw reset_seconds.
|
||||
reset = getattr(bucket, "remaining_seconds_now", None)
|
||||
if reset is None:
|
||||
reset = getattr(bucket, "reset_seconds", 0.0) or 0.0
|
||||
if limit <= 0:
|
||||
continue
|
||||
if remaining > 0:
|
||||
continue
|
||||
if reset >= _MIN_RESET_FOR_BREAKER_SECONDS:
|
||||
return True
|
||||
return False
|
||||
|
||||
@@ -1,193 +0,0 @@
|
||||
"""
|
||||
Contextual first-touch onboarding hints.
|
||||
|
||||
Instead of blocking first-run questionnaires, show a one-time hint the *first*
|
||||
time a user hits a behavior fork — message-while-running, first long-running
|
||||
tool, etc. Each hint is shown once per install (tracked in ``config.yaml`` under
|
||||
``onboarding.seen.<flag>``) and then never again.
|
||||
|
||||
Keep this module tiny and dependency-free so both the CLI and gateway can import
|
||||
it without pulling in heavy modules.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Any, Mapping, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# Flag names (stable — used as config.yaml keys under onboarding.seen)
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
BUSY_INPUT_FLAG = "busy_input_prompt"
|
||||
TOOL_PROGRESS_FLAG = "tool_progress_prompt"
|
||||
OPENCLAW_RESIDUE_FLAG = "openclaw_residue_cleanup"
|
||||
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# Hint content
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
def busy_input_hint_gateway(mode: str) -> str:
|
||||
"""Hint shown the first time a user messages while the agent is busy.
|
||||
|
||||
``mode`` is the effective busy_input_mode that was just applied, so the
|
||||
message matches reality ("I just interrupted…" vs "I just queued…").
|
||||
"""
|
||||
if mode == "queue":
|
||||
return (
|
||||
"💡 First-time tip — I queued your message instead of interrupting. "
|
||||
"Send `/busy interrupt` to make new messages stop the current task "
|
||||
"immediately, or `/busy status` to check. This notice won't appear again."
|
||||
)
|
||||
if mode == "steer":
|
||||
return (
|
||||
"💡 First-time tip — I steered your message into the current run; "
|
||||
"it will arrive after the next tool call instead of interrupting. "
|
||||
"Send `/busy interrupt` or `/busy queue` to change this, or "
|
||||
"`/busy status` to check. This notice won't appear again."
|
||||
)
|
||||
return (
|
||||
"💡 First-time tip — I just interrupted my current task to answer you. "
|
||||
"Send `/busy queue` to queue follow-ups for after the current task instead, "
|
||||
"`/busy steer` to inject them mid-run without interrupting, or "
|
||||
"`/busy status` to check. This notice won't appear again."
|
||||
)
|
||||
|
||||
|
||||
def busy_input_hint_cli(mode: str) -> str:
|
||||
"""CLI version of the busy-input hint (plain text, no markdown)."""
|
||||
if mode == "queue":
|
||||
return (
|
||||
"(tip) Your message was queued for the next turn. "
|
||||
"Use /busy interrupt to make Enter stop the current run instead, "
|
||||
"or /busy steer to inject mid-run. This tip only shows once."
|
||||
)
|
||||
if mode == "steer":
|
||||
return (
|
||||
"(tip) Your message was steered into the current run; it arrives "
|
||||
"after the next tool call. Use /busy interrupt or /busy queue to "
|
||||
"change this. This tip only shows once."
|
||||
)
|
||||
return (
|
||||
"(tip) Your message interrupted the current run. "
|
||||
"Use /busy queue to queue messages for the next turn instead, "
|
||||
"or /busy steer to inject mid-run. This tip only shows once."
|
||||
)
|
||||
|
||||
|
||||
def tool_progress_hint_gateway() -> str:
|
||||
return (
|
||||
"💡 First-time tip — that tool took a while and I'm streaming every step. "
|
||||
"If the progress messages feel noisy, send `/verbose` to cycle modes "
|
||||
"(all → new → off). This notice won't appear again."
|
||||
)
|
||||
|
||||
|
||||
def tool_progress_hint_cli() -> str:
|
||||
return (
|
||||
"(tip) That tool ran for a while. Use /verbose to cycle tool-progress "
|
||||
"display modes (all -> new -> off -> verbose). This tip only shows once."
|
||||
)
|
||||
|
||||
|
||||
def openclaw_residue_hint_cli() -> str:
|
||||
"""Banner shown the first time Hermes starts and finds ``~/.openclaw/``.
|
||||
|
||||
Points users at ``hermes claw migrate`` (non-destructive port of config,
|
||||
memory, and skills) first. ``hermes claw cleanup`` is mentioned as the
|
||||
follow-up step for users who have already migrated and want to archive
|
||||
the old directory — with a warning that archiving breaks OpenClaw.
|
||||
"""
|
||||
return (
|
||||
"A legacy OpenClaw directory was detected at ~/.openclaw/.\n"
|
||||
"To port your config, memory, and skills over to Hermes, run "
|
||||
"`hermes claw migrate`.\n"
|
||||
"If you've already migrated and want to archive the old directory, "
|
||||
"run `hermes claw cleanup` (renames it to ~/.openclaw.pre-migration — "
|
||||
"OpenClaw will stop working after this).\n"
|
||||
"This tip only shows once."
|
||||
)
|
||||
|
||||
|
||||
def detect_openclaw_residue(home: Optional[Path] = None) -> bool:
|
||||
"""Return True if an OpenClaw workspace directory is present in ``$HOME``.
|
||||
|
||||
Pure filesystem check — no side effects. ``home`` override exists for tests.
|
||||
"""
|
||||
base = home or Path.home()
|
||||
try:
|
||||
return (base / ".openclaw").is_dir()
|
||||
except OSError:
|
||||
return False
|
||||
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# State read / write
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
def _get_seen_dict(config: Mapping[str, Any]) -> Mapping[str, Any]:
|
||||
onboarding = config.get("onboarding") if isinstance(config, Mapping) else None
|
||||
if not isinstance(onboarding, Mapping):
|
||||
return {}
|
||||
seen = onboarding.get("seen")
|
||||
return seen if isinstance(seen, Mapping) else {}
|
||||
|
||||
|
||||
def is_seen(config: Mapping[str, Any], flag: str) -> bool:
|
||||
"""Return True if the user has already been shown this first-touch hint."""
|
||||
return bool(_get_seen_dict(config).get(flag))
|
||||
|
||||
|
||||
def mark_seen(config_path: Path, flag: str) -> bool:
|
||||
"""Persist ``onboarding.seen.<flag> = True`` to ``config_path``.
|
||||
|
||||
Uses the atomic YAML writer so a concurrent process can't observe a
|
||||
partially-written file. Returns True on success, False on any error
|
||||
(including the config file being absent — onboarding is best-effort).
|
||||
"""
|
||||
try:
|
||||
import yaml
|
||||
from utils import atomic_yaml_write
|
||||
except Exception as e: # pragma: no cover — dependency issue
|
||||
logger.debug("onboarding: failed to import yaml/utils: %s", e)
|
||||
return False
|
||||
|
||||
try:
|
||||
cfg: dict = {}
|
||||
if config_path.exists():
|
||||
with open(config_path, encoding="utf-8") as f:
|
||||
cfg = yaml.safe_load(f) or {}
|
||||
if not isinstance(cfg.get("onboarding"), dict):
|
||||
cfg["onboarding"] = {}
|
||||
seen = cfg["onboarding"].get("seen")
|
||||
if not isinstance(seen, dict):
|
||||
seen = {}
|
||||
cfg["onboarding"]["seen"] = seen
|
||||
if seen.get(flag) is True:
|
||||
return True # already marked — nothing to do
|
||||
seen[flag] = True
|
||||
atomic_yaml_write(config_path, cfg)
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.debug("onboarding: failed to mark flag %s: %s", flag, e)
|
||||
return False
|
||||
|
||||
|
||||
__all__ = [
|
||||
"BUSY_INPUT_FLAG",
|
||||
"TOOL_PROGRESS_FLAG",
|
||||
"OPENCLAW_RESIDUE_FLAG",
|
||||
"busy_input_hint_gateway",
|
||||
"busy_input_hint_cli",
|
||||
"tool_progress_hint_gateway",
|
||||
"tool_progress_hint_cli",
|
||||
"openclaw_residue_hint_cli",
|
||||
"detect_openclaw_residue",
|
||||
"is_seen",
|
||||
"mark_seen",
|
||||
]
|
||||
@@ -141,12 +141,6 @@ DEFAULT_AGENT_IDENTITY = (
|
||||
"Be targeted and efficient in your exploration and investigations."
|
||||
)
|
||||
|
||||
HERMES_AGENT_HELP_GUIDANCE = (
|
||||
"If the user asks about configuring, setting up, or using Hermes Agent "
|
||||
"itself, load the `hermes-agent` skill with skill_view(name='hermes-agent') "
|
||||
"before answering. Docs: https://hermes-agent.nousresearch.com/docs"
|
||||
)
|
||||
|
||||
MEMORY_GUIDANCE = (
|
||||
"You have persistent memory across sessions. Save durable facts using the memory "
|
||||
"tool: user preferences, environment details, tool quirks, and stable conventions. "
|
||||
@@ -182,64 +176,6 @@ SKILLS_GUIDANCE = (
|
||||
"Skills that aren't maintained become liabilities."
|
||||
)
|
||||
|
||||
KANBAN_GUIDANCE = (
|
||||
"# Kanban task execution protocol\n"
|
||||
"You have been assigned ONE task from "
|
||||
"the shared board at `~/.hermes/kanban.db`. Your task id is in "
|
||||
"`$HERMES_KANBAN_TASK`; your workspace is `$HERMES_KANBAN_WORKSPACE`. "
|
||||
"The `kanban_*` tools in your schema are your primary coordination surface — "
|
||||
"they write directly to the shared SQLite DB and work regardless of terminal "
|
||||
"backend (local/docker/modal/ssh).\n"
|
||||
"\n"
|
||||
"## Lifecycle\n"
|
||||
"\n"
|
||||
"1. **Orient.** Call `kanban_show()` first (no args — it defaults to your "
|
||||
"task). The response includes title, body, parent-task handoffs (summary + "
|
||||
"metadata), any prior attempts on this task if you're a retry, the full "
|
||||
"comment thread, and a pre-formatted `worker_context` you can treat as "
|
||||
"ground truth.\n"
|
||||
"2. **Work inside the workspace.** `cd $HERMES_KANBAN_WORKSPACE` before "
|
||||
"any file operations. The workspace is yours for this run. Don't modify "
|
||||
"files outside it unless the task explicitly asks.\n"
|
||||
"3. **Heartbeat on long operations.** Call `kanban_heartbeat(note=...)` "
|
||||
"every few minutes during long subprocesses (training, encoding, crawling). "
|
||||
"Skip heartbeats for short tasks.\n"
|
||||
"4. **Block on genuine ambiguity.** If you need a human decision you cannot "
|
||||
"infer (missing credentials, UX choice, paywalled source, peer output you "
|
||||
"need first), call `kanban_block(reason=\"...\")` and stop. Don't guess. "
|
||||
"The user will unblock with context and the dispatcher will respawn you.\n"
|
||||
"5. **Complete with structured handoff.** Call `kanban_complete(summary=..., "
|
||||
"metadata=...)`. `summary` is 1–3 human-readable sentences naming concrete "
|
||||
"artifacts. `metadata` is machine-readable facts "
|
||||
"(`{changed_files: [...], tests_run: N, decisions: [...]}`). Downstream "
|
||||
"workers read both via their own `kanban_show`. Never put secrets / "
|
||||
"tokens / raw PII in either field — run rows are durable forever.\n"
|
||||
"6. **If follow-up work appears, create it; don't do it.** Use "
|
||||
"`kanban_create(title=..., assignee=<right-profile>, parents=[your-task-id])` "
|
||||
"to spawn a child task for the appropriate specialist profile instead of "
|
||||
"scope-creeping into the next thing.\n"
|
||||
"\n"
|
||||
"## Orchestrator mode\n"
|
||||
"\n"
|
||||
"If your task is itself a decomposition task (e.g. a planner profile given "
|
||||
"a high-level goal), use `kanban_create` to fan out into child tasks — one "
|
||||
"per specialist, each with an explicit `assignee` and `parents=[...]` to "
|
||||
"express dependencies. Then `kanban_complete` your own task with a summary "
|
||||
"of the decomposition. Do NOT execute the work yourself; your job is "
|
||||
"routing, not implementation.\n"
|
||||
"\n"
|
||||
"## Do NOT\n"
|
||||
"\n"
|
||||
"- Do not shell out to `hermes kanban <verb>` for board operations. Use "
|
||||
"the `kanban_*` tools — they work across all terminal backends.\n"
|
||||
"- Do not complete a task you didn't actually finish. Block it.\n"
|
||||
"- Do not assign follow-up work to yourself. Assign it to the right "
|
||||
"specialist profile.\n"
|
||||
"- Do not call `delegate_task` as a board substitute. `delegate_task` is "
|
||||
"for short reasoning subtasks inside your own run; board tasks are for "
|
||||
"cross-agent handoffs that outlive one API loop."
|
||||
)
|
||||
|
||||
TOOL_USE_ENFORCEMENT_GUIDANCE = (
|
||||
"# Tool-use enforcement\n"
|
||||
"You MUST use your tools to take action — do not describe what you would do "
|
||||
@@ -368,10 +304,6 @@ PLATFORM_HINTS = {
|
||||
"Standard markdown is automatically converted to Telegram format. "
|
||||
"Supported: **bold**, *italic*, ~~strikethrough~~, ||spoiler||, "
|
||||
"`inline code`, ```code blocks```, [links](url), and ## headers. "
|
||||
"Telegram has NO table syntax — prefer bullet lists or labeled "
|
||||
"key: value pairs over pipe tables (any tables you do emit are "
|
||||
"auto-rewritten into row-group bullets, which you can produce "
|
||||
"directly for cleaner output). "
|
||||
"You can send media files natively: to deliver a file to the user, "
|
||||
"include MEDIA:/absolute/path/to/file in your response. Images "
|
||||
"(.png, .jpg, .webp) appear as photos, audio (.ogg) sends as voice "
|
||||
@@ -490,35 +422,6 @@ PLATFORM_HINTS = {
|
||||
"your response. Images are sent as native photos, and other files arrive as downloadable "
|
||||
"documents."
|
||||
),
|
||||
"yuanbao": (
|
||||
"You are on Yuanbao (腾讯元宝), a Chinese AI assistant platform. "
|
||||
"Markdown formatting is supported (code blocks, tables, bold/italic). "
|
||||
"You CAN send media files natively — to deliver a file to the user, include "
|
||||
"MEDIA:/absolute/path/to/file in your response. The file will be sent as a native "
|
||||
"Yuanbao attachment: images (.jpg, .png, .webp, .gif) are sent as photos, "
|
||||
"and other files (.pdf, .docx, .txt, .zip, etc.) arrive as downloadable documents "
|
||||
"(max 50 MB). You can also include image URLs in markdown format  and "
|
||||
"they will be downloaded and sent as native photos. "
|
||||
"Do NOT tell the user you lack file-sending capability — use MEDIA: syntax "
|
||||
"whenever a file delivery is appropriate.\n\n"
|
||||
"Stickers (贴纸 / 表情包 / TIM face): Yuanbao has a built-in sticker catalogue. "
|
||||
"When the user sends a sticker (you see '[emoji: 名称]' in their message) or asks "
|
||||
"you to send/reply-with a 贴纸/表情/表情包, you MUST use the sticker tools:\n"
|
||||
" 1. Call yb_search_sticker with a Chinese keyword (e.g. '666', '比心', '吃瓜', "
|
||||
" '捂脸', '合十') to discover matching sticker_ids.\n"
|
||||
" 2. Call yb_send_sticker with the chosen sticker_id or name — this sends a real "
|
||||
" TIMFaceElem that renders as a native sticker in the chat.\n"
|
||||
"DO NOT draw sticker-like PNGs with execute_code/Pillow/matplotlib and then send "
|
||||
"them via MEDIA: or send_image_file. That produces a fake low-quality 'sticker' "
|
||||
"image and is the WRONG path. Bare Unicode emoji in text is also not a substitute "
|
||||
"— when a sticker is the right response, use yb_send_sticker."
|
||||
),
|
||||
"api_server": (
|
||||
"You're responding through an API server. The rendering layer is unknown — "
|
||||
"assume plain text. No markdown formatting (no asterisks, bullets, headers, "
|
||||
"code fences). Treat this like a conversation, not a document. Keep responses "
|
||||
"brief and natural."
|
||||
),
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -922,11 +825,6 @@ def build_skills_system_prompt(
|
||||
"Skills also encode the user's preferred approach, conventions, and quality standards "
|
||||
"for tasks like code review, planning, and testing — load them even for tasks you "
|
||||
"already know how to do, because the skill defines how it should be done here.\n"
|
||||
"Whenever the user asks you to configure, set up, install, enable, disable, modify, "
|
||||
"or troubleshoot Hermes Agent itself — its CLI, config, models, providers, tools, "
|
||||
"skills, voice, gateway, plugins, or any feature — load the `hermes-agent` skill "
|
||||
"first. It has the actual commands (e.g. `hermes config set …`, `hermes tools`, "
|
||||
"`hermes setup`) so you don't have to guess or invent workarounds.\n"
|
||||
"If a skill has issues, fix it with skill_manage(action='patch').\n"
|
||||
"After difficult/iterative tasks, offer to save as a skill. "
|
||||
"If a skill you loaded was missing steps, had wrong commands, or needed "
|
||||
|
||||
@@ -56,12 +56,8 @@ _SENSITIVE_BODY_KEYS = frozenset({
|
||||
})
|
||||
|
||||
# Snapshot at import time so runtime env mutations (e.g. LLM-generated
|
||||
# `export HERMES_REDACT_SECRETS=true`) cannot enable/disable redaction
|
||||
# mid-session. OFF by default — user must opt in via
|
||||
# `security.redact_secrets: true` in config.yaml (bridged to this env var
|
||||
# in hermes_cli/main.py and gateway/run.py) or `HERMES_REDACT_SECRETS=true`
|
||||
# in ~/.hermes/.env.
|
||||
_REDACT_ENABLED = os.getenv("HERMES_REDACT_SECRETS", "").lower() in ("1", "true", "yes", "on")
|
||||
# `export HERMES_REDACT_SECRETS=false`) cannot disable redaction mid-session.
|
||||
_REDACT_ENABLED = os.getenv("HERMES_REDACT_SECRETS", "").lower() not in ("0", "false", "no", "off")
|
||||
|
||||
# Known API key prefixes -- match the prefix + contiguous token chars
|
||||
_PREFIX_PATTERNS = [
|
||||
@@ -184,59 +180,11 @@ _PREFIX_RE = re.compile(
|
||||
)
|
||||
|
||||
|
||||
def mask_secret(
|
||||
value: str,
|
||||
*,
|
||||
head: int = 4,
|
||||
tail: int = 4,
|
||||
floor: int = 12,
|
||||
placeholder: str = "***",
|
||||
empty: str = "",
|
||||
) -> str:
|
||||
"""Mask a secret for display, preserving ``head`` and ``tail`` characters.
|
||||
|
||||
Canonical helper for display-time redaction across Hermes — used by
|
||||
``hermes config``, ``hermes status``, ``hermes dump``, and anywhere
|
||||
a secret needs to be shown truncated for debuggability while still
|
||||
keeping the bulk hidden.
|
||||
|
||||
Args:
|
||||
value: The secret to mask. ``None``/empty returns ``empty``.
|
||||
head: Leading characters to preserve. Default 4.
|
||||
tail: Trailing characters to preserve. Default 4.
|
||||
floor: Values shorter than ``head + tail + floor_margin`` are
|
||||
fully masked (returns ``placeholder``). Default 12 —
|
||||
matches the existing config/status/dump convention.
|
||||
placeholder: Value returned for too-short inputs. Default ``"***"``.
|
||||
empty: Value returned when ``value`` is falsy (None, ""). The
|
||||
caller can override this to e.g. ``color("(not set)",
|
||||
Colors.DIM)`` for user-facing display.
|
||||
|
||||
Examples:
|
||||
>>> mask_secret("sk-proj-abcdef1234567890")
|
||||
'sk-p...7890'
|
||||
>>> mask_secret("short") # fully masked
|
||||
'***'
|
||||
>>> mask_secret("") # empty default
|
||||
''
|
||||
>>> mask_secret("", empty="(not set)") # empty override
|
||||
'(not set)'
|
||||
>>> mask_secret("long-token", head=6, tail=4, floor=18)
|
||||
'***'
|
||||
"""
|
||||
if not value:
|
||||
return empty
|
||||
if len(value) < floor:
|
||||
return placeholder
|
||||
return f"{value[:head]}...{value[-tail:]}"
|
||||
|
||||
|
||||
def _mask_token(token: str) -> str:
|
||||
"""Mask a log token — conservative 18-char floor, preserves 6 prefix / 4 suffix."""
|
||||
# Empty input: historically this returned "***" rather than "". Preserve.
|
||||
if not token:
|
||||
"""Mask a token, preserving prefix for long tokens."""
|
||||
if len(token) < 18:
|
||||
return "***"
|
||||
return mask_secret(token, head=6, tail=4, floor=18)
|
||||
return f"{token[:6]}...{token[-4:]}"
|
||||
|
||||
|
||||
def _redact_query_string(query: str) -> str:
|
||||
@@ -305,18 +253,11 @@ def _redact_form_body(text: str) -> str:
|
||||
return _redact_query_string(text.strip())
|
||||
|
||||
|
||||
def redact_sensitive_text(text: str, *, force: bool = False, code_file: bool = False) -> str:
|
||||
def redact_sensitive_text(text: str) -> str:
|
||||
"""Apply all redaction patterns to a block of text.
|
||||
|
||||
Safe to call on any string -- non-matching text passes through unchanged.
|
||||
Disabled by default — enable via security.redact_secrets: true in config.yaml.
|
||||
Set force=True for safety boundaries that must never return raw secrets
|
||||
regardless of the user's global logging redaction preference.
|
||||
|
||||
Set code_file=True to skip the ENV-assignment and JSON-field regex
|
||||
patterns when the text is known to be source code (e.g. MAX_TOKENS=***
|
||||
constants, "apiKey": "test" fixtures). Prefix patterns, auth headers,
|
||||
private keys, DB connstrings, JWTs, and URL secrets are still redacted.
|
||||
Disabled when security.redact_secrets is false in config.yaml.
|
||||
"""
|
||||
if text is None:
|
||||
return None
|
||||
@@ -324,24 +265,23 @@ def redact_sensitive_text(text: str, *, force: bool = False, code_file: bool = F
|
||||
text = str(text)
|
||||
if not text:
|
||||
return text
|
||||
if not (force or _REDACT_ENABLED):
|
||||
if not _REDACT_ENABLED:
|
||||
return text
|
||||
|
||||
# Known prefixes (sk-, ghp_, etc.)
|
||||
text = _PREFIX_RE.sub(lambda m: _mask_token(m.group(1)), text)
|
||||
|
||||
# ENV assignments: OPENAI_API_KEY=*** (skip for code files — false positives)
|
||||
if not code_file:
|
||||
def _redact_env(m):
|
||||
name, quote, value = m.group(1), m.group(2), m.group(3)
|
||||
return f"{name}={quote}{_mask_token(value)}{quote}"
|
||||
text = _ENV_ASSIGN_RE.sub(_redact_env, text)
|
||||
# ENV assignments: OPENAI_API_KEY=sk-abc...
|
||||
def _redact_env(m):
|
||||
name, quote, value = m.group(1), m.group(2), m.group(3)
|
||||
return f"{name}={quote}{_mask_token(value)}{quote}"
|
||||
text = _ENV_ASSIGN_RE.sub(_redact_env, text)
|
||||
|
||||
# JSON fields: "apiKey": "***" (skip for code files — false positives)
|
||||
def _redact_json(m):
|
||||
key, value = m.group(1), m.group(2)
|
||||
return f'{key}: "{_mask_token(value)}"'
|
||||
text = _JSON_FIELD_RE.sub(_redact_json, text)
|
||||
# JSON fields: "apiKey": "value"
|
||||
def _redact_json(m):
|
||||
key, value = m.group(1), m.group(2)
|
||||
return f'{key}: "{_mask_token(value)}"'
|
||||
text = _JSON_FIELD_RE.sub(_redact_json, text)
|
||||
|
||||
# Authorization headers
|
||||
text = _AUTH_HEADER_RE.sub(
|
||||
|
||||
@@ -76,7 +76,6 @@ except ImportError: # pragma: no cover
|
||||
fcntl = None # type: ignore[assignment]
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
from utils import atomic_replace
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -569,7 +568,7 @@ def save_allowlist(data: Dict[str, Any]) -> None:
|
||||
try:
|
||||
with os.fdopen(fd, "w") as fh:
|
||||
fh.write(json.dumps(data, indent=2, sort_keys=True))
|
||||
atomic_replace(tmp_path, p)
|
||||
os.replace(tmp_path, p)
|
||||
except Exception:
|
||||
try:
|
||||
os.unlink(tmp_path)
|
||||
@@ -755,11 +754,7 @@ def _resolve_effective_accept(
|
||||
if env in ("1", "true", "yes", "on"):
|
||||
return True
|
||||
cfg_val = cfg.get("hooks_auto_accept", False)
|
||||
if isinstance(cfg_val, bool):
|
||||
return cfg_val
|
||||
if isinstance(cfg_val, str):
|
||||
return cfg_val.strip().lower() in ("1", "true", "yes", "on")
|
||||
return False
|
||||
return bool(cfg_val)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@@ -6,7 +6,6 @@ can invoke skills via /skill-name commands.
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional
|
||||
@@ -21,35 +20,10 @@ from agent.skill_preprocessing import (
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_skill_commands: Dict[str, Dict[str, Any]] = {}
|
||||
_skill_commands_platform: Optional[str] = None
|
||||
# Patterns for sanitizing skill names into clean hyphen-separated slugs.
|
||||
_SKILL_INVALID_CHARS = re.compile(r"[^a-z0-9-]")
|
||||
_SKILL_MULTI_HYPHEN = re.compile(r"-{2,}")
|
||||
|
||||
|
||||
def _resolve_skill_commands_platform() -> Optional[str]:
|
||||
"""Return the current platform scope used for disabled-skill filtering.
|
||||
|
||||
Used to detect when the active platform has shifted so
|
||||
:func:`get_skill_commands` can drop a stale cache that was populated
|
||||
for a different platform's ``skills.platform_disabled`` view (#14536).
|
||||
|
||||
Resolves from (in order) ``HERMES_PLATFORM`` env var and
|
||||
``HERMES_SESSION_PLATFORM`` from the gateway session context. Returns
|
||||
``None`` when no platform scope is active (e.g. classic CLI, RL
|
||||
rollouts, standalone scripts).
|
||||
"""
|
||||
try:
|
||||
from gateway.session_context import get_session_env
|
||||
|
||||
resolved_platform = (
|
||||
os.getenv("HERMES_PLATFORM")
|
||||
or get_session_env("HERMES_SESSION_PLATFORM")
|
||||
)
|
||||
except Exception:
|
||||
resolved_platform = os.getenv("HERMES_PLATFORM")
|
||||
return resolved_platform or None
|
||||
|
||||
def _load_skill_payload(skill_identifier: str, task_id: str | None = None) -> tuple[dict[str, Any], Path | None, str] | None:
|
||||
"""Load a skill by name/path and return (loaded_payload, skill_dir, display_name)."""
|
||||
raw_identifier = (skill_identifier or "").strip()
|
||||
@@ -244,8 +218,7 @@ def scan_skill_commands() -> Dict[str, Dict[str, Any]]:
|
||||
Returns:
|
||||
Dict mapping "/skill-name" to {name, description, skill_md_path, skill_dir}.
|
||||
"""
|
||||
global _skill_commands, _skill_commands_platform
|
||||
_skill_commands_platform = _resolve_skill_commands_platform()
|
||||
global _skill_commands
|
||||
_skill_commands = {}
|
||||
try:
|
||||
from tools.skills_tool import SKILLS_DIR, _parse_frontmatter, skill_matches_platform, _get_disabled_skill_names
|
||||
@@ -261,7 +234,7 @@ def scan_skill_commands() -> Dict[str, Dict[str, Any]]:
|
||||
|
||||
for scan_dir in dirs_to_scan:
|
||||
for skill_md in iter_skill_index_files(scan_dir, "SKILL.md"):
|
||||
if any(part in ('.git', '.github', '.hub', '.archive') for part in skill_md.parts):
|
||||
if any(part in ('.git', '.github', '.hub') for part in skill_md.parts):
|
||||
continue
|
||||
try:
|
||||
content = skill_md.read_text(encoding='utf-8')
|
||||
@@ -305,85 +278,12 @@ def scan_skill_commands() -> Dict[str, Dict[str, Any]]:
|
||||
|
||||
|
||||
def get_skill_commands() -> Dict[str, Dict[str, Any]]:
|
||||
"""Return the current skill commands mapping (scan first if empty).
|
||||
|
||||
Rescans when the active platform scope changes (e.g. a gateway
|
||||
process serving Telegram and Discord concurrently) so each platform
|
||||
sees its own ``skills.platform_disabled`` view (#14536).
|
||||
"""
|
||||
if (
|
||||
not _skill_commands
|
||||
or _skill_commands_platform != _resolve_skill_commands_platform()
|
||||
):
|
||||
"""Return the current skill commands mapping (scan first if empty)."""
|
||||
if not _skill_commands:
|
||||
scan_skill_commands()
|
||||
return _skill_commands
|
||||
|
||||
|
||||
def reload_skills() -> Dict[str, Any]:
|
||||
"""Re-scan the skills directory and return a diff of what changed.
|
||||
|
||||
Rescans ``~/.hermes/skills/`` and any ``skills.external_dirs`` so the
|
||||
slash-command map (``agent.skill_commands._skill_commands``) reflects
|
||||
skills added or removed on disk.
|
||||
|
||||
This does NOT invalidate the skills system-prompt cache. Skills are
|
||||
called by name via ``/skill-name``, ``skills_list``, or ``skill_view``
|
||||
— they don't need to be in the system prompt for the model to use them.
|
||||
Keeping the prompt cache intact preserves prefix caching across the
|
||||
reload, so a user invoking ``/reload-skills`` pays no cache-reset cost.
|
||||
|
||||
Returns:
|
||||
Dict with keys::
|
||||
|
||||
{
|
||||
"added": [{"name": str, "description": str}, ...],
|
||||
"removed": [{"name": str, "description": str}, ...],
|
||||
"unchanged": [skill names present before and after],
|
||||
"total": total skill count after rescan,
|
||||
"commands": total /slash-skill count after rescan,
|
||||
}
|
||||
|
||||
``description`` is the skill's full SKILL.md frontmatter
|
||||
``description:`` field — the same string the system prompt renders
|
||||
as `` - name: description`` for pre-existing skills.
|
||||
"""
|
||||
# Snapshot pre-reload state (name -> description) from the current
|
||||
# slash-command cache. Using dicts lets the post-rescan diff carry
|
||||
# descriptions for newly-visible or just-removed skills without a
|
||||
# second disk walk.
|
||||
def _snapshot(cmds: Dict[str, Dict[str, Any]]) -> Dict[str, str]:
|
||||
out: Dict[str, str] = {}
|
||||
for slash_key, info in cmds.items():
|
||||
bare = slash_key.lstrip("/")
|
||||
out[bare] = (info or {}).get("description") or ""
|
||||
return out
|
||||
|
||||
before = _snapshot(_skill_commands)
|
||||
|
||||
# Rescan the skills dir. ``scan_skill_commands`` resets
|
||||
# ``_skill_commands = {}`` internally and repopulates it.
|
||||
new_commands = scan_skill_commands()
|
||||
|
||||
after = _snapshot(new_commands)
|
||||
|
||||
added_names = sorted(set(after) - set(before))
|
||||
removed_names = sorted(set(before) - set(after))
|
||||
unchanged = sorted(set(after) & set(before))
|
||||
|
||||
added = [{"name": n, "description": after[n]} for n in added_names]
|
||||
# For removed skills, use the description we had cached pre-rescan
|
||||
# (the skill file is gone so we can't re-read it).
|
||||
removed = [{"name": n, "description": before[n]} for n in removed_names]
|
||||
|
||||
return {
|
||||
"added": added,
|
||||
"removed": removed,
|
||||
"unchanged": unchanged,
|
||||
"total": len(after),
|
||||
"commands": len(new_commands),
|
||||
}
|
||||
|
||||
|
||||
def resolve_skill_command_key(command: str) -> Optional[str]:
|
||||
"""Resolve a user-typed /command to its canonical skill_cmds key.
|
||||
|
||||
@@ -428,16 +328,8 @@ def build_skill_invocation_message(
|
||||
return f"[Failed to load skill: {skill_info['name']}]"
|
||||
|
||||
loaded_skill, skill_dir, skill_name = loaded
|
||||
|
||||
# Track active usage for Curator lifecycle management (#17782)
|
||||
try:
|
||||
from tools.skill_usage import bump_use
|
||||
bump_use(skill_name)
|
||||
except Exception:
|
||||
pass # Non-critical — skill invocation proceeds regardless
|
||||
|
||||
activation_note = (
|
||||
f'[IMPORTANT: The user has invoked the "{skill_name}" skill, indicating they want '
|
||||
f'[SYSTEM: The user has invoked the "{skill_name}" skill, indicating they want '
|
||||
"you to follow its instructions. The full skill content is loaded below.]"
|
||||
)
|
||||
return _build_skill_message(
|
||||
@@ -475,16 +367,8 @@ def build_preloaded_skills_prompt(
|
||||
continue
|
||||
|
||||
loaded_skill, skill_dir, skill_name = loaded
|
||||
|
||||
# Track active usage for Curator lifecycle management (#17782)
|
||||
try:
|
||||
from tools.skill_usage import bump_use
|
||||
bump_use(skill_name)
|
||||
except Exception:
|
||||
pass # Non-critical
|
||||
|
||||
activation_note = (
|
||||
f'[IMPORTANT: The user launched this CLI session with the "{skill_name}" skill '
|
||||
f'[SYSTEM: The user launched this CLI session with the "{skill_name}" skill '
|
||||
"preloaded. Treat its instructions as active guidance for the duration of this "
|
||||
"session unless the user overrides them.]"
|
||||
)
|
||||
|
||||
@@ -24,7 +24,7 @@ PLATFORM_MAP = {
|
||||
"windows": "win32",
|
||||
}
|
||||
|
||||
EXCLUDED_SKILL_DIRS = frozenset((".git", ".github", ".hub", ".archive"))
|
||||
EXCLUDED_SKILL_DIRS = frozenset((".git", ".github", ".hub"))
|
||||
|
||||
# ── Lazy YAML loader ─────────────────────────────────────────────────────
|
||||
|
||||
@@ -200,9 +200,6 @@ def get_external_skills_dirs() -> List[Path]:
|
||||
if not isinstance(raw_dirs, list):
|
||||
return []
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
|
||||
hermes_home = get_hermes_home()
|
||||
local_skills = get_skills_dir().resolve()
|
||||
seen: Set[Path] = set()
|
||||
result: List[Path] = []
|
||||
@@ -213,12 +210,7 @@ def get_external_skills_dirs() -> List[Path]:
|
||||
continue
|
||||
# Expand ~ and environment variables
|
||||
expanded = os.path.expanduser(os.path.expandvars(entry))
|
||||
p = Path(expanded)
|
||||
# Resolve relative paths against HERMES_HOME, not cwd
|
||||
if not p.is_absolute():
|
||||
p = (hermes_home / p).resolve()
|
||||
else:
|
||||
p = p.resolve()
|
||||
p = Path(expanded).resolve()
|
||||
if p == local_skills:
|
||||
continue
|
||||
if p in seen:
|
||||
@@ -440,7 +432,7 @@ def extract_skill_description(frontmatter: Dict[str, Any]) -> str:
|
||||
def iter_skill_index_files(skills_dir: Path, filename: str):
|
||||
"""Walk skills_dir yielding sorted paths matching *filename*.
|
||||
|
||||
Excludes ``.git``, ``.github``, ``.hub``, ``.archive`` directories.
|
||||
Excludes ``.git``, ``.github``, ``.hub`` directories.
|
||||
"""
|
||||
matches = []
|
||||
for root, dirs, files in os.walk(skills_dir, followlinks=True):
|
||||
|
||||
@@ -1,386 +0,0 @@
|
||||
"""Stateful scrubber for reasoning/thinking blocks in streamed assistant text.
|
||||
|
||||
``run_agent._strip_think_blocks`` is regex-based and correct for a complete
|
||||
string, but when it runs *per-delta* in ``_fire_stream_delta`` it destroys
|
||||
the state that downstream consumers (CLI ``_stream_delta``, gateway
|
||||
``GatewayStreamConsumer._filter_and_accumulate``) rely on.
|
||||
|
||||
Concretely, when MiniMax-M2.7 streams
|
||||
|
||||
delta1 = "<think>"
|
||||
delta2 = "Let me check their config"
|
||||
delta3 = "</think>"
|
||||
|
||||
the per-delta regex erases delta1 entirely (case 2: unterminated-open at
|
||||
boundary matches ``^<think>...``), so the downstream state machine never
|
||||
sees the open tag, treats delta2 as regular content, and leaks reasoning
|
||||
to the user. Consumers that don't run their own state machine (ACP,
|
||||
api_server, TTS) never had any defence at all — they just emitted
|
||||
whatever survived the upstream regex.
|
||||
|
||||
This module centralises the tag-suppression state machine at the
|
||||
upstream layer so every stream_delta_callback sees text that has
|
||||
already had reasoning blocks removed. Partial tags at delta
|
||||
boundaries are held back until the next delta resolves them, and
|
||||
end-of-stream flushing surfaces any held-back prose that turned out
|
||||
not to be a real tag.
|
||||
|
||||
Usage::
|
||||
|
||||
scrubber = StreamingThinkScrubber()
|
||||
for delta in stream:
|
||||
visible = scrubber.feed(delta)
|
||||
if visible:
|
||||
emit(visible)
|
||||
tail = scrubber.flush() # at end of stream
|
||||
if tail:
|
||||
emit(tail)
|
||||
|
||||
The scrubber is re-entrant per agent instance. Call ``reset()`` at
|
||||
the top of each new turn so a hung block from an interrupted prior
|
||||
stream cannot taint the next turn's output.
|
||||
|
||||
Tag variants handled (case-insensitive):
|
||||
``<think>``, ``<thinking>``, ``<reasoning>``, ``<thought>``,
|
||||
``<REASONING_SCRATCHPAD>``.
|
||||
|
||||
Block-boundary rule for opens: an opening tag is only treated as a
|
||||
reasoning-block opener when it appears at the start of the stream,
|
||||
after a newline (optionally followed by whitespace), or when only
|
||||
whitespace has been emitted on the current line. This prevents prose
|
||||
that *mentions* the tag name (e.g. ``"use <think> tags here"``) from
|
||||
being incorrectly suppressed. Closed pairs (``<think>X</think>``) are
|
||||
always suppressed regardless of boundary; a closed pair is an
|
||||
intentional, bounded construct.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Tuple
|
||||
|
||||
__all__ = ["StreamingThinkScrubber"]
|
||||
|
||||
|
||||
class StreamingThinkScrubber:
|
||||
"""Stateful scrubber for streaming reasoning/thinking blocks.
|
||||
|
||||
State machine:
|
||||
- ``_in_block``: True while inside an opened block, waiting for
|
||||
a close tag. All text inside is discarded.
|
||||
- ``_buf``: held-back partial-tag tail. Emitted / discarded on
|
||||
the next ``feed()`` call or by ``flush()``.
|
||||
- ``_last_emitted_ended_newline``: True iff the most recent
|
||||
emission to the consumer ended with ``\\n``, or nothing has
|
||||
been emitted yet (start-of-stream counts as a boundary). Used
|
||||
to decide whether an open tag at buffer position 0 is at a
|
||||
block boundary.
|
||||
"""
|
||||
|
||||
_OPEN_TAG_NAMES: Tuple[str, ...] = (
|
||||
"think",
|
||||
"thinking",
|
||||
"reasoning",
|
||||
"thought",
|
||||
"REASONING_SCRATCHPAD",
|
||||
)
|
||||
|
||||
# Materialise literal tag strings so the hot path does string
|
||||
# operations, not regex compilation per feed().
|
||||
_OPEN_TAGS: Tuple[str, ...] = tuple(f"<{name}>" for name in _OPEN_TAG_NAMES)
|
||||
_CLOSE_TAGS: Tuple[str, ...] = tuple(f"</{name}>" for name in _OPEN_TAG_NAMES)
|
||||
|
||||
# Pre-compute the longest tag (for partial-tag hold-back bound).
|
||||
_MAX_TAG_LEN: int = max(len(tag) for tag in _OPEN_TAGS + _CLOSE_TAGS)
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._in_block: bool = False
|
||||
self._buf: str = ""
|
||||
self._last_emitted_ended_newline: bool = True
|
||||
|
||||
def reset(self) -> None:
|
||||
"""Reset all state. Call at the top of every new turn."""
|
||||
self._in_block = False
|
||||
self._buf = ""
|
||||
self._last_emitted_ended_newline = True
|
||||
|
||||
def feed(self, text: str) -> str:
|
||||
"""Feed one delta; return the scrubbed visible portion.
|
||||
|
||||
May return an empty string when the entire delta is reasoning
|
||||
content or is being held back pending resolution of a partial
|
||||
tag at the boundary.
|
||||
"""
|
||||
if not text:
|
||||
return ""
|
||||
buf = self._buf + text
|
||||
self._buf = ""
|
||||
out: list[str] = []
|
||||
|
||||
while buf:
|
||||
if self._in_block:
|
||||
# Hunt for the earliest close tag.
|
||||
close_idx, close_len = self._find_first_tag(
|
||||
buf, self._CLOSE_TAGS,
|
||||
)
|
||||
if close_idx == -1:
|
||||
# No close yet — hold back a potential partial
|
||||
# close-tag prefix; discard everything else.
|
||||
held = self._max_partial_suffix(buf, self._CLOSE_TAGS)
|
||||
self._buf = buf[-held:] if held else ""
|
||||
return "".join(out)
|
||||
# Found close: discard block content + tag, continue.
|
||||
buf = buf[close_idx + close_len:]
|
||||
self._in_block = False
|
||||
else:
|
||||
# Priority 1 — closed <tag>X</tag> pair anywhere in
|
||||
# buf. Closed pairs are always an intentional,
|
||||
# bounded construct (even mid-line prose containing
|
||||
# an open/close pair is almost certainly a model
|
||||
# leaking reasoning inline), so no boundary gating.
|
||||
pair = self._find_earliest_closed_pair(buf)
|
||||
# Priority 2 — unterminated open tag at a block
|
||||
# boundary. Boundary-gated so prose that mentions
|
||||
# '<think>' isn't over-stripped.
|
||||
open_idx, open_len = self._find_open_at_boundary(
|
||||
buf, out,
|
||||
)
|
||||
|
||||
# Pick whichever match comes earliest in the buffer.
|
||||
if pair is not None and (
|
||||
open_idx == -1 or pair[0] <= open_idx
|
||||
):
|
||||
start_idx, end_idx = pair
|
||||
preceding = buf[:start_idx]
|
||||
if preceding:
|
||||
preceding = self._strip_orphan_close_tags(preceding)
|
||||
if preceding:
|
||||
out.append(preceding)
|
||||
self._last_emitted_ended_newline = (
|
||||
preceding.endswith("\n")
|
||||
)
|
||||
buf = buf[end_idx:]
|
||||
continue
|
||||
|
||||
if open_idx != -1:
|
||||
# Unterminated open at boundary — emit preceding,
|
||||
# enter block, continue loop with remainder.
|
||||
preceding = buf[:open_idx]
|
||||
if preceding:
|
||||
preceding = self._strip_orphan_close_tags(preceding)
|
||||
if preceding:
|
||||
out.append(preceding)
|
||||
self._last_emitted_ended_newline = (
|
||||
preceding.endswith("\n")
|
||||
)
|
||||
self._in_block = True
|
||||
buf = buf[open_idx + open_len:]
|
||||
continue
|
||||
|
||||
# No resolvable tag structure in buf. Hold back any
|
||||
# partial-tag prefix at the tail so a split tag
|
||||
# across deltas isn't missed, then emit the rest.
|
||||
held = self._max_partial_suffix(buf, self._OPEN_TAGS)
|
||||
held_close = self._max_partial_suffix(
|
||||
buf, self._CLOSE_TAGS,
|
||||
)
|
||||
held = max(held, held_close)
|
||||
if held:
|
||||
emit_text = buf[:-held]
|
||||
self._buf = buf[-held:]
|
||||
else:
|
||||
emit_text = buf
|
||||
self._buf = ""
|
||||
if emit_text:
|
||||
emit_text = self._strip_orphan_close_tags(emit_text)
|
||||
if emit_text:
|
||||
out.append(emit_text)
|
||||
self._last_emitted_ended_newline = (
|
||||
emit_text.endswith("\n")
|
||||
)
|
||||
return "".join(out)
|
||||
|
||||
return "".join(out)
|
||||
|
||||
def flush(self) -> str:
|
||||
"""End-of-stream flush.
|
||||
|
||||
If still inside an unterminated block, held-back content is
|
||||
discarded — leaking partial reasoning is worse than a
|
||||
truncated answer. Otherwise the held-back partial-tag tail is
|
||||
emitted verbatim (it turned out not to be a real tag prefix).
|
||||
"""
|
||||
if self._in_block:
|
||||
self._buf = ""
|
||||
self._in_block = False
|
||||
return ""
|
||||
tail = self._buf
|
||||
self._buf = ""
|
||||
if not tail:
|
||||
return ""
|
||||
tail = self._strip_orphan_close_tags(tail)
|
||||
if tail:
|
||||
self._last_emitted_ended_newline = tail.endswith("\n")
|
||||
return tail
|
||||
|
||||
# ── internal helpers ───────────────────────────────────────────────
|
||||
|
||||
@staticmethod
|
||||
def _find_first_tag(
|
||||
buf: str, tags: Tuple[str, ...],
|
||||
) -> Tuple[int, int]:
|
||||
"""Return (earliest_index, tag_length) over *tags*, or (-1, 0).
|
||||
|
||||
Case-insensitive match.
|
||||
"""
|
||||
buf_lower = buf.lower()
|
||||
best_idx = -1
|
||||
best_len = 0
|
||||
for tag in tags:
|
||||
idx = buf_lower.find(tag.lower())
|
||||
if idx != -1 and (best_idx == -1 or idx < best_idx):
|
||||
best_idx = idx
|
||||
best_len = len(tag)
|
||||
return best_idx, best_len
|
||||
|
||||
def _find_earliest_closed_pair(self, buf: str):
|
||||
"""Return (start_idx, end_idx) of the earliest closed pair, else None.
|
||||
|
||||
A closed pair is ``<tag>...</tag>`` of any variant. Matches are
|
||||
case-insensitive and non-greedy (the closest close tag after
|
||||
an open tag wins), matching the regex ``<tag>.*?</tag>``
|
||||
semantics of ``_strip_think_blocks`` case 1. When two tag
|
||||
variants could both match, the one whose open tag appears
|
||||
earlier wins.
|
||||
"""
|
||||
buf_lower = buf.lower()
|
||||
best: "tuple[int, int] | None" = None
|
||||
for open_tag, close_tag in zip(self._OPEN_TAGS, self._CLOSE_TAGS):
|
||||
open_lower = open_tag.lower()
|
||||
close_lower = close_tag.lower()
|
||||
open_idx = buf_lower.find(open_lower)
|
||||
if open_idx == -1:
|
||||
continue
|
||||
close_idx = buf_lower.find(
|
||||
close_lower, open_idx + len(open_lower),
|
||||
)
|
||||
if close_idx == -1:
|
||||
continue
|
||||
end_idx = close_idx + len(close_lower)
|
||||
if best is None or open_idx < best[0]:
|
||||
best = (open_idx, end_idx)
|
||||
return best
|
||||
|
||||
def _find_open_at_boundary(
|
||||
self, buf: str, already_emitted: list[str],
|
||||
) -> Tuple[int, int]:
|
||||
"""Return the earliest block-boundary open-tag (idx, len).
|
||||
|
||||
Returns (-1, 0) if no boundary-legal opener is present.
|
||||
"""
|
||||
buf_lower = buf.lower()
|
||||
best_idx = -1
|
||||
best_len = 0
|
||||
for tag in self._OPEN_TAGS:
|
||||
tag_lower = tag.lower()
|
||||
search_start = 0
|
||||
while True:
|
||||
idx = buf_lower.find(tag_lower, search_start)
|
||||
if idx == -1:
|
||||
break
|
||||
if self._is_block_boundary(buf, idx, already_emitted):
|
||||
if best_idx == -1 or idx < best_idx:
|
||||
best_idx = idx
|
||||
best_len = len(tag)
|
||||
break # first boundary hit for this tag is enough
|
||||
search_start = idx + 1
|
||||
return best_idx, best_len
|
||||
|
||||
def _is_block_boundary(
|
||||
self, buf: str, idx: int, already_emitted: list[str],
|
||||
) -> bool:
|
||||
"""True iff position *idx* in *buf* is a block boundary.
|
||||
|
||||
A block boundary is:
|
||||
- buf position 0 AND the most recent emission ended with
|
||||
a newline (or nothing has been emitted yet)
|
||||
- any position whose preceding text on the current line
|
||||
(since the last newline in buf) is whitespace-only, AND
|
||||
if there is no newline in the preceding buf portion, the
|
||||
most recent prior emission ended with a newline
|
||||
"""
|
||||
if idx == 0:
|
||||
# Check whether the last already-emitted chunk in THIS
|
||||
# feed() call ended with a newline, otherwise fall back
|
||||
# to the cross-feed flag.
|
||||
if already_emitted:
|
||||
return already_emitted[-1].endswith("\n")
|
||||
return self._last_emitted_ended_newline
|
||||
preceding = buf[:idx]
|
||||
last_nl = preceding.rfind("\n")
|
||||
if last_nl == -1:
|
||||
# No newline in buf before the tag — boundary only if the
|
||||
# prior emission ended with a newline AND everything since
|
||||
# is whitespace.
|
||||
if already_emitted:
|
||||
prior_newline = already_emitted[-1].endswith("\n")
|
||||
else:
|
||||
prior_newline = self._last_emitted_ended_newline
|
||||
return prior_newline and preceding.strip() == ""
|
||||
# Newline present — text between it and the tag must be
|
||||
# whitespace-only.
|
||||
return preceding[last_nl + 1:].strip() == ""
|
||||
|
||||
@classmethod
|
||||
def _max_partial_suffix(
|
||||
cls, buf: str, tags: Tuple[str, ...],
|
||||
) -> int:
|
||||
"""Return the longest buf-suffix that is a prefix of any tag.
|
||||
|
||||
Only prefixes strictly shorter than the tag itself count
|
||||
(full-length suffixes are the tag and are handled as matches,
|
||||
not held-back partials). Case-insensitive.
|
||||
"""
|
||||
if not buf:
|
||||
return 0
|
||||
buf_lower = buf.lower()
|
||||
max_check = min(len(buf_lower), cls._MAX_TAG_LEN - 1)
|
||||
for i in range(max_check, 0, -1):
|
||||
suffix = buf_lower[-i:]
|
||||
for tag in tags:
|
||||
tag_lower = tag.lower()
|
||||
if len(tag_lower) > i and tag_lower.startswith(suffix):
|
||||
return i
|
||||
return 0
|
||||
|
||||
@classmethod
|
||||
def _strip_orphan_close_tags(cls, text: str) -> str:
|
||||
"""Remove any close tags from *text* (orphan-close handling).
|
||||
|
||||
An orphan close tag has no matching open in the current
|
||||
scrubber state; it's always noise, stripped with any trailing
|
||||
whitespace so the surrounding prose flows naturally.
|
||||
"""
|
||||
if "</" not in text:
|
||||
return text
|
||||
text_lower = text.lower()
|
||||
out: list[str] = []
|
||||
i = 0
|
||||
while i < len(text):
|
||||
matched = False
|
||||
if text_lower[i:i + 2] == "</":
|
||||
for tag in cls._CLOSE_TAGS:
|
||||
tag_lower = tag.lower()
|
||||
tag_len = len(tag_lower)
|
||||
if text_lower[i:i + tag_len] == tag_lower:
|
||||
# Skip the tag and any trailing whitespace,
|
||||
# matching _strip_think_blocks case 3.
|
||||
j = i + tag_len
|
||||
while j < len(text) and text[j] in " \t\n\r":
|
||||
j += 1
|
||||
i = j
|
||||
matched = True
|
||||
break
|
||||
if not matched:
|
||||
out.append(text[i])
|
||||
i += 1
|
||||
return "".join(out)
|
||||
@@ -6,19 +6,12 @@ adds latency to the user-facing reply.
|
||||
|
||||
import logging
|
||||
import threading
|
||||
from typing import Callable, Optional
|
||||
from typing import Optional
|
||||
|
||||
from agent.auxiliary_client import call_llm
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Callback signature: (task_name, exception) -> None. Used to surface
|
||||
# auxiliary failures to the user through AIAgent._emit_auxiliary_failure
|
||||
# so silent-drops (e.g. OpenRouter 402 exhausting the fallback chain)
|
||||
# become visible instead of piling up as NULL session titles.
|
||||
FailureCallback = Callable[[str, BaseException], None]
|
||||
TitleCallback = Callable[[str], None]
|
||||
|
||||
_TITLE_PROMPT = (
|
||||
"Generate a short, descriptive title (3-7 words) for a conversation that starts with the "
|
||||
"following exchange. The title should capture the main topic or intent. "
|
||||
@@ -26,23 +19,11 @@ _TITLE_PROMPT = (
|
||||
)
|
||||
|
||||
|
||||
def generate_title(
|
||||
user_message: str,
|
||||
assistant_response: str,
|
||||
timeout: float = 30.0,
|
||||
failure_callback: Optional[FailureCallback] = None,
|
||||
main_runtime: dict = None,
|
||||
) -> Optional[str]:
|
||||
def generate_title(user_message: str, assistant_response: str, timeout: float = 30.0) -> Optional[str]:
|
||||
"""Generate a session title from the first exchange.
|
||||
|
||||
Uses the main runtime's model when available, falling back to the
|
||||
auxiliary LLM client (cheapest/fastest available model).
|
||||
Uses the auxiliary LLM client (cheapest/fastest available model).
|
||||
Returns the title string or None on failure.
|
||||
|
||||
``failure_callback`` is invoked with ``(task, exception)`` when the
|
||||
auxiliary call raises — the caller typically wires this to
|
||||
``AIAgent._emit_auxiliary_failure`` so the user sees a warning instead
|
||||
of silently accumulating untitled sessions.
|
||||
"""
|
||||
# Truncate long messages to keep the request small
|
||||
user_snippet = user_message[:500] if user_message else ""
|
||||
@@ -60,7 +41,6 @@ def generate_title(
|
||||
max_tokens=500,
|
||||
temperature=0.3,
|
||||
timeout=timeout,
|
||||
main_runtime=main_runtime,
|
||||
)
|
||||
title = (response.choices[0].message.content or "").strip()
|
||||
# Clean up: remove quotes, trailing punctuation, prefixes like "Title: "
|
||||
@@ -72,15 +52,7 @@ def generate_title(
|
||||
title = title[:77] + "..."
|
||||
return title if title else None
|
||||
except Exception as e:
|
||||
# Log at WARNING so this shows up in agent.log without debug mode.
|
||||
# Full detail at debug level for operators who need the stack.
|
||||
logger.warning("Title generation failed: %s", e)
|
||||
logger.debug("Title generation traceback", exc_info=True)
|
||||
if failure_callback is not None:
|
||||
try:
|
||||
failure_callback("title generation", e)
|
||||
except Exception:
|
||||
logger.debug("Title generation failure_callback raised", exc_info=True)
|
||||
logger.debug("Title generation failed: %s", e)
|
||||
return None
|
||||
|
||||
|
||||
@@ -89,9 +61,6 @@ def auto_title_session(
|
||||
session_id: str,
|
||||
user_message: str,
|
||||
assistant_response: str,
|
||||
failure_callback: Optional[FailureCallback] = None,
|
||||
main_runtime: dict = None,
|
||||
title_callback: Optional[TitleCallback] = None,
|
||||
) -> None:
|
||||
"""Generate and set a session title if one doesn't already exist.
|
||||
|
||||
@@ -112,20 +81,13 @@ def auto_title_session(
|
||||
except Exception:
|
||||
return
|
||||
|
||||
title = generate_title(
|
||||
user_message, assistant_response, failure_callback=failure_callback, main_runtime=main_runtime
|
||||
)
|
||||
title = generate_title(user_message, assistant_response)
|
||||
if not title:
|
||||
return
|
||||
|
||||
try:
|
||||
session_db.set_session_title(session_id, title)
|
||||
logger.debug("Auto-generated session title: %s", title)
|
||||
if title_callback is not None:
|
||||
try:
|
||||
title_callback(title)
|
||||
except Exception:
|
||||
logger.debug("Auto-title callback failed", exc_info=True)
|
||||
except Exception as e:
|
||||
logger.debug("Failed to set auto-generated title: %s", e)
|
||||
|
||||
@@ -136,9 +98,6 @@ def maybe_auto_title(
|
||||
user_message: str,
|
||||
assistant_response: str,
|
||||
conversation_history: list,
|
||||
failure_callback: Optional[FailureCallback] = None,
|
||||
main_runtime: dict = None,
|
||||
title_callback: Optional[TitleCallback] = None,
|
||||
) -> None:
|
||||
"""Fire-and-forget title generation after the first exchange.
|
||||
|
||||
@@ -160,11 +119,6 @@ def maybe_auto_title(
|
||||
thread = threading.Thread(
|
||||
target=auto_title_session,
|
||||
args=(session_db, session_id, user_message, assistant_response),
|
||||
kwargs={
|
||||
"failure_callback": failure_callback,
|
||||
"main_runtime": main_runtime,
|
||||
"title_callback": title_callback,
|
||||
},
|
||||
daemon=True,
|
||||
name="auto-title",
|
||||
)
|
||||
|
||||
@@ -1,455 +0,0 @@
|
||||
"""Pure tool-call loop guardrail primitives.
|
||||
|
||||
The controller in this module is intentionally side-effect free: it tracks
|
||||
per-turn tool-call observations and returns decisions. Runtime code owns whether
|
||||
those decisions become warning guidance, synthetic tool results, or controlled
|
||||
turn halts.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Mapping
|
||||
|
||||
from utils import safe_json_loads
|
||||
|
||||
|
||||
IDEMPOTENT_TOOL_NAMES = frozenset(
|
||||
{
|
||||
"read_file",
|
||||
"search_files",
|
||||
"web_search",
|
||||
"web_extract",
|
||||
"session_search",
|
||||
"browser_snapshot",
|
||||
"browser_console",
|
||||
"browser_get_images",
|
||||
"mcp_filesystem_read_file",
|
||||
"mcp_filesystem_read_text_file",
|
||||
"mcp_filesystem_read_multiple_files",
|
||||
"mcp_filesystem_list_directory",
|
||||
"mcp_filesystem_list_directory_with_sizes",
|
||||
"mcp_filesystem_directory_tree",
|
||||
"mcp_filesystem_get_file_info",
|
||||
"mcp_filesystem_search_files",
|
||||
}
|
||||
)
|
||||
|
||||
MUTATING_TOOL_NAMES = frozenset(
|
||||
{
|
||||
"terminal",
|
||||
"execute_code",
|
||||
"write_file",
|
||||
"patch",
|
||||
"todo",
|
||||
"memory",
|
||||
"skill_manage",
|
||||
"browser_click",
|
||||
"browser_type",
|
||||
"browser_press",
|
||||
"browser_scroll",
|
||||
"browser_navigate",
|
||||
"send_message",
|
||||
"cronjob",
|
||||
"delegate_task",
|
||||
"process",
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ToolCallGuardrailConfig:
|
||||
"""Thresholds for per-turn tool-call loop detection.
|
||||
|
||||
Warnings are enabled by default and never prevent tool execution. Hard stops
|
||||
are explicit opt-in so interactive CLI/TUI sessions get a gentle nudge unless
|
||||
the user enables circuit-breaker behavior in config.yaml.
|
||||
"""
|
||||
|
||||
warnings_enabled: bool = True
|
||||
hard_stop_enabled: bool = False
|
||||
exact_failure_warn_after: int = 2
|
||||
exact_failure_block_after: int = 5
|
||||
same_tool_failure_warn_after: int = 3
|
||||
same_tool_failure_halt_after: int = 8
|
||||
no_progress_warn_after: int = 2
|
||||
no_progress_block_after: int = 5
|
||||
idempotent_tools: frozenset[str] = field(default_factory=lambda: IDEMPOTENT_TOOL_NAMES)
|
||||
mutating_tools: frozenset[str] = field(default_factory=lambda: MUTATING_TOOL_NAMES)
|
||||
|
||||
@classmethod
|
||||
def from_mapping(cls, data: Mapping[str, Any] | None) -> "ToolCallGuardrailConfig":
|
||||
"""Build config from the `tool_loop_guardrails` config.yaml section."""
|
||||
if not isinstance(data, Mapping):
|
||||
return cls()
|
||||
|
||||
warn_after = data.get("warn_after")
|
||||
if not isinstance(warn_after, Mapping):
|
||||
warn_after = {}
|
||||
hard_stop_after = data.get("hard_stop_after")
|
||||
if not isinstance(hard_stop_after, Mapping):
|
||||
hard_stop_after = {}
|
||||
|
||||
defaults = cls()
|
||||
return cls(
|
||||
warnings_enabled=_as_bool(data.get("warnings_enabled"), defaults.warnings_enabled),
|
||||
hard_stop_enabled=_as_bool(data.get("hard_stop_enabled"), defaults.hard_stop_enabled),
|
||||
exact_failure_warn_after=_positive_int(
|
||||
warn_after.get("exact_failure", data.get("exact_failure_warn_after")),
|
||||
defaults.exact_failure_warn_after,
|
||||
),
|
||||
same_tool_failure_warn_after=_positive_int(
|
||||
warn_after.get("same_tool_failure", data.get("same_tool_failure_warn_after")),
|
||||
defaults.same_tool_failure_warn_after,
|
||||
),
|
||||
no_progress_warn_after=_positive_int(
|
||||
warn_after.get("idempotent_no_progress", data.get("no_progress_warn_after")),
|
||||
defaults.no_progress_warn_after,
|
||||
),
|
||||
exact_failure_block_after=_positive_int(
|
||||
hard_stop_after.get("exact_failure", data.get("exact_failure_block_after")),
|
||||
defaults.exact_failure_block_after,
|
||||
),
|
||||
same_tool_failure_halt_after=_positive_int(
|
||||
hard_stop_after.get("same_tool_failure", data.get("same_tool_failure_halt_after")),
|
||||
defaults.same_tool_failure_halt_after,
|
||||
),
|
||||
no_progress_block_after=_positive_int(
|
||||
hard_stop_after.get("idempotent_no_progress", data.get("no_progress_block_after")),
|
||||
defaults.no_progress_block_after,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ToolCallSignature:
|
||||
"""Stable, non-reversible identity for a tool name plus canonical args."""
|
||||
|
||||
tool_name: str
|
||||
args_hash: str
|
||||
|
||||
@classmethod
|
||||
def from_call(cls, tool_name: str, args: Mapping[str, Any] | None) -> "ToolCallSignature":
|
||||
canonical = canonical_tool_args(args or {})
|
||||
return cls(tool_name=tool_name, args_hash=_sha256(canonical))
|
||||
|
||||
def to_metadata(self) -> dict[str, str]:
|
||||
"""Return public metadata without raw argument values."""
|
||||
return {"tool_name": self.tool_name, "args_hash": self.args_hash}
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ToolGuardrailDecision:
|
||||
"""Decision returned by the tool-call guardrail controller."""
|
||||
|
||||
action: str = "allow" # allow | warn | block | halt
|
||||
code: str = "allow"
|
||||
message: str = ""
|
||||
tool_name: str = ""
|
||||
count: int = 0
|
||||
signature: ToolCallSignature | None = None
|
||||
|
||||
@property
|
||||
def allows_execution(self) -> bool:
|
||||
return self.action in {"allow", "warn"}
|
||||
|
||||
@property
|
||||
def should_halt(self) -> bool:
|
||||
return self.action in {"block", "halt"}
|
||||
|
||||
def to_metadata(self) -> dict[str, Any]:
|
||||
data: dict[str, Any] = {
|
||||
"action": self.action,
|
||||
"code": self.code,
|
||||
"message": self.message,
|
||||
"tool_name": self.tool_name,
|
||||
"count": self.count,
|
||||
}
|
||||
if self.signature is not None:
|
||||
data["signature"] = self.signature.to_metadata()
|
||||
return data
|
||||
|
||||
|
||||
def canonical_tool_args(args: Mapping[str, Any]) -> str:
|
||||
"""Return sorted compact JSON for parsed tool arguments."""
|
||||
if not isinstance(args, Mapping):
|
||||
raise TypeError(f"tool args must be a mapping, got {type(args).__name__}")
|
||||
return json.dumps(
|
||||
args,
|
||||
ensure_ascii=False,
|
||||
sort_keys=True,
|
||||
separators=(",", ":"),
|
||||
default=str,
|
||||
)
|
||||
|
||||
|
||||
def classify_tool_failure(tool_name: str, result: str | None) -> tuple[bool, str]:
|
||||
"""Safety-fallback classifier used only when callers don't pass ``failed``.
|
||||
|
||||
Mirrors ``agent.display._detect_tool_failure`` exactly so the guardrail
|
||||
never disagrees with the CLI's user-visible ``[error]`` tag. Production
|
||||
callers in ``run_agent.py`` always pass an explicit ``failed=`` derived
|
||||
from ``_detect_tool_failure``; this function exists so standalone callers
|
||||
(tests, tooling) still get consistent behavior.
|
||||
"""
|
||||
if result is None:
|
||||
return False, ""
|
||||
|
||||
if tool_name == "terminal":
|
||||
data = safe_json_loads(result)
|
||||
if isinstance(data, dict):
|
||||
exit_code = data.get("exit_code")
|
||||
if exit_code is not None and exit_code != 0:
|
||||
return True, f" [exit {exit_code}]"
|
||||
return False, ""
|
||||
|
||||
if tool_name == "memory":
|
||||
data = safe_json_loads(result)
|
||||
if isinstance(data, dict):
|
||||
if data.get("success") is False and "exceed the limit" in data.get("error", ""):
|
||||
return True, " [full]"
|
||||
|
||||
lower = result[:500].lower()
|
||||
if '"error"' in lower or '"failed"' in lower or result.startswith("Error"):
|
||||
return True, " [error]"
|
||||
|
||||
return False, ""
|
||||
|
||||
|
||||
class ToolCallGuardrailController:
|
||||
"""Per-turn controller for repeated failed/non-progressing tool calls."""
|
||||
|
||||
def __init__(self, config: ToolCallGuardrailConfig | None = None):
|
||||
self.config = config or ToolCallGuardrailConfig()
|
||||
self.reset_for_turn()
|
||||
|
||||
def reset_for_turn(self) -> None:
|
||||
self._exact_failure_counts: dict[ToolCallSignature, int] = {}
|
||||
self._same_tool_failure_counts: dict[str, int] = {}
|
||||
self._no_progress: dict[ToolCallSignature, tuple[str, int]] = {}
|
||||
self._halt_decision: ToolGuardrailDecision | None = None
|
||||
|
||||
@property
|
||||
def halt_decision(self) -> ToolGuardrailDecision | None:
|
||||
return self._halt_decision
|
||||
|
||||
def before_call(self, tool_name: str, args: Mapping[str, Any] | None) -> ToolGuardrailDecision:
|
||||
signature = ToolCallSignature.from_call(tool_name, _coerce_args(args))
|
||||
if not self.config.hard_stop_enabled:
|
||||
return ToolGuardrailDecision(tool_name=tool_name, signature=signature)
|
||||
|
||||
exact_count = self._exact_failure_counts.get(signature, 0)
|
||||
if exact_count >= self.config.exact_failure_block_after:
|
||||
decision = ToolGuardrailDecision(
|
||||
action="block",
|
||||
code="repeated_exact_failure_block",
|
||||
message=(
|
||||
f"Blocked {tool_name}: the same tool call failed {exact_count} "
|
||||
"times with identical arguments. Stop retrying it unchanged; "
|
||||
"change strategy or explain the blocker."
|
||||
),
|
||||
tool_name=tool_name,
|
||||
count=exact_count,
|
||||
signature=signature,
|
||||
)
|
||||
self._halt_decision = decision
|
||||
return decision
|
||||
|
||||
if self._is_idempotent(tool_name):
|
||||
record = self._no_progress.get(signature)
|
||||
if record is not None:
|
||||
_result_hash, repeat_count = record
|
||||
if repeat_count >= self.config.no_progress_block_after:
|
||||
decision = ToolGuardrailDecision(
|
||||
action="block",
|
||||
code="idempotent_no_progress_block",
|
||||
message=(
|
||||
f"Blocked {tool_name}: this read-only call returned the same "
|
||||
f"result {repeat_count} times. Stop repeating it unchanged; "
|
||||
"use the result already provided or try a different query."
|
||||
),
|
||||
tool_name=tool_name,
|
||||
count=repeat_count,
|
||||
signature=signature,
|
||||
)
|
||||
self._halt_decision = decision
|
||||
return decision
|
||||
|
||||
return ToolGuardrailDecision(tool_name=tool_name, signature=signature)
|
||||
|
||||
def after_call(
|
||||
self,
|
||||
tool_name: str,
|
||||
args: Mapping[str, Any] | None,
|
||||
result: str | None,
|
||||
*,
|
||||
failed: bool | None = None,
|
||||
) -> ToolGuardrailDecision:
|
||||
args = _coerce_args(args)
|
||||
signature = ToolCallSignature.from_call(tool_name, args)
|
||||
if failed is None:
|
||||
failed, _ = classify_tool_failure(tool_name, result)
|
||||
|
||||
if failed:
|
||||
exact_count = self._exact_failure_counts.get(signature, 0) + 1
|
||||
self._exact_failure_counts[signature] = exact_count
|
||||
self._no_progress.pop(signature, None)
|
||||
|
||||
same_count = self._same_tool_failure_counts.get(tool_name, 0) + 1
|
||||
self._same_tool_failure_counts[tool_name] = same_count
|
||||
|
||||
if self.config.hard_stop_enabled and same_count >= self.config.same_tool_failure_halt_after:
|
||||
decision = ToolGuardrailDecision(
|
||||
action="halt",
|
||||
code="same_tool_failure_halt",
|
||||
message=(
|
||||
f"Stopped {tool_name}: it failed {same_count} times this turn. "
|
||||
"Stop retrying the same failing tool path and choose a different approach."
|
||||
),
|
||||
tool_name=tool_name,
|
||||
count=same_count,
|
||||
signature=signature,
|
||||
)
|
||||
self._halt_decision = decision
|
||||
return decision
|
||||
|
||||
if self.config.warnings_enabled and exact_count >= self.config.exact_failure_warn_after:
|
||||
return ToolGuardrailDecision(
|
||||
action="warn",
|
||||
code="repeated_exact_failure_warning",
|
||||
message=(
|
||||
f"{tool_name} has failed {exact_count} times with identical arguments. "
|
||||
"This looks like a loop; inspect the error and change strategy "
|
||||
"instead of retrying it unchanged."
|
||||
),
|
||||
tool_name=tool_name,
|
||||
count=exact_count,
|
||||
signature=signature,
|
||||
)
|
||||
|
||||
if self.config.warnings_enabled and same_count >= self.config.same_tool_failure_warn_after:
|
||||
return ToolGuardrailDecision(
|
||||
action="warn",
|
||||
code="same_tool_failure_warning",
|
||||
message=(
|
||||
f"{tool_name} has failed {same_count} times this turn. "
|
||||
"This looks like a loop; change approach before retrying."
|
||||
),
|
||||
tool_name=tool_name,
|
||||
count=same_count,
|
||||
signature=signature,
|
||||
)
|
||||
|
||||
return ToolGuardrailDecision(tool_name=tool_name, count=exact_count, signature=signature)
|
||||
|
||||
self._exact_failure_counts.pop(signature, None)
|
||||
self._same_tool_failure_counts.pop(tool_name, None)
|
||||
|
||||
if not self._is_idempotent(tool_name):
|
||||
self._no_progress.pop(signature, None)
|
||||
return ToolGuardrailDecision(tool_name=tool_name, signature=signature)
|
||||
|
||||
result_hash = _result_hash(result)
|
||||
previous = self._no_progress.get(signature)
|
||||
repeat_count = 1
|
||||
if previous is not None and previous[0] == result_hash:
|
||||
repeat_count = previous[1] + 1
|
||||
self._no_progress[signature] = (result_hash, repeat_count)
|
||||
|
||||
if self.config.warnings_enabled and repeat_count >= self.config.no_progress_warn_after:
|
||||
return ToolGuardrailDecision(
|
||||
action="warn",
|
||||
code="idempotent_no_progress_warning",
|
||||
message=(
|
||||
f"{tool_name} returned the same result {repeat_count} times. "
|
||||
"Use the result already provided or change the query instead of "
|
||||
"repeating it unchanged."
|
||||
),
|
||||
tool_name=tool_name,
|
||||
count=repeat_count,
|
||||
signature=signature,
|
||||
)
|
||||
|
||||
return ToolGuardrailDecision(tool_name=tool_name, count=repeat_count, signature=signature)
|
||||
|
||||
def _is_idempotent(self, tool_name: str) -> bool:
|
||||
if tool_name in self.config.mutating_tools:
|
||||
return False
|
||||
return tool_name in self.config.idempotent_tools
|
||||
|
||||
|
||||
def toolguard_synthetic_result(decision: ToolGuardrailDecision) -> str:
|
||||
"""Build a synthetic role=tool content string for a blocked tool call."""
|
||||
return json.dumps(
|
||||
{
|
||||
"error": decision.message,
|
||||
"guardrail": decision.to_metadata(),
|
||||
},
|
||||
ensure_ascii=False,
|
||||
)
|
||||
|
||||
|
||||
def append_toolguard_guidance(result: str, decision: ToolGuardrailDecision) -> str:
|
||||
"""Append runtime guidance to the current tool result content."""
|
||||
if decision.action not in {"warn", "halt"} or not decision.message:
|
||||
return result
|
||||
label = "Tool loop hard stop" if decision.action == "halt" else "Tool loop warning"
|
||||
suffix = (
|
||||
f"\n\n[{label}: "
|
||||
f"{decision.code}; count={decision.count}; {decision.message}]"
|
||||
)
|
||||
return (result or "") + suffix
|
||||
|
||||
|
||||
def _coerce_args(args: Mapping[str, Any] | None) -> Mapping[str, Any]:
|
||||
return args if isinstance(args, Mapping) else {}
|
||||
|
||||
|
||||
def _result_hash(result: str | None) -> str:
|
||||
parsed = safe_json_loads(result or "")
|
||||
if parsed is not None:
|
||||
try:
|
||||
canonical = json.dumps(
|
||||
parsed,
|
||||
ensure_ascii=False,
|
||||
sort_keys=True,
|
||||
separators=(",", ":"),
|
||||
default=str,
|
||||
)
|
||||
except TypeError:
|
||||
canonical = str(parsed)
|
||||
else:
|
||||
canonical = result or ""
|
||||
return _sha256(canonical)
|
||||
|
||||
|
||||
def _as_bool(value: Any, default: bool) -> bool:
|
||||
if value is None:
|
||||
return default
|
||||
if isinstance(value, bool):
|
||||
return value
|
||||
if isinstance(value, (int, float)):
|
||||
return bool(value)
|
||||
if isinstance(value, str):
|
||||
lowered = value.strip().lower()
|
||||
if lowered in {"1", "true", "yes", "on", "enabled"}:
|
||||
return True
|
||||
if lowered in {"0", "false", "no", "off", "disabled"}:
|
||||
return False
|
||||
return default
|
||||
|
||||
|
||||
def _positive_int(value: Any, default: int) -> int:
|
||||
if value is None:
|
||||
return default
|
||||
try:
|
||||
parsed = int(value)
|
||||
except (TypeError, ValueError):
|
||||
return default
|
||||
return parsed if parsed >= 1 else default
|
||||
|
||||
|
||||
def _sha256(value: str) -> str:
|
||||
return hashlib.sha256(value.encode("utf-8")).hexdigest()
|
||||
@@ -6,16 +6,9 @@ Usage:
|
||||
result = transport.normalize_response(raw_response)
|
||||
"""
|
||||
|
||||
from agent.transports.types import (
|
||||
NormalizedResponse,
|
||||
ToolCall,
|
||||
Usage,
|
||||
build_tool_call,
|
||||
map_finish_reason,
|
||||
) # noqa: F401
|
||||
from agent.transports.types import NormalizedResponse, ToolCall, Usage, build_tool_call, map_finish_reason # noqa: F401
|
||||
|
||||
_REGISTRY: dict = {}
|
||||
_discovered: bool = False
|
||||
|
||||
|
||||
def register_transport(api_mode: str, transport_cls: type) -> None:
|
||||
@@ -30,17 +23,9 @@ def get_transport(api_mode: str):
|
||||
This allows gradual migration — call sites can check for None
|
||||
and fall back to the legacy code path.
|
||||
"""
|
||||
global _discovered
|
||||
if not _discovered:
|
||||
if not _REGISTRY:
|
||||
_discover_transports()
|
||||
cls = _REGISTRY.get(api_mode)
|
||||
if cls is None:
|
||||
# The registry can be partially populated when a specific transport
|
||||
# module was imported directly (for example chat_completions before
|
||||
# codex). Discover on misses, not only when the registry is empty, so
|
||||
# test/order-dependent imports do not make valid api_modes unavailable.
|
||||
_discover_transports()
|
||||
cls = _REGISTRY.get(api_mode)
|
||||
if cls is None:
|
||||
return None
|
||||
return cls()
|
||||
@@ -48,8 +33,6 @@ def get_transport(api_mode: str):
|
||||
|
||||
def _discover_transports() -> None:
|
||||
"""Import all transport modules to trigger auto-registration."""
|
||||
global _discovered
|
||||
_discovered = True
|
||||
try:
|
||||
import agent.transports.anthropic # noqa: F401
|
||||
except ImportError:
|
||||
|
||||
@@ -58,7 +58,6 @@ class AnthropicTransport(ProviderTransport):
|
||||
context_length: int | None
|
||||
base_url: str | None
|
||||
fast_mode: bool
|
||||
drop_context_1m_beta: bool
|
||||
"""
|
||||
from agent.anthropic_adapter import build_anthropic_kwargs
|
||||
|
||||
@@ -74,7 +73,6 @@ class AnthropicTransport(ProviderTransport):
|
||||
context_length=params.get("context_length"),
|
||||
base_url=params.get("base_url"),
|
||||
fast_mode=params.get("fast_mode", False),
|
||||
drop_context_1m_beta=params.get("drop_context_1m_beta", False),
|
||||
)
|
||||
|
||||
def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
|
||||
|
||||
@@ -12,93 +12,12 @@ reasoning configuration, temperature handling, and extra_body assembly.
|
||||
import copy
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from agent.lmstudio_reasoning import resolve_lmstudio_effort
|
||||
from agent.moonshot_schema import is_moonshot_model, sanitize_moonshot_tools
|
||||
from agent.prompt_builder import DEVELOPER_ROLE_MODELS
|
||||
from agent.transports.base import ProviderTransport
|
||||
from agent.transports.types import NormalizedResponse, ToolCall, Usage
|
||||
|
||||
|
||||
def _build_gemini_thinking_config(model: str, reasoning_config: dict | None) -> dict | None:
|
||||
"""Translate Hermes/OpenRouter-style reasoning config to Gemini thinkingConfig."""
|
||||
if reasoning_config is None or not isinstance(reasoning_config, dict):
|
||||
return None
|
||||
|
||||
normalized_model = (model or "").strip().lower()
|
||||
if normalized_model.startswith("google/"):
|
||||
normalized_model = normalized_model.split("/", 1)[1]
|
||||
|
||||
# ``thinking_config`` is a Gemini-only request parameter. The same
|
||||
# ``gemini`` provider also serves Gemma (and historically PaLM/Bard);
|
||||
# those reject the field with HTTP 400 "Unknown name 'thinking_config':
|
||||
# Cannot find field" — including the polite ``{"includeThoughts": False}``
|
||||
# form. Omit the field entirely on non-Gemini models. (#17426)
|
||||
if not normalized_model.startswith("gemini"):
|
||||
return None
|
||||
|
||||
if reasoning_config.get("enabled") is False:
|
||||
# Gemini can hide thought parts even when internal thinking still
|
||||
# happens; omit thinkingLevel to avoid model-specific validation quirks.
|
||||
return {"includeThoughts": False}
|
||||
|
||||
effort = str(reasoning_config.get("effort", "medium") or "medium").strip().lower()
|
||||
if effort == "none":
|
||||
return {"includeThoughts": False}
|
||||
|
||||
thinking_config: Dict[str, Any] = {"includeThoughts": True}
|
||||
|
||||
# Gemini 2.5 accepts thinkingBudget; don't guess a budget from Hermes'
|
||||
# coarse effort levels. ``includeThoughts`` alone is enough to surface
|
||||
# thought parts without risking request validation errors.
|
||||
if normalized_model.startswith("gemini-2.5-"):
|
||||
return thinking_config
|
||||
|
||||
if effort not in {"minimal", "low", "medium", "high", "xhigh"}:
|
||||
effort = "medium"
|
||||
|
||||
# Gemini 3 Flash documents low/medium/high thinking levels; Gemini 3 Pro
|
||||
# is stricter (low/high). Clamp Hermes' wider effort set to what each
|
||||
# family accepts so we never forward an undocumented level verbatim.
|
||||
if normalized_model.startswith(("gemini-3", "gemini-3.1")):
|
||||
if "flash" in normalized_model:
|
||||
if effort in {"minimal", "low"}:
|
||||
thinking_config["thinkingLevel"] = "low"
|
||||
elif effort in {"high", "xhigh"}:
|
||||
thinking_config["thinkingLevel"] = "high"
|
||||
else:
|
||||
thinking_config["thinkingLevel"] = "medium"
|
||||
elif "pro" in normalized_model:
|
||||
thinking_config["thinkingLevel"] = (
|
||||
"high" if effort in {"high", "xhigh"} else "low"
|
||||
)
|
||||
|
||||
return thinking_config
|
||||
|
||||
|
||||
def _snake_case_gemini_thinking_config(config: dict | None) -> dict | None:
|
||||
"""Convert Gemini thinking config keys to the OpenAI-compat field names."""
|
||||
if not isinstance(config, dict) or not config:
|
||||
return None
|
||||
|
||||
translated: Dict[str, Any] = {}
|
||||
if isinstance(config.get("includeThoughts"), bool):
|
||||
translated["include_thoughts"] = config["includeThoughts"]
|
||||
if isinstance(config.get("thinkingLevel"), str) and config["thinkingLevel"].strip():
|
||||
translated["thinking_level"] = config["thinkingLevel"].strip().lower()
|
||||
if isinstance(config.get("thinkingBudget"), (int, float)):
|
||||
translated["thinking_budget"] = int(config["thinkingBudget"])
|
||||
return translated or None
|
||||
|
||||
|
||||
def _is_gemini_openai_compat_base_url(base_url: Any) -> bool:
|
||||
normalized = str(base_url or "").strip().rstrip("/").lower()
|
||||
if not normalized:
|
||||
return False
|
||||
if "generativelanguage.googleapis.com" not in normalized:
|
||||
return False
|
||||
return normalized.endswith("/openai")
|
||||
|
||||
|
||||
class ChatCompletionsTransport(ProviderTransport):
|
||||
"""Transport for api_mode='chat_completions'.
|
||||
|
||||
@@ -109,28 +28,24 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
def api_mode(self) -> str:
|
||||
return "chat_completions"
|
||||
|
||||
def convert_messages(
|
||||
self, messages: list[dict[str, Any]], **kwargs
|
||||
) -> list[dict[str, Any]]:
|
||||
def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> List[Dict[str, Any]]:
|
||||
"""Messages are already in OpenAI format — sanitize Codex leaks only.
|
||||
|
||||
Strips Codex Responses API fields (``codex_reasoning_items`` /
|
||||
``codex_message_items`` on the message, ``call_id``/``response_item_id``
|
||||
on tool_calls) that strict chat-completions providers reject with 400/422.
|
||||
Strips Codex Responses API fields (``codex_reasoning_items`` on the
|
||||
message, ``call_id``/``response_item_id`` on tool_calls) that strict
|
||||
chat-completions providers reject with 400/422.
|
||||
"""
|
||||
needs_sanitize = False
|
||||
for msg in messages:
|
||||
if not isinstance(msg, dict):
|
||||
continue
|
||||
if "codex_reasoning_items" in msg or "codex_message_items" in msg:
|
||||
if "codex_reasoning_items" in msg:
|
||||
needs_sanitize = True
|
||||
break
|
||||
tool_calls = msg.get("tool_calls")
|
||||
if isinstance(tool_calls, list):
|
||||
for tc in tool_calls:
|
||||
if isinstance(tc, dict) and (
|
||||
"call_id" in tc or "response_item_id" in tc
|
||||
):
|
||||
if isinstance(tc, dict) and ("call_id" in tc or "response_item_id" in tc):
|
||||
needs_sanitize = True
|
||||
break
|
||||
if needs_sanitize:
|
||||
@@ -144,7 +59,6 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
if not isinstance(msg, dict):
|
||||
continue
|
||||
msg.pop("codex_reasoning_items", None)
|
||||
msg.pop("codex_message_items", None)
|
||||
tool_calls = msg.get("tool_calls")
|
||||
if isinstance(tool_calls, list):
|
||||
for tc in tool_calls:
|
||||
@@ -153,42 +67,39 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
tc.pop("response_item_id", None)
|
||||
return sanitized
|
||||
|
||||
def convert_tools(self, tools: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
def convert_tools(self, tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
"""Tools are already in OpenAI format — identity."""
|
||||
return tools
|
||||
|
||||
def build_kwargs(
|
||||
self,
|
||||
model: str,
|
||||
messages: list[dict[str, Any]],
|
||||
tools: list[dict[str, Any]] | None = None,
|
||||
messages: List[Dict[str, Any]],
|
||||
tools: Optional[List[Dict[str, Any]]] = None,
|
||||
**params,
|
||||
) -> dict[str, Any]:
|
||||
) -> Dict[str, Any]:
|
||||
"""Build chat.completions.create() kwargs.
|
||||
|
||||
params (all optional):
|
||||
This is the most complex transport method — it handles ~16 providers
|
||||
via params rather than subclasses.
|
||||
|
||||
params:
|
||||
timeout: float — API call timeout
|
||||
max_tokens: int | None — user-configured max tokens
|
||||
ephemeral_max_output_tokens: int | None — one-shot override
|
||||
ephemeral_max_output_tokens: int | None — one-shot override (error recovery)
|
||||
max_tokens_param_fn: callable — returns {max_tokens: N} or {max_completion_tokens: N}
|
||||
reasoning_config: dict | None
|
||||
request_overrides: dict | None
|
||||
session_id: str | None
|
||||
qwen_session_metadata: dict | None — {sessionId, promptId} precomputed
|
||||
model_lower: str — lowercase model name for pattern matching
|
||||
# Provider profile path (all per-provider quirks live in providers/)
|
||||
provider_profile: ProviderProfile | None — when present, delegates to
|
||||
_build_kwargs_from_profile(); all flag params below are bypassed.
|
||||
# Legacy-path flags — only used when provider_profile is None
|
||||
# (i.e. custom / unregistered providers). Known providers all go
|
||||
# through provider_profile.
|
||||
# Provider detection flags (all optional, default False)
|
||||
is_openrouter: bool
|
||||
is_nous: bool
|
||||
is_qwen_portal: bool
|
||||
is_github_models: bool
|
||||
is_nvidia_nim: bool
|
||||
is_kimi: bool
|
||||
is_tokenhub: bool
|
||||
is_lmstudio: bool
|
||||
is_custom_provider: bool
|
||||
ollama_num_ctx: int | None
|
||||
# Provider routing
|
||||
@@ -196,31 +107,36 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
# Qwen-specific
|
||||
qwen_prepare_fn: callable | None — runs AFTER codex sanitization
|
||||
qwen_prepare_inplace_fn: callable | None — in-place variant for deepcopied lists
|
||||
qwen_session_metadata: dict | None
|
||||
# Temperature
|
||||
fixed_temperature: Any — from _fixed_temperature_for_model()
|
||||
omit_temperature: bool
|
||||
# Reasoning
|
||||
supports_reasoning: bool
|
||||
github_reasoning_extra: dict | None
|
||||
lmstudio_reasoning_options: list[str] | None # raw allowed_options from /api/v1/models
|
||||
# Claude on OpenRouter/Nous max output
|
||||
anthropic_max_output: int | None
|
||||
extra_body_additions: dict | None
|
||||
# Extra
|
||||
extra_body_additions: dict | None — pre-built extra_body entries
|
||||
"""
|
||||
# Codex sanitization: drop reasoning_items / call_id / response_item_id
|
||||
sanitized = self.convert_messages(messages)
|
||||
|
||||
# ── Provider profile: single-path when present ──────────────────
|
||||
_profile = params.get("provider_profile")
|
||||
if _profile:
|
||||
return self._build_kwargs_from_profile(
|
||||
_profile, model, sanitized, tools, params
|
||||
)
|
||||
|
||||
# ── Legacy fallback (unregistered / unknown provider) ───────────
|
||||
# Reached only when get_provider_profile() returned None.
|
||||
# Known providers always go through the profile path above.
|
||||
# Qwen portal prep AFTER codex sanitization. If sanitize already
|
||||
# deepcopied, reuse that copy via the in-place variant to avoid a
|
||||
# second deepcopy.
|
||||
is_qwen = params.get("is_qwen_portal", False)
|
||||
if is_qwen:
|
||||
qwen_prep = params.get("qwen_prepare_fn")
|
||||
qwen_prep_inplace = params.get("qwen_prepare_inplace_fn")
|
||||
if sanitized is messages:
|
||||
if qwen_prep is not None:
|
||||
sanitized = qwen_prep(sanitized)
|
||||
else:
|
||||
# Already deepcopied — transform in place
|
||||
if qwen_prep_inplace is not None:
|
||||
qwen_prep_inplace(sanitized)
|
||||
elif qwen_prep is not None:
|
||||
sanitized = qwen_prep(sanitized)
|
||||
|
||||
# Developer role swap for GPT-5/Codex models
|
||||
model_lower = params.get("model_lower", (model or "").lower())
|
||||
@@ -233,7 +149,7 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
sanitized = list(sanitized)
|
||||
sanitized[0] = {**sanitized[0], "role": "developer"}
|
||||
|
||||
api_kwargs: dict[str, Any] = {
|
||||
api_kwargs: Dict[str, Any] = {
|
||||
"model": model,
|
||||
"messages": sanitized,
|
||||
}
|
||||
@@ -242,6 +158,19 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
if timeout is not None:
|
||||
api_kwargs["timeout"] = timeout
|
||||
|
||||
# Temperature
|
||||
fixed_temp = params.get("fixed_temperature")
|
||||
omit_temp = params.get("omit_temperature", False)
|
||||
if omit_temp:
|
||||
api_kwargs.pop("temperature", None)
|
||||
elif fixed_temp is not None:
|
||||
api_kwargs["temperature"] = fixed_temp
|
||||
|
||||
# Qwen metadata (caller precomputes {sessionId, promptId})
|
||||
qwen_meta = params.get("qwen_session_metadata")
|
||||
if qwen_meta and is_qwen:
|
||||
api_kwargs["metadata"] = qwen_meta
|
||||
|
||||
# Tools
|
||||
if tools:
|
||||
# Moonshot/Kimi uses a stricter flavored JSON Schema. Rewriting
|
||||
@@ -258,13 +187,19 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
anthropic_max_out = params.get("anthropic_max_output")
|
||||
is_nvidia_nim = params.get("is_nvidia_nim", False)
|
||||
is_kimi = params.get("is_kimi", False)
|
||||
is_tokenhub = params.get("is_tokenhub", False)
|
||||
reasoning_config = params.get("reasoning_config")
|
||||
|
||||
if ephemeral is not None and max_tokens_fn:
|
||||
api_kwargs.update(max_tokens_fn(ephemeral))
|
||||
elif max_tokens is not None and max_tokens_fn:
|
||||
api_kwargs.update(max_tokens_fn(max_tokens))
|
||||
elif is_nvidia_nim and max_tokens_fn:
|
||||
api_kwargs.update(max_tokens_fn(16384))
|
||||
elif is_qwen and max_tokens_fn:
|
||||
api_kwargs.update(max_tokens_fn(65536))
|
||||
elif is_kimi and max_tokens_fn:
|
||||
# Kimi/Moonshot: 32000 matches Kimi CLI's default
|
||||
api_kwargs.update(max_tokens_fn(32000))
|
||||
elif anthropic_max_out is not None:
|
||||
api_kwargs["max_tokens"] = anthropic_max_out
|
||||
|
||||
@@ -283,41 +218,12 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
_kimi_effort = _e
|
||||
api_kwargs["reasoning_effort"] = _kimi_effort
|
||||
|
||||
# Tencent TokenHub: top-level reasoning_effort (unless thinking disabled)
|
||||
if is_tokenhub:
|
||||
_tokenhub_thinking_off = bool(
|
||||
reasoning_config
|
||||
and isinstance(reasoning_config, dict)
|
||||
and reasoning_config.get("enabled") is False
|
||||
)
|
||||
if not _tokenhub_thinking_off:
|
||||
_tokenhub_effort = "high"
|
||||
if reasoning_config and isinstance(reasoning_config, dict):
|
||||
_e = (reasoning_config.get("effort") or "").strip().lower()
|
||||
if _e in ("low", "medium", "high"):
|
||||
_tokenhub_effort = _e
|
||||
api_kwargs["reasoning_effort"] = _tokenhub_effort
|
||||
|
||||
# LM Studio: top-level reasoning_effort. Only emit when the model
|
||||
# declares reasoning support via /api/v1/models capabilities (gated
|
||||
# upstream by params["supports_reasoning"]). resolve_lmstudio_effort
|
||||
# is shared with run_agent's summary path so both stay in sync.
|
||||
if params.get("is_lmstudio", False) and params.get("supports_reasoning", False):
|
||||
_lm_effort = resolve_lmstudio_effort(
|
||||
reasoning_config,
|
||||
params.get("lmstudio_reasoning_options"),
|
||||
)
|
||||
if _lm_effort is not None:
|
||||
api_kwargs["reasoning_effort"] = _lm_effort
|
||||
|
||||
# extra_body assembly
|
||||
extra_body: dict[str, Any] = {}
|
||||
extra_body: Dict[str, Any] = {}
|
||||
|
||||
is_openrouter = params.get("is_openrouter", False)
|
||||
is_nous = params.get("is_nous", False)
|
||||
is_github_models = params.get("is_github_models", False)
|
||||
provider_name = str(params.get("provider_name") or "").strip().lower()
|
||||
base_url = params.get("base_url")
|
||||
|
||||
provider_prefs = params.get("provider_preferences")
|
||||
if provider_prefs and is_openrouter:
|
||||
@@ -333,32 +239,42 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
"type": "enabled" if _kimi_thinking_enabled else "disabled",
|
||||
}
|
||||
|
||||
# Reasoning. LM Studio is handled above via top-level reasoning_effort,
|
||||
# so skip emitting extra_body.reasoning for it.
|
||||
if params.get("supports_reasoning", False) and not params.get("is_lmstudio", False):
|
||||
# Reasoning
|
||||
if params.get("supports_reasoning", False):
|
||||
if is_github_models:
|
||||
gh_reasoning = params.get("github_reasoning_extra")
|
||||
if gh_reasoning is not None:
|
||||
extra_body["reasoning"] = gh_reasoning
|
||||
else:
|
||||
extra_body["reasoning"] = {"enabled": True, "effort": "medium"}
|
||||
if reasoning_config is not None:
|
||||
rc = dict(reasoning_config)
|
||||
if is_nous and rc.get("enabled") is False:
|
||||
pass # omit for Nous when disabled
|
||||
else:
|
||||
extra_body["reasoning"] = rc
|
||||
else:
|
||||
extra_body["reasoning"] = {"enabled": True, "effort": "medium"}
|
||||
|
||||
if provider_name == "gemini":
|
||||
raw_thinking_config = _build_gemini_thinking_config(model, reasoning_config)
|
||||
if _is_gemini_openai_compat_base_url(base_url):
|
||||
thinking_config = _snake_case_gemini_thinking_config(raw_thinking_config)
|
||||
if thinking_config:
|
||||
openai_compat_extra = extra_body.get("extra_body", {})
|
||||
google_extra = openai_compat_extra.get("google", {})
|
||||
google_extra["thinking_config"] = thinking_config
|
||||
openai_compat_extra["google"] = google_extra
|
||||
extra_body["extra_body"] = openai_compat_extra
|
||||
elif raw_thinking_config:
|
||||
extra_body["thinking_config"] = raw_thinking_config
|
||||
elif provider_name == "google-gemini-cli":
|
||||
thinking_config = _build_gemini_thinking_config(model, reasoning_config)
|
||||
if thinking_config:
|
||||
extra_body["thinking_config"] = thinking_config
|
||||
if is_nous:
|
||||
extra_body["tags"] = ["product=hermes-agent"]
|
||||
|
||||
# Ollama num_ctx
|
||||
ollama_ctx = params.get("ollama_num_ctx")
|
||||
if ollama_ctx:
|
||||
options = extra_body.get("options", {})
|
||||
options["num_ctx"] = ollama_ctx
|
||||
extra_body["options"] = options
|
||||
|
||||
# Ollama/custom think=false
|
||||
if params.get("is_custom_provider", False):
|
||||
if reasoning_config and isinstance(reasoning_config, dict):
|
||||
_effort = (reasoning_config.get("effort") or "").strip().lower()
|
||||
_enabled = reasoning_config.get("enabled", True)
|
||||
if _effort == "none" or _enabled is False:
|
||||
extra_body["think"] = False
|
||||
|
||||
if is_qwen:
|
||||
extra_body["vl_high_resolution_images"] = True
|
||||
|
||||
# Merge any pre-built extra_body additions
|
||||
additions = params.get("extra_body_additions")
|
||||
@@ -375,120 +291,6 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
|
||||
return api_kwargs
|
||||
|
||||
def _build_kwargs_from_profile(self, profile, model, sanitized, tools, params):
|
||||
"""Build API kwargs using a ProviderProfile — single path, no legacy flags.
|
||||
|
||||
This method replaces the entire flag-based kwargs assembly when a
|
||||
provider_profile is passed. Every quirk comes from the profile object.
|
||||
"""
|
||||
from providers.base import OMIT_TEMPERATURE
|
||||
|
||||
# Message preprocessing
|
||||
sanitized = profile.prepare_messages(sanitized)
|
||||
|
||||
# Developer role swap — model-name-based, applies to all providers
|
||||
_model_lower = (model or "").lower()
|
||||
if (
|
||||
sanitized
|
||||
and isinstance(sanitized[0], dict)
|
||||
and sanitized[0].get("role") == "system"
|
||||
and any(p in _model_lower for p in DEVELOPER_ROLE_MODELS)
|
||||
):
|
||||
sanitized = list(sanitized)
|
||||
sanitized[0] = {**sanitized[0], "role": "developer"}
|
||||
|
||||
api_kwargs: dict[str, Any] = {
|
||||
"model": model,
|
||||
"messages": sanitized,
|
||||
}
|
||||
|
||||
# Temperature
|
||||
if profile.fixed_temperature is OMIT_TEMPERATURE:
|
||||
pass # Don't include temperature at all
|
||||
elif profile.fixed_temperature is not None:
|
||||
api_kwargs["temperature"] = profile.fixed_temperature
|
||||
else:
|
||||
# Use caller's temperature if provided
|
||||
temp = params.get("temperature")
|
||||
if temp is not None:
|
||||
api_kwargs["temperature"] = temp
|
||||
|
||||
# Timeout
|
||||
timeout = params.get("timeout")
|
||||
if timeout is not None:
|
||||
api_kwargs["timeout"] = timeout
|
||||
|
||||
# Tools — apply Moonshot/Kimi schema sanitization regardless of path
|
||||
if tools:
|
||||
if is_moonshot_model(model):
|
||||
tools = sanitize_moonshot_tools(tools)
|
||||
api_kwargs["tools"] = tools
|
||||
|
||||
# max_tokens resolution — priority: ephemeral > user > profile default
|
||||
max_tokens_fn = params.get("max_tokens_param_fn")
|
||||
ephemeral = params.get("ephemeral_max_output_tokens")
|
||||
user_max = params.get("max_tokens")
|
||||
anthropic_max = params.get("anthropic_max_output")
|
||||
|
||||
if ephemeral is not None and max_tokens_fn:
|
||||
api_kwargs.update(max_tokens_fn(ephemeral))
|
||||
elif user_max is not None and max_tokens_fn:
|
||||
api_kwargs.update(max_tokens_fn(user_max))
|
||||
elif profile.default_max_tokens and max_tokens_fn:
|
||||
api_kwargs.update(max_tokens_fn(profile.default_max_tokens))
|
||||
elif anthropic_max is not None:
|
||||
api_kwargs["max_tokens"] = anthropic_max
|
||||
|
||||
# Provider-specific api_kwargs extras (reasoning_effort, metadata, etc.)
|
||||
reasoning_config = params.get("reasoning_config")
|
||||
extra_body_from_profile, top_level_from_profile = (
|
||||
profile.build_api_kwargs_extras(
|
||||
reasoning_config=reasoning_config,
|
||||
supports_reasoning=params.get("supports_reasoning", False),
|
||||
qwen_session_metadata=params.get("qwen_session_metadata"),
|
||||
model=model,
|
||||
ollama_num_ctx=params.get("ollama_num_ctx"),
|
||||
)
|
||||
)
|
||||
api_kwargs.update(top_level_from_profile)
|
||||
|
||||
# extra_body assembly
|
||||
extra_body: dict[str, Any] = {}
|
||||
|
||||
# Profile's extra_body (tags, provider prefs, vl_high_resolution, etc.)
|
||||
profile_body = profile.build_extra_body(
|
||||
session_id=params.get("session_id"),
|
||||
provider_preferences=params.get("provider_preferences"),
|
||||
model=model,
|
||||
base_url=params.get("base_url"),
|
||||
reasoning_config=reasoning_config,
|
||||
)
|
||||
if profile_body:
|
||||
extra_body.update(profile_body)
|
||||
|
||||
# Profile's reasoning/thinking extra_body entries
|
||||
if extra_body_from_profile:
|
||||
extra_body.update(extra_body_from_profile)
|
||||
|
||||
# Merge any pre-built extra_body additions from the caller
|
||||
additions = params.get("extra_body_additions")
|
||||
if additions:
|
||||
extra_body.update(additions)
|
||||
|
||||
# Request overrides (user config)
|
||||
overrides = params.get("request_overrides")
|
||||
if overrides:
|
||||
for k, v in overrides.items():
|
||||
if k == "extra_body" and isinstance(v, dict):
|
||||
extra_body.update(v)
|
||||
else:
|
||||
api_kwargs[k] = v
|
||||
|
||||
if extra_body:
|
||||
api_kwargs["extra_body"] = extra_body
|
||||
|
||||
return api_kwargs
|
||||
|
||||
def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
|
||||
"""Normalize OpenAI ChatCompletion to NormalizedResponse.
|
||||
|
||||
@@ -510,7 +312,7 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
# Gemini 3 thinking models attach extra_content with
|
||||
# thought_signature — without replay on the next turn the API
|
||||
# rejects the request with 400.
|
||||
tc_provider_data: dict[str, Any] = {}
|
||||
tc_provider_data: Dict[str, Any] = {}
|
||||
extra = getattr(tc, "extra_content", None)
|
||||
if extra is None and hasattr(tc, "model_extra"):
|
||||
extra = (tc.model_extra or {}).get("extra_content")
|
||||
@@ -521,14 +323,12 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
except Exception:
|
||||
pass
|
||||
tc_provider_data["extra_content"] = extra
|
||||
tool_calls.append(
|
||||
ToolCall(
|
||||
id=tc.id,
|
||||
name=tc.function.name,
|
||||
arguments=tc.function.arguments,
|
||||
provider_data=tc_provider_data or None,
|
||||
)
|
||||
)
|
||||
tool_calls.append(ToolCall(
|
||||
id=tc.id,
|
||||
name=tc.function.name,
|
||||
arguments=tc.function.arguments,
|
||||
provider_data=tc_provider_data or None,
|
||||
))
|
||||
|
||||
usage = None
|
||||
if hasattr(response, "usage") and response.usage:
|
||||
@@ -545,13 +345,9 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
# so keep them apart in provider_data rather than merging.
|
||||
reasoning = getattr(msg, "reasoning", None)
|
||||
reasoning_content = getattr(msg, "reasoning_content", None)
|
||||
if reasoning_content is None and hasattr(msg, "model_extra"):
|
||||
model_extra = getattr(msg, "model_extra", None) or {}
|
||||
if isinstance(model_extra, dict) and "reasoning_content" in model_extra:
|
||||
reasoning_content = model_extra["reasoning_content"]
|
||||
|
||||
provider_data: Dict[str, Any] = {}
|
||||
if reasoning_content is not None:
|
||||
if reasoning_content:
|
||||
provider_data["reasoning_content"] = reasoning_content
|
||||
rd = getattr(msg, "reasoning_details", None)
|
||||
if rd:
|
||||
@@ -576,7 +372,7 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
return False
|
||||
return True
|
||||
|
||||
def extract_cache_stats(self, response: Any) -> dict[str, int] | None:
|
||||
def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
|
||||
"""Extract OpenRouter/OpenAI cache stats from prompt_tokens_details."""
|
||||
usage = getattr(response, "usage", None)
|
||||
if usage is None:
|
||||
|
||||
@@ -8,7 +8,7 @@ streaming, or the _run_codex_stream() call path.
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from agent.transports.base import ProviderTransport
|
||||
from agent.transports.types import NormalizedResponse, ToolCall
|
||||
from agent.transports.types import NormalizedResponse, ToolCall, Usage
|
||||
|
||||
|
||||
class ResponsesApiTransport(ProviderTransport):
|
||||
@@ -120,41 +120,12 @@ class ResponsesApiTransport(ProviderTransport):
|
||||
if request_overrides:
|
||||
kwargs.update(request_overrides)
|
||||
|
||||
if is_codex_backend:
|
||||
prompt_cache_key = kwargs.get("prompt_cache_key")
|
||||
cache_scope_id = str(prompt_cache_key or session_id or "").strip()
|
||||
if cache_scope_id:
|
||||
existing_extra_headers = kwargs.get("extra_headers")
|
||||
merged_extra_headers: Dict[str, str] = {}
|
||||
if isinstance(existing_extra_headers, dict):
|
||||
merged_extra_headers.update(
|
||||
{
|
||||
str(key): str(value)
|
||||
for key, value in existing_extra_headers.items()
|
||||
if key and value is not None
|
||||
}
|
||||
)
|
||||
merged_extra_headers["session_id"] = cache_scope_id
|
||||
merged_extra_headers["x-client-request-id"] = cache_scope_id
|
||||
kwargs["extra_headers"] = merged_extra_headers
|
||||
|
||||
max_tokens = params.get("max_tokens")
|
||||
if max_tokens is not None and not is_codex_backend:
|
||||
kwargs["max_output_tokens"] = max_tokens
|
||||
|
||||
if is_xai_responses and session_id:
|
||||
existing_extra_headers = kwargs.get("extra_headers")
|
||||
merged_extra_headers: Dict[str, str] = {}
|
||||
if isinstance(existing_extra_headers, dict):
|
||||
merged_extra_headers.update(
|
||||
{
|
||||
str(key): str(value)
|
||||
for key, value in existing_extra_headers.items()
|
||||
if key and value is not None
|
||||
}
|
||||
)
|
||||
merged_extra_headers["x-grok-conv-id"] = session_id
|
||||
kwargs["extra_headers"] = merged_extra_headers
|
||||
kwargs["extra_headers"] = {"x-grok-conv-id": session_id}
|
||||
|
||||
return kwargs
|
||||
|
||||
@@ -162,6 +133,8 @@ class ResponsesApiTransport(ProviderTransport):
|
||||
"""Normalize Codex Responses API response to NormalizedResponse."""
|
||||
from agent.codex_responses_adapter import (
|
||||
_normalize_codex_response,
|
||||
_extract_responses_message_text,
|
||||
_extract_responses_reasoning_text,
|
||||
)
|
||||
|
||||
# _normalize_codex_response returns (SimpleNamespace, finish_reason_str)
|
||||
@@ -187,8 +160,6 @@ class ResponsesApiTransport(ProviderTransport):
|
||||
provider_data = {}
|
||||
if msg and hasattr(msg, "codex_reasoning_items") and msg.codex_reasoning_items:
|
||||
provider_data["codex_reasoning_items"] = msg.codex_reasoning_items
|
||||
if msg and hasattr(msg, "codex_message_items") and msg.codex_message_items:
|
||||
provider_data["codex_message_items"] = msg.codex_message_items
|
||||
if msg and hasattr(msg, "reasoning_details") and msg.reasoning_details:
|
||||
provider_data["reasoning_details"] = msg.reasoning_details
|
||||
|
||||
|
||||
@@ -12,7 +12,7 @@ from __future__ import annotations
|
||||
|
||||
import json
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -32,10 +32,10 @@ class ToolCall:
|
||||
* Others: ``None``
|
||||
"""
|
||||
|
||||
id: str | None
|
||||
id: Optional[str]
|
||||
name: str
|
||||
arguments: str # JSON string
|
||||
provider_data: dict[str, Any] | None = field(default=None, repr=False)
|
||||
provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False)
|
||||
|
||||
# ── Backward compatibility ──────────────────────────────────
|
||||
# The agent loop reads tc.function.name / tc.function.arguments
|
||||
@@ -47,17 +47,17 @@ class ToolCall:
|
||||
return "function"
|
||||
|
||||
@property
|
||||
def function(self) -> ToolCall:
|
||||
def function(self) -> "ToolCall":
|
||||
"""Return self so tc.function.name / tc.function.arguments work."""
|
||||
return self
|
||||
|
||||
@property
|
||||
def call_id(self) -> str | None:
|
||||
def call_id(self) -> Optional[str]:
|
||||
"""Codex call_id from provider_data, accessed via getattr by _build_assistant_message."""
|
||||
return (self.provider_data or {}).get("call_id")
|
||||
|
||||
@property
|
||||
def response_item_id(self) -> str | None:
|
||||
def response_item_id(self) -> Optional[str]:
|
||||
"""Codex response_item_id from provider_data."""
|
||||
return (self.provider_data or {}).get("response_item_id")
|
||||
|
||||
@@ -97,22 +97,22 @@ class NormalizedResponse:
|
||||
Response-level ``provider_data`` examples:
|
||||
|
||||
* Anthropic: ``{"reasoning_details": [...]}``
|
||||
* Codex: ``{"codex_reasoning_items": [...], "codex_message_items": [...]}``
|
||||
* Codex: ``{"codex_reasoning_items": [...]}``
|
||||
* Others: ``None``
|
||||
"""
|
||||
|
||||
content: str | None
|
||||
tool_calls: list[ToolCall] | None
|
||||
content: Optional[str]
|
||||
tool_calls: Optional[List[ToolCall]]
|
||||
finish_reason: str # "stop", "tool_calls", "length", "content_filter"
|
||||
reasoning: str | None = None
|
||||
usage: Usage | None = None
|
||||
provider_data: dict[str, Any] | None = field(default=None, repr=False)
|
||||
reasoning: Optional[str] = None
|
||||
usage: Optional[Usage] = None
|
||||
provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False)
|
||||
|
||||
# ── Backward compatibility ──────────────────────────────────
|
||||
# The shim _nr_to_assistant_message() mapped these from provider_data.
|
||||
# These properties let NormalizedResponse pass through directly.
|
||||
@property
|
||||
def reasoning_content(self) -> str | None:
|
||||
def reasoning_content(self) -> Optional[str]:
|
||||
pd = self.provider_data or {}
|
||||
return pd.get("reasoning_content")
|
||||
|
||||
@@ -126,19 +126,13 @@ class NormalizedResponse:
|
||||
pd = self.provider_data or {}
|
||||
return pd.get("codex_reasoning_items")
|
||||
|
||||
@property
|
||||
def codex_message_items(self):
|
||||
pd = self.provider_data or {}
|
||||
return pd.get("codex_message_items")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Factory helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def build_tool_call(
|
||||
id: str | None,
|
||||
id: Optional[str],
|
||||
name: str,
|
||||
arguments: Any,
|
||||
**provider_fields: Any,
|
||||
@@ -152,7 +146,7 @@ def build_tool_call(
|
||||
return ToolCall(id=id, name=name, arguments=args_str, provider_data=pd)
|
||||
|
||||
|
||||
def map_finish_reason(reason: str | None, mapping: dict[str, str]) -> str:
|
||||
def map_finish_reason(reason: Optional[str], mapping: Dict[str, str]) -> str:
|
||||
"""Translate a provider-specific stop reason to the normalised set.
|
||||
|
||||
Falls back to ``"stop"`` for unknown or ``None`` reasons.
|
||||
|
||||
@@ -359,25 +359,6 @@ _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = {
|
||||
source_url="https://aws.amazon.com/bedrock/pricing/",
|
||||
pricing_version="bedrock-pricing-2026-04",
|
||||
),
|
||||
# MiniMax
|
||||
(
|
||||
"minimax",
|
||||
"minimax-m2.7",
|
||||
): PricingEntry(
|
||||
input_cost_per_million=Decimal("0.30"),
|
||||
output_cost_per_million=Decimal("1.20"),
|
||||
source="official_docs_snapshot",
|
||||
pricing_version="minimax-pricing-2026-04",
|
||||
),
|
||||
(
|
||||
"minimax-cn",
|
||||
"minimax-m2.7",
|
||||
): PricingEntry(
|
||||
input_cost_per_million=Decimal("0.30"),
|
||||
output_cost_per_million=Decimal("1.20"),
|
||||
source="official_docs_snapshot",
|
||||
pricing_version="minimax-pricing-2026-04",
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
@@ -419,8 +400,6 @@ def resolve_billing_route(
|
||||
return BillingRoute(provider="anthropic", model=model.split("/")[-1], base_url=base_url or "", billing_mode="official_docs_snapshot")
|
||||
if provider_name == "openai":
|
||||
return BillingRoute(provider="openai", model=model.split("/")[-1], base_url=base_url or "", billing_mode="official_docs_snapshot")
|
||||
if provider_name in {"minimax", "minimax-cn"}:
|
||||
return BillingRoute(provider=provider_name, model=model.split("/")[-1], base_url=base_url or "", billing_mode="official_docs_snapshot")
|
||||
if provider_name in {"custom", "local"} or (base and "localhost" in base):
|
||||
return BillingRoute(provider=provider_name or "custom", model=model, base_url=base_url or "", billing_mode="unknown")
|
||||
return BillingRoute(provider=provider_name or "unknown", model=model.split("/")[-1] if model else "", base_url=base_url or "", billing_mode="unknown")
|
||||
|
||||
58
apps/README.md
Normal file
@@ -0,0 +1,58 @@
|
||||
# Hermes Apps
|
||||
|
||||
Platform apps live here. The first app is a cross-platform GUI shell around the
|
||||
existing Hermes dashboard; it should not fork chat, config, logs, or session UI.
|
||||
|
||||
## Shape
|
||||
|
||||
```text
|
||||
apps/
|
||||
gui/ # cross-platform app shell: dev Chrome shell now, Tauri native next
|
||||
shared/ # runtime bundle notes/scripts used by Windows + macOS packaging
|
||||
```
|
||||
|
||||
## Desktop Dev
|
||||
|
||||
The backend-only GUI mode is:
|
||||
|
||||
```bash
|
||||
hermes dashboard --gui
|
||||
```
|
||||
|
||||
The fast GUI shell is:
|
||||
|
||||
```powershell
|
||||
cd \\wsl$\Ubuntu\home\bb\hermes-agent\apps\gui
|
||||
npm run dev
|
||||
```
|
||||
|
||||
The native Tauri shell is:
|
||||
|
||||
```powershell
|
||||
cd \\wsl$\Ubuntu\home\bb\hermes-agent\apps\gui
|
||||
npm run dev:tauri
|
||||
```
|
||||
|
||||
`--gui` implies the embedded TUI; do not pass `--tui` separately for GUI mode.
|
||||
|
||||
## MVP Boundary
|
||||
|
||||
Included:
|
||||
|
||||
- bundled Python runtime
|
||||
- bundled Node/TUI runtime
|
||||
- CLI install to PATH
|
||||
- profile picker and first-run setup
|
||||
- dashboard health/reconnect state
|
||||
- tray controls
|
||||
- desktop notifications
|
||||
- Windows installer
|
||||
|
||||
Deferred:
|
||||
|
||||
- code signing
|
||||
- native self-updater
|
||||
- store distribution
|
||||
|
||||
For MVP updates, the desktop UI should run the existing `hermes update` flow and
|
||||
surface progress/finish notifications.
|
||||
|
Before Width: | Height: | Size: 3.7 MiB |
@@ -1,46 +0,0 @@
|
||||
#!/usr/bin/env node
|
||||
/**
|
||||
* Copy font and asset folders from @nous-research/ui into public/ for Vite.
|
||||
*
|
||||
* Locates @nous-research/ui by walking up from this script looking for
|
||||
* node_modules/@nous-research/ui — works whether the dep is co-located
|
||||
* (non-workspace layout) or hoisted to the repo root (npm workspaces).
|
||||
*/
|
||||
const fs = require('node:fs')
|
||||
const path = require('node:path')
|
||||
|
||||
const DASHBOARD_ROOT = path.resolve(__dirname, '..')
|
||||
|
||||
function locateUiPackage() {
|
||||
let dir = DASHBOARD_ROOT
|
||||
const { root } = path.parse(dir)
|
||||
while (true) {
|
||||
const candidate = path.join(dir, 'node_modules', '@nous-research', 'ui')
|
||||
if (fs.existsSync(path.join(candidate, 'package.json'))) {
|
||||
return candidate
|
||||
}
|
||||
if (dir === root) break
|
||||
dir = path.dirname(dir)
|
||||
}
|
||||
throw new Error(
|
||||
'@nous-research/ui not found. Run `npm install` from the repo root.'
|
||||
)
|
||||
}
|
||||
|
||||
const uiRoot = locateUiPackage()
|
||||
const distRoot = path.join(uiRoot, 'dist')
|
||||
|
||||
const mappings = [
|
||||
['fonts', path.join(DASHBOARD_ROOT, 'public', 'fonts')],
|
||||
['assets', path.join(DASHBOARD_ROOT, 'public', 'ds-assets')],
|
||||
]
|
||||
|
||||
for (const [srcName, destPath] of mappings) {
|
||||
const srcPath = path.join(distRoot, srcName)
|
||||
if (!fs.existsSync(srcPath)) {
|
||||
throw new Error(`Missing ${srcPath} in @nous-research/ui — rebuild that package.`)
|
||||
}
|
||||
fs.rmSync(destPath, { recursive: true, force: true })
|
||||
fs.cpSync(srcPath, destPath, { recursive: true })
|
||||
console.log(`synced ${path.relative(DASHBOARD_ROOT, destPath)}`)
|
||||
}
|
||||
@@ -1,36 +0,0 @@
|
||||
import { Button } from "@nous-research/ui/ui/components/button";
|
||||
import { Typography } from "@/components/NouiTypography";
|
||||
import { useI18n } from "@/i18n/context";
|
||||
|
||||
/**
|
||||
* Compact language toggle — shows a clickable flag that switches between
|
||||
* English and Chinese. Persists choice to localStorage.
|
||||
*/
|
||||
export function LanguageSwitcher() {
|
||||
const { locale, setLocale, t } = useI18n();
|
||||
|
||||
const toggle = () => setLocale(locale === "en" ? "zh" : "en");
|
||||
|
||||
return (
|
||||
<Button
|
||||
ghost
|
||||
onClick={toggle}
|
||||
title={t.language.switchTo}
|
||||
aria-label={t.language.switchTo}
|
||||
className="px-2 py-1 normal-case tracking-normal font-normal text-xs text-muted-foreground hover:text-foreground"
|
||||
>
|
||||
<span className="inline-flex items-center gap-1.5">
|
||||
<span className="text-base leading-none">
|
||||
{locale === "en" ? "🇬🇧" : "🇨🇳"}
|
||||
</span>
|
||||
|
||||
<Typography
|
||||
mondwest
|
||||
className="hidden sm:inline tracking-wide uppercase text-[0.65rem]"
|
||||
>
|
||||
{locale === "en" ? "EN" : "中文"}
|
||||
</Typography>
|
||||
</span>
|
||||
</Button>
|
||||
);
|
||||
}
|
||||
@@ -1,63 +0,0 @@
|
||||
import { forwardRef, type ElementType, type HTMLAttributes, type ReactNode } from "react";
|
||||
import { cn } from "@/lib/utils";
|
||||
|
||||
type TypographyProps = HTMLAttributes<HTMLElement> & {
|
||||
as?: ElementType;
|
||||
children?: ReactNode;
|
||||
compressed?: boolean;
|
||||
courier?: boolean;
|
||||
expanded?: boolean;
|
||||
mondwest?: boolean;
|
||||
mono?: boolean;
|
||||
sans?: boolean;
|
||||
variant?: "sm" | "md" | "lg" | "xl";
|
||||
};
|
||||
|
||||
const variantClasses: Record<NonNullable<TypographyProps["variant"]>, string> = {
|
||||
sm: "leading-[1.4] text-[.9375rem] tracking-[0.1875rem]",
|
||||
md: "text-[2.625rem] leading-[1] tracking-[0.0525rem]",
|
||||
lg: "text-[2.625rem] leading-[1] tracking-[0.0525rem]",
|
||||
xl: "text-[4.5rem] leading-[1] tracking-[0.135rem]",
|
||||
};
|
||||
|
||||
export const Typography = forwardRef<HTMLElement, TypographyProps>(function Typography(
|
||||
{
|
||||
as: Component = "span",
|
||||
className,
|
||||
compressed,
|
||||
courier,
|
||||
expanded,
|
||||
mondwest,
|
||||
mono,
|
||||
sans,
|
||||
variant,
|
||||
...props
|
||||
},
|
||||
ref,
|
||||
) {
|
||||
const hasFontVariant = compressed || courier || expanded || mondwest || mono || sans;
|
||||
|
||||
return (
|
||||
<Component
|
||||
className={cn(
|
||||
compressed && "font-compressed",
|
||||
courier && "font-courier",
|
||||
expanded && "font-expanded",
|
||||
mondwest && "font-mondwest tracking-[0.1875rem]",
|
||||
mono && "font-mono",
|
||||
(!hasFontVariant || sans) && "font-sans",
|
||||
variant && variantClasses[variant],
|
||||
className,
|
||||
)}
|
||||
ref={ref}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
});
|
||||
|
||||
export const H2 = forwardRef<HTMLHeadingElement, Omit<TypographyProps, "as">>(function H2(
|
||||
{ className, variant = "lg", ...props },
|
||||
ref,
|
||||
) {
|
||||
return <Typography as="h2" className={cn("font-bold", className)} variant={variant} ref={ref} {...props} />;
|
||||
});
|
||||
@@ -1,36 +0,0 @@
|
||||
import {
|
||||
JsonRpcGatewayClient,
|
||||
type ConnectionState,
|
||||
type GatewayEvent,
|
||||
type GatewayEventName,
|
||||
} from "@hermes/shared";
|
||||
|
||||
export type { ConnectionState, GatewayEvent, GatewayEventName };
|
||||
|
||||
/**
|
||||
* Browser wrapper for the shared tui_gateway JSON-RPC client.
|
||||
*
|
||||
* Dashboard resolves its token and host from the served page. Desktop uses the
|
||||
* same shared protocol client, but supplies an absolute wsUrl from Electron.
|
||||
*/
|
||||
export class GatewayClient extends JsonRpcGatewayClient {
|
||||
async connect(token?: string): Promise<void> {
|
||||
const resolved = token ?? window.__HERMES_SESSION_TOKEN__ ?? "";
|
||||
if (!resolved) {
|
||||
throw new Error(
|
||||
"Session token not available — page must be served by the Hermes dashboard",
|
||||
);
|
||||
}
|
||||
|
||||
const scheme = location.protocol === "https:" ? "wss:" : "ws:";
|
||||
await super.connect(
|
||||
`${scheme}//${location.host}/api/ws?token=${encodeURIComponent(resolved)}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
declare global {
|
||||
interface Window {
|
||||
__HERMES_SESSION_TOKEN__?: string;
|
||||
}
|
||||
}
|
||||
@@ -1,543 +0,0 @@
|
||||
import { useCallback, useEffect, useLayoutEffect, useMemo, useState } from "react";
|
||||
import {
|
||||
ArrowDown,
|
||||
ArrowUp,
|
||||
ArrowUpDown,
|
||||
BarChart3,
|
||||
Brain,
|
||||
Cpu,
|
||||
RefreshCw,
|
||||
TrendingUp,
|
||||
} from "lucide-react";
|
||||
import { api } from "@/lib/api";
|
||||
import type {
|
||||
AnalyticsResponse,
|
||||
AnalyticsDailyEntry,
|
||||
AnalyticsModelEntry,
|
||||
AnalyticsSkillEntry,
|
||||
} from "@/lib/api";
|
||||
import { timeAgo } from "@/lib/utils";
|
||||
import { Button } from "@nous-research/ui/ui/components/button";
|
||||
import { Spinner } from "@nous-research/ui/ui/components/spinner";
|
||||
import { Stats } from "@nous-research/ui/ui/components/stats";
|
||||
import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
|
||||
import { Badge } from "@nous-research/ui/ui/components/badge";
|
||||
import { usePageHeader } from "@/contexts/usePageHeader";
|
||||
import { useI18n } from "@/i18n";
|
||||
import { PluginSlot } from "@/plugins";
|
||||
|
||||
const PERIODS = [
|
||||
{ label: "7d", days: 7 },
|
||||
{ label: "30d", days: 30 },
|
||||
{ label: "90d", days: 90 },
|
||||
] as const;
|
||||
|
||||
const CHART_HEIGHT_PX = 160;
|
||||
|
||||
function formatTokens(n: number): string {
|
||||
if (n >= 1_000_000) return `${(n / 1_000_000).toFixed(1)}M`;
|
||||
if (n >= 1_000) return `${(n / 1_000).toFixed(1)}K`;
|
||||
return String(n);
|
||||
}
|
||||
|
||||
function formatDate(day: string): string {
|
||||
try {
|
||||
const d = new Date(day + "T00:00:00");
|
||||
return d.toLocaleDateString(undefined, { month: "short", day: "numeric" });
|
||||
} catch {
|
||||
return day;
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Sorting
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function useTableSort<T>(
|
||||
data: T[],
|
||||
defaultKey: keyof T & string,
|
||||
defaultDir: "asc" | "desc" = "desc",
|
||||
) {
|
||||
const [sortKey, setSortKey] = useState<string>(defaultKey);
|
||||
const [sortDir, setSortDir] = useState<"asc" | "desc">(defaultDir);
|
||||
|
||||
const sorted = useMemo(() => {
|
||||
return [...data].sort((a, b) => {
|
||||
const aVal = a[sortKey as keyof T];
|
||||
const bVal = b[sortKey as keyof T];
|
||||
// Nulls always last regardless of direction
|
||||
if (aVal === null || aVal === undefined) return 1;
|
||||
if (bVal === null || bVal === undefined) return -1;
|
||||
if (aVal === bVal) return 0;
|
||||
const cmp = aVal > bVal ? 1 : -1;
|
||||
return sortDir === "asc" ? cmp : -cmp;
|
||||
});
|
||||
}, [data, sortKey, sortDir]);
|
||||
|
||||
const toggle = useCallback(
|
||||
(key: string) => {
|
||||
if (key === sortKey) {
|
||||
setSortDir((d) => (d === "asc" ? "desc" : "asc"));
|
||||
} else {
|
||||
setSortKey(key);
|
||||
setSortDir("desc");
|
||||
}
|
||||
},
|
||||
[sortKey],
|
||||
);
|
||||
|
||||
return { sorted, sortKey, sortDir, toggle };
|
||||
}
|
||||
|
||||
function SortHeader({
|
||||
label,
|
||||
col,
|
||||
sortKey,
|
||||
sortDir,
|
||||
toggle,
|
||||
className,
|
||||
}: {
|
||||
label: string;
|
||||
col: string;
|
||||
sortKey: string;
|
||||
sortDir: "asc" | "desc";
|
||||
toggle: (key: string) => void;
|
||||
className?: string;
|
||||
}) {
|
||||
const active = col === sortKey;
|
||||
return (
|
||||
<th
|
||||
onClick={() => toggle(col)}
|
||||
className={`cursor-pointer select-none ${className ?? ""}`}
|
||||
>
|
||||
<span className="inline-flex items-center gap-1.5 rounded px-1 -mx-1 py-0.5 hover:bg-muted/40 transition-colors">
|
||||
{label}
|
||||
{active ? (
|
||||
sortDir === "asc" ? (
|
||||
<ArrowUp className="h-3.5 w-3.5 text-foreground/80 shrink-0" />
|
||||
) : (
|
||||
<ArrowDown className="h-3.5 w-3.5 text-foreground/80 shrink-0" />
|
||||
)
|
||||
) : (
|
||||
<ArrowUpDown className="h-3 w-3 text-muted-foreground/40 shrink-0" />
|
||||
)}
|
||||
</span>
|
||||
</th>
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
|
||||
function TokenBarChart({ daily }: { daily: AnalyticsDailyEntry[] }) {
|
||||
const { t } = useI18n();
|
||||
if (daily.length === 0) return null;
|
||||
|
||||
const maxTokens = Math.max(
|
||||
...daily.map((d) => d.input_tokens + d.output_tokens),
|
||||
1,
|
||||
);
|
||||
|
||||
return (
|
||||
<Card>
|
||||
<CardHeader>
|
||||
<div className="flex items-center gap-2">
|
||||
<BarChart3 className="h-5 w-5 text-muted-foreground" />
|
||||
<CardTitle className="text-base">
|
||||
{t.analytics.dailyTokenUsage}
|
||||
</CardTitle>
|
||||
</div>
|
||||
<div className="flex items-center gap-4 text-xs text-muted-foreground">
|
||||
<div className="flex items-center gap-1.5">
|
||||
<div className="h-2.5 w-2.5 bg-[#ffe6cb]" />
|
||||
{t.analytics.input}
|
||||
</div>
|
||||
<div className="flex items-center gap-1.5">
|
||||
<div className="h-2.5 w-2.5 bg-emerald-500" />
|
||||
{t.analytics.output}
|
||||
</div>
|
||||
</div>
|
||||
</CardHeader>
|
||||
<CardContent>
|
||||
<div
|
||||
className="flex items-end gap-[2px]"
|
||||
style={{ height: CHART_HEIGHT_PX }}
|
||||
>
|
||||
{daily.map((d) => {
|
||||
const total = d.input_tokens + d.output_tokens;
|
||||
const inputH = Math.round(
|
||||
(d.input_tokens / maxTokens) * CHART_HEIGHT_PX,
|
||||
);
|
||||
const outputH = Math.round(
|
||||
(d.output_tokens / maxTokens) * CHART_HEIGHT_PX,
|
||||
);
|
||||
return (
|
||||
<div
|
||||
key={d.day}
|
||||
className="flex-1 min-w-0 group relative flex flex-col justify-end"
|
||||
style={{ height: CHART_HEIGHT_PX }}
|
||||
>
|
||||
<div className="absolute bottom-full left-1/2 -translate-x-1/2 mb-2 hidden group-hover:block z-10 pointer-events-none">
|
||||
<div className="bg-card border border-border px-2.5 py-1.5 text-[10px] text-foreground shadow-lg whitespace-nowrap">
|
||||
<div className="font-medium">{formatDate(d.day)}</div>
|
||||
<div>
|
||||
{t.analytics.input}: {formatTokens(d.input_tokens)}
|
||||
</div>
|
||||
<div>
|
||||
{t.analytics.output}: {formatTokens(d.output_tokens)}
|
||||
</div>
|
||||
<div>
|
||||
{t.analytics.total}: {formatTokens(total)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div
|
||||
className="w-full bg-[#ffe6cb]/70"
|
||||
style={{ height: Math.max(inputH, total > 0 ? 1 : 0) }}
|
||||
/>
|
||||
|
||||
<div
|
||||
className="w-full bg-emerald-500/70"
|
||||
style={{
|
||||
height: Math.max(outputH, d.output_tokens > 0 ? 1 : 0),
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
|
||||
<div className="flex justify-between mt-2 text-[10px] text-muted-foreground">
|
||||
<span>{daily.length > 0 ? formatDate(daily[0].day) : ""}</span>
|
||||
{daily.length > 2 && (
|
||||
<span>{formatDate(daily[Math.floor(daily.length / 2)].day)}</span>
|
||||
)}
|
||||
<span>
|
||||
{daily.length > 1 ? formatDate(daily[daily.length - 1].day) : ""}
|
||||
</span>
|
||||
</div>
|
||||
</CardContent>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
function DailyTable({ daily }: { daily: AnalyticsDailyEntry[] }) {
|
||||
const { t } = useI18n();
|
||||
const { sorted, sortKey, sortDir, toggle } = useTableSort(daily, "day", "desc");
|
||||
|
||||
if (daily.length === 0) return null;
|
||||
|
||||
return (
|
||||
<Card>
|
||||
<CardHeader>
|
||||
<div className="flex items-center gap-2">
|
||||
<TrendingUp className="h-5 w-5 text-muted-foreground" />
|
||||
<CardTitle className="text-base">
|
||||
{t.analytics.dailyBreakdown}
|
||||
</CardTitle>
|
||||
</div>
|
||||
</CardHeader>
|
||||
<CardContent>
|
||||
<div className="overflow-x-auto">
|
||||
<table className="w-full text-sm">
|
||||
<thead>
|
||||
<tr className="border-b border-border text-muted-foreground text-xs">
|
||||
<SortHeader label={t.analytics.date} col="day" sortKey={sortKey} sortDir={sortDir} toggle={toggle} className="text-left py-2 pr-4 font-medium" />
|
||||
<SortHeader label={t.sessions.title} col="sessions" sortKey={sortKey} sortDir={sortDir} toggle={toggle} className="text-right py-2 px-4 font-medium" />
|
||||
<SortHeader label={t.analytics.input} col="input_tokens" sortKey={sortKey} sortDir={sortDir} toggle={toggle} className="text-right py-2 px-4 font-medium" />
|
||||
<SortHeader label={t.analytics.output} col="output_tokens" sortKey={sortKey} sortDir={sortDir} toggle={toggle} className="text-right py-2 pl-4 font-medium" />
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{sorted.map((d) => (
|
||||
<tr
|
||||
key={d.day}
|
||||
className="border-b border-border/50 hover:bg-secondary/20 transition-colors"
|
||||
>
|
||||
<td className="py-2 pr-4 font-medium">
|
||||
{formatDate(d.day)}
|
||||
</td>
|
||||
<td className="text-right py-2 px-4 text-muted-foreground">
|
||||
{d.sessions}
|
||||
</td>
|
||||
<td className="text-right py-2 px-4">
|
||||
<span className="text-[#ffe6cb]">
|
||||
{formatTokens(d.input_tokens)}
|
||||
</span>
|
||||
</td>
|
||||
<td className="text-right py-2 pl-4">
|
||||
<span className="text-emerald-400">
|
||||
{formatTokens(d.output_tokens)}
|
||||
</span>
|
||||
</td>
|
||||
</tr>
|
||||
))}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</CardContent>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
function ModelTable({ models }: { models: AnalyticsModelEntry[] }) {
|
||||
const { t } = useI18n();
|
||||
const { sorted, sortKey, sortDir, toggle } = useTableSort(models, "input_tokens", "desc");
|
||||
|
||||
if (models.length === 0) return null;
|
||||
|
||||
return (
|
||||
<Card>
|
||||
<CardHeader>
|
||||
<div className="flex items-center gap-2">
|
||||
<Cpu className="h-5 w-5 text-muted-foreground" />
|
||||
<CardTitle className="text-base">
|
||||
{t.analytics.perModelBreakdown}
|
||||
</CardTitle>
|
||||
</div>
|
||||
</CardHeader>
|
||||
<CardContent>
|
||||
<div className="overflow-x-auto">
|
||||
<table className="w-full text-sm">
|
||||
<thead>
|
||||
<tr className="border-b border-border text-muted-foreground text-xs">
|
||||
<SortHeader label={t.analytics.model} col="model" sortKey={sortKey} sortDir={sortDir} toggle={toggle} className="text-left py-2 pr-4 font-medium" />
|
||||
<SortHeader label={t.sessions.title} col="sessions" sortKey={sortKey} sortDir={sortDir} toggle={toggle} className="text-right py-2 px-4 font-medium" />
|
||||
<SortHeader label={t.analytics.tokens} col="input_tokens" sortKey={sortKey} sortDir={sortDir} toggle={toggle} className="text-right py-2 pl-4 font-medium" />
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{sorted.map((m) => (
|
||||
<tr
|
||||
key={m.model}
|
||||
className="border-b border-border/50 hover:bg-secondary/20 transition-colors"
|
||||
>
|
||||
<td className="py-2 pr-4">
|
||||
<span className="font-mono-ui text-xs">{m.model}</span>
|
||||
</td>
|
||||
<td className="text-right py-2 px-4 text-muted-foreground">
|
||||
{m.sessions}
|
||||
</td>
|
||||
<td className="text-right py-2 pl-4">
|
||||
<span className="text-[#ffe6cb]">
|
||||
{formatTokens(m.input_tokens)}
|
||||
</span>
|
||||
{" / "}
|
||||
<span className="text-emerald-400">
|
||||
{formatTokens(m.output_tokens)}
|
||||
</span>
|
||||
</td>
|
||||
</tr>
|
||||
))}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</CardContent>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
function SkillTable({ skills }: { skills: AnalyticsSkillEntry[] }) {
|
||||
const { t } = useI18n();
|
||||
const { sorted, sortKey, sortDir, toggle } = useTableSort(skills, "total_count", "desc");
|
||||
|
||||
if (skills.length === 0) return null;
|
||||
|
||||
return (
|
||||
<Card>
|
||||
<CardHeader>
|
||||
<div className="flex items-center gap-2">
|
||||
<Brain className="h-5 w-5 text-muted-foreground" />
|
||||
<CardTitle className="text-base">{t.analytics.topSkills}</CardTitle>
|
||||
</div>
|
||||
</CardHeader>
|
||||
<CardContent>
|
||||
<div className="overflow-x-auto">
|
||||
<table className="w-full text-sm">
|
||||
<thead>
|
||||
<tr className="border-b border-border text-muted-foreground text-xs">
|
||||
<SortHeader label={t.analytics.skill} col="skill" sortKey={sortKey} sortDir={sortDir} toggle={toggle} className="text-left py-2 pr-4 font-medium" />
|
||||
<SortHeader label={t.analytics.loads} col="view_count" sortKey={sortKey} sortDir={sortDir} toggle={toggle} className="text-right py-2 px-4 font-medium" />
|
||||
<SortHeader label={t.analytics.edits} col="manage_count" sortKey={sortKey} sortDir={sortDir} toggle={toggle} className="text-right py-2 px-4 font-medium" />
|
||||
<SortHeader label={t.analytics.total} col="total_count" sortKey={sortKey} sortDir={sortDir} toggle={toggle} className="text-right py-2 px-4 font-medium" />
|
||||
<SortHeader label={t.analytics.lastUsed} col="last_used_at" sortKey={sortKey} sortDir={sortDir} toggle={toggle} className="text-right py-2 pl-4 font-medium" />
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{sorted.map((skill) => (
|
||||
<tr
|
||||
key={skill.skill}
|
||||
className="border-b border-border/50 hover:bg-secondary/20 transition-colors"
|
||||
>
|
||||
<td className="py-2 pr-4">
|
||||
<span className="font-mono-ui text-xs">{skill.skill}</span>
|
||||
</td>
|
||||
<td className="text-right py-2 px-4 text-muted-foreground">
|
||||
{skill.view_count}
|
||||
</td>
|
||||
<td className="text-right py-2 px-4 text-muted-foreground">
|
||||
{skill.manage_count}
|
||||
</td>
|
||||
<td className="text-right py-2 px-4">{skill.total_count}</td>
|
||||
<td className="text-right py-2 pl-4 text-muted-foreground">
|
||||
{skill.last_used_at ? timeAgo(skill.last_used_at) : "—"}
|
||||
</td>
|
||||
</tr>
|
||||
))}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</CardContent>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
export default function AnalyticsPage() {
|
||||
const [days, setDays] = useState(30);
|
||||
const [data, setData] = useState<AnalyticsResponse | null>(null);
|
||||
const [loading, setLoading] = useState(true);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
const { t } = useI18n();
|
||||
const { setAfterTitle, setEnd } = usePageHeader();
|
||||
|
||||
const load = useCallback(() => {
|
||||
setLoading(true);
|
||||
setError(null);
|
||||
api
|
||||
.getAnalytics(days)
|
||||
.then(setData)
|
||||
.catch((err) => setError(String(err)))
|
||||
.finally(() => setLoading(false));
|
||||
}, [days]);
|
||||
|
||||
useLayoutEffect(() => {
|
||||
const periodLabel =
|
||||
PERIODS.find((p) => p.days === days)?.label ?? `${days}d`;
|
||||
setAfterTitle(
|
||||
<span className="flex items-center gap-2">
|
||||
{loading && <Spinner className="shrink-0 text-base text-primary" />}
|
||||
<Badge tone="secondary" className="text-[10px]">
|
||||
{periodLabel}
|
||||
</Badge>
|
||||
</span>,
|
||||
);
|
||||
setEnd(
|
||||
<div className="flex w-full min-w-0 flex-wrap items-center justify-end gap-2 sm:gap-2">
|
||||
<div className="flex flex-wrap items-center gap-1.5">
|
||||
{PERIODS.map((p) => (
|
||||
<Button
|
||||
key={p.label}
|
||||
type="button"
|
||||
size="sm"
|
||||
outlined={days !== p.days}
|
||||
onClick={() => setDays(p.days)}
|
||||
>
|
||||
{p.label}
|
||||
</Button>
|
||||
))}
|
||||
</div>
|
||||
<Button
|
||||
type="button"
|
||||
size="sm"
|
||||
outlined
|
||||
onClick={load}
|
||||
disabled={loading}
|
||||
prefix={loading ? <Spinner /> : <RefreshCw />}
|
||||
>
|
||||
{t.common.refresh}
|
||||
</Button>
|
||||
</div>,
|
||||
);
|
||||
return () => {
|
||||
setAfterTitle(null);
|
||||
setEnd(null);
|
||||
};
|
||||
}, [days, loading, load, setAfterTitle, setEnd, t.common.refresh]);
|
||||
|
||||
useEffect(() => {
|
||||
load();
|
||||
}, [load]);
|
||||
|
||||
return (
|
||||
<div className="flex flex-col gap-6">
|
||||
<PluginSlot name="analytics:top" />
|
||||
{loading && !data && (
|
||||
<div className="flex items-center justify-center py-24">
|
||||
<Spinner className="text-2xl text-primary" />
|
||||
</div>
|
||||
)}
|
||||
|
||||
{error && (
|
||||
<Card>
|
||||
<CardContent className="py-6">
|
||||
<p className="text-sm text-destructive text-center">{error}</p>
|
||||
</CardContent>
|
||||
</Card>
|
||||
)}
|
||||
|
||||
{data && (
|
||||
<>
|
||||
<div className="grid gap-6 lg:grid-cols-2">
|
||||
<Card>
|
||||
<CardContent className="py-6">
|
||||
<Stats
|
||||
items={[
|
||||
{
|
||||
label: t.analytics.totalTokens,
|
||||
value: formatTokens(
|
||||
data.totals.total_input + data.totals.total_output,
|
||||
),
|
||||
},
|
||||
{
|
||||
label: t.analytics.input,
|
||||
value: formatTokens(data.totals.total_input),
|
||||
},
|
||||
{
|
||||
label: t.analytics.output,
|
||||
value: formatTokens(data.totals.total_output),
|
||||
},
|
||||
{
|
||||
label: t.analytics.totalSessions,
|
||||
value: `${data.totals.total_sessions} (~${(data.totals.total_sessions / days).toFixed(1)}${t.analytics.perDayAvg})`,
|
||||
},
|
||||
{
|
||||
label: t.analytics.apiCalls,
|
||||
value: String(
|
||||
data.totals.total_api_calls ??
|
||||
data.daily.reduce((sum, d) => sum + d.sessions, 0),
|
||||
),
|
||||
},
|
||||
]}
|
||||
/>
|
||||
</CardContent>
|
||||
</Card>
|
||||
|
||||
<TokenBarChart daily={data.daily} />
|
||||
</div>
|
||||
|
||||
<DailyTable daily={data.daily} />
|
||||
<ModelTable models={data.by_model} />
|
||||
<SkillTable skills={data.skills.top_skills} />
|
||||
</>
|
||||
)}
|
||||
|
||||
{data &&
|
||||
data.daily.length === 0 &&
|
||||
data.by_model.length === 0 &&
|
||||
data.skills.top_skills.length === 0 && (
|
||||
<Card>
|
||||
<CardContent className="py-12">
|
||||
<div className="flex flex-col items-center text-muted-foreground">
|
||||
<BarChart3 className="h-8 w-8 mb-3 opacity-40" />
|
||||
<p className="text-sm font-medium">{t.analytics.noUsageData}</p>
|
||||
<p className="text-xs mt-1 text-muted-foreground/60">
|
||||
{t.analytics.startSession}
|
||||
</p>
|
||||
</div>
|
||||
</CardContent>
|
||||
</Card>
|
||||
)}
|
||||
<PluginSlot name="analytics:bottom" />
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -1,817 +0,0 @@
|
||||
import { useCallback, useEffect, useLayoutEffect, useState } from "react";
|
||||
import {
|
||||
Brain,
|
||||
ChevronDown,
|
||||
Cpu,
|
||||
DollarSign,
|
||||
Eye,
|
||||
RefreshCw,
|
||||
Settings2,
|
||||
Star,
|
||||
Wrench,
|
||||
Zap,
|
||||
} from "lucide-react";
|
||||
import { api } from "@/lib/api";
|
||||
import type {
|
||||
AuxiliaryModelsResponse,
|
||||
AuxiliaryTaskAssignment,
|
||||
ModelsAnalyticsModelEntry,
|
||||
ModelsAnalyticsResponse,
|
||||
} from "@/lib/api";
|
||||
import { timeAgo } from "@/lib/utils";
|
||||
import { formatTokenCount } from "@/lib/format";
|
||||
import { Button } from "@nous-research/ui/ui/components/button";
|
||||
import { Spinner } from "@nous-research/ui/ui/components/spinner";
|
||||
import { Stats } from "@nous-research/ui/ui/components/stats";
|
||||
import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
|
||||
import { Badge } from "@nous-research/ui/ui/components/badge";
|
||||
import { usePageHeader } from "@/contexts/usePageHeader";
|
||||
import { useI18n } from "@/i18n";
|
||||
import { PluginSlot } from "@/plugins";
|
||||
import { ModelPickerDialog } from "@/components/ModelPickerDialog";
|
||||
|
||||
const PERIODS = [
|
||||
{ label: "7d", days: 7 },
|
||||
{ label: "30d", days: 30 },
|
||||
{ label: "90d", days: 90 },
|
||||
] as const;
|
||||
|
||||
// Must match _AUX_TASK_SLOTS in hermes_cli/web_server.py.
|
||||
const AUX_TASKS: readonly { key: string; label: string; hint: string }[] = [
|
||||
{ key: "vision", label: "Vision", hint: "Image analysis" },
|
||||
{ key: "web_extract", label: "Web Extract", hint: "Page summarization" },
|
||||
{ key: "compression", label: "Compression", hint: "Context compaction" },
|
||||
{ key: "session_search", label: "Session Search", hint: "Recall queries" },
|
||||
{ key: "skills_hub", label: "Skills Hub", hint: "Skill search" },
|
||||
{ key: "approval", label: "Approval", hint: "Smart auto-approve" },
|
||||
{ key: "mcp", label: "MCP", hint: "MCP tool routing" },
|
||||
{ key: "title_generation", label: "Title Gen", hint: "Session titles" },
|
||||
{ key: "curator", label: "Curator", hint: "Skill-usage review" },
|
||||
] as const;
|
||||
|
||||
function formatTokens(n: number): string {
|
||||
if (n >= 1_000_000) return `${(n / 1_000_000).toFixed(1)}M`;
|
||||
if (n >= 1_000) return `${(n / 1_000).toFixed(1)}K`;
|
||||
return String(n);
|
||||
}
|
||||
|
||||
function formatCost(n: number): string {
|
||||
if (n >= 1) return `$${n.toFixed(2)}`;
|
||||
if (n >= 0.01) return `$${n.toFixed(3)}`;
|
||||
if (n > 0) return `$${n.toFixed(4)}`;
|
||||
return "$0";
|
||||
}
|
||||
|
||||
/** Short model name: strip vendor prefix like "openrouter/" or "anthropic/". */
|
||||
function shortModelName(model: string): string {
|
||||
const slashIdx = model.indexOf("/");
|
||||
if (slashIdx > 0) return model.slice(slashIdx + 1);
|
||||
return model;
|
||||
}
|
||||
|
||||
/** Extract vendor prefix from a model string like "anthropic/claude-opus-4.7" → "anthropic". */
|
||||
function modelVendor(model: string, fallback?: string): string {
|
||||
const slashIdx = model.indexOf("/");
|
||||
if (slashIdx > 0) return model.slice(0, slashIdx);
|
||||
return fallback || "";
|
||||
}
|
||||
|
||||
function TokenBar({
|
||||
input,
|
||||
output,
|
||||
cacheRead,
|
||||
reasoning,
|
||||
}: {
|
||||
input: number;
|
||||
output: number;
|
||||
cacheRead: number;
|
||||
reasoning: number;
|
||||
}) {
|
||||
const total = input + output + cacheRead + reasoning;
|
||||
if (total === 0) return null;
|
||||
|
||||
const segments = [
|
||||
{ value: cacheRead, color: "bg-blue-400/60", label: "Cache Read" },
|
||||
{ value: reasoning, color: "bg-purple-400/60", label: "Reasoning" },
|
||||
{ value: input, color: "bg-[#ffe6cb]/70", label: "Input" },
|
||||
{ value: output, color: "bg-emerald-500/70", label: "Output" },
|
||||
].filter((s) => s.value > 0);
|
||||
|
||||
return (
|
||||
<div className="space-y-1">
|
||||
<div className="flex h-2 w-full overflow-hidden rounded-sm bg-muted/30">
|
||||
{segments.map((s, i) => (
|
||||
<div
|
||||
key={i}
|
||||
className={`${s.color} transition-all duration-300`}
|
||||
style={{ width: `${(s.value / total) * 100}%` }}
|
||||
/>
|
||||
))}
|
||||
</div>
|
||||
<div className="flex flex-wrap gap-x-3 gap-y-0.5 text-[10px] text-muted-foreground">
|
||||
{segments.map((s, i) => (
|
||||
<span key={i} className="flex items-center gap-1">
|
||||
<span className={`inline-block h-1.5 w-1.5 rounded-full ${s.color}`} />
|
||||
{s.label} {formatTokens(s.value)}
|
||||
</span>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function CapabilityBadges({
|
||||
capabilities,
|
||||
}: {
|
||||
capabilities: ModelsAnalyticsModelEntry["capabilities"];
|
||||
}) {
|
||||
const hasAny =
|
||||
capabilities.supports_tools ||
|
||||
capabilities.supports_vision ||
|
||||
capabilities.supports_reasoning ||
|
||||
capabilities.model_family;
|
||||
if (!hasAny) return null;
|
||||
|
||||
return (
|
||||
<div className="flex flex-wrap items-center gap-1.5">
|
||||
{capabilities.supports_tools && (
|
||||
<span className="inline-flex items-center gap-1 bg-emerald-500/10 px-1.5 py-0.5 text-[10px] font-medium text-emerald-600 dark:text-emerald-400">
|
||||
<Wrench className="h-2.5 w-2.5" /> Tools
|
||||
</span>
|
||||
)}
|
||||
{capabilities.supports_vision && (
|
||||
<span className="inline-flex items-center gap-1 bg-blue-500/10 px-1.5 py-0.5 text-[10px] font-medium text-blue-600 dark:text-blue-400">
|
||||
<Eye className="h-2.5 w-2.5" /> Vision
|
||||
</span>
|
||||
)}
|
||||
{capabilities.supports_reasoning && (
|
||||
<span className="inline-flex items-center gap-1 bg-purple-500/10 px-1.5 py-0.5 text-[10px] font-medium text-purple-600 dark:text-purple-400">
|
||||
<Brain className="h-2.5 w-2.5" /> Reasoning
|
||||
</span>
|
||||
)}
|
||||
{capabilities.model_family && (
|
||||
<span className="inline-flex items-center bg-muted px-1.5 py-0.5 text-[10px] font-medium text-muted-foreground">
|
||||
{capabilities.model_family}
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
/* ──────────────────────────────────────────────────────────────────── */
|
||||
/* Per-card "Use as" menu */
|
||||
/* ──────────────────────────────────────────────────────────────────── */
|
||||
|
||||
function UseAsMenu({
|
||||
provider,
|
||||
model,
|
||||
isMain,
|
||||
mainAuxTask,
|
||||
onAssigned,
|
||||
}: {
|
||||
provider: string;
|
||||
model: string;
|
||||
/** True when this card's model+provider match config.yaml's main slot. */
|
||||
isMain: boolean;
|
||||
/** If this model is assigned to a specific aux task, that task's key. */
|
||||
mainAuxTask: string | null;
|
||||
onAssigned(): void;
|
||||
}) {
|
||||
const [open, setOpen] = useState(false);
|
||||
const [busy, setBusy] = useState(false);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
|
||||
const assign = async (
|
||||
scope: "main" | "auxiliary",
|
||||
task: string,
|
||||
) => {
|
||||
if (!provider || !model) {
|
||||
setError("Missing provider/model");
|
||||
return;
|
||||
}
|
||||
setBusy(true);
|
||||
setError(null);
|
||||
try {
|
||||
await api.setModelAssignment({ scope, provider, model, task });
|
||||
onAssigned();
|
||||
setOpen(false);
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : String(e));
|
||||
} finally {
|
||||
setBusy(false);
|
||||
}
|
||||
};
|
||||
|
||||
// Close on outside click.
|
||||
useEffect(() => {
|
||||
if (!open) return;
|
||||
const onDown = (e: MouseEvent) => {
|
||||
const target = e.target as HTMLElement | null;
|
||||
if (target && !target.closest?.("[data-use-as-menu]")) setOpen(false);
|
||||
};
|
||||
window.addEventListener("mousedown", onDown);
|
||||
return () => window.removeEventListener("mousedown", onDown);
|
||||
}, [open]);
|
||||
|
||||
return (
|
||||
<div className="relative" data-use-as-menu>
|
||||
<Button
|
||||
size="sm"
|
||||
outlined
|
||||
onClick={() => setOpen((v) => !v)}
|
||||
disabled={busy}
|
||||
className="text-[10px] h-6 px-2"
|
||||
prefix={busy ? <Spinner /> : null}
|
||||
>
|
||||
Use as <ChevronDown className="h-3 w-3" />
|
||||
</Button>
|
||||
{open && (
|
||||
<div className="absolute right-0 top-full mt-1 z-50 min-w-[220px] border border-border bg-card shadow-lg">
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => assign("main", "")}
|
||||
disabled={busy}
|
||||
className="flex w-full items-center justify-between px-3 py-2 text-xs hover:bg-muted/50 disabled:opacity-40"
|
||||
>
|
||||
<span className="flex items-center gap-2">
|
||||
<Star className="h-3 w-3" />
|
||||
Main model
|
||||
</span>
|
||||
{isMain && (
|
||||
<span className="text-[9px] uppercase tracking-wider text-primary/80">
|
||||
current
|
||||
</span>
|
||||
)}
|
||||
</button>
|
||||
|
||||
<div className="border-t border-border/50 px-3 py-1.5 text-[9px] uppercase tracking-wider text-muted-foreground">
|
||||
Auxiliary task
|
||||
</div>
|
||||
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => assign("auxiliary", "")}
|
||||
disabled={busy}
|
||||
className="flex w-full items-center justify-between px-3 py-1.5 text-xs hover:bg-muted/50 disabled:opacity-40"
|
||||
>
|
||||
<span>All auxiliary tasks</span>
|
||||
</button>
|
||||
|
||||
{AUX_TASKS.map((t) => (
|
||||
<button
|
||||
key={t.key}
|
||||
type="button"
|
||||
onClick={() => assign("auxiliary", t.key)}
|
||||
disabled={busy}
|
||||
className="flex w-full items-center justify-between px-3 py-1.5 text-xs hover:bg-muted/50 disabled:opacity-40"
|
||||
>
|
||||
<span>{t.label}</span>
|
||||
{mainAuxTask === t.key && (
|
||||
<span className="text-[9px] uppercase tracking-wider text-primary/80">
|
||||
current
|
||||
</span>
|
||||
)}
|
||||
</button>
|
||||
))}
|
||||
|
||||
{error && (
|
||||
<div className="px-3 py-2 text-[10px] text-destructive border-t border-border/50">
|
||||
{error}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
/* ──────────────────────────────────────────────────────────────────── */
|
||||
/* ModelCard */
|
||||
/* ──────────────────────────────────────────────────────────────────── */
|
||||
|
||||
function ModelCard({
|
||||
entry,
|
||||
rank,
|
||||
main,
|
||||
aux,
|
||||
onAssigned,
|
||||
}: {
|
||||
entry: ModelsAnalyticsModelEntry;
|
||||
rank: number;
|
||||
main: { provider: string; model: string } | null;
|
||||
aux: AuxiliaryTaskAssignment[];
|
||||
onAssigned(): void;
|
||||
}) {
|
||||
const { t } = useI18n();
|
||||
const provider = entry.provider || modelVendor(entry.model);
|
||||
const totalTokens = entry.input_tokens + entry.output_tokens;
|
||||
const caps = entry.capabilities;
|
||||
|
||||
const isMain =
|
||||
!!main &&
|
||||
main.provider === provider &&
|
||||
main.model === entry.model;
|
||||
|
||||
// First aux task currently using this model (if any).
|
||||
const mainAuxTask =
|
||||
aux.find(
|
||||
(a) => a.provider === provider && a.model === entry.model,
|
||||
)?.task ?? null;
|
||||
|
||||
return (
|
||||
<Card className={isMain ? "ring-1 ring-primary/40" : undefined}>
|
||||
<CardHeader className="pb-3">
|
||||
<div className="flex items-start justify-between gap-2">
|
||||
<div className="min-w-0 flex-1">
|
||||
<div className="flex items-center gap-2">
|
||||
<span className="text-muted-foreground/50 text-xs font-mono">
|
||||
#{rank}
|
||||
</span>
|
||||
<CardTitle className="text-sm font-mono-ui truncate">
|
||||
{shortModelName(entry.model)}
|
||||
</CardTitle>
|
||||
{isMain && (
|
||||
<span className="inline-flex items-center gap-0.5 bg-primary/15 px-1.5 py-0.5 text-[9px] font-medium uppercase tracking-wider text-primary">
|
||||
<Star className="h-2.5 w-2.5" /> main
|
||||
</span>
|
||||
)}
|
||||
{mainAuxTask && (
|
||||
<span className="inline-flex items-center bg-purple-500/10 px-1.5 py-0.5 text-[9px] font-medium uppercase tracking-wider text-purple-600 dark:text-purple-400">
|
||||
aux · {mainAuxTask}
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
<div className="flex items-center gap-2 mt-1">
|
||||
{provider && (
|
||||
<Badge tone="secondary" className="text-[9px]">
|
||||
{provider}
|
||||
</Badge>
|
||||
)}
|
||||
{caps.context_window && caps.context_window > 0 && (
|
||||
<span className="text-[10px] text-muted-foreground">
|
||||
{formatTokenCount(caps.context_window)} ctx
|
||||
</span>
|
||||
)}
|
||||
{caps.max_output_tokens && caps.max_output_tokens > 0 && (
|
||||
<span className="text-[10px] text-muted-foreground">
|
||||
{formatTokenCount(caps.max_output_tokens)} out
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
<div className="flex flex-col items-end gap-1 shrink-0">
|
||||
<div className="text-right">
|
||||
<div className="text-xs font-mono font-semibold">
|
||||
{formatTokens(totalTokens)}
|
||||
</div>
|
||||
<div className="text-[10px] text-muted-foreground">
|
||||
{t.models.tokens}
|
||||
</div>
|
||||
</div>
|
||||
<UseAsMenu
|
||||
provider={provider}
|
||||
model={entry.model}
|
||||
isMain={isMain}
|
||||
mainAuxTask={mainAuxTask}
|
||||
onAssigned={onAssigned}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
</CardHeader>
|
||||
<CardContent className="space-y-3 pt-0">
|
||||
<TokenBar
|
||||
input={entry.input_tokens}
|
||||
output={entry.output_tokens}
|
||||
cacheRead={entry.cache_read_tokens}
|
||||
reasoning={entry.reasoning_tokens}
|
||||
/>
|
||||
|
||||
<div className="grid grid-cols-3 gap-2 text-xs">
|
||||
<div className="text-center">
|
||||
<div className="font-mono font-semibold">{entry.sessions}</div>
|
||||
<div className="text-[10px] text-muted-foreground">
|
||||
{t.models.sessions}
|
||||
</div>
|
||||
</div>
|
||||
<div className="text-center">
|
||||
<div className="font-mono font-semibold">
|
||||
{formatTokens(entry.avg_tokens_per_session)}
|
||||
</div>
|
||||
<div className="text-[10px] text-muted-foreground">
|
||||
{t.models.avgPerSession}
|
||||
</div>
|
||||
</div>
|
||||
<div className="text-center">
|
||||
<div className="font-mono font-semibold">
|
||||
{entry.api_calls > 0 ? formatTokens(entry.api_calls) : "—"}
|
||||
</div>
|
||||
<div className="text-[10px] text-muted-foreground">
|
||||
{t.models.apiCalls}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="flex items-center justify-between text-[10px] text-muted-foreground border-t border-border/30 pt-2">
|
||||
<div className="flex items-center gap-3">
|
||||
{entry.estimated_cost > 0 && (
|
||||
<span className="flex items-center gap-0.5">
|
||||
<DollarSign className="h-2.5 w-2.5" />
|
||||
{formatCost(entry.estimated_cost)}
|
||||
</span>
|
||||
)}
|
||||
{entry.tool_calls > 0 && (
|
||||
<span className="flex items-center gap-0.5">
|
||||
<Zap className="h-2.5 w-2.5" />
|
||||
{entry.tool_calls} {t.models.toolCalls}
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
{entry.last_used_at > 0 && (
|
||||
<span>{timeAgo(entry.last_used_at)}</span>
|
||||
)}
|
||||
</div>
|
||||
|
||||
<CapabilityBadges capabilities={entry.capabilities} />
|
||||
</CardContent>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
/* ──────────────────────────────────────────────────────────────────── */
|
||||
/* Model Settings panel (top of page) */
|
||||
/* ──────────────────────────────────────────────────────────────────── */
|
||||
|
||||
type PickerTarget =
|
||||
| { kind: "main" }
|
||||
| { kind: "aux"; task: string };
|
||||
|
||||
function ModelSettingsPanel({
|
||||
aux,
|
||||
refreshKey,
|
||||
onSaved,
|
||||
}: {
|
||||
aux: AuxiliaryModelsResponse | null;
|
||||
refreshKey: number;
|
||||
onSaved(): void;
|
||||
}) {
|
||||
const [expanded, setExpanded] = useState(false);
|
||||
const [picker, setPicker] = useState<PickerTarget | null>(null);
|
||||
const [resetBusy, setResetBusy] = useState(false);
|
||||
|
||||
const mainProv = aux?.main.provider ?? "";
|
||||
const mainModel = aux?.main.model ?? "";
|
||||
|
||||
const applyAssignment = async ({
|
||||
scope,
|
||||
task,
|
||||
provider,
|
||||
model,
|
||||
}: {
|
||||
scope: "main" | "auxiliary";
|
||||
task: string;
|
||||
provider: string;
|
||||
model: string;
|
||||
}) => {
|
||||
await api.setModelAssignment({ scope, task, provider, model });
|
||||
onSaved();
|
||||
};
|
||||
|
||||
const resetAllAux = async () => {
|
||||
if (!window.confirm("Reset every auxiliary task to 'auto'? This overrides any per-task overrides you've set.")) {
|
||||
return;
|
||||
}
|
||||
setResetBusy(true);
|
||||
try {
|
||||
await api.setModelAssignment({
|
||||
scope: "auxiliary",
|
||||
task: "__reset__",
|
||||
provider: "",
|
||||
model: "",
|
||||
});
|
||||
onSaved();
|
||||
} finally {
|
||||
setResetBusy(false);
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<Card>
|
||||
<CardHeader className="pb-3">
|
||||
<div className="flex items-center justify-between gap-3 flex-wrap">
|
||||
<div className="flex items-center gap-2">
|
||||
<Settings2 className="h-4 w-4 text-muted-foreground" />
|
||||
<CardTitle className="text-sm">Model Settings</CardTitle>
|
||||
<span className="text-[10px] text-muted-foreground">
|
||||
applies to new sessions
|
||||
</span>
|
||||
</div>
|
||||
<Button
|
||||
size="sm"
|
||||
outlined
|
||||
onClick={() => setExpanded((v) => !v)}
|
||||
className="text-xs"
|
||||
>
|
||||
{expanded ? "Hide auxiliary" : "Show auxiliary"}
|
||||
<ChevronDown
|
||||
className={`h-3 w-3 transition-transform ${expanded ? "rotate-180" : ""}`}
|
||||
/>
|
||||
</Button>
|
||||
</div>
|
||||
</CardHeader>
|
||||
|
||||
<CardContent className="space-y-3 pt-0">
|
||||
{/* Main row */}
|
||||
<div className="flex items-center justify-between gap-3 bg-muted/20 border border-border/50 px-3 py-2">
|
||||
<div className="min-w-0 flex-1">
|
||||
<div className="flex items-center gap-2 mb-0.5">
|
||||
<Star className="h-3 w-3 text-primary" />
|
||||
<span className="text-xs font-medium uppercase tracking-wider">
|
||||
Main model
|
||||
</span>
|
||||
</div>
|
||||
<div className="text-xs font-mono text-muted-foreground truncate">
|
||||
{mainProv || "(unset)"}
|
||||
{mainProv && mainModel && " · "}
|
||||
{mainModel || "(unset)"}
|
||||
</div>
|
||||
</div>
|
||||
<Button
|
||||
size="sm"
|
||||
onClick={() => setPicker({ kind: "main" })}
|
||||
className="text-xs"
|
||||
>
|
||||
Change
|
||||
</Button>
|
||||
</div>
|
||||
|
||||
{/* Auxiliary rows */}
|
||||
{expanded && (
|
||||
<div className="space-y-1 border-t border-border/50 pt-3">
|
||||
<div className="flex items-center justify-between pb-1">
|
||||
<div className="text-[10px] uppercase tracking-wider text-muted-foreground">
|
||||
Auxiliary tasks
|
||||
</div>
|
||||
<Button
|
||||
size="sm"
|
||||
outlined
|
||||
onClick={resetAllAux}
|
||||
disabled={resetBusy}
|
||||
className="text-[10px] h-6"
|
||||
prefix={resetBusy ? <Spinner /> : null}
|
||||
>
|
||||
Reset all to auto
|
||||
</Button>
|
||||
</div>
|
||||
|
||||
<p className="text-[10px] text-muted-foreground/80 pb-2">
|
||||
Auxiliary tasks handle side-jobs like vision, session search, and
|
||||
compression. <span className="font-mono">auto</span> means
|
||||
"use the main model". Override per-task when you want a
|
||||
cheap/fast model for a specific job.
|
||||
</p>
|
||||
|
||||
{AUX_TASKS.map((t) => {
|
||||
const cur = aux?.tasks.find((a) => a.task === t.key);
|
||||
const isAuto =
|
||||
!cur || cur.provider === "auto" || !cur.provider;
|
||||
return (
|
||||
<div
|
||||
key={t.key}
|
||||
className="flex items-center justify-between gap-3 px-3 py-1.5 border border-border/30 bg-card/50 hover:bg-muted/20 transition-colors"
|
||||
>
|
||||
<div className="min-w-0 flex-1">
|
||||
<div className="flex items-baseline gap-2">
|
||||
<span className="text-xs font-medium">{t.label}</span>
|
||||
<span className="text-[10px] text-muted-foreground/60">
|
||||
{t.hint}
|
||||
</span>
|
||||
</div>
|
||||
<div className="text-[10px] font-mono text-muted-foreground truncate">
|
||||
{isAuto
|
||||
? "auto (use main model)"
|
||||
: `${cur?.provider} · ${cur?.model || "(provider default)"}`}
|
||||
</div>
|
||||
</div>
|
||||
<Button
|
||||
size="sm"
|
||||
outlined
|
||||
onClick={() => setPicker({ kind: "aux", task: t.key })}
|
||||
className="text-[10px] h-6"
|
||||
>
|
||||
Change
|
||||
</Button>
|
||||
</div>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{picker && (
|
||||
<ModelPickerDialog
|
||||
key={`picker-${refreshKey}`}
|
||||
loader={api.getModelOptions}
|
||||
alwaysGlobal
|
||||
title={
|
||||
picker.kind === "main"
|
||||
? "Set Main Model"
|
||||
: `Set Auxiliary: ${
|
||||
AUX_TASKS.find((t) => t.key === picker.task)?.label ??
|
||||
picker.task
|
||||
}`
|
||||
}
|
||||
onApply={async ({ provider, model }) => {
|
||||
await applyAssignment({
|
||||
scope: picker.kind === "main" ? "main" : "auxiliary",
|
||||
task: picker.kind === "main" ? "" : picker.task,
|
||||
provider,
|
||||
model,
|
||||
});
|
||||
}}
|
||||
onClose={() => setPicker(null)}
|
||||
/>
|
||||
)}
|
||||
</CardContent>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
/* ──────────────────────────────────────────────────────────────────── */
|
||||
/* Page */
|
||||
/* ──────────────────────────────────────────────────────────────────── */
|
||||
|
||||
export default function ModelsPage() {
|
||||
const [days, setDays] = useState(30);
|
||||
const [data, setData] = useState<ModelsAnalyticsResponse | null>(null);
|
||||
const [aux, setAux] = useState<AuxiliaryModelsResponse | null>(null);
|
||||
const [loading, setLoading] = useState(true);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
const [saveKey, setSaveKey] = useState(0);
|
||||
const { t } = useI18n();
|
||||
const { setAfterTitle, setEnd } = usePageHeader();
|
||||
|
||||
const load = useCallback(() => {
|
||||
setLoading(true);
|
||||
setError(null);
|
||||
Promise.all([
|
||||
api.getModelsAnalytics(days),
|
||||
api.getAuxiliaryModels().catch(() => null),
|
||||
])
|
||||
.then(([models, auxData]) => {
|
||||
setData(models);
|
||||
setAux(auxData);
|
||||
})
|
||||
.catch((err) => setError(String(err)))
|
||||
.finally(() => setLoading(false));
|
||||
}, [days]);
|
||||
|
||||
const onAssigned = useCallback(() => {
|
||||
// Reload aux state after any assignment change.
|
||||
api
|
||||
.getAuxiliaryModels()
|
||||
.then(setAux)
|
||||
.catch(() => {});
|
||||
setSaveKey((k) => k + 1);
|
||||
}, []);
|
||||
|
||||
useLayoutEffect(() => {
|
||||
const periodLabel =
|
||||
PERIODS.find((p) => p.days === days)?.label ?? `${days}d`;
|
||||
setAfterTitle(
|
||||
<span className="flex items-center gap-2">
|
||||
{loading && <Spinner className="shrink-0 text-base text-primary" />}
|
||||
<Badge tone="secondary" className="text-[10px]">
|
||||
{periodLabel}
|
||||
</Badge>
|
||||
</span>,
|
||||
);
|
||||
setEnd(
|
||||
<div className="flex w-full min-w-0 flex-wrap items-center justify-end gap-2 sm:gap-2">
|
||||
<div className="flex flex-wrap items-center gap-1.5">
|
||||
{PERIODS.map((p) => (
|
||||
<Button
|
||||
key={p.label}
|
||||
type="button"
|
||||
size="sm"
|
||||
outlined={days !== p.days}
|
||||
onClick={() => setDays(p.days)}
|
||||
>
|
||||
{p.label}
|
||||
</Button>
|
||||
))}
|
||||
</div>
|
||||
<Button
|
||||
type="button"
|
||||
size="sm"
|
||||
outlined
|
||||
onClick={load}
|
||||
disabled={loading}
|
||||
prefix={loading ? <Spinner /> : <RefreshCw />}
|
||||
>
|
||||
{t.common.refresh}
|
||||
</Button>
|
||||
</div>,
|
||||
);
|
||||
return () => {
|
||||
setAfterTitle(null);
|
||||
setEnd(null);
|
||||
};
|
||||
}, [days, loading, load, setAfterTitle, setEnd, t.common.refresh]);
|
||||
|
||||
useEffect(() => {
|
||||
load();
|
||||
}, [load]);
|
||||
|
||||
return (
|
||||
<div className="flex flex-col gap-6">
|
||||
<PluginSlot name="models:top" />
|
||||
|
||||
<ModelSettingsPanel
|
||||
aux={aux}
|
||||
refreshKey={saveKey}
|
||||
onSaved={onAssigned}
|
||||
/>
|
||||
|
||||
{loading && !data && (
|
||||
<div className="flex items-center justify-center py-24">
|
||||
<Spinner className="text-2xl text-primary" />
|
||||
</div>
|
||||
)}
|
||||
|
||||
{error && (
|
||||
<Card>
|
||||
<CardContent className="py-6">
|
||||
<p className="text-sm text-destructive text-center">{error}</p>
|
||||
</CardContent>
|
||||
</Card>
|
||||
)}
|
||||
|
||||
{data && (
|
||||
<>
|
||||
<Card>
|
||||
<CardContent className="py-6">
|
||||
<Stats
|
||||
items={[
|
||||
{
|
||||
label: t.models.modelsUsed,
|
||||
value: String(data.totals.distinct_models),
|
||||
},
|
||||
{
|
||||
label: t.analytics.totalTokens,
|
||||
value: formatTokens(
|
||||
data.totals.total_input + data.totals.total_output,
|
||||
),
|
||||
},
|
||||
{
|
||||
label: t.analytics.input,
|
||||
value: formatTokens(data.totals.total_input),
|
||||
},
|
||||
{
|
||||
label: t.analytics.output,
|
||||
value: formatTokens(data.totals.total_output),
|
||||
},
|
||||
{
|
||||
label: t.models.estimatedCost,
|
||||
value: formatCost(data.totals.total_estimated_cost),
|
||||
},
|
||||
{
|
||||
label: t.analytics.totalSessions,
|
||||
value: String(data.totals.total_sessions),
|
||||
},
|
||||
]}
|
||||
/>
|
||||
</CardContent>
|
||||
</Card>
|
||||
|
||||
{data.models.length > 0 ? (
|
||||
<div className="grid gap-4 md:grid-cols-2 xl:grid-cols-3">
|
||||
{data.models.map((m, i) => (
|
||||
<ModelCard
|
||||
key={`${m.model}:${m.provider}`}
|
||||
entry={m}
|
||||
rank={i + 1}
|
||||
main={aux?.main ?? null}
|
||||
aux={aux?.tasks ?? []}
|
||||
onAssigned={onAssigned}
|
||||
/>
|
||||
))}
|
||||
</div>
|
||||
) : (
|
||||
<Card>
|
||||
<CardContent className="py-12">
|
||||
<div className="flex flex-col items-center text-muted-foreground">
|
||||
<Cpu className="h-8 w-8 mb-3 opacity-40" />
|
||||
<p className="text-sm font-medium">{t.models.noModelsData}</p>
|
||||
<p className="text-xs mt-1 text-muted-foreground/60">
|
||||
{t.models.startSession}
|
||||
</p>
|
||||
</div>
|
||||
</CardContent>
|
||||
</Card>
|
||||
)}
|
||||
</>
|
||||
)}
|
||||
|
||||
<PluginSlot name="models:bottom" />
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -1,581 +0,0 @@
|
||||
import { useCallback, useEffect, useState } from "react";
|
||||
import { ExternalLink, RefreshCw, Puzzle, Trash2, Eye, EyeOff } from "lucide-react";
|
||||
import type { Translations } from "@/i18n/types";
|
||||
import { Link } from "react-router-dom";
|
||||
import { api } from "@/lib/api";
|
||||
import type { HubAgentPluginRow, PluginsHubResponse } from "@/lib/api";
|
||||
import { Button } from "@nous-research/ui/ui/components/button";
|
||||
import { Badge } from "@nous-research/ui/ui/components/badge";
|
||||
import { Select, SelectOption } from "@nous-research/ui/ui/components/select";
|
||||
import { Switch } from "@nous-research/ui/ui/components/switch";
|
||||
import { Spinner } from "@nous-research/ui/ui/components/spinner";
|
||||
import { CommandBlock } from "@nous-research/ui/ui/components/command-block";
|
||||
import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
|
||||
import { Input } from "@/components/ui/input";
|
||||
import { Label } from "@/components/ui/label";
|
||||
import { useToast } from "@/hooks/useToast";
|
||||
import { Toast } from "@/components/Toast";
|
||||
import { useI18n } from "@/i18n";
|
||||
import { PluginSlot } from "@/plugins";
|
||||
import { cn } from "@/lib/utils";
|
||||
import { usePageHeader } from "@/contexts/usePageHeader";
|
||||
|
||||
/** Select value for built-in memory (`config` uses empty string). Never use `""` — UI Select maps empty value to an empty label. */
|
||||
const MEMORY_PROVIDER_BUILTIN = "__hermes_memory_builtin__";
|
||||
|
||||
export default function PluginsPage() {
|
||||
const [hub, setHub] = useState<PluginsHubResponse | null>(null);
|
||||
const [loading, setLoading] = useState(true);
|
||||
const [installId, setInstallId] = useState("");
|
||||
const [installForce, setInstallForce] = useState(false);
|
||||
const [installEnable, setInstallEnable] = useState(true);
|
||||
const [installBusy, setInstallBusy] = useState(false);
|
||||
const [rescanBusy, setRescanBusy] = useState(false);
|
||||
const [memorySel, setMemorySel] = useState(MEMORY_PROVIDER_BUILTIN);
|
||||
const [contextSel, setContextSel] = useState("compressor");
|
||||
const [providerBusy, setProviderBusy] = useState(false);
|
||||
const [rowBusy, setRowBusy] = useState<string | null>(null);
|
||||
|
||||
const { toast, showToast } = useToast();
|
||||
const { t } = useI18n();
|
||||
const { setEnd } = usePageHeader();
|
||||
|
||||
const loadHub = useCallback(() => {
|
||||
return api
|
||||
.getPluginsHub()
|
||||
.then((h) => {
|
||||
setHub(h);
|
||||
const p = h.providers;
|
||||
setMemorySel(p.memory_provider ? p.memory_provider : MEMORY_PROVIDER_BUILTIN);
|
||||
setContextSel(p.context_engine || "compressor");
|
||||
})
|
||||
.catch(() => showToast(t.common.loading, "error"));
|
||||
}, [showToast, t.common.loading]);
|
||||
|
||||
useEffect(() => {
|
||||
setLoading(true);
|
||||
void loadHub().finally(() => setLoading(false));
|
||||
}, [loadHub]);
|
||||
|
||||
useEffect(() => {
|
||||
setEnd(
|
||||
<Button
|
||||
ghost
|
||||
size="sm"
|
||||
className="shrink-0 gap-2"
|
||||
disabled={loading || rescanBusy}
|
||||
onClick={() => void onRescan()}
|
||||
>
|
||||
{rescanBusy ? <Spinner /> : <RefreshCw className="h-3.5 w-3.5" />}
|
||||
{t.pluginsPage.refreshDashboard}
|
||||
</Button>,
|
||||
);
|
||||
return () => setEnd(null);
|
||||
}, [loading, rescanBusy, setEnd, t.pluginsPage.refreshDashboard]);
|
||||
|
||||
const onInstall = async () => {
|
||||
const id = installId.trim();
|
||||
if (!id) {
|
||||
showToast(t.pluginsPage.installHint, "error");
|
||||
return;
|
||||
}
|
||||
setInstallBusy(true);
|
||||
try {
|
||||
const r = await api.installAgentPlugin({
|
||||
identifier: id,
|
||||
force: installForce,
|
||||
enable: installEnable,
|
||||
});
|
||||
showToast(`${r.plugin_name ?? id} installed`, "success");
|
||||
if ((r.warnings?.length ?? 0) > 0) showToast(r.warnings!.join(" "), "error");
|
||||
if ((r.missing_env?.length ?? 0) > 0)
|
||||
showToast(`${t.pluginsPage.missingEnvWarn} ${r.missing_env!.join(", ")}`, "error");
|
||||
setInstallId("");
|
||||
await loadHub();
|
||||
} catch (e) {
|
||||
showToast(e instanceof Error ? e.message : "Install failed", "error");
|
||||
} finally {
|
||||
setInstallBusy(false);
|
||||
}
|
||||
};
|
||||
|
||||
const onRescan = async () => {
|
||||
setRescanBusy(true);
|
||||
try {
|
||||
const rc = await api.rescanPlugins();
|
||||
showToast(
|
||||
`${t.pluginsPage.refreshDashboard} (${rc.count})`,
|
||||
"success",
|
||||
);
|
||||
await loadHub();
|
||||
} catch (e) {
|
||||
showToast(e instanceof Error ? e.message : "Rescan failed", "error");
|
||||
} finally {
|
||||
setRescanBusy(false);
|
||||
}
|
||||
};
|
||||
|
||||
const onSaveProviders = async () => {
|
||||
setProviderBusy(true);
|
||||
try {
|
||||
await api.savePluginProviders({
|
||||
memory_provider:
|
||||
memorySel === MEMORY_PROVIDER_BUILTIN ? "" : memorySel,
|
||||
context_engine: contextSel,
|
||||
});
|
||||
showToast(t.pluginsPage.savedProviders, "success");
|
||||
await loadHub();
|
||||
} catch (e) {
|
||||
showToast(e instanceof Error ? e.message : "Save failed", "error");
|
||||
} finally {
|
||||
setProviderBusy(false);
|
||||
}
|
||||
};
|
||||
|
||||
const setRuntimeLoading = async (name: string, fn: () => Promise<unknown>) => {
|
||||
setRowBusy(name);
|
||||
try {
|
||||
await fn();
|
||||
await loadHub();
|
||||
} catch (e) {
|
||||
showToast(e instanceof Error ? e.message : "Failed", "error");
|
||||
} finally {
|
||||
setRowBusy(null);
|
||||
}
|
||||
};
|
||||
|
||||
const rows = hub?.plugins ?? [];
|
||||
const providers = hub?.providers;
|
||||
|
||||
return (
|
||||
<div className="flex flex-col gap-4">
|
||||
<PluginSlot name="plugins:top" />
|
||||
|
||||
<div className={cn("flex w-full flex-col gap-8")}>
|
||||
|
||||
{providers && (
|
||||
<Card>
|
||||
<CardHeader>
|
||||
<CardTitle>{t.pluginsPage.providersHeading}</CardTitle>
|
||||
<p className="text-[0.7rem] tracking-[0.08em] text-midground/55 normal-case">
|
||||
{t.pluginsPage.providersHint}
|
||||
</p>
|
||||
</CardHeader>
|
||||
|
||||
<CardContent className="flex flex-col gap-6">
|
||||
|
||||
<div className="grid gap-6 sm:grid-cols-2 max-w-full">
|
||||
<div className="grid gap-2 min-w-0">
|
||||
<Label htmlFor="mem-provider">{t.pluginsPage.memoryProviderLabel}</Label>
|
||||
|
||||
<Select
|
||||
id="mem-provider"
|
||||
className="w-full"
|
||||
value={memorySel}
|
||||
onValueChange={setMemorySel}
|
||||
>
|
||||
<SelectOption value={MEMORY_PROVIDER_BUILTIN}>
|
||||
{`(${t.pluginsPage.providerDefaults})`}
|
||||
</SelectOption>
|
||||
|
||||
{providers.memory_options.map((o) => (
|
||||
<SelectOption key={o.name} value={o.name}>
|
||||
{o.name}
|
||||
</SelectOption>
|
||||
))}
|
||||
</Select>
|
||||
</div>
|
||||
|
||||
<div className="grid gap-2 min-w-0">
|
||||
<Label htmlFor="ctx-engine">{t.pluginsPage.contextEngineLabel}</Label>
|
||||
|
||||
<Select
|
||||
id="ctx-engine"
|
||||
className="w-full"
|
||||
value={contextSel}
|
||||
onValueChange={setContextSel}
|
||||
>
|
||||
<SelectOption value="compressor">compressor</SelectOption>
|
||||
|
||||
{providers.context_options
|
||||
.filter((o) => o.name !== "compressor")
|
||||
.map((o) => (
|
||||
<SelectOption key={o.name} value={o.name}>
|
||||
{o.name}
|
||||
</SelectOption>
|
||||
))}
|
||||
</Select>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<Button
|
||||
className="w-fit gap-2"
|
||||
size="sm"
|
||||
disabled={providerBusy}
|
||||
onClick={() => void onSaveProviders()}
|
||||
>
|
||||
{providerBusy ? <Spinner /> : null}
|
||||
{t.pluginsPage.saveProviders}
|
||||
</Button>
|
||||
</CardContent>
|
||||
</Card>
|
||||
)}
|
||||
|
||||
<Card>
|
||||
<CardHeader>
|
||||
<CardTitle>{t.pluginsPage.installHeading}</CardTitle>
|
||||
<p className="text-[0.7rem] tracking-[0.08em] text-midground/55 normal-case">
|
||||
{t.pluginsPage.installHint}
|
||||
</p>
|
||||
</CardHeader>
|
||||
|
||||
|
||||
<CardContent className="flex flex-col gap-4">
|
||||
|
||||
<div className="flex flex-col gap-2">
|
||||
|
||||
<Label htmlFor="install-url">{t.pluginsPage.identifierLabel}</Label>
|
||||
|
||||
<Input
|
||||
className="normal-case font-sans lowercase"
|
||||
id="install-url"
|
||||
placeholder="owner/repo or https://..."
|
||||
spellCheck={false}
|
||||
value={installId}
|
||||
onChange={(e) => setInstallId(e.target.value)}
|
||||
/>
|
||||
</div>
|
||||
|
||||
|
||||
<div className="flex flex-wrap items-center gap-8">
|
||||
|
||||
<div className="flex items-center gap-3">
|
||||
|
||||
<Switch checked={installForce} onCheckedChange={setInstallForce} />
|
||||
|
||||
<span className="text-[0.7rem] tracking-[0.06em] text-midforeground/85 normal-case">
|
||||
{t.pluginsPage.forceReinstall}
|
||||
</span>
|
||||
</div>
|
||||
|
||||
<div className="flex items-center gap-3">
|
||||
|
||||
<Switch checked={installEnable} onCheckedChange={setInstallEnable} />
|
||||
|
||||
<span className="text-[0.7rem] tracking-[0.06em] text-midforeground/85 normal-case">
|
||||
{t.pluginsPage.enableAfterInstall}
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<Button
|
||||
className="w-fit gap-2"
|
||||
size="sm"
|
||||
disabled={installBusy}
|
||||
onClick={() => void onInstall()}
|
||||
>
|
||||
{installBusy ? <Spinner /> : <Puzzle className="h-3.5 w-3.5" />}
|
||||
{t.pluginsPage.installBtn}
|
||||
</Button>
|
||||
|
||||
<p className="text-[0.65rem] tracking-[0.06em] text-midforeground/55 normal-case">
|
||||
{t.pluginsPage.rescanHint}
|
||||
</p>
|
||||
|
||||
<p className="text-[0.65rem] tracking-[0.06em] text-midforeground/55 normal-case">
|
||||
{t.pluginsPage.removeHint}
|
||||
</p>
|
||||
</CardContent>
|
||||
</Card>
|
||||
|
||||
<div className="flex flex-col gap-3">
|
||||
|
||||
<h3 className="font-mondwest text-[0.75rem] tracking-[0.12em] text-midground/85">
|
||||
{t.pluginsPage.pluginListHeading}
|
||||
</h3>
|
||||
|
||||
{loading ? (
|
||||
|
||||
<div className="flex items-center gap-2 py-8 text-[0.8rem] text-midforeground/65">
|
||||
|
||||
<Spinner />
|
||||
<span>{t.common.loading}</span>
|
||||
</div>
|
||||
) : rows.length === 0 ? (
|
||||
|
||||
<p className="text-[0.75rem] text-midforeground/55 normal-case">{t.common.noResults}</p>
|
||||
) : (
|
||||
|
||||
<ul className="flex flex-col gap-3">
|
||||
|
||||
{rows.map((row: HubAgentPluginRow) => (
|
||||
|
||||
<li key={row.name}>
|
||||
|
||||
|
||||
<PluginRowCard
|
||||
{...{ row, rowBusy, setRuntimeLoading, showToast, t }}
|
||||
/>
|
||||
|
||||
</li>
|
||||
))}
|
||||
</ul>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{(hub?.orphan_dashboard_plugins?.length ?? 0) > 0 ? (
|
||||
|
||||
|
||||
<div className="flex flex-col gap-3 opacity-95">
|
||||
|
||||
<h3 className="font-mondwest text-[0.75rem] tracking-[0.12em] text-midforeground/85">
|
||||
{t.pluginsPage.orphanHeading}
|
||||
</h3>
|
||||
|
||||
<ul className="flex flex-col gap-2 rounded border border-current/15 p-4">
|
||||
|
||||
{hub!.orphan_dashboard_plugins.map((m) => (
|
||||
|
||||
<li className="text-[0.7rem] normal-case opacity-85" key={m.name}>
|
||||
|
||||
|
||||
{m.label ?? m.name} — {m.description || m.tab?.path}
|
||||
|
||||
|
||||
{!m.tab?.hidden ? (
|
||||
|
||||
|
||||
<Link className="ml-3 inline-flex items-center gap-1 underline" to={m.tab.path}>
|
||||
|
||||
|
||||
<ExternalLink className="h-3 w-3 opacity-65" />
|
||||
|
||||
{t.pluginsPage.openTab}
|
||||
</Link>
|
||||
) : null}
|
||||
</li>
|
||||
))}
|
||||
</ul>
|
||||
</div>
|
||||
) : null}
|
||||
</div>
|
||||
|
||||
<Toast toast={toast} />
|
||||
<PluginSlot name="plugins:bottom" />
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
interface PluginRowCardProps {
|
||||
|
||||
row: HubAgentPluginRow;
|
||||
rowBusy: string | null;
|
||||
setRuntimeLoading: (
|
||||
name: string,
|
||||
fn: () => Promise<unknown>,
|
||||
) => Promise<void>;
|
||||
|
||||
showToast: (msg: string, variant: "success" | "error") => void;
|
||||
t: Translations;
|
||||
}
|
||||
|
||||
function PluginRowCard(props: PluginRowCardProps) {
|
||||
const {
|
||||
row,
|
||||
rowBusy,
|
||||
setRuntimeLoading,
|
||||
showToast,
|
||||
t,
|
||||
} = props;
|
||||
|
||||
const dm = row.dashboard_manifest;
|
||||
|
||||
const tabPath = dm?.tab && !dm.tab.hidden ? dm.tab.override ?? dm.tab.path : null;
|
||||
|
||||
const busy = rowBusy === row.name;
|
||||
|
||||
const badgeTone =
|
||||
row.runtime_status === "enabled"
|
||||
? "success"
|
||||
: row.runtime_status === "disabled"
|
||||
? "destructive"
|
||||
: "outline";
|
||||
|
||||
return (
|
||||
|
||||
<Card className={cn(busy ? "opacity-70" : undefined)}>
|
||||
|
||||
|
||||
<CardContent className="flex flex-col gap-4 px-6 py-4">
|
||||
|
||||
|
||||
<div className="flex flex-wrap items-start justify-between gap-4">
|
||||
|
||||
|
||||
<div className="min-w-0 flex-1">
|
||||
|
||||
<div className="flex flex-wrap items-center gap-3">
|
||||
|
||||
<span className="truncate font-semibold">{row.name}</span>
|
||||
|
||||
<Badge tone="outline">
|
||||
{t.pluginsPage.sourceBadge}: {row.source}
|
||||
</Badge>
|
||||
|
||||
|
||||
<Badge tone="outline">v{row.version || "—"}</Badge>
|
||||
|
||||
<Badge tone={badgeTone}>{row.runtime_status}</Badge>
|
||||
|
||||
{row.auth_required ? (
|
||||
<Badge tone="destructive">{t.pluginsPage.authRequired}</Badge>
|
||||
) : null}
|
||||
</div>
|
||||
|
||||
{row.description ? (
|
||||
|
||||
<p className="mt-2 max-w-2xl text-[0.7rem] tracking-[0.06em] text-midforeground/75 normal-case">
|
||||
{row.description}
|
||||
</p>
|
||||
) : null}
|
||||
</div>
|
||||
|
||||
<div className="flex flex-wrap items-center gap-2 shrink-0">
|
||||
|
||||
|
||||
<Button
|
||||
disabled={busy || row.runtime_status === "enabled"}
|
||||
ghost
|
||||
size="sm"
|
||||
onClick={() => {
|
||||
void setRuntimeLoading(row.name, async () => {
|
||||
await api.enableAgentPlugin(row.name);
|
||||
showToast(t.pluginsPage.enableRuntime, "success");
|
||||
});
|
||||
}}
|
||||
>
|
||||
{t.pluginsPage.enableRuntime}
|
||||
</Button>
|
||||
|
||||
|
||||
<Button
|
||||
disabled={busy || row.runtime_status === "disabled"}
|
||||
ghost
|
||||
size="sm"
|
||||
onClick={() => {
|
||||
void setRuntimeLoading(row.name, async () => {
|
||||
await api.disableAgentPlugin(row.name);
|
||||
showToast(t.pluginsPage.disableRuntime, "success");
|
||||
});
|
||||
}}
|
||||
>
|
||||
{t.pluginsPage.disableRuntime}
|
||||
</Button>
|
||||
|
||||
{tabPath ? (
|
||||
|
||||
<Link
|
||||
className={cn(
|
||||
"inline-flex items-center rounded-none px-3 py-1.5",
|
||||
"border border-current/25 hover:bg-current/10",
|
||||
"font-mondwest text-[0.65rem] tracking-[0.1em] uppercase",
|
||||
)}
|
||||
to={tabPath}
|
||||
>
|
||||
{t.pluginsPage.openTab}
|
||||
</Link>
|
||||
) : null}
|
||||
|
||||
{row.can_update_git ? (
|
||||
|
||||
<Button
|
||||
disabled={busy}
|
||||
ghost
|
||||
size="sm"
|
||||
onClick={() => {
|
||||
void setRuntimeLoading(row.name, async () => {
|
||||
await api.updateAgentPlugin(row.name);
|
||||
showToast(t.pluginsPage.updateGit, "success");
|
||||
});
|
||||
}}
|
||||
>
|
||||
{busy ? <Spinner /> : null}
|
||||
{t.pluginsPage.updateGit}
|
||||
</Button>
|
||||
) : null}
|
||||
|
||||
{row.has_dashboard_manifest ? (
|
||||
<Button
|
||||
disabled={busy}
|
||||
ghost
|
||||
size="sm"
|
||||
title={row.user_hidden ? t.pluginsPage.showInSidebar : t.pluginsPage.hideFromSidebar}
|
||||
onClick={() => {
|
||||
void setRuntimeLoading(row.name, async () => {
|
||||
await api.setPluginVisibility(row.name, !row.user_hidden);
|
||||
});
|
||||
}}
|
||||
>
|
||||
{row.user_hidden ? (
|
||||
<EyeOff className="h-3.5 w-3.5" />
|
||||
) : (
|
||||
<Eye className="h-3.5 w-3.5" />
|
||||
)}
|
||||
{row.user_hidden ? t.pluginsPage.showInSidebar : t.pluginsPage.hideFromSidebar}
|
||||
</Button>
|
||||
) : null}
|
||||
|
||||
{row.can_remove ? (
|
||||
|
||||
|
||||
<Button
|
||||
destructive
|
||||
disabled={busy}
|
||||
ghost
|
||||
size="sm"
|
||||
onClick={() => {
|
||||
const ok =
|
||||
typeof window !== "undefined"
|
||||
? window.confirm(t.pluginsPage.removeConfirm)
|
||||
: false;
|
||||
if (!ok) return;
|
||||
|
||||
void setRuntimeLoading(row.name, async () => {
|
||||
await api.removeAgentPlugin(row.name);
|
||||
showToast(`${row.name} removed`, "success");
|
||||
});
|
||||
}}
|
||||
>
|
||||
|
||||
{busy ? <Spinner /> : <Trash2 className="h-3.5 w-3.5" />}
|
||||
</Button>
|
||||
) : null}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{dm?.slots?.length ? (
|
||||
|
||||
<p className="text-[0.65rem] tracking-[0.05em] text-midforeground/55 normal-case">
|
||||
{t.pluginsPage.dashboardSlots}: {dm.slots.join(", ")}
|
||||
</p>
|
||||
) : null}
|
||||
|
||||
{row.auth_required ? (
|
||||
<CommandBlock
|
||||
label={t.pluginsPage.authRequiredHint}
|
||||
code={row.auth_command}
|
||||
/>
|
||||
) : null}
|
||||
|
||||
{!row.has_dashboard_manifest && !dm ? (
|
||||
|
||||
|
||||
<p className="text-[0.65rem] italic text-midforeground/45 normal-case">
|
||||
{t.pluginsPage.noDashboardTab}
|
||||
</p>
|
||||
) : null}
|
||||
</CardContent>
|
||||
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
@@ -1,444 +0,0 @@
|
||||
import { useCallback, useEffect, useRef, useState } from "react";
|
||||
import { ChevronDown, Pencil, Plus, Terminal, Trash2, Users } from "lucide-react";
|
||||
import { H2 } from "@/components/NouiTypography";
|
||||
import { api } from "@/lib/api";
|
||||
import type { ProfileInfo } from "@/lib/api";
|
||||
import { DeleteConfirmDialog } from "@/components/DeleteConfirmDialog";
|
||||
import { useToast } from "@/hooks/useToast";
|
||||
import { useConfirmDelete } from "@/hooks/useConfirmDelete";
|
||||
import { Toast } from "@/components/Toast";
|
||||
import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
|
||||
import { Badge } from "@nous-research/ui/ui/components/badge";
|
||||
import { Button } from "@nous-research/ui/ui/components/button";
|
||||
import { Input } from "@/components/ui/input";
|
||||
import { Label } from "@/components/ui/label";
|
||||
import { useI18n } from "@/i18n";
|
||||
|
||||
// Mirrors hermes_cli/profiles.py::_PROFILE_ID_RE so we can reject obviously
|
||||
// invalid names (uppercase, spaces, …) before round-tripping a doomed POST.
|
||||
const PROFILE_NAME_RE = /^[a-z0-9][a-z0-9_-]{0,63}$/;
|
||||
|
||||
export default function ProfilesPage() {
|
||||
const [profiles, setProfiles] = useState<ProfileInfo[]>([]);
|
||||
const [loading, setLoading] = useState(true);
|
||||
const { toast, showToast } = useToast();
|
||||
const { t } = useI18n();
|
||||
|
||||
// Create form
|
||||
const [newName, setNewName] = useState("");
|
||||
const [cloneFromDefault, setCloneFromDefault] = useState(true);
|
||||
const [creating, setCreating] = useState(false);
|
||||
|
||||
// Inline rename state
|
||||
const [renamingFrom, setRenamingFrom] = useState<string | null>(null);
|
||||
const [renameTo, setRenameTo] = useState("");
|
||||
|
||||
// Inline SOUL editor state
|
||||
const [editingSoulFor, setEditingSoulFor] = useState<string | null>(null);
|
||||
const [soulText, setSoulText] = useState("");
|
||||
const [soulSaving, setSoulSaving] = useState(false);
|
||||
// Tracks the latest SOUL request so out-of-order responses don't overwrite
|
||||
// newer state when the user switches profiles or closes the editor.
|
||||
const activeSoulRequest = useRef<string | null>(null);
|
||||
|
||||
const load = useCallback(() => {
|
||||
api
|
||||
.getProfiles()
|
||||
.then((res) => setProfiles(res.profiles))
|
||||
.catch((e) => showToast(`${t.status.error}: ${e}`, "error"))
|
||||
.finally(() => setLoading(false));
|
||||
}, [showToast, t.status.error]);
|
||||
|
||||
useEffect(() => {
|
||||
load();
|
||||
}, [load]);
|
||||
|
||||
const handleCreate = async () => {
|
||||
const name = newName.trim();
|
||||
if (!name) {
|
||||
showToast(t.profiles.nameRequired, "error");
|
||||
return;
|
||||
}
|
||||
if (!PROFILE_NAME_RE.test(name)) {
|
||||
showToast(`${t.profiles.invalidName}: ${t.profiles.nameRule}`, "error");
|
||||
return;
|
||||
}
|
||||
setCreating(true);
|
||||
try {
|
||||
await api.createProfile({ name, clone_from_default: cloneFromDefault });
|
||||
showToast(`${t.profiles.created}: ${name}`, "success");
|
||||
setNewName("");
|
||||
load();
|
||||
} catch (e) {
|
||||
showToast(`${t.status.error}: ${e}`, "error");
|
||||
} finally {
|
||||
setCreating(false);
|
||||
}
|
||||
};
|
||||
|
||||
const handleRenameSubmit = async () => {
|
||||
if (!renamingFrom) return;
|
||||
const target = renameTo.trim();
|
||||
if (!target || target === renamingFrom) {
|
||||
setRenamingFrom(null);
|
||||
setRenameTo("");
|
||||
return;
|
||||
}
|
||||
if (!PROFILE_NAME_RE.test(target)) {
|
||||
showToast(`${t.profiles.invalidName}: ${t.profiles.nameRule}`, "error");
|
||||
return;
|
||||
}
|
||||
try {
|
||||
await api.renameProfile(renamingFrom, target);
|
||||
showToast(`${t.profiles.renamed}: ${renamingFrom} → ${target}`, "success");
|
||||
setRenamingFrom(null);
|
||||
setRenameTo("");
|
||||
load();
|
||||
} catch (e) {
|
||||
showToast(`${t.status.error}: ${e}`, "error");
|
||||
}
|
||||
};
|
||||
|
||||
const openSoulEditor = useCallback(
|
||||
async (name: string) => {
|
||||
if (editingSoulFor === name) {
|
||||
activeSoulRequest.current = null;
|
||||
setEditingSoulFor(null);
|
||||
return;
|
||||
}
|
||||
setEditingSoulFor(name);
|
||||
setSoulText("");
|
||||
activeSoulRequest.current = name;
|
||||
try {
|
||||
const soul = await api.getProfileSoul(name);
|
||||
if (activeSoulRequest.current === name) {
|
||||
setSoulText(soul.content);
|
||||
}
|
||||
} catch (e) {
|
||||
if (activeSoulRequest.current === name) {
|
||||
showToast(`${t.status.error}: ${e}`, "error");
|
||||
}
|
||||
}
|
||||
},
|
||||
[editingSoulFor, showToast, t.status.error],
|
||||
);
|
||||
|
||||
const handleSaveSoul = async (name: string) => {
|
||||
setSoulSaving(true);
|
||||
try {
|
||||
await api.updateProfileSoul(name, soulText);
|
||||
showToast(`${t.profiles.soulSaved}: ${name}`, "success");
|
||||
} catch (e) {
|
||||
showToast(`${t.status.error}: ${e}`, "error");
|
||||
} finally {
|
||||
setSoulSaving(false);
|
||||
}
|
||||
};
|
||||
|
||||
const handleCopyTerminalCommand = async (name: string) => {
|
||||
let cmd: string;
|
||||
try {
|
||||
const res = await api.getProfileSetupCommand(name);
|
||||
cmd = res.command;
|
||||
} catch (e) {
|
||||
showToast(`${t.status.error}: ${e}`, "error");
|
||||
return;
|
||||
}
|
||||
try {
|
||||
await navigator.clipboard.writeText(cmd);
|
||||
showToast(`${t.profiles.commandCopied}: ${cmd}`, "success");
|
||||
} catch {
|
||||
showToast(`${t.profiles.copyFailed}: ${cmd}`, "error");
|
||||
}
|
||||
};
|
||||
|
||||
const profileDelete = useConfirmDelete<string>({
|
||||
onDelete: useCallback(
|
||||
async (name: string) => {
|
||||
try {
|
||||
await api.deleteProfile(name);
|
||||
showToast(`${t.profiles.deleted}: ${name}`, "success");
|
||||
load();
|
||||
} catch (e) {
|
||||
showToast(`${t.status.error}: ${e}`, "error");
|
||||
throw e;
|
||||
}
|
||||
},
|
||||
[load, showToast, t.profiles.deleted, t.status.error],
|
||||
),
|
||||
});
|
||||
|
||||
const pendingName = profileDelete.pendingId;
|
||||
|
||||
if (loading) {
|
||||
return (
|
||||
<div className="flex items-center justify-center py-24">
|
||||
<div className="h-6 w-6 animate-spin rounded-full border-2 border-primary border-t-transparent" />
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
// Profile names, model slugs, and paths are case-sensitive; opt out of
|
||||
// the app shell's global ``uppercase`` so they render as the user typed.
|
||||
// Children that explicitly opt back in (Badges, etc.) keep their casing.
|
||||
<div className="flex flex-col gap-6 normal-case">
|
||||
<Toast toast={toast} />
|
||||
|
||||
<DeleteConfirmDialog
|
||||
open={profileDelete.isOpen}
|
||||
onCancel={profileDelete.cancel}
|
||||
onConfirm={profileDelete.confirm}
|
||||
title={t.profiles.confirmDeleteTitle}
|
||||
description={
|
||||
pendingName
|
||||
? t.profiles.confirmDeleteMessage.replace("{name}", pendingName)
|
||||
: t.profiles.confirmDeleteMessage
|
||||
}
|
||||
loading={profileDelete.isDeleting}
|
||||
/>
|
||||
|
||||
{/* Create new profile */}
|
||||
<Card>
|
||||
<CardHeader>
|
||||
<CardTitle className="flex items-center gap-2 text-base">
|
||||
<Plus className="h-4 w-4" />
|
||||
{t.profiles.newProfile}
|
||||
</CardTitle>
|
||||
</CardHeader>
|
||||
<CardContent>
|
||||
<div className="grid gap-4">
|
||||
<div className="grid gap-2">
|
||||
<Label htmlFor="profile-name">{t.profiles.name}</Label>
|
||||
<Input
|
||||
id="profile-name"
|
||||
placeholder={t.profiles.namePlaceholder}
|
||||
value={newName}
|
||||
onChange={(e) => setNewName(e.target.value)}
|
||||
aria-invalid={
|
||||
newName.trim() !== "" &&
|
||||
!PROFILE_NAME_RE.test(newName.trim())
|
||||
}
|
||||
/>
|
||||
<p className="text-xs text-muted-foreground">
|
||||
{t.profiles.nameRule}
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<label className="flex items-center gap-2 text-sm cursor-pointer">
|
||||
<input
|
||||
type="checkbox"
|
||||
checked={cloneFromDefault}
|
||||
onChange={(e) => setCloneFromDefault(e.target.checked)}
|
||||
/>
|
||||
{t.profiles.cloneFromDefault}
|
||||
</label>
|
||||
|
||||
<div>
|
||||
<Button onClick={handleCreate} disabled={creating}>
|
||||
<Plus className="h-3 w-3" />
|
||||
{creating ? t.common.creating : t.common.create}
|
||||
</Button>
|
||||
</div>
|
||||
</div>
|
||||
</CardContent>
|
||||
</Card>
|
||||
|
||||
{/* List */}
|
||||
<div className="flex flex-col gap-3">
|
||||
<H2
|
||||
variant="sm"
|
||||
className="flex items-center gap-2 text-muted-foreground"
|
||||
>
|
||||
<Users className="h-4 w-4" />
|
||||
{t.profiles.allProfiles} ({profiles.length})
|
||||
</H2>
|
||||
|
||||
{profiles.length === 0 && (
|
||||
<Card>
|
||||
<CardContent className="py-8 text-center text-sm text-muted-foreground">
|
||||
{t.profiles.noProfiles}
|
||||
</CardContent>
|
||||
</Card>
|
||||
)}
|
||||
|
||||
{profiles.map((p) => {
|
||||
const isRenaming = renamingFrom === p.name;
|
||||
const isEditingSoul = editingSoulFor === p.name;
|
||||
return (
|
||||
<Card key={p.name}>
|
||||
<CardContent className="flex items-center gap-4 py-4">
|
||||
<div className="flex-1 min-w-0">
|
||||
<div className="flex items-center gap-2 mb-1 flex-wrap">
|
||||
{isRenaming ? (
|
||||
<Input
|
||||
autoFocus
|
||||
value={renameTo}
|
||||
onChange={(e) => setRenameTo(e.target.value)}
|
||||
onKeyDown={(e) => {
|
||||
if (e.key === "Enter") handleRenameSubmit();
|
||||
if (e.key === "Escape") setRenamingFrom(null);
|
||||
}}
|
||||
aria-invalid={
|
||||
renameTo.trim() !== "" &&
|
||||
renameTo.trim() !== p.name &&
|
||||
!PROFILE_NAME_RE.test(renameTo.trim())
|
||||
}
|
||||
className="max-w-xs"
|
||||
/>
|
||||
) : (
|
||||
<span className="font-medium text-sm truncate">
|
||||
{p.name}
|
||||
</span>
|
||||
)}
|
||||
{p.is_default && (
|
||||
<Badge tone="secondary">{t.profiles.defaultBadge}</Badge>
|
||||
)}
|
||||
{p.has_env && (
|
||||
<Badge tone="outline">{t.profiles.hasEnv}</Badge>
|
||||
)}
|
||||
</div>
|
||||
{isRenaming &&
|
||||
(() => {
|
||||
const trimmed = renameTo.trim();
|
||||
const invalid =
|
||||
trimmed !== "" &&
|
||||
trimmed !== p.name &&
|
||||
!PROFILE_NAME_RE.test(trimmed);
|
||||
return (
|
||||
<p
|
||||
className={
|
||||
"text-xs mb-1 " +
|
||||
(invalid
|
||||
? "text-destructive"
|
||||
: "text-muted-foreground")
|
||||
}
|
||||
>
|
||||
{invalid
|
||||
? `${t.profiles.invalidName}: ${t.profiles.nameRule}`
|
||||
: t.profiles.nameRule}
|
||||
</p>
|
||||
);
|
||||
})()}
|
||||
<div className="flex items-center gap-4 text-xs text-muted-foreground flex-wrap">
|
||||
{p.model && (
|
||||
<span>
|
||||
{t.profiles.model}: {p.model}
|
||||
{p.provider ? ` (${p.provider})` : ""}
|
||||
</span>
|
||||
)}
|
||||
<span>
|
||||
{t.profiles.skills}: {p.skill_count}
|
||||
</span>
|
||||
<span className="font-mono truncate max-w-[28rem]">
|
||||
{p.path}
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="flex items-center gap-1 shrink-0">
|
||||
{isRenaming ? (
|
||||
<>
|
||||
<Button
|
||||
size="sm"
|
||||
onClick={handleRenameSubmit}
|
||||
>
|
||||
{t.common.save}
|
||||
</Button>
|
||||
<Button
|
||||
size="sm"
|
||||
ghost
|
||||
onClick={() => setRenamingFrom(null)}
|
||||
>
|
||||
{t.common.cancel}
|
||||
</Button>
|
||||
</>
|
||||
) : (
|
||||
<>
|
||||
<Button
|
||||
ghost
|
||||
size="icon"
|
||||
title={t.profiles.editSoul}
|
||||
aria-label={t.profiles.editSoul}
|
||||
onClick={() => openSoulEditor(p.name)}
|
||||
>
|
||||
{isEditingSoul ? (
|
||||
<ChevronDown className="h-4 w-4" />
|
||||
) : (
|
||||
<span aria-hidden className="text-xs font-bold">
|
||||
S
|
||||
</span>
|
||||
)}
|
||||
</Button>
|
||||
<Button
|
||||
ghost
|
||||
size="icon"
|
||||
title={t.profiles.openInTerminal}
|
||||
aria-label={t.profiles.openInTerminal}
|
||||
onClick={() => handleCopyTerminalCommand(p.name)}
|
||||
>
|
||||
<Terminal className="h-4 w-4" />
|
||||
</Button>
|
||||
{!p.is_default && (
|
||||
<Button
|
||||
ghost
|
||||
size="icon"
|
||||
title={t.profiles.rename}
|
||||
aria-label={t.profiles.rename}
|
||||
onClick={() => {
|
||||
setRenamingFrom(p.name);
|
||||
setRenameTo(p.name);
|
||||
}}
|
||||
>
|
||||
<Pencil className="h-4 w-4" />
|
||||
</Button>
|
||||
)}
|
||||
{!p.is_default && (
|
||||
<Button
|
||||
ghost
|
||||
size="icon"
|
||||
title={t.common.delete}
|
||||
aria-label={t.common.delete}
|
||||
onClick={() => profileDelete.requestDelete(p.name)}
|
||||
>
|
||||
<Trash2 className="h-4 w-4 text-destructive" />
|
||||
</Button>
|
||||
)}
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
</CardContent>
|
||||
|
||||
{isEditingSoul && (
|
||||
<div className="border-t border-border px-4 pb-4 pt-3 flex flex-col gap-2">
|
||||
<Label
|
||||
htmlFor={`soul-editor-${p.name}`}
|
||||
className="flex items-center gap-2 text-xs uppercase tracking-wider text-muted-foreground"
|
||||
>
|
||||
{t.profiles.soulSection}
|
||||
</Label>
|
||||
<textarea
|
||||
id={`soul-editor-${p.name}`}
|
||||
className="flex min-h-[180px] w-full border border-input bg-transparent px-3 py-2 text-sm font-mono shadow-sm placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-ring"
|
||||
placeholder={t.profiles.soulPlaceholder}
|
||||
value={soulText}
|
||||
onChange={(e) => setSoulText(e.target.value)}
|
||||
/>
|
||||
<div>
|
||||
<Button
|
||||
size="sm"
|
||||
onClick={() => handleSaveSoul(p.name)}
|
||||
disabled={soulSaving}
|
||||
>
|
||||
{soulSaving ? t.common.saving : t.profiles.saveSoul}
|
||||
</Button>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</Card>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -1,11 +0,0 @@
|
||||
{
|
||||
"arrowParens": "avoid",
|
||||
"bracketSpacing": true,
|
||||
"endOfLine": "auto",
|
||||
"printWidth": 120,
|
||||
"semi": false,
|
||||
"singleQuote": true,
|
||||
"tabWidth": 2,
|
||||
"trailingComma": "none",
|
||||
"useTabs": false
|
||||
}
|
||||
@@ -1,195 +0,0 @@
|
||||
# Hermes Desktop
|
||||
|
||||
Native Electron shell for Hermes. It packages the desktop renderer, a bundled Hermes source payload, and installer targets for macOS and Windows.
|
||||
|
||||
## Setup
|
||||
|
||||
Install workspace dependencies from the repo root so `apps/desktop`, `apps/dashboard`, and `apps/shared` stay linked:
|
||||
|
||||
```bash
|
||||
npm install
|
||||
```
|
||||
|
||||
Use the normal Hermes Python environment for local runs:
|
||||
|
||||
```bash
|
||||
source .venv/bin/activate # or: source venv/bin/activate
|
||||
python -m pip install -e .
|
||||
```
|
||||
|
||||
## Development
|
||||
|
||||
```bash
|
||||
cd apps/desktop
|
||||
npm run dev
|
||||
```
|
||||
|
||||
`npm run dev` starts Vite on `127.0.0.1:5174`, launches Electron, and lets Electron boot the Hermes dashboard backend on an open port in `9120-9199`. This path is for UI iteration and may still show Electron/dev identities in OS prompts.
|
||||
|
||||
Useful overrides:
|
||||
|
||||
```bash
|
||||
HERMES_DESKTOP_HERMES_ROOT=/path/to/hermes-agent npm run dev
|
||||
HERMES_DESKTOP_PYTHON=/path/to/python npm run dev
|
||||
HERMES_DESKTOP_CWD=/path/to/project npm run dev
|
||||
HERMES_DESKTOP_IGNORE_EXISTING=1 npm run dev
|
||||
```
|
||||
|
||||
`HERMES_DESKTOP_IGNORE_EXISTING=1` skips any `hermes` CLI already on `PATH`, which is useful when testing the bundled/runtime bootstrap path.
|
||||
|
||||
## Dashboard Dev
|
||||
|
||||
Run the Python dashboard backend with embedded chat enabled:
|
||||
|
||||
```bash
|
||||
hermes dashboard --tui --no-open
|
||||
```
|
||||
|
||||
For dashboard HMR, start Vite in another terminal:
|
||||
|
||||
```bash
|
||||
cd apps/dashboard
|
||||
npm run dev
|
||||
```
|
||||
|
||||
Open the Vite URL. The dev server proxies `/api`, `/api/pty`, and plugin assets to `http://127.0.0.1:9119` and fetches the live dashboard HTML so the ephemeral session token matches the running backend.
|
||||
|
||||
## Build
|
||||
|
||||
```bash
|
||||
npm run build
|
||||
npm run pack # unpacked app at release/mac-<arch>/Hermes.app
|
||||
npm run dist:mac # macOS DMG + zip
|
||||
npm run dist:mac:dmg # DMG only
|
||||
npm run dist:mac:zip # zip only
|
||||
npm run dist:win # NSIS + MSI
|
||||
```
|
||||
|
||||
Before packaging, `stage:hermes` copies the Python Hermes payload into `build/hermes-agent`. Electron Builder then ships it as `Contents/Resources/hermes-agent`.
|
||||
|
||||
## Automated Releases
|
||||
|
||||
Desktop installers are published by [`.github/workflows/desktop-release.yml`](../../.github/workflows/desktop-release.yml) with two channels:
|
||||
|
||||
- **Stable:** runs on published GitHub releases and uploads signed artifacts to that release tag.
|
||||
- **Nightly:** runs on `main` pushes and updates the rolling `desktop-nightly` prerelease.
|
||||
|
||||
The workflow injects a channel-aware desktop version at build time:
|
||||
|
||||
- stable: derived from the release tag (for example `v2026.5.5` -> `2026.5.5`)
|
||||
- nightly: `0.0.0-nightly.YYYYMMDD.<sha>`
|
||||
|
||||
Artifact names include channel, platform, and architecture:
|
||||
|
||||
```text
|
||||
Hermes-<version>-<channel>-<platform>-<arch>.<ext>
|
||||
```
|
||||
|
||||
Each run also publishes `SHA256SUMS-<platform>.txt` so installers can be verified.
|
||||
|
||||
### Stable release gates
|
||||
|
||||
Stable builds fail fast if signing credentials are missing:
|
||||
|
||||
- macOS signing + notarization: `CSC_LINK`, `CSC_KEY_PASSWORD`, `APPLE_API_KEY`, `APPLE_API_KEY_ID`, `APPLE_API_ISSUER`
|
||||
- Windows signing: `WIN_CSC_LINK`, `WIN_CSC_KEY_PASSWORD`
|
||||
|
||||
Stable macOS builds also validate stapling and Gatekeeper assessment in CI before upload.
|
||||
|
||||
## Icons
|
||||
|
||||
Desktop icons live in `assets/`:
|
||||
|
||||
- `assets/icon.icns`
|
||||
- `assets/icon.ico`
|
||||
- `assets/icon.png`
|
||||
|
||||
The builder config points at `assets/icon`. Replace these files directly if the app icon changes.
|
||||
|
||||
## Testing Install Paths
|
||||
|
||||
Use the package-local test scripts from this directory:
|
||||
|
||||
```bash
|
||||
npm run test:desktop:all
|
||||
npm run test:desktop:existing
|
||||
npm run test:desktop:fresh
|
||||
npm run test:desktop:dmg
|
||||
```
|
||||
|
||||
`test:desktop:existing` builds the packaged app and opens it normally. It should use an existing `hermes` CLI if one is on `PATH`, preserving the user’s real `~/.hermes` config.
|
||||
|
||||
`test:desktop:fresh` builds the packaged app, deletes the bundled desktop runtime, sets `HERMES_DESKTOP_IGNORE_EXISTING=1`, and launches the app through the bundled payload path. Use this repeatedly to test first-run bootstrap.
|
||||
|
||||
`test:desktop:dmg` builds and opens the DMG.
|
||||
|
||||
For fast reruns without rebuilding:
|
||||
|
||||
```bash
|
||||
HERMES_DESKTOP_SKIP_BUILD=1 npm run test:desktop:fresh
|
||||
HERMES_DESKTOP_SKIP_BUILD=1 npm run test:desktop:existing
|
||||
HERMES_DESKTOP_SKIP_BUILD=1 npm run test:desktop:dmg
|
||||
```
|
||||
|
||||
## Installing Locally
|
||||
|
||||
```bash
|
||||
npm run dist:mac:dmg
|
||||
open release/Hermes-0.0.0-arm64.dmg
|
||||
```
|
||||
|
||||
Drag `Hermes` to Applications. If testing repeated installs, replace the existing app.
|
||||
|
||||
## Runtime Bootstrap
|
||||
|
||||
Packaged desktop startup resolves Hermes in this order:
|
||||
|
||||
1. `HERMES_DESKTOP_HERMES_ROOT`
|
||||
2. existing `hermes` CLI, unless `HERMES_DESKTOP_IGNORE_EXISTING=1`
|
||||
3. bundled `Contents/Resources/hermes-agent`
|
||||
4. dev repo source
|
||||
5. installed `python -m hermes_cli.main`
|
||||
|
||||
When the bundled path is used, Electron creates or reuses:
|
||||
|
||||
```text
|
||||
~/Library/Application Support/Hermes/hermes-runtime
|
||||
```
|
||||
|
||||
The runtime is validated before use. If required dashboard imports are missing, it reinstalls the desktop runtime dependencies and retries.
|
||||
|
||||
## Debugging
|
||||
|
||||
Desktop boot logs are written to:
|
||||
|
||||
```text
|
||||
~/Library/Application Support/Hermes/desktop.log
|
||||
```
|
||||
|
||||
If the UI reports `Desktop boot failed`, check that log first. It includes the backend command output and recent Python traceback context.
|
||||
|
||||
To reset bundled runtime state:
|
||||
|
||||
```bash
|
||||
rm -rf "$HOME/Library/Application Support/Hermes/hermes-runtime"
|
||||
```
|
||||
|
||||
To reset stale macOS microphone permission prompts:
|
||||
|
||||
```bash
|
||||
tccutil reset Microphone com.github.Electron
|
||||
tccutil reset Microphone com.nousresearch.hermes
|
||||
```
|
||||
|
||||
## Verification
|
||||
|
||||
Run before handing off installer changes:
|
||||
|
||||
```bash
|
||||
npm run fix
|
||||
npm run type-check
|
||||
npm run lint
|
||||
npm run test:desktop:all
|
||||
```
|
||||
|
||||
Current lint may report existing warnings, but it should exit with no errors.
|
||||
|
Before Width: | Height: | Size: 78 KiB |
|
Before Width: | Height: | Size: 674 KiB |
@@ -1,21 +0,0 @@
|
||||
{
|
||||
"$schema": "https://ui.shadcn.com/schema.json",
|
||||
"style": "new-york",
|
||||
"rsc": false,
|
||||
"tsx": true,
|
||||
"tailwind": {
|
||||
"config": "",
|
||||
"css": "src/styles.css",
|
||||
"baseColor": "neutral",
|
||||
"cssVariables": true,
|
||||
"prefix": ""
|
||||
},
|
||||
"aliases": {
|
||||
"components": "@/components",
|
||||
"utils": "@/lib/utils",
|
||||
"ui": "@/components/ui",
|
||||
"lib": "@/lib",
|
||||
"hooks": "@/hooks"
|
||||
},
|
||||
"iconLibrary": "lucide"
|
||||
}
|
||||
@@ -1,12 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
||||
<plist version="1.0">
|
||||
<dict>
|
||||
<key>com.apple.security.cs.allow-jit</key>
|
||||
<true/>
|
||||
<key>com.apple.security.cs.allow-unsigned-executable-memory</key>
|
||||
<true/>
|
||||
<key>com.apple.security.cs.disable-library-validation</key>
|
||||
<true/>
|
||||
</dict>
|
||||
</plist>
|
||||
@@ -1,14 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
||||
<plist version="1.0">
|
||||
<dict>
|
||||
<key>com.apple.security.cs.allow-jit</key>
|
||||
<true/>
|
||||
<key>com.apple.security.cs.allow-unsigned-executable-memory</key>
|
||||
<true/>
|
||||
<key>com.apple.security.cs.disable-library-validation</key>
|
||||
<true/>
|
||||
<key>com.apple.security.device.audio-input</key>
|
||||
<true/>
|
||||
</dict>
|
||||
</plist>
|
||||
@@ -1,44 +0,0 @@
|
||||
const { contextBridge, ipcRenderer, webUtils } = require('electron')
|
||||
|
||||
contextBridge.exposeInMainWorld('hermesDesktop', {
|
||||
getConnection: () => ipcRenderer.invoke('hermes:connection'),
|
||||
api: request => ipcRenderer.invoke('hermes:api', request),
|
||||
notify: payload => ipcRenderer.invoke('hermes:notify', payload),
|
||||
requestMicrophoneAccess: () => ipcRenderer.invoke('hermes:requestMicrophoneAccess'),
|
||||
readFileDataUrl: filePath => ipcRenderer.invoke('hermes:readFileDataUrl', filePath),
|
||||
readFileText: filePath => ipcRenderer.invoke('hermes:readFileText', filePath),
|
||||
selectPaths: options => ipcRenderer.invoke('hermes:selectPaths', options),
|
||||
writeClipboard: text => ipcRenderer.invoke('hermes:writeClipboard', text),
|
||||
saveImageFromUrl: url => ipcRenderer.invoke('hermes:saveImageFromUrl', url),
|
||||
saveImageBuffer: (data, ext) => ipcRenderer.invoke('hermes:saveImageBuffer', { data, ext }),
|
||||
saveClipboardImage: () => ipcRenderer.invoke('hermes:saveClipboardImage'),
|
||||
getPathForFile: file => {
|
||||
try {
|
||||
return webUtils.getPathForFile(file) || ''
|
||||
} catch {
|
||||
return ''
|
||||
}
|
||||
},
|
||||
normalizePreviewTarget: (target, baseDir) => ipcRenderer.invoke('hermes:normalizePreviewTarget', target, baseDir),
|
||||
watchPreviewFile: url => ipcRenderer.invoke('hermes:watchPreviewFile', url),
|
||||
stopPreviewFileWatch: id => ipcRenderer.invoke('hermes:stopPreviewFileWatch', id),
|
||||
setPreviewShortcutActive: active => ipcRenderer.send('hermes:previewShortcutActive', Boolean(active)),
|
||||
openExternal: url => ipcRenderer.invoke('hermes:openExternal', url),
|
||||
readDir: dirPath => ipcRenderer.invoke('hermes:fs:readDir', dirPath),
|
||||
gitRoot: startPath => ipcRenderer.invoke('hermes:fs:gitRoot', startPath),
|
||||
onClosePreviewRequested: callback => {
|
||||
const listener = () => callback()
|
||||
ipcRenderer.on('hermes:close-preview-requested', listener)
|
||||
return () => ipcRenderer.removeListener('hermes:close-preview-requested', listener)
|
||||
},
|
||||
onPreviewFileChanged: callback => {
|
||||
const listener = (_event, payload) => callback(payload)
|
||||
ipcRenderer.on('hermes:preview-file-changed', listener)
|
||||
return () => ipcRenderer.removeListener('hermes:preview-file-changed', listener)
|
||||
},
|
||||
onBackendExit: callback => {
|
||||
const listener = (_event, payload) => callback(payload)
|
||||
ipcRenderer.on('hermes:backend-exit', listener)
|
||||
return () => ipcRenderer.removeListener('hermes:backend-exit', listener)
|
||||
}
|
||||
})
|
||||
@@ -1,122 +0,0 @@
|
||||
import js from '@eslint/js'
|
||||
import typescriptEslint from '@typescript-eslint/eslint-plugin'
|
||||
import typescriptParser from '@typescript-eslint/parser'
|
||||
import perfectionist from 'eslint-plugin-perfectionist'
|
||||
import reactPlugin from 'eslint-plugin-react'
|
||||
import reactCompiler from 'eslint-plugin-react-compiler'
|
||||
import hooksPlugin from 'eslint-plugin-react-hooks'
|
||||
import unusedImports from 'eslint-plugin-unused-imports'
|
||||
import globals from 'globals'
|
||||
|
||||
const noopRule = {
|
||||
meta: { schema: [], type: 'problem' },
|
||||
create: () => ({})
|
||||
}
|
||||
|
||||
const customRules = {
|
||||
rules: {
|
||||
'no-process-cwd': noopRule,
|
||||
'no-process-env-top-level': noopRule,
|
||||
'no-sync-fs': noopRule,
|
||||
'no-top-level-dynamic-import': noopRule,
|
||||
'no-top-level-side-effects': noopRule
|
||||
}
|
||||
}
|
||||
|
||||
export default [
|
||||
{
|
||||
ignores: ['**/node_modules/**', '**/dist/**', 'src/**/*.js']
|
||||
},
|
||||
js.configs.recommended,
|
||||
{
|
||||
files: ['**/*.{ts,tsx}'],
|
||||
languageOptions: {
|
||||
globals: {
|
||||
...globals.browser,
|
||||
...globals.node
|
||||
},
|
||||
parser: typescriptParser,
|
||||
parserOptions: {
|
||||
ecmaFeatures: { jsx: true },
|
||||
ecmaVersion: 'latest',
|
||||
sourceType: 'module'
|
||||
}
|
||||
},
|
||||
plugins: {
|
||||
'@typescript-eslint': typescriptEslint,
|
||||
'custom-rules': customRules,
|
||||
perfectionist,
|
||||
react: reactPlugin,
|
||||
'react-compiler': reactCompiler,
|
||||
'react-hooks': hooksPlugin,
|
||||
'unused-imports': unusedImports
|
||||
},
|
||||
rules: {
|
||||
'@typescript-eslint/consistent-type-imports': ['error', { prefer: 'type-imports' }],
|
||||
'@typescript-eslint/no-unused-vars': 'off',
|
||||
curly: ['error', 'all'],
|
||||
'no-fallthrough': ['error', { allowEmptyCase: true }],
|
||||
'no-undef': 'off',
|
||||
'no-unused-vars': 'off',
|
||||
'padding-line-between-statements': [
|
||||
1,
|
||||
{
|
||||
blankLine: 'always',
|
||||
next: [
|
||||
'block-like',
|
||||
'block',
|
||||
'return',
|
||||
'if',
|
||||
'class',
|
||||
'continue',
|
||||
'debugger',
|
||||
'break',
|
||||
'multiline-const',
|
||||
'multiline-let'
|
||||
],
|
||||
prev: '*'
|
||||
},
|
||||
{
|
||||
blankLine: 'always',
|
||||
next: '*',
|
||||
prev: ['case', 'default', 'multiline-const', 'multiline-let', 'multiline-block-like']
|
||||
},
|
||||
{ blankLine: 'never', next: ['block', 'block-like'], prev: ['case', 'default'] },
|
||||
{ blankLine: 'always', next: ['block', 'block-like'], prev: ['block', 'block-like'] },
|
||||
{ blankLine: 'always', next: ['empty'], prev: 'export' },
|
||||
{ blankLine: 'never', next: 'iife', prev: ['block', 'block-like', 'empty'] }
|
||||
],
|
||||
'perfectionist/sort-exports': ['error', { order: 'asc', type: 'natural' }],
|
||||
'perfectionist/sort-imports': [
|
||||
'error',
|
||||
{
|
||||
groups: ['side-effect', 'builtin', 'external', 'internal', 'parent', 'sibling', 'index'],
|
||||
order: 'asc',
|
||||
type: 'natural'
|
||||
}
|
||||
],
|
||||
'perfectionist/sort-jsx-props': ['error', { order: 'asc', type: 'natural' }],
|
||||
'perfectionist/sort-named-exports': ['error', { order: 'asc', type: 'natural' }],
|
||||
'perfectionist/sort-named-imports': ['error', { order: 'asc', type: 'natural' }],
|
||||
'react-compiler/react-compiler': 'warn',
|
||||
'react-hooks/exhaustive-deps': 'warn',
|
||||
'react-hooks/rules-of-hooks': 'error',
|
||||
'unused-imports/no-unused-imports': 'error'
|
||||
},
|
||||
settings: {
|
||||
react: { version: 'detect' }
|
||||
}
|
||||
},
|
||||
{
|
||||
files: ['**/*.js', '**/*.cjs'],
|
||||
ignores: ['**/node_modules/**', '**/dist/**'],
|
||||
languageOptions: {
|
||||
ecmaVersion: 'latest',
|
||||
globals: { ...globals.node },
|
||||
sourceType: 'commonjs'
|
||||
}
|
||||
},
|
||||
{
|
||||
ignores: ['*.config.*']
|
||||
}
|
||||
]
|
||||
@@ -1,14 +0,0 @@
|
||||
<!doctype html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<link rel="icon" href="/apple-touch-icon.png" />
|
||||
<link rel="apple-touch-icon" href="/apple-touch-icon.png" />
|
||||
<title>Hermes</title>
|
||||
</head>
|
||||
<body>
|
||||
<div id="root"></div>
|
||||
<script type="module" src="/src/main.tsx"></script>
|
||||
</body>
|
||||
</html>
|
||||
17571
apps/desktop/package-lock.json
generated
@@ -1,183 +0,0 @@
|
||||
{
|
||||
"name": "hermes",
|
||||
"productName": "Hermes",
|
||||
"private": true,
|
||||
"version": "0.0.0",
|
||||
"description": "Native desktop shell for Hermes Agent.",
|
||||
"author": "Nous Research",
|
||||
"type": "module",
|
||||
"main": "electron/main.cjs",
|
||||
"scripts": {
|
||||
"dev": "concurrently -k \"npm:dev:renderer\" \"npm:dev:electron\"",
|
||||
"dev:renderer": "vite --host 127.0.0.1 --port 5174",
|
||||
"dev:electron": "wait-on http://127.0.0.1:5174 && cross-env HERMES_DESKTOP_DEV_SERVER=http://127.0.0.1:5174 electron .",
|
||||
"profile:main": "wait-on http://127.0.0.1:5174 && cross-env HERMES_DESKTOP_DEV_SERVER=http://127.0.0.1:5174 electron --inspect=9229 .",
|
||||
"profile:main:cpu": "wait-on http://127.0.0.1:5174 && cross-env NODE_OPTIONS=--cpu-prof HERMES_DESKTOP_DEV_SERVER=http://127.0.0.1:5174 electron .",
|
||||
"start": "npm run build && electron .",
|
||||
"build": "tsc -b && vite build",
|
||||
"stage:hermes": "node scripts/stage-hermes-payload.mjs",
|
||||
"pack": "npm run build && npm run stage:hermes && electron-builder --dir",
|
||||
"dist": "npm run build && npm run stage:hermes && electron-builder",
|
||||
"dist:mac": "npm run build && npm run stage:hermes && electron-builder --mac",
|
||||
"dist:mac:dmg": "npm run build && npm run stage:hermes && electron-builder --mac dmg",
|
||||
"dist:mac:zip": "npm run build && npm run stage:hermes && electron-builder --mac zip",
|
||||
"dist:win": "npm run build && npm run stage:hermes && electron-builder --win",
|
||||
"dist:win:msi": "npm run build && npm run stage:hermes && electron-builder --win msi",
|
||||
"dist:win:nsis": "npm run build && npm run stage:hermes && electron-builder --win nsis",
|
||||
"test:desktop": "node scripts/test-desktop.mjs",
|
||||
"test:desktop:all": "node scripts/test-desktop.mjs all",
|
||||
"test:desktop:dmg": "node scripts/test-desktop.mjs dmg",
|
||||
"test:desktop:existing": "node scripts/test-desktop.mjs existing",
|
||||
"test:desktop:fresh": "node scripts/test-desktop.mjs fresh",
|
||||
"type-check": "tsc -b",
|
||||
"lint": "eslint src/ electron/",
|
||||
"lint:fix": "eslint src/ electron/ --fix",
|
||||
"fmt": "prettier --write 'src/**/*.{ts,tsx}' 'electron/**/*.{js,cjs}' 'vite.config.ts'",
|
||||
"fix": "npm run lint:fix && npm run fmt",
|
||||
"test:ui": "vitest run --environment jsdom",
|
||||
"preview": "vite preview --host 127.0.0.1 --port 4174"
|
||||
},
|
||||
"dependencies": {
|
||||
"@assistant-ui/react": "^0.12.28",
|
||||
"@assistant-ui/react-streamdown": "^0.1.11",
|
||||
"@audiowave/react": "^0.6.2",
|
||||
"@chenglou/pretext": "^0.0.6",
|
||||
"@hermes/shared": "file:../shared",
|
||||
"@nanostores/react": "^1.1.0",
|
||||
"@radix-ui/react-slot": "^1.2.4",
|
||||
"@streamdown/code": "^1.1.1",
|
||||
"@tabler/icons-react": "^3.41.1",
|
||||
"@tailwindcss/vite": "^4.2.4",
|
||||
"@tanstack/react-query": "^5.100.6",
|
||||
"class-variance-authority": "^0.7.1",
|
||||
"clsx": "^2.1.1",
|
||||
"cmdk": "^1.1.1",
|
||||
"ignore": "^7.0.5",
|
||||
"liquid-glass-react": "^1.1.1",
|
||||
"lucide-react": "^0.577.0",
|
||||
"nanostores": "^1.3.0",
|
||||
"radix-ui": "^1.4.3",
|
||||
"react": "^19.2.5",
|
||||
"react-arborist": "^3.5.0",
|
||||
"react-dom": "^19.2.5",
|
||||
"react-router-dom": "^7.14.2",
|
||||
"react-shiki": "^0.9.3",
|
||||
"shiki": "^4.0.2",
|
||||
"streamdown": "^2.5.0",
|
||||
"tailwind-merge": "^3.5.0",
|
||||
"tailwindcss": "^4.2.4",
|
||||
"tw-shimmer": "^0.4.11",
|
||||
"unicode-animations": "^1.0.3",
|
||||
"use-stick-to-bottom": "^1.1.4",
|
||||
"web-haptics": "^0.0.6"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@eslint/js": "^9.39.4",
|
||||
"@testing-library/react": "^16.3.2",
|
||||
"@types/node": "^24.12.2",
|
||||
"@types/react": "^19.2.14",
|
||||
"@types/react-dom": "^19.2.3",
|
||||
"@typescript-eslint/eslint-plugin": "^8.59.1",
|
||||
"@typescript-eslint/parser": "^8.59.1",
|
||||
"@vitejs/plugin-react": "^6.0.1",
|
||||
"concurrently": "^9.2.1",
|
||||
"cross-env": "^10.1.0",
|
||||
"electron": "^40.9.3",
|
||||
"electron-builder": "^26.8.1",
|
||||
"eslint": "^9.39.4",
|
||||
"eslint-plugin-perfectionist": "^5.9.0",
|
||||
"eslint-plugin-react": "^7.37.5",
|
||||
"eslint-plugin-react-compiler": "^19.1.0-rc.2",
|
||||
"eslint-plugin-react-hooks": "^7.1.1",
|
||||
"eslint-plugin-unused-imports": "^4.4.1",
|
||||
"globals": "^16.5.0",
|
||||
"jsdom": "^29.1.1",
|
||||
"prettier": "^3.8.3",
|
||||
"typescript": "^6.0.3",
|
||||
"vite": "^8.0.10",
|
||||
"vitest": "^4.1.5",
|
||||
"wait-on": "^9.0.5"
|
||||
},
|
||||
"build": {
|
||||
"appId": "com.nousresearch.hermes",
|
||||
"productName": "Hermes",
|
||||
"executableName": "Hermes",
|
||||
"artifactName": "Hermes-${version}-${os}-${arch}.${ext}",
|
||||
"icon": "assets/icon",
|
||||
"directories": {
|
||||
"output": "release"
|
||||
},
|
||||
"files": [
|
||||
"dist/**",
|
||||
"assets/**",
|
||||
"electron/**",
|
||||
"public/**",
|
||||
"package.json"
|
||||
],
|
||||
"extraResources": [
|
||||
{
|
||||
"from": "build/hermes-agent",
|
||||
"to": "hermes-agent"
|
||||
}
|
||||
],
|
||||
"asar": true,
|
||||
"afterSign": "scripts/notarize.cjs",
|
||||
"asarUnpack": [
|
||||
"**/*.node"
|
||||
],
|
||||
"mac": {
|
||||
"category": "public.app-category.developer-tools",
|
||||
"entitlements": "electron/entitlements.mac.plist",
|
||||
"entitlementsInherit": "electron/entitlements.mac.inherit.plist",
|
||||
"extendInfo": {
|
||||
"CFBundleDisplayName": "Hermes",
|
||||
"CFBundleExecutable": "Hermes",
|
||||
"CFBundleName": "Hermes",
|
||||
"NSAudioCaptureUsageDescription": "Hermes uses audio capture for voice conversations.",
|
||||
"NSMicrophoneUsageDescription": "Hermes uses the microphone for voice input and voice conversations."
|
||||
},
|
||||
"gatekeeperAssess": false,
|
||||
"hardenedRuntime": true,
|
||||
"target": [
|
||||
"dmg",
|
||||
"zip"
|
||||
]
|
||||
},
|
||||
"dmg": {
|
||||
"title": "Install Hermes",
|
||||
"backgroundColor": "#f5f5f7",
|
||||
"iconSize": 96,
|
||||
"window": {
|
||||
"width": 560,
|
||||
"height": 360
|
||||
},
|
||||
"contents": [
|
||||
{
|
||||
"x": 160,
|
||||
"y": 170,
|
||||
"type": "file"
|
||||
},
|
||||
{
|
||||
"x": 400,
|
||||
"y": 170,
|
||||
"type": "link",
|
||||
"path": "/Applications"
|
||||
}
|
||||
]
|
||||
},
|
||||
"win": {
|
||||
"legalTrademarks": "Hermes",
|
||||
"target": [
|
||||
"nsis",
|
||||
"msi"
|
||||
]
|
||||
},
|
||||
"nsis": {
|
||||
"oneClick": false,
|
||||
"allowToChangeInstallationDirectory": true,
|
||||
"perMachine": false,
|
||||
"shortcutName": "Hermes",
|
||||
"uninstallDisplayName": "Hermes"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,65 +0,0 @@
|
||||
<!doctype html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width,initial-scale=1" />
|
||||
<title>Preview Demo</title>
|
||||
<style>
|
||||
:root { color-scheme: dark; }
|
||||
html, body { height: 100%; margin: 0; }
|
||||
body {
|
||||
font-family: ui-sans-serif, system-ui, -apple-system, "SF Pro Text", sans-serif;
|
||||
background: radial-gradient(1200px 600px at 20% 10%, #4a1a33 0%, #2a1020 40%, #120810 100%);
|
||||
color: #ffe4f1;
|
||||
display: grid;
|
||||
place-items: center;
|
||||
padding: 2rem;
|
||||
}
|
||||
.card {
|
||||
max-width: 520px;
|
||||
padding: 2rem 2.25rem;
|
||||
border: 1px solid rgba(255,182,214,0.18);
|
||||
border-radius: 14px;
|
||||
background: rgba(28,14,22,0.6);
|
||||
backdrop-filter: blur(6px);
|
||||
box-shadow: 0 10px 40px rgba(0,0,0,0.4);
|
||||
}
|
||||
h1 {
|
||||
margin: 0 0 0.5rem;
|
||||
font-size: 1.5rem;
|
||||
letter-spacing: 0.01em;
|
||||
}
|
||||
p { margin: 0.35rem 0; opacity: 0.85; line-height: 1.5; }
|
||||
.dot {
|
||||
display: inline-block; width: 10px; height: 10px; border-radius: 50%;
|
||||
background: #ff6fb5; margin-right: 0.5rem;
|
||||
box-shadow: 0 0 12px #ff6fb5;
|
||||
animation: pulse 1.6s ease-in-out infinite;
|
||||
}
|
||||
@keyframes pulse {
|
||||
0%,100% { transform: scale(1); opacity: 1; }
|
||||
50% { transform: scale(1.4); opacity: 0.6; }
|
||||
}
|
||||
code {
|
||||
background: rgba(255,182,214,0.10);
|
||||
padding: 0.1rem 0.35rem;
|
||||
border-radius: 4px;
|
||||
font-size: 0.9em;
|
||||
}
|
||||
.time { font-variant-numeric: tabular-nums; opacity: 0.7; font-size: 0.85rem; margin-top: 1rem; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="card">
|
||||
<h1><span class="dot"></span>preview-demo.html</h1>
|
||||
<p>Tiny standalone HTML artifact — no server, no build step.</p>
|
||||
<p>Open directly in a browser via <code>file://</code>.</p>
|
||||
<p class="time" id="t"></p>
|
||||
</div>
|
||||
<script>
|
||||
const el = document.getElementById('t');
|
||||
const tick = () => { el.textContent = new Date().toLocaleString(); };
|
||||
tick(); setInterval(tick, 1000);
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
|
Before Width: | Height: | Size: 1.1 MiB |
|
Before Width: | Height: | Size: 132 KiB |
|
Before Width: | Height: | Size: 115 KiB |
|
Before Width: | Height: | Size: 109 KiB |
|
Before Width: | Height: | Size: 76 KiB |