mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-19 16:40:38 +08:00
Compare commits
231 Commits
feat/provi
...
dependabot
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6880ee3088 | ||
|
|
6278bca055 | ||
|
|
12dfcfdf73 | ||
|
|
a64fc490fe | ||
|
|
245b95b094 | ||
|
|
c02192ff6a | ||
|
|
cfb55de5ea | ||
|
|
e4452ffb8a | ||
|
|
620fd59b8e | ||
|
|
28d887ca18 | ||
|
|
d06104a9ee | ||
|
|
8568988b01 | ||
|
|
e48554a3e0 | ||
|
|
62c71ebd8f | ||
|
|
1d2e359678 | ||
|
|
9ae98e07a7 | ||
|
|
c10aa5dc9c | ||
|
|
0403f41f9c | ||
|
|
2c6e266e88 | ||
|
|
36851fa576 | ||
|
|
d2c53ff558 | ||
|
|
03d9a95a74 | ||
|
|
cbe44bf890 | ||
|
|
769f307042 | ||
|
|
f1ff8459db | ||
|
|
3ead2bdd0d | ||
|
|
2944b3c394 | ||
|
|
f8d8f045fa | ||
|
|
1ea2b27993 | ||
|
|
c23c370b8b | ||
|
|
49596b70cb | ||
|
|
3042045540 | ||
|
|
9705e7944a | ||
|
|
4ed2f33994 | ||
|
|
0879d5cc8f | ||
|
|
81ff916e57 | ||
|
|
73cd8622f9 | ||
|
|
d573e7c9e1 | ||
|
|
81eaedd0f5 | ||
|
|
51ee5b2c94 | ||
|
|
07e785d60a | ||
|
|
0fa7d6f660 | ||
|
|
38c8a9c10f | ||
|
|
2fa16ec2d2 | ||
|
|
fd12e59e6b | ||
|
|
c37fdec2d9 | ||
|
|
4af16b5da2 | ||
|
|
5ffbfed193 | ||
|
|
58ad6942d9 | ||
|
|
25c590ccd0 | ||
|
|
f1254c8eaf | ||
|
|
41babc702e | ||
|
|
3c3ac19d9c | ||
|
|
2e5c04aaf7 | ||
|
|
b39ec2fc37 | ||
|
|
646cd1b43e | ||
|
|
ef4b897a18 | ||
|
|
92e6d8c858 | ||
|
|
2f7c4858a7 | ||
|
|
8abdab24c9 | ||
|
|
67316fdc94 | ||
|
|
feff283e17 | ||
|
|
a14bae6bcc | ||
|
|
2a5d51c16e | ||
|
|
426f321e84 | ||
|
|
ca28c630c7 | ||
|
|
9b2f7d2cb1 | ||
|
|
0787ea07c8 | ||
|
|
f4fbaa6cda | ||
|
|
e1d10ec1ed | ||
|
|
860cf5133a | ||
|
|
f6fac60e66 | ||
|
|
b4356135f2 | ||
|
|
40ed67ccfe | ||
|
|
0b54a33a34 | ||
|
|
737007e335 | ||
|
|
6777916068 | ||
|
|
481f0417d8 | ||
|
|
085fc5d001 | ||
|
|
edcde6b26f | ||
|
|
5494c1e9b6 | ||
|
|
832d5967f8 | ||
|
|
eaa0984210 | ||
|
|
6752da9a77 | ||
|
|
1153b42b24 | ||
|
|
c661634537 | ||
|
|
9c3c5da356 | ||
|
|
0ddd21c74e | ||
|
|
4440d77bf3 | ||
|
|
3769dff5dd | ||
|
|
c276b017ad | ||
|
|
fcf6cb3d73 | ||
|
|
c5eb64b9f7 | ||
|
|
6f89e17a33 | ||
|
|
4b7a186003 | ||
|
|
020e59d3cf | ||
|
|
86f2946fbe | ||
|
|
9ba4615db2 | ||
|
|
c1f9eb0ec4 | ||
|
|
acc8916ac7 | ||
|
|
237fa7d29c | ||
|
|
6b03874d07 | ||
|
|
6e20c1992f | ||
|
|
3db9b3e616 | ||
|
|
c28a02b49d | ||
|
|
e74577ed0f | ||
|
|
5feec8b4cf | ||
|
|
c803661cec | ||
|
|
c366466d70 | ||
|
|
ab1a42fcea | ||
|
|
a3cdd8c39d | ||
|
|
d0133fd8e4 | ||
|
|
259e78e175 | ||
|
|
b0999c82f3 | ||
|
|
3db49381d6 | ||
|
|
53d9b98305 | ||
|
|
e9a2ce6585 | ||
|
|
6092be413d | ||
|
|
f8098c6b6f | ||
|
|
016bce1a09 | ||
|
|
fd674af47f | ||
|
|
7fbb8c9df5 | ||
|
|
ee41aa0c1a | ||
|
|
5a00bd1518 | ||
|
|
22b6942fc2 | ||
|
|
394cdf48ce | ||
|
|
c835448908 | ||
|
|
33b1d14459 | ||
|
|
b07b7894ec | ||
|
|
0c1e8d0ba9 | ||
|
|
1e6c4ba74f | ||
|
|
4de4a4e2da | ||
|
|
49d7481dfb | ||
|
|
aa6f77596b | ||
|
|
eaddeaf2e6 | ||
|
|
cc9f37e77c | ||
|
|
3d21666b2f | ||
|
|
c2fa302e93 | ||
|
|
c6c8abbadb | ||
|
|
f10f7114f9 | ||
|
|
0138282f97 | ||
|
|
992b922389 | ||
|
|
cbfa018aef | ||
|
|
06d907dc4e | ||
|
|
dc86d48a3e | ||
|
|
674e8b098a | ||
|
|
f80381c456 | ||
|
|
49ef0241eb | ||
|
|
f4100f4394 | ||
|
|
fc1119ca66 | ||
|
|
7bbffceb9c | ||
|
|
e48803daec | ||
|
|
4d39a603d1 | ||
|
|
435c706e8e | ||
|
|
f9c8d95e43 | ||
|
|
b70a4e7533 | ||
|
|
3d37869295 | ||
|
|
a7ec334448 | ||
|
|
9901141d64 | ||
|
|
ca6542f602 | ||
|
|
99a20f8d9a | ||
|
|
fbaad3031a | ||
|
|
f48b312037 | ||
|
|
3ac6551ba3 | ||
|
|
b82eca2beb | ||
|
|
547a014e7e | ||
|
|
00c045b43f | ||
|
|
f3b813c027 | ||
|
|
91e9459e10 | ||
|
|
eddbf291a4 | ||
|
|
a30b40c73a | ||
|
|
813a4e3838 | ||
|
|
5e01a5dbf1 | ||
|
|
36ae958473 | ||
|
|
bd7fc8fdcd | ||
|
|
b7f0c9cd52 | ||
|
|
d1ecebcbfd | ||
|
|
db44af004c | ||
|
|
1b962f001e | ||
|
|
9137b86a52 | ||
|
|
7493de7fc3 | ||
|
|
1039e90b5e | ||
|
|
8ed16a7a0c | ||
|
|
3f80bcac56 | ||
|
|
01ae9b853e | ||
|
|
db01910e3a | ||
|
|
b7fa62c530 | ||
|
|
f4ef70f6fc | ||
|
|
bbc842d31e | ||
|
|
28f92478e3 | ||
|
|
e76e7b5073 | ||
|
|
8fa562a399 | ||
|
|
44e5848e74 | ||
|
|
6ebc449915 | ||
|
|
f6a42b1acf | ||
|
|
b2da39a0f3 | ||
|
|
17251e865b | ||
|
|
658ac1d866 | ||
|
|
c2c55c4443 | ||
|
|
e3adbb5ae9 | ||
|
|
e236bb87eb | ||
|
|
cf52370253 | ||
|
|
d7668aaff5 | ||
|
|
5094325140 | ||
|
|
166d2457b2 | ||
|
|
315fdae5f8 | ||
|
|
2c2ca0443b | ||
|
|
3c76dac4fd | ||
|
|
2b972472ce | ||
|
|
a893d77d8d | ||
|
|
94523764fc | ||
|
|
70f53f36cb | ||
|
|
7f76cf7195 | ||
|
|
b0e25c9cb2 | ||
|
|
2dace37f6b | ||
|
|
c6e99ab375 | ||
|
|
80e4b8985e | ||
|
|
7d938cc5c9 | ||
|
|
cb6b4127e7 | ||
|
|
a68ac0c49a | ||
|
|
16fc717091 | ||
|
|
925b0d1ab5 | ||
|
|
e65d74bc6f | ||
|
|
4858942c55 | ||
|
|
ee7b8a4672 | ||
|
|
630b43892d | ||
|
|
dd0e3e0a05 | ||
|
|
a0ec4f52b9 | ||
|
|
0e81d2fb71 | ||
|
|
989d5d0cb7 | ||
|
|
c92a95a130 |
@@ -102,6 +102,3 @@ acp_registry/
|
||||
.gitattributes
|
||||
.hadolint.yaml
|
||||
.mailmap
|
||||
|
||||
# Top-level LICENSE (not matched by *.md); not needed inside the container
|
||||
LICENSE
|
||||
|
||||
BIN
.github/pr-screenshots/45449/billing-confirm.png
vendored
Normal file
BIN
.github/pr-screenshots/45449/billing-confirm.png
vendored
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 138 KiB |
BIN
.github/pr-screenshots/45449/billing-overview.png
vendored
Normal file
BIN
.github/pr-screenshots/45449/billing-overview.png
vendored
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 148 KiB |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -5,6 +5,7 @@
|
||||
*.pyc*
|
||||
__pycache__/
|
||||
.venv/
|
||||
.venv
|
||||
.vscode/
|
||||
.env
|
||||
.env.local
|
||||
|
||||
57
Dockerfile
57
Dockerfile
@@ -9,8 +9,11 @@ FROM ghcr.io/astral-sh/uv:0.11.6-python3.13-trixie@sha256:b3c543b6c4f23a5f2df228
|
||||
FROM node:22-bookworm-slim@sha256:7af03b14a13c8cdd38e45058fd957bf00a72bbe17feac43b1c15a689c029c732 AS node_source
|
||||
FROM debian:13.4
|
||||
|
||||
# Disable Python stdout buffering to ensure logs are printed immediately
|
||||
# Disable Python stdout buffering to ensure logs are printed immediately.
|
||||
# Do not write .pyc files at runtime: /opt/hermes is immutable in the
|
||||
# published container and writable state belongs under /opt/data.
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
ENV PYTHONDONTWRITEBYTECODE=1
|
||||
|
||||
# Store Playwright browsers outside the volume mount so the build-time
|
||||
# install survives the /opt/data volume overlay at runtime.
|
||||
@@ -186,36 +189,38 @@ RUN cd web && npm run build && \
|
||||
|
||||
# ---------- Source code ----------
|
||||
# .dockerignore excludes node_modules, so the installs above survive.
|
||||
COPY --chown=hermes:hermes . .
|
||||
COPY . .
|
||||
|
||||
# ---------- Permissions ----------
|
||||
# Make install dir world-readable so any HERMES_UID can read it at runtime.
|
||||
# The venv needs to be traversable too.
|
||||
# node_modules trees additionally need to be writable by the hermes user
|
||||
# so the runtime `npm install` triggered by _tui_need_npm_install() in
|
||||
# hermes_cli/main.py succeeds (see #18800). /opt/hermes/web is build-time
|
||||
# only (HERMES_WEB_DIST points at hermes_cli/web_dist) and is intentionally
|
||||
# not chowned here.
|
||||
# /opt/hermes/gateway is runtime-writable: Python may create __pycache__ and
|
||||
# gateway state artifacts beneath the package after services drop privileges,
|
||||
# especially when the hermes UID is remapped at boot (#27221).
|
||||
# The .venv MUST remain hermes-writable so lazy_deps.py can install
|
||||
# remaining optional platform packages and future pin bumps at first use.
|
||||
# Without this, `uv pip install` fails with EACCES and adapters silently
|
||||
# fail to load. See tools/lazy_deps.py.
|
||||
# Link hermes-agent itself (editable). Deps are already installed in the
|
||||
# cached layer above; `--no-deps` makes this a fast egg-link creation with no
|
||||
# resolution or downloads.
|
||||
RUN uv pip install --no-cache-dir --no-deps -e "."
|
||||
|
||||
# Keep /opt/hermes immutable for the runtime hermes user. Hosted/container
|
||||
# instances must not be able to self-edit the installed source or venv; user
|
||||
# data, skills, plugins, config, logs, and dashboard uploads live under
|
||||
# /opt/data instead. Root can still repair the image during build/boot, but
|
||||
# supervised Hermes processes drop to the non-root hermes user.
|
||||
USER root
|
||||
RUN chmod -R a+rX /opt/hermes && \
|
||||
chown -R hermes:hermes /opt/hermes/.venv /opt/hermes/ui-tui /opt/hermes/gateway /opt/hermes/node_modules
|
||||
RUN mkdir -p /opt/hermes/bin && \
|
||||
cp /opt/hermes/docker/hermes-exec-shim.sh /opt/hermes/bin/hermes && \
|
||||
chmod 0755 /opt/hermes/bin/hermes && \
|
||||
printf 'docker\n' > /opt/hermes/.install_method && \
|
||||
chown -R root:root /opt/hermes && \
|
||||
chmod -R a+rX /opt/hermes && \
|
||||
chmod -R a-w /opt/hermes
|
||||
# The ``.install_method`` stamp is baked next to the running code (the install
|
||||
# tree), NOT into $HERMES_HOME. $HERMES_HOME (/opt/data) is a shared data
|
||||
# volume that is commonly bind-mounted from the host and even shared with a
|
||||
# host-side Desktop/CLI install; stamping it at boot used to clobber that
|
||||
# host install's marker and wrongly block its ``hermes update``. A code-scoped
|
||||
# stamp is read first by detect_install_method() and is immune to the share.
|
||||
# Start as root so the s6-overlay stage2 hook can usermod/groupmod and chown
|
||||
# the data volume. Each supervised service then drops to the hermes user via
|
||||
# `s6-setuidgid hermes` in its run script. If HERMES_UID is unset, services
|
||||
# run as the default hermes user (UID 10000).
|
||||
|
||||
# ---------- Link hermes-agent itself (editable) ----------
|
||||
# Deps are already installed in the cached layer above; `--no-deps` makes
|
||||
# this a fast (~1s) egg-link creation with no resolution or downloads.
|
||||
RUN uv pip install --no-cache-dir --no-deps -e "."
|
||||
|
||||
# ---------- Bake build-time git revision ----------
|
||||
# .dockerignore excludes .git, so `git rev-parse HEAD` from inside the
|
||||
# container always returns nothing — meaning `hermes dump` reports
|
||||
@@ -235,8 +240,9 @@ RUN uv pip install --no-cache-dir --no-deps -e "."
|
||||
# every published image has it.
|
||||
ARG HERMES_GIT_SHA=
|
||||
RUN if [ -n "${HERMES_GIT_SHA}" ]; then \
|
||||
chmod u+w /opt/hermes && \
|
||||
printf '%s\n' "${HERMES_GIT_SHA}" > /opt/hermes/.hermes_build_sha && \
|
||||
chown hermes:hermes /opt/hermes/.hermes_build_sha; \
|
||||
chmod a-w /opt/hermes /opt/hermes/.hermes_build_sha; \
|
||||
fi
|
||||
|
||||
# ---------- s6-overlay service wiring ----------
|
||||
@@ -282,6 +288,8 @@ ENV HERMES_WEB_DIST=/opt/hermes/hermes_cli/web_dist
|
||||
# check. (A separate launcher hardening is tracked independently.)
|
||||
ENV HERMES_TUI_DIR=/opt/hermes/ui-tui
|
||||
ENV HERMES_HOME=/opt/data
|
||||
ENV HERMES_WRITE_SAFE_ROOT=/opt/data
|
||||
ENV HERMES_DISABLE_LAZY_INSTALLS=1
|
||||
|
||||
# `docker exec` privilege-drop shim. When operators run
|
||||
# `docker exec <c> hermes ...` they default to root, and any file the
|
||||
@@ -294,7 +302,6 @@ ENV HERMES_HOME=/opt/data
|
||||
# Recursion is impossible because the shim exec's the venv binary by
|
||||
# absolute path (/opt/hermes/.venv/bin/hermes). See the shim source for
|
||||
# the opt-out env var (HERMES_DOCKER_EXEC_AS_ROOT=1).
|
||||
COPY --chmod=0755 docker/hermes-exec-shim.sh /opt/hermes/bin/hermes
|
||||
|
||||
# Pre-s6 entrypoint.sh did `source .venv/bin/activate` which exported
|
||||
# the venv bin onto PATH; Architecture B's main-wrapper.sh does the
|
||||
|
||||
@@ -27,7 +27,7 @@ import threading
|
||||
import time
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict, List, Optional
|
||||
from typing import Any, Callable, Dict, List, Optional
|
||||
from urllib.parse import urlparse, parse_qs, urlunparse
|
||||
|
||||
from agent.context_compressor import ContextCompressor
|
||||
@@ -195,6 +195,7 @@ def init_agent(
|
||||
status_callback: callable = None,
|
||||
notice_callback: callable = None,
|
||||
notice_clear_callback: callable = None,
|
||||
event_callback: Optional[Callable[[str, dict], None]] = None,
|
||||
max_tokens: int = None,
|
||||
reasoning_config: Dict[str, Any] = None,
|
||||
service_tier: str = None,
|
||||
@@ -426,6 +427,7 @@ def init_agent(
|
||||
agent.status_callback = status_callback
|
||||
agent.notice_callback = notice_callback
|
||||
agent.notice_clear_callback = notice_clear_callback
|
||||
agent.event_callback = event_callback
|
||||
agent.tool_gen_callback = tool_gen_callback
|
||||
|
||||
|
||||
@@ -597,6 +599,7 @@ def init_agent(
|
||||
# (e.g. CLI voice mode adds a temporary prefix for the live call only).
|
||||
agent._persist_user_message_idx = None
|
||||
agent._persist_user_message_override = None
|
||||
agent._persist_user_message_timestamp = None
|
||||
|
||||
# Cache anthropic image-to-text fallbacks per image payload/URL so a
|
||||
# single tool loop does not repeatedly re-run auxiliary vision on the
|
||||
@@ -1153,6 +1156,9 @@ def init_agent(
|
||||
"hermes_home": str(get_hermes_home()),
|
||||
"agent_context": "primary",
|
||||
}
|
||||
if _init_kwargs["platform"] == "cli":
|
||||
_init_kwargs["warning_callback"] = agent._emit_warning
|
||||
_init_kwargs["status_callback"] = agent._emit_status
|
||||
# Thread session title for memory provider scoping
|
||||
# (e.g. honcho uses this to derive chat-scoped session keys)
|
||||
if agent._session_db:
|
||||
@@ -1221,12 +1227,35 @@ def init_agent(
|
||||
# targets.
|
||||
agent._task_completion_guidance = bool(_agent_section.get("task_completion_guidance", True))
|
||||
|
||||
# Universal parallel-tool-call guidance toggle. Default True. Separate
|
||||
# flag from task_completion_guidance because a user may want one but not
|
||||
# the other. Steers the model to batch independent tool calls into a
|
||||
# single turn; the runtime already executes such batches concurrently.
|
||||
agent._parallel_tool_call_guidance = bool(_agent_section.get("parallel_tool_call_guidance", True))
|
||||
|
||||
# Local Python toolchain probe toggle. Default True. When False,
|
||||
# the probe is skipped entirely (no subprocess calls, no system-prompt
|
||||
# line). Useful for users on exotic setups where the probe heuristics
|
||||
# are noisy.
|
||||
agent._environment_probe = bool(_agent_section.get("environment_probe", True))
|
||||
|
||||
# Per-platform prompt-hint overrides (config.yaml → platform_hints).
|
||||
# Lets an enterprise admin append to or replace Hermes' built-in
|
||||
# platform hint for a single messaging platform (e.g. WhatsApp) without
|
||||
# affecting other platforms. Shape:
|
||||
# platform_hints:
|
||||
# whatsapp:
|
||||
# append: "When tabular output would help, invoke the ... skill."
|
||||
# slack:
|
||||
# replace: "Custom Slack hint that fully replaces the default."
|
||||
# Stored verbatim; resolution happens in agent/system_prompt.py against
|
||||
# the active platform. Invalid shapes are ignored defensively so a bad
|
||||
# config entry can never break prompt assembly.
|
||||
_platform_hints_cfg = _agent_cfg.get("platform_hints", {})
|
||||
if not isinstance(_platform_hints_cfg, dict):
|
||||
_platform_hints_cfg = {}
|
||||
agent._platform_hint_overrides = _platform_hints_cfg
|
||||
|
||||
# App-level API retry count (wraps each model API call). Default 3,
|
||||
# overridable via agent.api_max_retries in config.yaml. See #11616.
|
||||
try:
|
||||
|
||||
@@ -1839,28 +1839,42 @@ def invoke_tool(agent, function_name: str, function_args: dict, effective_task_i
|
||||
elif function_name == "memory":
|
||||
def _execute(next_args: dict) -> Any:
|
||||
target = next_args.get("target", "memory")
|
||||
operations = next_args.get("operations")
|
||||
from tools.memory_tool import memory_tool as _memory_tool
|
||||
result = _memory_tool(
|
||||
action=next_args.get("action"),
|
||||
target=target,
|
||||
content=next_args.get("content"),
|
||||
old_text=next_args.get("old_text"),
|
||||
operations=operations,
|
||||
store=agent._memory_store,
|
||||
)
|
||||
# Bridge: notify external memory provider of built-in memory writes
|
||||
if agent._memory_manager and next_args.get("action") in {"add", "replace"}:
|
||||
try:
|
||||
agent._memory_manager.on_memory_write(
|
||||
next_args.get("action", ""),
|
||||
target,
|
||||
next_args.get("content", ""),
|
||||
metadata=agent._build_memory_write_metadata(
|
||||
task_id=effective_task_id,
|
||||
tool_call_id=tool_call_id,
|
||||
),
|
||||
# Bridge: notify external memory provider of built-in memory writes.
|
||||
# Covers both the single-op shape and each add/replace inside a batch.
|
||||
if agent._memory_manager:
|
||||
if operations:
|
||||
_mem_ops = [
|
||||
op for op in operations
|
||||
if isinstance(op, dict) and op.get("action") in {"add", "replace"}
|
||||
]
|
||||
else:
|
||||
_mem_ops = (
|
||||
[{"action": next_args.get("action"), "content": next_args.get("content")}]
|
||||
if next_args.get("action") in {"add", "replace"} else []
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
for _op in _mem_ops:
|
||||
try:
|
||||
agent._memory_manager.on_memory_write(
|
||||
_op.get("action", ""),
|
||||
target,
|
||||
_op.get("content", "") or "",
|
||||
metadata=agent._build_memory_write_metadata(
|
||||
task_id=effective_task_id,
|
||||
tool_call_id=tool_call_id,
|
||||
),
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
return _finish_agent_tool(result, next_args)
|
||||
elif agent._memory_manager and agent._memory_manager.has_tool(function_name):
|
||||
def _execute(next_args: dict) -> Any:
|
||||
|
||||
@@ -372,7 +372,7 @@ def _detect_claude_code_version() -> str:
|
||||
|
||||
|
||||
_CLAUDE_CODE_SYSTEM_PREFIX = "You are Claude Code, Anthropic's official CLI for Claude."
|
||||
_MCP_TOOL_PREFIX = "mcp_"
|
||||
_MCP_TOOL_PREFIX = "mcp__"
|
||||
|
||||
|
||||
def _get_claude_code_version() -> str:
|
||||
@@ -2349,25 +2349,46 @@ def build_anthropic_kwargs(
|
||||
text = text.replace("Nous Research", "Anthropic")
|
||||
block["text"] = text
|
||||
|
||||
# 3. Prefix tool names with mcp_ (Claude Code convention)
|
||||
# Skip names that already begin with the marker — native MCP server
|
||||
# tools (from mcp_servers: in config.yaml) are registered under their
|
||||
# full mcp_<server>_<tool> name and would double-prefix otherwise,
|
||||
# breaking round-trip registry lookup in normalize_response. GH-25255.
|
||||
# 3. Normalize tool names so NOTHING goes on the OAuth wire with a
|
||||
# single-underscore ``mcp_`` prefix. Anthropic's subscription/OAuth
|
||||
# billing classifier treats a single-underscore ``mcp_`` tool name as
|
||||
# a third-party-app fingerprint and rejects the request with HTTP 400
|
||||
# "Third-party apps now draw from extra usage, not plan limits"
|
||||
# (verified empirically: a single ``mcp_foo`` tool flips a request
|
||||
# from plan-billing to the extra-usage lane; ``mcp__foo`` is accepted).
|
||||
#
|
||||
# Two cases, both must land on the double-underscore ``mcp__`` form:
|
||||
# a) bare Hermes-native tools (``read_file``) -> ``mcp__read_file``
|
||||
# b) native MCP server tools registered under their full
|
||||
# single-underscore ``mcp_<server>_<tool>`` name
|
||||
# (``mcp_linear_get_issue``) -> ``mcp__linear_get_issue``
|
||||
# Case (b) is the gap that the bare ``mcp_``->``mcp__`` constant swap
|
||||
# left open: those tools were *skipped* and stayed single-underscore,
|
||||
# so any session with an MCP server configured still tripped the
|
||||
# classifier. normalize_response reverses both forms via registry
|
||||
# lookup so the dispatcher still sees the original name. GH-25255.
|
||||
def _to_oauth_wire_name(name: str) -> str:
|
||||
if name.startswith("mcp__"):
|
||||
return name # already correct, don't double-prefix
|
||||
if name.startswith("mcp_"):
|
||||
# single-underscore native MCP tool -> promote to double
|
||||
return "mcp__" + name[len("mcp_"):]
|
||||
return _MCP_TOOL_PREFIX + name # bare name -> mcp__<name>
|
||||
|
||||
if anthropic_tools:
|
||||
for tool in anthropic_tools:
|
||||
if "name" in tool and not tool["name"].startswith(_MCP_TOOL_PREFIX):
|
||||
tool["name"] = _MCP_TOOL_PREFIX + tool["name"]
|
||||
if "name" in tool:
|
||||
tool["name"] = _to_oauth_wire_name(tool["name"])
|
||||
|
||||
# 4. Prefix tool names in message history (tool_use and tool_result blocks)
|
||||
# 4. Apply the same normalization to tool names in message history
|
||||
# (tool_use blocks) so replayed turns match the wire names above.
|
||||
for msg in anthropic_messages:
|
||||
content = msg.get("content")
|
||||
if isinstance(content, list):
|
||||
for block in content:
|
||||
if isinstance(block, dict):
|
||||
if block.get("type") == "tool_use" and "name" in block:
|
||||
if not block["name"].startswith(_MCP_TOOL_PREFIX):
|
||||
block["name"] = _MCP_TOOL_PREFIX + block["name"]
|
||||
block["name"] = _to_oauth_wire_name(block["name"])
|
||||
elif block.get("type") == "tool_result" and "tool_use_id" in block:
|
||||
pass # tool_result uses ID, not name
|
||||
|
||||
|
||||
@@ -3079,23 +3079,20 @@ def _try_configured_fallback_chain(
|
||||
if not fb_provider or fb_provider.lower() == skip:
|
||||
continue
|
||||
fb_model = str(entry.get("model", "")).strip() or None
|
||||
fb_base_url = str(entry.get("base_url", "")).strip() or None
|
||||
fb_api_key = str(entry.get("api_key", "")).strip() or None
|
||||
|
||||
label = f"fallback_chain[{i}]({fb_provider})"
|
||||
|
||||
try:
|
||||
fb_client = _resolve_single_provider(
|
||||
fb_provider, fb_model, fb_base_url, fb_api_key)
|
||||
fb_client, resolved_model = _resolve_fallback_entry(entry)
|
||||
except Exception:
|
||||
fb_client = None
|
||||
fb_client, resolved_model = None, None
|
||||
|
||||
if fb_client is not None:
|
||||
logger.info(
|
||||
"Auxiliary %s: %s on %s — configured fallback to %s (%s)",
|
||||
task, reason, failed_provider, label, fb_model or "default",
|
||||
task, reason, failed_provider, label, resolved_model or fb_model or "default",
|
||||
)
|
||||
return fb_client, fb_model, label
|
||||
return fb_client, resolved_model or fb_model, label
|
||||
tried.append(label)
|
||||
|
||||
if tried:
|
||||
@@ -3106,6 +3103,103 @@ def _try_configured_fallback_chain(
|
||||
return None, None, ""
|
||||
|
||||
|
||||
def _fallback_entry_api_key(entry: Dict[str, Any]) -> Optional[str]:
|
||||
"""Resolve inline or env-backed API key from a fallback-chain entry."""
|
||||
explicit = str(entry.get("api_key") or "").strip()
|
||||
if explicit:
|
||||
return explicit
|
||||
key_env = str(entry.get("key_env") or entry.get("api_key_env") or "").strip()
|
||||
if key_env:
|
||||
return os.getenv(key_env, "").strip() or None
|
||||
return None
|
||||
|
||||
|
||||
def _resolve_fallback_entry(entry: Dict[str, Any]) -> Tuple[Optional[Any], Optional[str]]:
|
||||
"""Resolve one fallback entry through the central provider router."""
|
||||
provider = str(entry.get("provider") or "").strip()
|
||||
model = str(entry.get("model") or "").strip() or None
|
||||
if not provider or not model:
|
||||
return None, None
|
||||
base_url = str(entry.get("base_url") or "").strip() or None
|
||||
api_key = _fallback_entry_api_key(entry)
|
||||
api_mode = str(entry.get("api_mode") or entry.get("transport") or "").strip() or None
|
||||
return resolve_provider_client(
|
||||
provider,
|
||||
model=model,
|
||||
explicit_base_url=base_url,
|
||||
explicit_api_key=api_key,
|
||||
api_mode=api_mode,
|
||||
)
|
||||
|
||||
|
||||
def _try_main_fallback_chain(
|
||||
task: Optional[str],
|
||||
failed_provider: str = "",
|
||||
reason: str = "error",
|
||||
) -> Tuple[Optional[Any], Optional[str], str]:
|
||||
"""Try the top-level main-agent fallback chain for an auxiliary call.
|
||||
|
||||
``provider: auto`` auxiliary tasks should respect the user's declared
|
||||
main fallback policy before dropping into Hermes' built-in discovery
|
||||
chain. The top-level chain is read through ``get_fallback_chain`` so
|
||||
both modern ``fallback_providers`` and legacy ``fallback_model`` entries
|
||||
participate in the same order as the main agent.
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
from hermes_cli.fallback_config import get_fallback_chain
|
||||
|
||||
chain = get_fallback_chain(load_config())
|
||||
except Exception as exc:
|
||||
logger.debug("Auxiliary %s: could not load main fallback chain: %s", task or "call", exc)
|
||||
return None, None, ""
|
||||
|
||||
if not chain:
|
||||
return None, None, ""
|
||||
|
||||
failed_norm = (failed_provider or "").strip().lower()
|
||||
main_norm = (_read_main_provider() or "").strip().lower()
|
||||
skip = {p for p in (failed_norm, main_norm, "auto") if p}
|
||||
tried: List[str] = []
|
||||
|
||||
for i, entry in enumerate(chain):
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
fb_provider = str(entry.get("provider") or "").strip()
|
||||
fb_model = str(entry.get("model") or "").strip()
|
||||
if not fb_provider or not fb_model:
|
||||
continue
|
||||
fb_norm = fb_provider.lower()
|
||||
label = f"fallback_providers[{i}]({fb_provider})"
|
||||
if fb_norm in skip:
|
||||
tried.append(f"{label} (skipped)")
|
||||
continue
|
||||
if _is_provider_unhealthy(fb_norm):
|
||||
_log_skip_unhealthy(fb_norm, task)
|
||||
tried.append(f"{label} (unhealthy)")
|
||||
continue
|
||||
try:
|
||||
fb_client, resolved_model = _resolve_fallback_entry(entry)
|
||||
except Exception as exc:
|
||||
logger.debug("Auxiliary %s: main fallback %s failed to resolve: %s", task or "call", label, exc)
|
||||
fb_client, resolved_model = None, None
|
||||
if fb_client is not None:
|
||||
logger.info(
|
||||
"Auxiliary %s: %s on %s — main fallback chain to %s (%s)",
|
||||
task or "call", reason, failed_provider or "auto", label,
|
||||
resolved_model or fb_model,
|
||||
)
|
||||
return fb_client, resolved_model or fb_model, fb_provider
|
||||
tried.append(label)
|
||||
|
||||
if tried:
|
||||
logger.debug(
|
||||
"Auxiliary %s: main fallback chain exhausted (tried: %s)",
|
||||
task or "call", ", ".join(tried),
|
||||
)
|
||||
return None, None, ""
|
||||
|
||||
|
||||
def _resolve_single_provider(
|
||||
provider: str,
|
||||
model: Optional[str] = None,
|
||||
@@ -3116,16 +3210,19 @@ def _resolve_single_provider(
|
||||
|
||||
Uses the existing provider resolution infrastructure where possible.
|
||||
"""
|
||||
# Reuse resolve_provider_client which handles provider→client mapping
|
||||
# Reuse resolve_provider_client which handles provider→client mapping.
|
||||
client, resolved_model = resolve_provider_client(
|
||||
provider=provider,
|
||||
model=model,
|
||||
base_url=base_url,
|
||||
api_key=api_key,
|
||||
explicit_base_url=base_url,
|
||||
explicit_api_key=api_key,
|
||||
)
|
||||
return client
|
||||
|
||||
def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
def _resolve_auto(
|
||||
main_runtime: Optional[Dict[str, Any]] = None,
|
||||
task: Optional[str] = None,
|
||||
) -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
"""Full auto-detection chain.
|
||||
|
||||
Priority:
|
||||
@@ -3223,7 +3320,22 @@ def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Option
|
||||
main_provider, resolved or main_model)
|
||||
return client, resolved or main_model
|
||||
|
||||
# ── Step 2: aggregator / fallback chain ──────────────────────────────
|
||||
# ── Step 2: user-configured fallback policy ─────────────────────────
|
||||
# In auto mode, respect the task-specific fallback chain first, then the
|
||||
# main agent's top-level fallback_providers/fallback_model chain. The
|
||||
# hardcoded provider discovery chain below is only the convenience default
|
||||
# for users who have not declared a fallback policy.
|
||||
if task:
|
||||
fb_client, fb_model, _fb_label = _try_configured_fallback_chain(
|
||||
task, main_provider or "auto", reason="main provider unavailable")
|
||||
if fb_client is not None:
|
||||
return fb_client, fb_model
|
||||
fb_client, fb_model, _fb_label = _try_main_fallback_chain(
|
||||
task, main_provider or "auto", reason="main provider unavailable")
|
||||
if fb_client is not None:
|
||||
return fb_client, fb_model
|
||||
|
||||
# ── Step 3: aggregator / fallback chain ──────────────────────────────
|
||||
tried = []
|
||||
for label, try_fn in _get_provider_chain():
|
||||
if _is_provider_unhealthy(label):
|
||||
@@ -3344,6 +3456,7 @@ def resolve_provider_client(
|
||||
api_mode: str = None,
|
||||
main_runtime: Optional[Dict[str, Any]] = None,
|
||||
is_vision: bool = False,
|
||||
task: Optional[str] = None,
|
||||
) -> Tuple[Optional[Any], Optional[str]]:
|
||||
"""Central router: given a provider name and optional model, return a
|
||||
configured client with the correct auth, base URL, and API format.
|
||||
@@ -3464,7 +3577,7 @@ def resolve_provider_client(
|
||||
|
||||
# ── Auto: try all providers in priority order ────────────────────
|
||||
if provider == "auto":
|
||||
client, resolved = _resolve_auto(main_runtime=main_runtime)
|
||||
client, resolved = _resolve_auto(main_runtime=main_runtime, task=task)
|
||||
if client is None:
|
||||
return None, None
|
||||
# When auto-detection lands on a non-OpenRouter provider (e.g. a
|
||||
@@ -4357,11 +4470,16 @@ def _client_cache_key(
|
||||
api_mode: Optional[str] = None,
|
||||
main_runtime: Optional[Dict[str, Any]] = None,
|
||||
is_vision: bool = False,
|
||||
task: Optional[str] = None,
|
||||
) -> tuple:
|
||||
runtime = _normalize_main_runtime(main_runtime)
|
||||
runtime_key = tuple(runtime.get(field, "") for field in _MAIN_RUNTIME_FIELDS) if provider == "auto" else ()
|
||||
# `auto` can now resolve through task-specific or main fallback policy,
|
||||
# so the task participates in the cache key. Non-auto providers keep the
|
||||
# old cache shape because the explicit provider/model tuple is sufficient.
|
||||
task_key = (task or "") if provider == "auto" else ""
|
||||
pool_hint = _pool_cache_hint(provider, main_runtime=main_runtime)
|
||||
return (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key, is_vision, pool_hint)
|
||||
return (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key, is_vision, task_key, pool_hint)
|
||||
|
||||
|
||||
def _store_cached_client(cache_key: tuple, client: Any, default_model: Optional[str], *, bound_loop: Any = None) -> None:
|
||||
@@ -4554,6 +4672,7 @@ def _get_cached_client(
|
||||
api_mode: str = None,
|
||||
main_runtime: Optional[Dict[str, Any]] = None,
|
||||
is_vision: bool = False,
|
||||
task: Optional[str] = None,
|
||||
) -> Tuple[Optional[Any], Optional[str]]:
|
||||
"""Get or create a cached client for the given provider.
|
||||
|
||||
@@ -4591,6 +4710,7 @@ def _get_cached_client(
|
||||
api_mode=api_mode,
|
||||
main_runtime=main_runtime,
|
||||
is_vision=is_vision,
|
||||
task=task,
|
||||
)
|
||||
with _client_cache_lock:
|
||||
if cache_key in _client_cache:
|
||||
@@ -4635,6 +4755,7 @@ def _get_cached_client(
|
||||
api_mode=api_mode,
|
||||
main_runtime=runtime,
|
||||
is_vision=is_vision,
|
||||
task=task,
|
||||
)
|
||||
if client is not None:
|
||||
# For async clients, remember which loop they were created on so we
|
||||
@@ -5140,7 +5261,7 @@ def call_llm(
|
||||
if not resolved_base_url:
|
||||
logger.info("Auxiliary %s: provider %s unavailable, trying auto-detection chain",
|
||||
task or "call", resolved_provider)
|
||||
client, final_model = _get_cached_client("auto", main_runtime=main_runtime)
|
||||
client, final_model = _get_cached_client("auto", main_runtime=main_runtime, task=task)
|
||||
if client is None:
|
||||
raise RuntimeError(
|
||||
f"No LLM provider configured for task={task} provider={resolved_provider}. "
|
||||
@@ -5466,14 +5587,19 @@ def call_llm(
|
||||
|
||||
# Fallback order (#26882, #26803):
|
||||
# 1. User-configured fallback_chain (per-task) if set
|
||||
# 2. Main agent model (last-resort safety net)
|
||||
# For auto users (no explicit aux provider), use the full
|
||||
# auto-detection chain instead — its Step 1 IS the main agent
|
||||
# model, so users on `auto` already get main-model fallback.
|
||||
# 2. For auto: top-level main fallback_providers/fallback_model
|
||||
# 3. For auto: built-in auxiliary discovery chain
|
||||
# 4. For explicit aux providers: main agent model safety net
|
||||
fb_client, fb_model, fb_label = (None, None, "")
|
||||
if is_auto:
|
||||
fb_client, fb_model, fb_label = _try_payment_fallback(
|
||||
resolved_provider, task, reason=reason)
|
||||
fb_client, fb_model, fb_label = _try_configured_fallback_chain(
|
||||
task, resolved_provider or "auto", reason=reason)
|
||||
if fb_client is None:
|
||||
fb_client, fb_model, fb_label = _try_main_fallback_chain(
|
||||
task, resolved_provider or "auto", reason=reason)
|
||||
if fb_client is None:
|
||||
fb_client, fb_model, fb_label = _try_payment_fallback(
|
||||
resolved_provider, task, reason=reason)
|
||||
else:
|
||||
fb_client, fb_model, fb_label = _try_configured_fallback_chain(
|
||||
task, resolved_provider or "auto", reason=reason)
|
||||
@@ -5636,7 +5762,7 @@ async def async_call_llm(
|
||||
if not resolved_base_url:
|
||||
logger.info("Auxiliary %s: provider %s unavailable, trying auto-detection chain",
|
||||
task or "call", resolved_provider)
|
||||
client, final_model = _get_cached_client("auto", async_mode=True)
|
||||
client, final_model = _get_cached_client("auto", async_mode=True, main_runtime=main_runtime, task=task)
|
||||
if client is None:
|
||||
raise RuntimeError(
|
||||
f"No LLM provider configured for task={task} provider={resolved_provider}. "
|
||||
@@ -5904,13 +6030,19 @@ async def async_call_llm(
|
||||
|
||||
# Fallback order (#26882, #26803):
|
||||
# 1. User-configured fallback_chain (per-task) if set
|
||||
# 2. Main agent model (last-resort safety net)
|
||||
# Auto users get the full auto-detection chain instead — its
|
||||
# Step 1 IS the main agent model.
|
||||
# 2. For auto: top-level main fallback_providers/fallback_model
|
||||
# 3. For auto: built-in auxiliary discovery chain
|
||||
# 4. For explicit aux providers: main agent model safety net
|
||||
fb_client, fb_model, fb_label = (None, None, "")
|
||||
if is_auto:
|
||||
fb_client, fb_model, fb_label = _try_payment_fallback(
|
||||
resolved_provider, task, reason=reason)
|
||||
fb_client, fb_model, fb_label = _try_configured_fallback_chain(
|
||||
task, resolved_provider or "auto", reason=reason)
|
||||
if fb_client is None:
|
||||
fb_client, fb_model, fb_label = _try_main_fallback_chain(
|
||||
task, resolved_provider or "auto", reason=reason)
|
||||
if fb_client is None:
|
||||
fb_client, fb_model, fb_label = _try_payment_fallback(
|
||||
resolved_provider, task, reason=reason)
|
||||
else:
|
||||
fb_client, fb_model, fb_label = _try_configured_fallback_chain(
|
||||
task, resolved_provider or "auto", reason=reason)
|
||||
|
||||
@@ -300,6 +300,7 @@ def summarize_background_review_actions(
|
||||
"target": args.get("target", "memory"),
|
||||
"content": args.get("content", ""),
|
||||
"old_text": args.get("old_text", ""),
|
||||
"operations": args.get("operations") or [],
|
||||
"name": args.get("name", ""),
|
||||
"old_string": args.get("old_string", ""),
|
||||
"new_string": args.get("new_string", ""),
|
||||
@@ -353,6 +354,7 @@ def summarize_background_review_actions(
|
||||
content = detail.get("content", "")
|
||||
old_text = detail.get("old_text", "")
|
||||
skill_name = detail.get("name", "")
|
||||
operations = detail.get("operations") or []
|
||||
max_preview = 120
|
||||
if is_skill:
|
||||
change = data.get("_change", {})
|
||||
@@ -376,6 +378,21 @@ def summarize_background_review_actions(
|
||||
actions.append(f"📝 Skill '{skill_name}' rewritten: {description}")
|
||||
else:
|
||||
actions.append(f"📝 {message}" if message else f"Skill {action}")
|
||||
elif operations:
|
||||
for op in operations:
|
||||
op = op or {}
|
||||
op_act = op.get("action", "")
|
||||
op_content = (op.get("content") or "")
|
||||
op_old = (op.get("old_text") or "")
|
||||
if op_act == "add" and op_content:
|
||||
preview = op_content[:max_preview] + ("…" if len(op_content) > max_preview else "")
|
||||
actions.append(f"{label} ➕ {preview}")
|
||||
elif op_act == "replace" and op_content:
|
||||
preview = op_content[:max_preview] + ("…" if len(op_content) > max_preview else "")
|
||||
actions.append(f"{label} ✏️ {preview}")
|
||||
elif op_act == "remove" and op_old:
|
||||
preview = op_old[:60] + ("…" if len(op_old) > 60 else "")
|
||||
actions.append(f"{label} ➖ {preview}")
|
||||
elif action == "add" and content:
|
||||
preview = content[:max_preview] + ("…" if len(content) > max_preview else "")
|
||||
actions.append(f"{label} ➕ {preview}")
|
||||
@@ -391,6 +408,7 @@ def summarize_background_review_actions(
|
||||
"added" in message_lower
|
||||
or "replaced" in message_lower
|
||||
or "removed" in message_lower
|
||||
or "applied" in message_lower
|
||||
or (target and "add" in message.lower())
|
||||
or "Entry added" in message
|
||||
):
|
||||
|
||||
295
agent/billing_view.py
Normal file
295
agent/billing_view.py
Normal file
@@ -0,0 +1,295 @@
|
||||
"""Surface-agnostic core for the Phase 2b terminal-billing screens.
|
||||
|
||||
One fetch/parse per concern, consumed identically by the CLI handler
|
||||
(``cli.py::_show_billing``), the TUI JSON-RPC methods
|
||||
(``tui_gateway/server.py``), and any other surface. Mirrors the proven
|
||||
``agent/account_usage.py::build_credits_view`` pattern: parse the server payload
|
||||
into a frozen dataclass; **fail open** — when not logged in or the portal is
|
||||
unreachable, return a struct with ``logged_in=False`` and let the surface degrade
|
||||
gracefully (never crash).
|
||||
|
||||
Money discipline: the server emits decimal STRINGS (``"142.5"``, not fixed 2dp).
|
||||
We keep them as :class:`decimal.Decimal` end-to-end and only format for display.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import uuid
|
||||
from dataclasses import dataclass, field
|
||||
from decimal import Decimal, InvalidOperation
|
||||
from typing import Any, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Decimal money helpers
|
||||
# =============================================================================
|
||||
|
||||
|
||||
def parse_money(value: Any) -> Optional[Decimal]:
|
||||
"""Parse a server money value (decimal string) into :class:`Decimal`.
|
||||
|
||||
Returns None for missing/invalid input. Never raises. Accepts str/int (and,
|
||||
defensively, float — though the server always sends strings).
|
||||
"""
|
||||
if value is None:
|
||||
return None
|
||||
try:
|
||||
# Decimal(str(...)) avoids binary-float artifacts if a float ever sneaks in.
|
||||
return Decimal(str(value).strip())
|
||||
except (InvalidOperation, ValueError, TypeError):
|
||||
return None
|
||||
|
||||
|
||||
def format_money(value: Optional[Decimal]) -> str:
|
||||
"""Format a Decimal as ``$X`` / ``$X.YY`` for display.
|
||||
|
||||
Whole dollars show no decimals; any fractional amount shows exactly 2dp:
|
||||
``Decimal("142.5")`` → ``"$142.50"``, ``Decimal("100")`` → ``"$100"``,
|
||||
``Decimal("0.01")`` → ``"$0.01"``.
|
||||
"""
|
||||
if value is None:
|
||||
return "—"
|
||||
if value == value.to_integral_value():
|
||||
# Whole dollars — no decimal point. format(..., "f") avoids 1E+3 for 1000.
|
||||
return f"${format(value.to_integral_value(), 'f')}"
|
||||
# Fractional — always show 2dp.
|
||||
return f"${format(value.quantize(Decimal('0.01')), 'f')}"
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Parsed sub-structures
|
||||
# =============================================================================
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class CardInfo:
|
||||
brand: str
|
||||
last4: str
|
||||
|
||||
@property
|
||||
def masked(self) -> str:
|
||||
return f"{self.brand} ····{self.last4}"
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class MonthlyCap:
|
||||
limit_usd: Optional[Decimal] = None
|
||||
spent_this_month_usd: Optional[Decimal] = None
|
||||
is_default_ceiling: bool = False
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class AutoReload:
|
||||
enabled: bool = False
|
||||
threshold_usd: Optional[Decimal] = None
|
||||
reload_to_usd: Optional[Decimal] = None
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class BillingState:
|
||||
"""Parsed ``GET /api/billing/state`` — the overview screen's data.
|
||||
|
||||
Fail-open: ``logged_in=False`` (and empty fields) when not logged in or the
|
||||
portal is unreachable.
|
||||
"""
|
||||
|
||||
logged_in: bool
|
||||
org_id: Optional[str] = None
|
||||
org_slug: Optional[str] = None
|
||||
org_name: Optional[str] = None
|
||||
role: Optional[str] = None # "OWNER" | "ADMIN" | "MEMBER"
|
||||
balance_usd: Optional[Decimal] = None
|
||||
cli_billing_enabled: bool = False
|
||||
charge_presets: tuple[Decimal, ...] = ()
|
||||
min_usd: Optional[Decimal] = None
|
||||
max_usd: Optional[Decimal] = None
|
||||
card: Optional[CardInfo] = None
|
||||
monthly_cap: Optional[MonthlyCap] = None
|
||||
auto_reload: Optional[AutoReload] = None
|
||||
portal_url: Optional[str] = None
|
||||
# When the fetch failed (vs cleanly not-logged-in), the message for the surface.
|
||||
error: Optional[str] = None
|
||||
|
||||
@property
|
||||
def is_admin(self) -> bool:
|
||||
"""True for OWNER/ADMIN — the roles that can manage billing."""
|
||||
return (self.role or "").upper() in ("OWNER", "ADMIN")
|
||||
|
||||
@property
|
||||
def can_charge(self) -> bool:
|
||||
"""True when the UI should offer charge/auto-reload actions.
|
||||
|
||||
Admin role AND the per-org kill-switch on. (The server still enforces;
|
||||
this is just for graying out actions the user can't take.)
|
||||
"""
|
||||
return self.is_admin and self.cli_billing_enabled
|
||||
|
||||
|
||||
def _parse_card(raw: Any) -> Optional[CardInfo]:
|
||||
if not isinstance(raw, dict):
|
||||
return None
|
||||
brand = raw.get("brand")
|
||||
last4 = raw.get("last4")
|
||||
if isinstance(brand, str) and isinstance(last4, str):
|
||||
return CardInfo(brand=brand, last4=last4)
|
||||
return None
|
||||
|
||||
|
||||
def _parse_monthly_cap(raw: Any) -> Optional[MonthlyCap]:
|
||||
if not isinstance(raw, dict):
|
||||
return None
|
||||
return MonthlyCap(
|
||||
limit_usd=parse_money(raw.get("limitUsd")),
|
||||
spent_this_month_usd=parse_money(raw.get("spentThisMonthUsd")),
|
||||
is_default_ceiling=bool(raw.get("isDefaultCeiling")),
|
||||
)
|
||||
|
||||
|
||||
def _parse_auto_reload(raw: Any) -> Optional[AutoReload]:
|
||||
if not isinstance(raw, dict):
|
||||
return None
|
||||
return AutoReload(
|
||||
enabled=bool(raw.get("enabled")),
|
||||
threshold_usd=parse_money(raw.get("thresholdUsd")),
|
||||
reload_to_usd=parse_money(raw.get("reloadToUsd")),
|
||||
)
|
||||
|
||||
|
||||
def billing_state_from_payload(
|
||||
payload: dict[str, Any], *, portal_url: Optional[str] = None
|
||||
) -> BillingState:
|
||||
"""Map a raw ``/api/billing/state`` JSON dict into :class:`BillingState`."""
|
||||
raw_org = payload.get("org")
|
||||
org: dict[str, Any] = raw_org if isinstance(raw_org, dict) else {}
|
||||
raw_bounds = payload.get("bounds")
|
||||
bounds: dict[str, Any] = raw_bounds if isinstance(raw_bounds, dict) else {}
|
||||
|
||||
presets: list[Decimal] = []
|
||||
for item in payload.get("chargePresets") or ():
|
||||
parsed = parse_money(item)
|
||||
if parsed is not None:
|
||||
presets.append(parsed)
|
||||
|
||||
return BillingState(
|
||||
logged_in=True,
|
||||
org_id=org.get("id"),
|
||||
org_slug=org.get("slug"),
|
||||
org_name=org.get("name"),
|
||||
role=org.get("role"),
|
||||
balance_usd=parse_money(payload.get("balanceUsd")),
|
||||
cli_billing_enabled=bool(payload.get("cliBillingEnabled")),
|
||||
charge_presets=tuple(presets),
|
||||
min_usd=parse_money(bounds.get("minUsd")),
|
||||
max_usd=parse_money(bounds.get("maxUsd")),
|
||||
card=_parse_card(payload.get("card")),
|
||||
monthly_cap=_parse_monthly_cap(payload.get("monthlyCap")),
|
||||
auto_reload=_parse_auto_reload(payload.get("autoReload")),
|
||||
portal_url=portal_url,
|
||||
)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Fail-open builders (the surface front doors)
|
||||
# =============================================================================
|
||||
|
||||
|
||||
def build_billing_state(*, timeout: float = 15.0) -> BillingState:
|
||||
"""Fetch + parse ``/api/billing/state``. Fail-open.
|
||||
|
||||
Returns ``BillingState(logged_in=False)`` when not logged in. On a portal/HTTP
|
||||
failure, returns ``logged_in=False`` with ``error`` set so the surface can show
|
||||
a clear message rather than crashing.
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.nous_billing import (
|
||||
BillingAuthError,
|
||||
BillingError,
|
||||
_absolutize_portal_url,
|
||||
get_billing_state,
|
||||
resolve_portal_base_url,
|
||||
)
|
||||
except Exception:
|
||||
return BillingState(logged_in=False, error="billing client unavailable")
|
||||
|
||||
try:
|
||||
payload = get_billing_state(timeout=timeout)
|
||||
except BillingAuthError:
|
||||
return BillingState(logged_in=False)
|
||||
except BillingError as exc:
|
||||
logger.debug("billing ▸ /state fetch failed (fail-open)", exc_info=True)
|
||||
return BillingState(logged_in=False, error=str(exc))
|
||||
except Exception:
|
||||
logger.debug("billing ▸ /state unexpected error (fail-open)", exc_info=True)
|
||||
return BillingState(logged_in=False, error="could not load billing state")
|
||||
|
||||
# Prefer a server-supplied portalUrl if present (resolved to absolute in case
|
||||
# it's relative); else build the standard one.
|
||||
raw_portal = payload.get("portalUrl") if isinstance(payload, dict) else None
|
||||
portal_url = _absolutize_portal_url(raw_portal) if raw_portal else None
|
||||
if not portal_url:
|
||||
try:
|
||||
portal_url = _fallback_portal_url(resolve_portal_base_url())
|
||||
except Exception:
|
||||
portal_url = None
|
||||
|
||||
return billing_state_from_payload(payload, portal_url=portal_url)
|
||||
|
||||
|
||||
def _fallback_portal_url(base: str) -> str:
|
||||
"""Standard billing deep-link when the server omits ``portalUrl``."""
|
||||
return f"{base.rstrip('/')}/billing?topup=open"
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Idempotency
|
||||
# =============================================================================
|
||||
|
||||
|
||||
def new_idempotency_key() -> str:
|
||||
"""Fresh UUID for a user-confirmed purchase (reuse on retry of the SAME buy).
|
||||
|
||||
The ``Idempotency-Key`` header is mandatory on ``POST /charge``; generate one
|
||||
per confirmed purchase and reuse it across retries so a double-submit collapses
|
||||
to a single charge. Never reuse a key across different amounts (the server
|
||||
returns 409 idempotency_conflict).
|
||||
"""
|
||||
return str(uuid.uuid4())
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Amount validation (Screen 3 custom input)
|
||||
# =============================================================================
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class AmountValidation:
|
||||
ok: bool
|
||||
amount: Optional[Decimal] = None
|
||||
error: Optional[str] = None
|
||||
|
||||
|
||||
def validate_charge_amount(
|
||||
raw: str, *, min_usd: Optional[Decimal], max_usd: Optional[Decimal]
|
||||
) -> AmountValidation:
|
||||
"""Validate a custom charge amount against bounds + 2dp (multipleOf 0.01).
|
||||
|
||||
Mirrors the server's accept/reject so the UI can give instant feedback rather
|
||||
than round-tripping a sure-to-fail charge. The server is still authoritative.
|
||||
"""
|
||||
cleaned = (raw or "").strip().lstrip("$").strip()
|
||||
amount = parse_money(cleaned)
|
||||
if amount is None:
|
||||
return AmountValidation(ok=False, error="Enter a dollar amount, e.g. 100")
|
||||
if amount <= 0:
|
||||
return AmountValidation(ok=False, error="Amount must be greater than $0")
|
||||
# multipleOf 0.01 — reject sub-cent precision.
|
||||
if amount != amount.quantize(Decimal("0.01")):
|
||||
return AmountValidation(ok=False, error="Amount can't be smaller than a cent")
|
||||
if min_usd is not None and amount < min_usd:
|
||||
return AmountValidation(ok=False, error=f"Minimum is {format_money(min_usd)}")
|
||||
if max_usd is not None and amount > max_usd:
|
||||
return AmountValidation(ok=False, error=f"Maximum is {format_money(max_usd)}")
|
||||
return AmountValidation(ok=True, amount=amount)
|
||||
@@ -262,6 +262,26 @@ def _responses_tools(tools: Optional[List[Dict[str, Any]]] = None) -> Optional[L
|
||||
return converted or None
|
||||
|
||||
|
||||
# Provider-executed built-in tool *declaration* types accepted on the
|
||||
# Responses ``tools`` array. These are declared by ``type`` alone (no
|
||||
# client-side name/parameters schema) and run server-side — the provider
|
||||
# owns the implementation and reports progress via the matching ``*_call``
|
||||
# output items. Hermes injects xAI's native ``web_search`` for the xAI
|
||||
# transport (see agent/transports/codex.py); the rest are listed so the
|
||||
# preflight validator passes them through rather than rejecting them as
|
||||
# "unsupported type". Mirrors the ``*_call`` item-type set used in
|
||||
# _normalize_codex_response.
|
||||
_RESPONSES_BUILTIN_TOOL_TYPES = {
|
||||
"web_search",
|
||||
"web_search_preview",
|
||||
"file_search",
|
||||
"code_interpreter",
|
||||
"image_generation",
|
||||
"computer_use_preview",
|
||||
"local_shell",
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Message format conversion
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -802,7 +822,22 @@ def _preflight_codex_api_kwargs(
|
||||
for idx, tool in enumerate(tools):
|
||||
if not isinstance(tool, dict):
|
||||
raise ValueError(f"Codex Responses tools[{idx}] must be an object.")
|
||||
if tool.get("type") != "function":
|
||||
|
||||
tool_type = tool.get("type")
|
||||
|
||||
# Provider-executed built-in tools (xAI native web_search, code
|
||||
# interpreter, etc.) are declared by ``type`` alone and carry no
|
||||
# ``name``/``parameters`` schema — the provider owns the
|
||||
# implementation. Pass them through verbatim instead of forcing
|
||||
# them through the function-tool validation below (which would
|
||||
# otherwise reject them with "unsupported type"). See
|
||||
# agent/transports/codex.py for where xAI's native web_search is
|
||||
# injected.
|
||||
if tool_type in _RESPONSES_BUILTIN_TOOL_TYPES:
|
||||
normalized_tools.append(dict(tool))
|
||||
continue
|
||||
|
||||
if tool_type != "function":
|
||||
raise ValueError(f"Codex Responses tools[{idx}] has unsupported type {tool.get('type')!r}.")
|
||||
|
||||
name = tool.get("name")
|
||||
@@ -1086,6 +1121,33 @@ def _normalize_codex_response(
|
||||
saw_final_answer_phase = False
|
||||
saw_reasoning_item = False
|
||||
|
||||
# Server-side built-in tool calls (xAI's native web_search, code
|
||||
# interpreter, etc.) are executed by the provider and reported as
|
||||
# discrete ``*_call`` output items. xAI's /v1/responses surface
|
||||
# (e.g. grok-composer-2.5-fast on SuperGrok OAuth) routinely leaves
|
||||
# these items at ``status="in_progress"`` even when the overall
|
||||
# ``response.status == "completed"`` — the search ran to completion
|
||||
# server-side, the per-item status simply isn't reconciled. These
|
||||
# are NOT a signal that the model's turn is unfinished, so they must
|
||||
# not flip ``has_incomplete_items``. Only the response-level status
|
||||
# and genuine model output items (message/reasoning/function_call)
|
||||
# govern the incomplete verdict. Without this guard, any turn where
|
||||
# grok-composer invokes server-side search is misclassified as
|
||||
# ``finish_reason="incomplete"`` and burns 3 fruitless continuation
|
||||
# retries before failing with "Codex response remained incomplete
|
||||
# after 3 continuation attempts". client-side function/custom tool
|
||||
# calls keep their own in_progress handling below (they are skipped,
|
||||
# not awaited).
|
||||
_SERVER_SIDE_TOOL_CALL_TYPES = {
|
||||
"web_search_call",
|
||||
"file_search_call",
|
||||
"code_interpreter_call",
|
||||
"image_generation_call",
|
||||
"computer_call",
|
||||
"local_shell_call",
|
||||
"mcp_call",
|
||||
}
|
||||
|
||||
for item in output:
|
||||
item_type = getattr(item, "type", None)
|
||||
item_status = getattr(item, "status", None)
|
||||
@@ -1094,7 +1156,10 @@ def _normalize_codex_response(
|
||||
else:
|
||||
item_status = None
|
||||
|
||||
if item_status in {"queued", "in_progress", "incomplete"}:
|
||||
if (
|
||||
item_status in {"queued", "in_progress", "incomplete"}
|
||||
and item_type not in _SERVER_SIDE_TOOL_CALL_TYPES
|
||||
):
|
||||
has_incomplete_items = True
|
||||
saw_streaming_or_item_incomplete = True
|
||||
|
||||
|
||||
@@ -512,6 +512,16 @@ def compress_context(
|
||||
old_title = agent._session_db.get_session_title(agent.session_id)
|
||||
# Trigger memory extraction on the old session before it rotates.
|
||||
agent.commit_memory_session(messages)
|
||||
# Flush any un-persisted messages from the current turn to the
|
||||
# old session *before* rotating. compress_context() can be
|
||||
# called mid-turn (auto-compress when context exceeds threshold)
|
||||
# at a point when _flush_messages_to_session_db() has not yet
|
||||
# run. Without this, messages generated during the current turn
|
||||
# are silently lost on session rotation (#47202).
|
||||
try:
|
||||
agent._flush_messages_to_session_db(messages)
|
||||
except Exception:
|
||||
pass # best-effort — don't block compression on a flush error
|
||||
agent._session_db.end_session(agent.session_id, "compression")
|
||||
old_session_id = agent.session_id
|
||||
agent.session_id = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:6]}"
|
||||
@@ -603,6 +613,20 @@ def compress_context(
|
||||
force=True,
|
||||
)
|
||||
|
||||
# Emit session:compress event so hooks (e.g. MemPalace sync) can ingest
|
||||
# the completed old session before its details are lost.
|
||||
_old_sid_for_event = locals().get("old_session_id")
|
||||
if getattr(agent, "event_callback", None):
|
||||
try:
|
||||
agent.event_callback("session:compress", {
|
||||
"platform": agent.platform or "",
|
||||
"session_id": agent.session_id,
|
||||
"old_session_id": _old_sid_for_event or "",
|
||||
"compression_count": agent.context_compressor.compression_count,
|
||||
})
|
||||
except Exception as e:
|
||||
logger.debug("event_callback error on session:compress: %s", e)
|
||||
|
||||
# Keep the post-compression rough estimate for diagnostics, but do not
|
||||
# treat it as provider-reported prompt usage. Schema-heavy rough estimates
|
||||
# can remain above threshold even after the next real API request fits.
|
||||
|
||||
@@ -300,11 +300,20 @@ def _restore_or_build_system_prompt(agent, system_message, conversation_history)
|
||||
agent.session_id, exc,
|
||||
)
|
||||
|
||||
if stored_prompt:
|
||||
if stored_prompt and _stored_prompt_matches_runtime(agent, stored_prompt):
|
||||
# Continuing session — reuse the exact system prompt from the
|
||||
# previous turn so the Anthropic cache prefix matches.
|
||||
agent._cached_system_prompt = stored_prompt
|
||||
return
|
||||
if stored_prompt:
|
||||
stored_state = "stale_runtime"
|
||||
logger.info(
|
||||
"Stored system prompt for session %s has stale runtime identity; "
|
||||
"rebuilding for model=%s provider=%s.",
|
||||
agent.session_id,
|
||||
getattr(agent, "model", "") or "",
|
||||
getattr(agent, "provider", "") or "",
|
||||
)
|
||||
|
||||
if conversation_history and stored_state in ("null", "empty"):
|
||||
# Continuing session whose stored prompt is unusable. The
|
||||
@@ -366,6 +375,30 @@ def _restore_or_build_system_prompt(agent, system_message, conversation_history)
|
||||
)
|
||||
|
||||
|
||||
def _stored_prompt_matches_runtime(agent, prompt: str) -> bool:
|
||||
"""Return False when the persisted Model/Provider lines are stale."""
|
||||
|
||||
def line_value(label: str) -> str:
|
||||
prefix = f"{label}:"
|
||||
value = ""
|
||||
for line in prompt.splitlines():
|
||||
if line.startswith(prefix):
|
||||
value = line[len(prefix):].strip()
|
||||
return value
|
||||
|
||||
stored_model = line_value("Model")
|
||||
current_model = str(getattr(agent, "model", "") or "").strip()
|
||||
if stored_model and current_model and stored_model != current_model:
|
||||
return False
|
||||
|
||||
stored_provider = line_value("Provider")
|
||||
current_provider = str(getattr(agent, "provider", "") or "").strip()
|
||||
if stored_provider and current_provider and stored_provider != current_provider:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def _get_continuation_prompt(is_partial_stub: bool, dropped_tools: Optional[List[str]] = None) -> str:
|
||||
if is_partial_stub and dropped_tools:
|
||||
tool_list = ", ".join(dropped_tools[:3])
|
||||
@@ -441,6 +474,7 @@ def run_conversation(
|
||||
task_id: str = None,
|
||||
stream_callback: Optional[callable] = None,
|
||||
persist_user_message: Optional[str] = None,
|
||||
persist_user_timestamp: Optional[float] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Run a complete conversation with tool calling until completion.
|
||||
@@ -456,6 +490,8 @@ def run_conversation(
|
||||
persist_user_message: Optional clean user message to store in
|
||||
transcripts/history when user_message contains API-only
|
||||
synthetic prefixes.
|
||||
persist_user_timestamp: Optional platform event timestamp to store
|
||||
as metadata on that persisted user message.
|
||||
or queuing follow-up prefetch work.
|
||||
|
||||
Returns:
|
||||
@@ -477,6 +513,7 @@ def run_conversation(
|
||||
task_id,
|
||||
stream_callback,
|
||||
persist_user_message,
|
||||
persist_user_timestamp,
|
||||
restore_or_build_system_prompt=_restore_or_build_system_prompt,
|
||||
install_safe_stdio=_install_safe_stdio,
|
||||
sanitize_surrogates=_sanitize_surrogates,
|
||||
@@ -3719,8 +3756,30 @@ def run_conversation(
|
||||
assistant_msg = agent._build_assistant_message(assistant_message, finish_reason)
|
||||
messages.append(assistant_msg)
|
||||
for tc in assistant_message.tool_calls:
|
||||
if tc.function.name not in agent.valid_tool_names:
|
||||
content = f"Tool '{tc.function.name}' does not exist. Available tools: {available}"
|
||||
_tc_name = tc.function.name
|
||||
if _tc_name not in agent.valid_tool_names:
|
||||
# A blank/whitespace-only name is not a typo the
|
||||
# model can fuzzy-correct toward a real tool — it is
|
||||
# almost always a weak open model echoing tool-call
|
||||
# XML/JSON it saw in file or tool output (#47967:
|
||||
# <tool_call>/<invoke name=...> payloads in a file
|
||||
# prime mimo/nemotron-class models to emit empty
|
||||
# structured calls). Dumping the full tool catalog
|
||||
# in that case feeds the priming loop more names to
|
||||
# mimic and inflates context 3-4x across retries, so
|
||||
# send a terse error that tells the model in-context
|
||||
# tool-call syntax is DATA, not a call to make.
|
||||
if not (_tc_name or "").strip():
|
||||
content = (
|
||||
"Tool call rejected: the tool name was empty. "
|
||||
"If tool-call XML or JSON appeared in file "
|
||||
"contents or tool output, that is data — do "
|
||||
"not re-emit it as a tool call. To call a "
|
||||
"tool, use a valid name from your tool list; "
|
||||
"otherwise reply in plain text."
|
||||
)
|
||||
else:
|
||||
content = f"Tool '{_tc_name}' does not exist. Available tools: {available}"
|
||||
else:
|
||||
content = "Skipped: another tool call in this turn used an invalid name. Please retry this tool call."
|
||||
messages.append({
|
||||
|
||||
@@ -57,6 +57,11 @@ DEFAULT_INTERVAL_HOURS = 24 * 7 # 7 days
|
||||
DEFAULT_MIN_IDLE_HOURS = 2
|
||||
DEFAULT_STALE_AFTER_DAYS = 30
|
||||
DEFAULT_ARCHIVE_AFTER_DAYS = 90
|
||||
# Consolidation (the LLM umbrella-building fork) is OFF by default. The
|
||||
# deterministic inactivity prune (apply_automatic_transitions) still runs
|
||||
# whenever the curator is enabled; only the opinionated, aux-model-cost
|
||||
# consolidation pass is opt-in.
|
||||
DEFAULT_CONSOLIDATE = False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -182,6 +187,22 @@ def get_prune_builtins() -> bool:
|
||||
return bool(cfg.get("prune_builtins", True))
|
||||
|
||||
|
||||
def get_consolidate() -> bool:
|
||||
"""Whether the curator runs its LLM consolidation (umbrella-building) pass.
|
||||
|
||||
OFF by default. When off, a curator run does ONLY the deterministic
|
||||
inactivity prune (mark stale / archive long-unused skills) and skips the
|
||||
forked aux-model review entirely — no consolidation, no umbrella-building,
|
||||
no aux-model cost. Set ``curator.consolidate: true`` to opt back into the
|
||||
LLM pass that merges overlapping skills into class-level umbrellas.
|
||||
|
||||
The explicit ``hermes curator run --consolidate`` flag overrides this for
|
||||
a single invocation regardless of the config value.
|
||||
"""
|
||||
cfg = _load_config()
|
||||
return bool(cfg.get("consolidate", DEFAULT_CONSOLIDATE))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Idle / interval check
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -1408,25 +1429,38 @@ def run_curator_review(
|
||||
on_summary: Optional[Callable[[str], None]] = None,
|
||||
synchronous: bool = False,
|
||||
dry_run: bool = False,
|
||||
consolidate: Optional[bool] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Execute a single curator review pass.
|
||||
|
||||
Steps:
|
||||
1. Apply automatic state transitions (pure, no LLM).
|
||||
2. If there are agent-created skills, spawn a forked AIAgent that runs
|
||||
the LLM review prompt against the current candidate list.
|
||||
2. If consolidation is enabled AND there are agent-created skills, spawn
|
||||
a forked AIAgent that runs the LLM review prompt against the current
|
||||
candidate list.
|
||||
3. Update .curator_state with last_run_at and a one-line summary.
|
||||
4. Invoke *on_summary* with a user-visible description.
|
||||
|
||||
If *synchronous* is True, the LLM review runs in the calling thread; the
|
||||
default is to spawn a daemon thread so the caller returns immediately.
|
||||
|
||||
*consolidate* gates the LLM umbrella-building pass. ``None`` (the default)
|
||||
reads ``curator.consolidate`` from config (OFF by default). Passing
|
||||
``True``/``False`` overrides the config for this invocation — used by the
|
||||
``hermes curator run --consolidate`` flag. When consolidation is off, only
|
||||
the deterministic inactivity prune runs and the forked aux-model review is
|
||||
skipped entirely (no aux-model cost).
|
||||
|
||||
If *dry_run* is True, the automatic stale/archive transitions are SKIPPED
|
||||
and the LLM review pass is instructed to produce a report only — no
|
||||
skill_manage mutations, no terminal archive moves. The REPORT.md still
|
||||
gets written and ``state.last_report_path`` still records it so users
|
||||
can read what the curator WOULD have done.
|
||||
can read what the curator WOULD have done. A dry-run also honors
|
||||
*consolidate*: when consolidation is off, the preview only reports the
|
||||
deterministic prune candidates.
|
||||
"""
|
||||
if consolidate is None:
|
||||
consolidate = get_consolidate()
|
||||
start = datetime.now(timezone.utc)
|
||||
if dry_run:
|
||||
# Count candidates without mutating state.
|
||||
@@ -1489,6 +1523,53 @@ def run_curator_review(
|
||||
before_report = []
|
||||
before_names = {r.get("name") for r in before_report if isinstance(r, dict)}
|
||||
|
||||
# Consolidation gate. When off (the default), the curator does ONLY the
|
||||
# deterministic inactivity prune above — no forked aux-model review, no
|
||||
# umbrella-building, no aux-model cost. Record the run, write a report
|
||||
# reflecting the prune-only outcome, and return without spawning a fork.
|
||||
if not consolidate:
|
||||
final_summary = (
|
||||
f"{prefix}{auto_summary}; llm: skipped (consolidation off)"
|
||||
)
|
||||
llm_meta = {
|
||||
"final": "",
|
||||
"summary": "skipped (consolidation off)",
|
||||
"model": "",
|
||||
"provider": "",
|
||||
"tool_calls": [],
|
||||
"error": None,
|
||||
}
|
||||
elapsed = (datetime.now(timezone.utc) - start).total_seconds()
|
||||
state2 = load_state()
|
||||
state2["last_run_duration_seconds"] = elapsed
|
||||
state2["last_run_summary"] = final_summary
|
||||
try:
|
||||
after_report = skill_usage.agent_created_report()
|
||||
except Exception:
|
||||
after_report = []
|
||||
try:
|
||||
report_path = _write_run_report(
|
||||
started_at=start,
|
||||
elapsed_seconds=elapsed,
|
||||
auto_counts=counts,
|
||||
auto_summary=auto_summary,
|
||||
before_report=before_report,
|
||||
before_names=before_names,
|
||||
after_report=after_report,
|
||||
llm_meta=llm_meta,
|
||||
)
|
||||
if report_path is not None:
|
||||
state2["last_report_path"] = str(report_path)
|
||||
except Exception as e:
|
||||
logger.debug("Curator report write failed: %s", e, exc_info=True)
|
||||
save_state(state2)
|
||||
if on_summary:
|
||||
try:
|
||||
on_summary(f"curator: {final_summary}")
|
||||
except Exception:
|
||||
pass
|
||||
return
|
||||
|
||||
llm_meta: Dict[str, Any] = {}
|
||||
try:
|
||||
candidate_list = _render_candidate_list()
|
||||
|
||||
@@ -46,7 +46,7 @@ import shutil
|
||||
import tarfile
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
from typing import Any, Dict, List, Optional, Set, Tuple
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
from agent.skill_utils import is_excluded_skill_path
|
||||
@@ -208,13 +208,17 @@ def _write_manifest(dest: Path, reason: str, archive_path: Path,
|
||||
)
|
||||
|
||||
|
||||
def snapshot_skills(reason: str = "manual") -> Optional[Path]:
|
||||
def snapshot_skills(reason: str = "manual", *, protect_ids: Optional[Set[str]] = None) -> Optional[Path]:
|
||||
"""Create a tar.gz snapshot of ``~/.hermes/skills/`` and prune old ones.
|
||||
|
||||
Returns the snapshot directory path, or ``None`` if the snapshot was
|
||||
skipped (backup disabled, skills dir missing, or an IO error occurred —
|
||||
in which case we log at debug and return None so the curator never
|
||||
aborts a pass because of a backup failure).
|
||||
|
||||
``protect_ids`` is forwarded to the prune step so callers can guarantee
|
||||
specific snapshot ids survive even when they fall outside the keep
|
||||
window (rollback passes the id it is about to restore from).
|
||||
"""
|
||||
if not is_enabled():
|
||||
logger.debug("Curator backup disabled by config; skipping snapshot")
|
||||
@@ -276,15 +280,19 @@ def snapshot_skills(reason: str = "manual") -> Optional[Path]:
|
||||
pass
|
||||
return None
|
||||
|
||||
_prune_old(keep=get_keep())
|
||||
_prune_old(keep=get_keep(), protect=protect_ids)
|
||||
logger.info("Curator snapshot created: %s (%s)", snap_id, reason)
|
||||
return dest
|
||||
|
||||
|
||||
def _prune_old(keep: int) -> List[str]:
|
||||
def _prune_old(keep: int, protect: Optional[Set[str]] = None) -> List[str]:
|
||||
"""Delete regular snapshots beyond the newest *keep*. Returns deleted
|
||||
ids. Staging dirs (``.rollback-staging-*``) are implementation detail
|
||||
and pruned independently on every call."""
|
||||
ids. Snapshot ids in *protect* are never deleted even when they fall
|
||||
outside the keep window — rollback() uses this so the mandatory
|
||||
pre-rollback safety snapshot can never evict the very snapshot being
|
||||
restored. Staging dirs (``.rollback-staging-*``) are implementation
|
||||
detail and pruned independently on every call."""
|
||||
protect = protect or set()
|
||||
backups = _backups_dir()
|
||||
if not backups.exists():
|
||||
return []
|
||||
@@ -305,6 +313,8 @@ def _prune_old(keep: int) -> List[str]:
|
||||
entries.sort(key=lambda t: t[0], reverse=True)
|
||||
deleted: List[str] = []
|
||||
for _, path in entries[keep:]:
|
||||
if path.name in protect:
|
||||
continue
|
||||
try:
|
||||
shutil.rmtree(path)
|
||||
deleted.append(path.name)
|
||||
@@ -564,7 +574,13 @@ def rollback(backup_id: Optional[str] = None) -> Tuple[bool, str, Optional[Path]
|
||||
# out before touching anything — otherwise a failed extract could leave
|
||||
# the user with no skills.
|
||||
try:
|
||||
snapshot_skills(reason=f"pre-rollback to {target.name}")
|
||||
# Protect the target from this snapshot's prune step: at the steady
|
||||
# keep limit, pruning the oldest snapshot would otherwise delete the
|
||||
# very snapshot we are about to extract from.
|
||||
snapshot_skills(
|
||||
reason=f"pre-rollback to {target.name}",
|
||||
protect_ids={target.name},
|
||||
)
|
||||
except Exception as e:
|
||||
return (False, f"pre-rollback safety snapshot failed: {e}", None)
|
||||
|
||||
|
||||
@@ -11,6 +11,18 @@ Providers live in ``<repo>/plugins/image_gen/<name>/`` (built-in, auto-loaded
|
||||
as ``kind: backend``) or ``~/.hermes/plugins/image_gen/<name>/`` (user, opt-in
|
||||
via ``plugins.enabled``).
|
||||
|
||||
Unified surface
|
||||
---------------
|
||||
One tool — ``image_generate`` — covers **text-to-image** and
|
||||
**image-to-image / image editing**. The router is the presence of
|
||||
``image_url`` (and/or ``reference_image_urls``): if any source image is
|
||||
provided, the provider routes to its image-to-image / edit endpoint; if
|
||||
omitted, the provider routes to text-to-image. Users pick one **model**
|
||||
(e.g. nano-banana-pro, gpt-image-2, grok-imagine-image); the provider
|
||||
handles which underlying endpoint to hit. This mirrors the ``video_gen``
|
||||
provider design (``agent/video_gen_provider.py``) so the two surfaces
|
||||
stay learnable together.
|
||||
|
||||
Response shape
|
||||
--------------
|
||||
All providers return a dict that :func:`success_response` / :func:`error_response`
|
||||
@@ -21,6 +33,7 @@ produce. The tool wrapper JSON-serializes it. Keys:
|
||||
model str provider-specific model identifier
|
||||
prompt str echoed prompt
|
||||
aspect_ratio str "landscape" | "square" | "portrait"
|
||||
modality str "text" | "image" (which mode was used)
|
||||
provider str provider name (for diagnostics)
|
||||
error str only when success=False
|
||||
error_type str only when success=False
|
||||
@@ -127,19 +140,51 @@ class ImageGenProvider(abc.ABC):
|
||||
return models[0].get("id")
|
||||
return None
|
||||
|
||||
def capabilities(self) -> Dict[str, Any]:
|
||||
"""Return what this provider supports.
|
||||
|
||||
Returned dict (all keys optional)::
|
||||
|
||||
{
|
||||
"modalities": ["text", "image"], # which inputs the backend accepts
|
||||
"max_reference_images": 9, # cap for reference_image_urls
|
||||
}
|
||||
|
||||
``modalities`` declares whether the active backend/model supports
|
||||
text-to-image (``"text"``), image-to-image / editing (``"image"``),
|
||||
or both. The tool layer surfaces this in the dynamic schema so the
|
||||
model knows when ``image_url`` is honored. Used by ``hermes tools``
|
||||
for the picker too. Default: text-only (backward compatible — a
|
||||
provider that doesn't override this advertises text-to-image only).
|
||||
"""
|
||||
return {
|
||||
"modalities": ["text"],
|
||||
"max_reference_images": 0,
|
||||
}
|
||||
|
||||
@abc.abstractmethod
|
||||
def generate(
|
||||
self,
|
||||
prompt: str,
|
||||
aspect_ratio: str = DEFAULT_ASPECT_RATIO,
|
||||
*,
|
||||
image_url: Optional[str] = None,
|
||||
reference_image_urls: Optional[List[str]] = None,
|
||||
**kwargs: Any,
|
||||
) -> Dict[str, Any]:
|
||||
"""Generate an image.
|
||||
"""Generate an image from a text prompt, or edit/transform a source image.
|
||||
|
||||
Routing: if ``image_url`` (or any ``reference_image_urls``) is
|
||||
provided, the provider should route to its image-to-image / edit
|
||||
endpoint; otherwise text-to-image. ``image_url`` is the primary
|
||||
source image to edit; ``reference_image_urls`` are additional
|
||||
style/composition references (provider clamps to its declared
|
||||
``max_reference_images``).
|
||||
|
||||
Implementations should return the dict from :func:`success_response`
|
||||
or :func:`error_response`. ``kwargs`` may contain forward-compat
|
||||
parameters future versions of the schema will expose — implementations
|
||||
should ignore unknown keys.
|
||||
parameters future versions of the schema will expose —
|
||||
implementations MUST ignore unknown keys (no TypeError).
|
||||
"""
|
||||
|
||||
|
||||
@@ -162,6 +207,26 @@ def resolve_aspect_ratio(value: Optional[str]) -> str:
|
||||
return DEFAULT_ASPECT_RATIO
|
||||
|
||||
|
||||
def normalize_reference_images(value: Any) -> Optional[List[str]]:
|
||||
"""Coerce a reference-image argument into a clean list of URL/path strings.
|
||||
|
||||
Accepts a single string or a list; strips blanks and whitespace. Returns
|
||||
``None`` when nothing usable remains so providers can treat "no refs" as a
|
||||
single sentinel.
|
||||
"""
|
||||
if value is None:
|
||||
return None
|
||||
if isinstance(value, str):
|
||||
value = [value]
|
||||
if not isinstance(value, (list, tuple)):
|
||||
return None
|
||||
out: List[str] = []
|
||||
for item in value:
|
||||
if isinstance(item, str) and item.strip():
|
||||
out.append(item.strip())
|
||||
return out or None
|
||||
|
||||
|
||||
def _images_cache_dir() -> Path:
|
||||
"""Return ``$HERMES_HOME/cache/images/``, creating parents as needed."""
|
||||
from hermes_constants import get_hermes_home
|
||||
@@ -280,13 +345,16 @@ def success_response(
|
||||
prompt: str,
|
||||
aspect_ratio: str,
|
||||
provider: str,
|
||||
modality: str = "text",
|
||||
extra: Optional[Dict[str, Any]] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Build a uniform success response dict.
|
||||
|
||||
``image`` may be an HTTP URL or an absolute filesystem path (for b64
|
||||
providers like OpenAI). Callers that need to pass through additional
|
||||
backend-specific fields can supply ``extra``.
|
||||
providers like OpenAI). ``modality`` is ``"text"`` (text-to-image) or
|
||||
``"image"`` (image-to-image / editing) — indicates which endpoint was
|
||||
actually hit, useful for diagnostics. Callers that need to pass through
|
||||
additional backend-specific fields can supply ``extra``.
|
||||
"""
|
||||
payload: Dict[str, Any] = {
|
||||
"success": True,
|
||||
@@ -294,6 +362,7 @@ def success_response(
|
||||
"model": model,
|
||||
"prompt": prompt,
|
||||
"aspect_ratio": aspect_ratio,
|
||||
"modality": modality,
|
||||
"provider": provider,
|
||||
}
|
||||
if extra:
|
||||
|
||||
@@ -33,6 +33,7 @@ from concurrent.futures import ThreadPoolExecutor
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from agent.memory_provider import MemoryProvider
|
||||
from agent.skill_commands import extract_user_instruction_from_skill_message
|
||||
from tools.registry import tool_error
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -430,16 +431,37 @@ class MemoryManager:
|
||||
|
||||
# -- Prefetch / recall ---------------------------------------------------
|
||||
|
||||
@staticmethod
|
||||
def _strip_skill_scaffolding(text: str) -> Optional[str]:
|
||||
"""Return memory-worthy user text, or None to skip the turn.
|
||||
|
||||
When a user invokes a /skill or /bundle, Hermes expands the turn into
|
||||
a model-facing message that embeds the entire skill body. Feeding that
|
||||
verbatim to memory providers pollutes their stores/embeddings with
|
||||
prompt scaffolding instead of what the user actually asked. We recover
|
||||
just the user's instruction here, once, for every provider — so this
|
||||
is fixed for the whole provider fan-out, not per backend.
|
||||
|
||||
- Non-skill messages pass through unchanged.
|
||||
- Skill turns with a user instruction return that instruction.
|
||||
- Bare skill invocations (no instruction) return None → callers skip
|
||||
the turn, since there is no user content worth remembering.
|
||||
"""
|
||||
return extract_user_instruction_from_skill_message(text)
|
||||
|
||||
def prefetch_all(self, query: str, *, session_id: str = "") -> str:
|
||||
"""Collect prefetch context from all providers.
|
||||
|
||||
Returns merged context text labeled by provider. Empty providers
|
||||
are skipped. Failures in one provider don't block others.
|
||||
"""
|
||||
clean_query = self._strip_skill_scaffolding(query)
|
||||
if not clean_query:
|
||||
return ""
|
||||
parts = []
|
||||
for provider in self._providers:
|
||||
try:
|
||||
result = provider.prefetch(query, session_id=session_id)
|
||||
result = provider.prefetch(clean_query, session_id=session_id)
|
||||
if result and result.strip():
|
||||
parts.append(result)
|
||||
except Exception as e:
|
||||
@@ -460,10 +482,14 @@ class MemoryManager:
|
||||
if not providers:
|
||||
return
|
||||
|
||||
clean_query = self._strip_skill_scaffolding(query)
|
||||
if not clean_query:
|
||||
return
|
||||
|
||||
def _run() -> None:
|
||||
for provider in providers:
|
||||
try:
|
||||
provider.queue_prefetch(query, session_id=session_id)
|
||||
provider.queue_prefetch(clean_query, session_id=session_id)
|
||||
except Exception as e:
|
||||
logger.debug(
|
||||
"Memory provider '%s' queue_prefetch failed (non-fatal): %s",
|
||||
@@ -515,6 +541,11 @@ class MemoryManager:
|
||||
if not providers:
|
||||
return
|
||||
|
||||
clean_user_content = self._strip_skill_scaffolding(user_content)
|
||||
if not clean_user_content:
|
||||
return
|
||||
user_content = clean_user_content
|
||||
|
||||
def _run() -> None:
|
||||
for provider in providers:
|
||||
try:
|
||||
|
||||
@@ -275,6 +275,11 @@ DEFAULT_CONTEXT_LENGTHS = {
|
||||
# via a custom provider. Values sourced from models.dev (2026-04).
|
||||
# Keys use substring matching (longest-first), so e.g. "grok-4.20"
|
||||
# matches "grok-4.20-0309-reasoning" / "-non-reasoning" / "-multi-agent-0309".
|
||||
# OAuth-only slug; absent from GET /v1/models. xAI publishes a 200k
|
||||
# usable context window for Composer 2.5 on Grok Build (SuperGrok /
|
||||
# Premium+); /v1/responses additionally enforces a ~262144 input+output
|
||||
# budget, but the usable context (what we track here) is 200k.
|
||||
"grok-composer": 200000, # grok-composer-2.5-fast (Grok Build CLI)
|
||||
"grok-build": 256000, # grok-build-0.1
|
||||
"grok-code-fast": 256000, # grok-code-fast-1
|
||||
"grok-2-vision": 8192, # grok-2-vision, -1212, -latest
|
||||
|
||||
@@ -8,6 +8,7 @@ import json
|
||||
import logging
|
||||
import os
|
||||
import threading
|
||||
import contextvars
|
||||
from collections import OrderedDict
|
||||
from pathlib import Path
|
||||
|
||||
@@ -304,6 +305,47 @@ TASK_COMPLETION_GUIDANCE = (
|
||||
"is always better than inventing a result."
|
||||
)
|
||||
|
||||
# Universal parallel-tool-call guidance — applied to ALL models.
|
||||
#
|
||||
# Why this matters for cost: every assistant turn resends the entire
|
||||
# accumulated conversation (and, on cache-friendly providers, re-reads the
|
||||
# cached prefix and pays for the newly-appended turn). A model that issues
|
||||
# one tool call per turn multiplies the number of round-trips — and therefore
|
||||
# the resent context — for any task that needs several independent reads,
|
||||
# searches, or safe lookups. Batching independent calls into a single
|
||||
# assistant response collapses N turns into one, cutting both latency and the
|
||||
# resent-context cost that compounds over a long conversation.
|
||||
#
|
||||
# The hermes-agent runtime already executes a batch of tool calls
|
||||
# concurrently when they are independent (read-only tools always; path-scoped
|
||||
# file ops when their targets don't overlap — see
|
||||
# run_agent._execute_tool_calls / tool_dispatch_helpers). The missing piece
|
||||
# was telling the *model* to emit those calls together in the first place.
|
||||
# Until now the only batching steer in the prompt lived in
|
||||
# GOOGLE_MODEL_OPERATIONAL_GUIDANCE — Gemini/Gemma got it, every other model
|
||||
# got nothing. This block makes the steer universal; the now-redundant
|
||||
# Google-only bullet has been dropped so no model receives it twice.
|
||||
#
|
||||
# Short on purpose — shipped in the cached system prompt to every user, every
|
||||
# session. Token cost is paid once at install and amortised across all
|
||||
# sessions via prefix caching. Keep it tight.
|
||||
#
|
||||
# Ported from cline/cline#11514 ("encourage parallel tool calls"), adapted
|
||||
# from Cline's TypeScript tool-surface guidance to hermes-agent's Python
|
||||
# prompt-assembly architecture.
|
||||
PARALLEL_TOOL_CALL_GUIDANCE = (
|
||||
"# Parallel tool calls\n"
|
||||
"When you need several pieces of information that don't depend on each "
|
||||
"other, request them together in a single response instead of one tool "
|
||||
"call per turn. Independent reads, searches, web fetches, and read-only "
|
||||
"commands should be batched into the same assistant turn — the runtime "
|
||||
"executes independent calls concurrently, and batching avoids resending "
|
||||
"the whole conversation on every extra round-trip.\n"
|
||||
"Only serialize calls when a later call genuinely depends on an earlier "
|
||||
"call's result (e.g. you must read a file before you can patch it). When "
|
||||
"in doubt and the calls are independent, batch them."
|
||||
)
|
||||
|
||||
# OpenAI GPT/Codex-specific execution guidance. Addresses known failure modes
|
||||
# where GPT models abandon work on partial results, skip prerequisite lookups,
|
||||
# hallucinate instead of using tools, and declare "done" without verification.
|
||||
@@ -385,9 +427,10 @@ GOOGLE_MODEL_OPERATIONAL_GUIDANCE = (
|
||||
"package.json, requirements.txt, Cargo.toml, etc. before importing.\n"
|
||||
"- **Conciseness:** Keep explanatory text brief — a few sentences, not "
|
||||
"paragraphs. Focus on actions and results over narration.\n"
|
||||
"- **Parallel tool calls:** When you need to perform multiple independent "
|
||||
"operations (e.g. reading several files), make all the tool calls in a "
|
||||
"single response rather than sequentially.\n"
|
||||
# Parallel-tool-call steering now lives in the universal
|
||||
# PARALLEL_TOOL_CALL_GUIDANCE block (injected for all models), so it is no
|
||||
# longer duplicated here — keeping it would send Gemini/Gemma the same
|
||||
# instruction twice.
|
||||
"- **Non-interactive commands:** Use flags like -y, --yes, --non-interactive "
|
||||
"to prevent CLI tools from hanging on prompts.\n"
|
||||
"- **Keep going:** Work autonomously until the task is fully resolved. "
|
||||
@@ -957,6 +1000,80 @@ CONTEXT_FILE_MAX_CHARS = 20_000
|
||||
CONTEXT_TRUNCATE_HEAD_RATIO = 0.7
|
||||
CONTEXT_TRUNCATE_TAIL_RATIO = 0.2
|
||||
|
||||
# Dynamic-cap parameters (used when no explicit context_file_max_chars is set).
|
||||
# The cap scales with the model's context window so large-context models rarely
|
||||
# truncate a project doc, while small-context models stay at the historical
|
||||
# 20K floor. ~4 chars/token is the usual English heuristic; we spend a small
|
||||
# slice of the window on context files since they share the cached prefix with
|
||||
# the system prompt, tools, memory, and the whole conversation.
|
||||
_CONTEXT_FILE_CHARS_PER_TOKEN = 4
|
||||
_CONTEXT_FILE_WINDOW_FRACTION = 0.06
|
||||
_CONTEXT_FILE_DYNAMIC_CEILING = 500_000
|
||||
|
||||
|
||||
def _dynamic_context_file_max_chars(context_length: Optional[int]) -> int:
|
||||
"""Derive a char cap from the model's context window.
|
||||
|
||||
Returns at least ``CONTEXT_FILE_MAX_CHARS`` (the historical 20K floor) and
|
||||
at most ``_CONTEXT_FILE_DYNAMIC_CEILING``. When ``context_length`` is
|
||||
unknown/invalid, returns the flat default so behavior is unchanged.
|
||||
"""
|
||||
if not isinstance(context_length, int) or context_length <= 0:
|
||||
return CONTEXT_FILE_MAX_CHARS
|
||||
budget = int(
|
||||
context_length * _CONTEXT_FILE_CHARS_PER_TOKEN * _CONTEXT_FILE_WINDOW_FRACTION
|
||||
)
|
||||
return max(CONTEXT_FILE_MAX_CHARS, min(budget, _CONTEXT_FILE_DYNAMIC_CEILING))
|
||||
|
||||
|
||||
def _get_context_file_max_chars(context_length: Optional[int] = None) -> int:
|
||||
"""Return the context-file truncation limit.
|
||||
|
||||
Resolution order:
|
||||
1. Explicit ``context_file_max_chars`` in config.yaml — user knows best,
|
||||
always wins (including over the dynamic cap).
|
||||
2. Dynamic cap derived from the model's ``context_length`` when provided
|
||||
(scales the budget to the window; floor 20K, ceiling 500K).
|
||||
3. ``CONTEXT_FILE_MAX_CHARS`` (20K) as the upstream-compatible fallback.
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
|
||||
val = load_config().get("context_file_max_chars")
|
||||
if isinstance(val, (int, float)) and val > 0:
|
||||
return int(val)
|
||||
except Exception as e:
|
||||
logger.debug("Could not read context_file_max_chars from config: %s", e)
|
||||
return _dynamic_context_file_max_chars(context_length)
|
||||
|
||||
# Collect truncation warnings so the caller (run_agent) can surface them.
|
||||
# A ContextVar (not a module-global list) isolates accumulation per thread /
|
||||
# per async task, so concurrent gateway-session prompt builds can't drain or
|
||||
# clear each other's pending warnings (cross-session leak). Each build runs in
|
||||
# its own context, collects its own warnings, and drains them synchronously.
|
||||
_truncation_warnings: "contextvars.ContextVar[Optional[list]]" = contextvars.ContextVar(
|
||||
"context_file_truncation_warnings", default=None
|
||||
)
|
||||
|
||||
|
||||
def _record_truncation_warning(msg: str) -> None:
|
||||
"""Append a truncation warning to the current context's accumulator."""
|
||||
warnings = _truncation_warnings.get()
|
||||
if warnings is None:
|
||||
warnings = []
|
||||
_truncation_warnings.set(warnings)
|
||||
warnings.append(msg)
|
||||
|
||||
|
||||
def drain_truncation_warnings() -> list:
|
||||
"""Return and clear any truncation warnings accumulated in this context."""
|
||||
warnings = _truncation_warnings.get()
|
||||
if not warnings:
|
||||
return []
|
||||
drained = list(warnings)
|
||||
warnings.clear()
|
||||
return drained
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Skills prompt cache
|
||||
@@ -1463,19 +1580,47 @@ def build_nous_subscription_prompt(valid_tool_names: "set[str] | None" = None) -
|
||||
# Context files (SOUL.md, AGENTS.md, .cursorrules)
|
||||
# =========================================================================
|
||||
|
||||
def _truncate_content(content: str, filename: str, max_chars: int = CONTEXT_FILE_MAX_CHARS) -> str:
|
||||
"""Head/tail truncation with a marker in the middle."""
|
||||
def _truncate_content(
|
||||
content: str,
|
||||
filename: str,
|
||||
max_chars: Optional[int] = None,
|
||||
context_length: Optional[int] = None,
|
||||
read_path: Optional[str] = None,
|
||||
) -> str:
|
||||
"""Head/tail truncation with a marker in the middle.
|
||||
|
||||
``filename`` is the human label used in warnings. ``read_path`` is the
|
||||
concrete path the agent should ``read_file`` to recover the full content
|
||||
(defaults to ``filename`` when not supplied). ``context_length`` lets the
|
||||
cap scale to the model's window when no explicit config override is set.
|
||||
"""
|
||||
if max_chars is None:
|
||||
max_chars = _get_context_file_max_chars(context_length)
|
||||
if len(content) <= max_chars:
|
||||
return content
|
||||
target = read_path or filename
|
||||
msg = (
|
||||
f"⚠️ Context file {filename} TRUNCATED: "
|
||||
f"{len(content)} chars exceeds limit of {max_chars} — "
|
||||
f"trim the file, pin a larger context_file_max_chars, or use a "
|
||||
f"larger-context model!"
|
||||
)
|
||||
logger.warning(msg)
|
||||
_record_truncation_warning(msg)
|
||||
head_chars = int(max_chars * CONTEXT_TRUNCATE_HEAD_RATIO)
|
||||
tail_chars = int(max_chars * CONTEXT_TRUNCATE_TAIL_RATIO)
|
||||
head = content[:head_chars]
|
||||
tail = content[-tail_chars:]
|
||||
marker = f"\n\n[...truncated {filename}: kept {head_chars}+{tail_chars} of {len(content)} chars. Use file tools to read the full file.]\n\n"
|
||||
marker = (
|
||||
f"\n\n[...truncated {filename}: kept {head_chars}+{tail_chars} of "
|
||||
f"{len(content)} chars. The middle is omitted — if you need the full "
|
||||
f"instructions, read the complete file with the read_file tool: "
|
||||
f"{target}]\n\n"
|
||||
)
|
||||
return head + marker + tail
|
||||
|
||||
|
||||
def load_soul_md() -> Optional[str]:
|
||||
def load_soul_md(context_length: Optional[int] = None) -> Optional[str]:
|
||||
"""Load SOUL.md from HERMES_HOME and return its content, or None.
|
||||
|
||||
Used as the agent identity (slot #1 in the system prompt). When this
|
||||
@@ -1496,14 +1641,17 @@ def load_soul_md() -> Optional[str]:
|
||||
if not content:
|
||||
return None
|
||||
content = _scan_context_content(content, "SOUL.md")
|
||||
content = _truncate_content(content, "SOUL.md")
|
||||
content = _truncate_content(
|
||||
content, "SOUL.md", context_length=context_length,
|
||||
read_path=str(soul_path),
|
||||
)
|
||||
return content
|
||||
except Exception as e:
|
||||
logger.debug("Could not read SOUL.md from %s: %s", soul_path, e)
|
||||
return None
|
||||
|
||||
|
||||
def _load_hermes_md(cwd_path: Path) -> str:
|
||||
def _load_hermes_md(cwd_path: Path, context_length: Optional[int] = None) -> str:
|
||||
""".hermes.md / HERMES.md — walk to git root."""
|
||||
hermes_md_path = _find_hermes_md(cwd_path)
|
||||
if not hermes_md_path:
|
||||
@@ -1520,13 +1668,16 @@ def _load_hermes_md(cwd_path: Path) -> str:
|
||||
pass
|
||||
content = _scan_context_content(content, rel)
|
||||
result = f"## {rel}\n\n{content}"
|
||||
return _truncate_content(result, ".hermes.md")
|
||||
return _truncate_content(
|
||||
result, ".hermes.md", context_length=context_length,
|
||||
read_path=str(hermes_md_path),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug("Could not read %s: %s", hermes_md_path, e)
|
||||
return ""
|
||||
|
||||
|
||||
def _load_agents_md(cwd_path: Path) -> str:
|
||||
def _load_agents_md(cwd_path: Path, context_length: Optional[int] = None) -> str:
|
||||
"""AGENTS.md — top-level only (no recursive walk)."""
|
||||
for name in ["AGENTS.md", "agents.md"]:
|
||||
candidate = cwd_path / name
|
||||
@@ -1536,13 +1687,16 @@ def _load_agents_md(cwd_path: Path) -> str:
|
||||
if content:
|
||||
content = _scan_context_content(content, name)
|
||||
result = f"## {name}\n\n{content}"
|
||||
return _truncate_content(result, "AGENTS.md")
|
||||
return _truncate_content(
|
||||
result, "AGENTS.md", context_length=context_length,
|
||||
read_path=str(candidate),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug("Could not read %s: %s", candidate, e)
|
||||
return ""
|
||||
|
||||
|
||||
def _load_claude_md(cwd_path: Path) -> str:
|
||||
def _load_claude_md(cwd_path: Path, context_length: Optional[int] = None) -> str:
|
||||
"""CLAUDE.md / claude.md — cwd only."""
|
||||
for name in ["CLAUDE.md", "claude.md"]:
|
||||
candidate = cwd_path / name
|
||||
@@ -1552,13 +1706,16 @@ def _load_claude_md(cwd_path: Path) -> str:
|
||||
if content:
|
||||
content = _scan_context_content(content, name)
|
||||
result = f"## {name}\n\n{content}"
|
||||
return _truncate_content(result, "CLAUDE.md")
|
||||
return _truncate_content(
|
||||
result, "CLAUDE.md", context_length=context_length,
|
||||
read_path=str(candidate),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug("Could not read %s: %s", candidate, e)
|
||||
return ""
|
||||
|
||||
|
||||
def _load_cursorrules(cwd_path: Path) -> str:
|
||||
def _load_cursorrules(cwd_path: Path, context_length: Optional[int] = None) -> str:
|
||||
""".cursorrules + .cursor/rules/*.mdc — cwd only."""
|
||||
cursorrules_content = ""
|
||||
cursorrules_file = cwd_path / ".cursorrules"
|
||||
@@ -1585,10 +1742,17 @@ def _load_cursorrules(cwd_path: Path) -> str:
|
||||
|
||||
if not cursorrules_content:
|
||||
return ""
|
||||
return _truncate_content(cursorrules_content, ".cursorrules")
|
||||
return _truncate_content(
|
||||
cursorrules_content, ".cursorrules", context_length=context_length,
|
||||
read_path=str(cwd_path / ".cursorrules"),
|
||||
)
|
||||
|
||||
|
||||
def build_context_files_prompt(cwd: Optional[str] = None, skip_soul: bool = False) -> str:
|
||||
def build_context_files_prompt(
|
||||
cwd: Optional[str] = None,
|
||||
skip_soul: bool = False,
|
||||
context_length: Optional[int] = None,
|
||||
) -> str:
|
||||
"""Discover and load context files for the system prompt.
|
||||
|
||||
Priority (first found wins — only ONE project context type is loaded):
|
||||
@@ -1598,7 +1762,11 @@ def build_context_files_prompt(cwd: Optional[str] = None, skip_soul: bool = Fals
|
||||
4. .cursorrules / .cursor/rules/*.mdc (cwd only)
|
||||
|
||||
SOUL.md from HERMES_HOME is independent and always included when present.
|
||||
Each context source is capped at 20,000 chars.
|
||||
|
||||
Each context source is capped before injection. The cap defaults to the
|
||||
model's context window (scaled — see ``_dynamic_context_file_max_chars``)
|
||||
when *context_length* is provided, falling back to 20,000 chars otherwise.
|
||||
An explicit ``context_file_max_chars`` in config.yaml always wins.
|
||||
|
||||
When *skip_soul* is True, SOUL.md is not included here (it was already
|
||||
loaded via ``load_soul_md()`` for the identity slot).
|
||||
@@ -1611,17 +1779,17 @@ def build_context_files_prompt(cwd: Optional[str] = None, skip_soul: bool = Fals
|
||||
|
||||
# Priority-based project context: first match wins
|
||||
project_context = (
|
||||
_load_hermes_md(cwd_path)
|
||||
or _load_agents_md(cwd_path)
|
||||
or _load_claude_md(cwd_path)
|
||||
or _load_cursorrules(cwd_path)
|
||||
_load_hermes_md(cwd_path, context_length)
|
||||
or _load_agents_md(cwd_path, context_length)
|
||||
or _load_claude_md(cwd_path, context_length)
|
||||
or _load_cursorrules(cwd_path, context_length)
|
||||
)
|
||||
if project_context:
|
||||
sections.append(project_context)
|
||||
|
||||
# SOUL.md from HERMES_HOME only — skip when already loaded as identity
|
||||
if not skip_soul:
|
||||
soul_content = load_soul_md()
|
||||
soul_content = load_soul_md(context_length)
|
||||
if soul_content:
|
||||
sections.append(soul_content)
|
||||
|
||||
|
||||
@@ -26,6 +26,91 @@ _skill_commands_platform: Optional[str] = None
|
||||
_SKILL_INVALID_CHARS = re.compile(r"[^a-z0-9-]")
|
||||
_SKILL_MULTI_HYPHEN = re.compile(r"-{2,}")
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Skill-scaffolding markers and the canonical extractor.
|
||||
#
|
||||
# When a user invokes a /skill (or /bundle), Hermes expands the turn into a
|
||||
# model-facing message that embeds the full skill body plus scaffolding. That
|
||||
# expanded text is what flows into the agent loop — and into memory providers
|
||||
# via MemoryManager. Providers that store or embed the raw user turn (mem0,
|
||||
# openviking, hindsight, retaindb, byterover, honcho, supermemory) would
|
||||
# otherwise capture the entire skill body instead of what the user actually
|
||||
# asked. ``extract_user_instruction_from_skill_message`` recovers just the
|
||||
# user's instruction so memory stays clean.
|
||||
#
|
||||
# These markers MUST stay byte-identical to the builders below
|
||||
# (``_build_skill_message`` here, ``build_bundle_invocation_message`` in
|
||||
# agent/skill_bundles.py). They are co-located with the single-skill builder
|
||||
# on purpose, and the bundle markers are asserted against the bundle builder in
|
||||
# tests/openviking_plugin/test_openviking.py::test_skill_markers_match_hermes_scaffolding.
|
||||
# ---------------------------------------------------------------------------
|
||||
_SKILL_INVOCATION_PREFIX = "[IMPORTANT: The user has invoked the "
|
||||
_SINGLE_SKILL_MARKER = "The full skill content is loaded below.]"
|
||||
_SINGLE_SKILL_INSTRUCTION = (
|
||||
"The user has provided the following instruction alongside the skill invocation: "
|
||||
)
|
||||
_RUNTIME_NOTE = "\n\n[Runtime note:"
|
||||
_BUNDLE_MARKER = " skill bundle,"
|
||||
_BUNDLE_USER_INSTRUCTION = "\nUser instruction: "
|
||||
_BUNDLE_FIRST_SKILL_BLOCK = "\n\n[Loaded as part of the "
|
||||
|
||||
|
||||
def extract_user_instruction_from_skill_message(content: Any) -> Optional[str]:
|
||||
"""Recover the user's instruction from a slash-skill-expanded turn.
|
||||
|
||||
Returns:
|
||||
- The original string unchanged when it is NOT skill scaffolding
|
||||
(a normal user message passes straight through).
|
||||
- The extracted user instruction when the scaffolding carried one.
|
||||
- ``None`` when the content is skill scaffolding with no user
|
||||
instruction (i.e. a bare ``/skill`` invocation). Callers that feed
|
||||
memory providers should skip the turn in that case — there is no
|
||||
user content worth storing.
|
||||
"""
|
||||
if not isinstance(content, str):
|
||||
return None
|
||||
|
||||
if not content.startswith(_SKILL_INVOCATION_PREFIX):
|
||||
return content
|
||||
|
||||
if _BUNDLE_MARKER in content:
|
||||
return _extract_bundle_user_instruction(content)
|
||||
|
||||
if _SINGLE_SKILL_MARKER in content:
|
||||
return _extract_single_skill_user_instruction(content)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _extract_single_skill_user_instruction(message: str) -> Optional[str]:
|
||||
# Single-skill format appends the user instruction after the skill body, so
|
||||
# the last occurrence is the user-provided one; the body may quote this text.
|
||||
marker_idx = message.rfind(_SINGLE_SKILL_INSTRUCTION)
|
||||
if marker_idx < 0:
|
||||
return None
|
||||
|
||||
instruction = message[marker_idx + len(_SINGLE_SKILL_INSTRUCTION):]
|
||||
runtime_idx = instruction.find(_RUNTIME_NOTE)
|
||||
if runtime_idx >= 0:
|
||||
instruction = instruction[:runtime_idx]
|
||||
instruction = instruction.strip()
|
||||
return instruction or None
|
||||
|
||||
|
||||
def _extract_bundle_user_instruction(message: str) -> Optional[str]:
|
||||
# Bundle format puts the user instruction before the loaded skills, so the
|
||||
# first occurrence is the user-provided one.
|
||||
marker_idx = message.find(_BUNDLE_USER_INSTRUCTION)
|
||||
if marker_idx < 0:
|
||||
return None
|
||||
|
||||
instruction = message[marker_idx + len(_BUNDLE_USER_INSTRUCTION):]
|
||||
first_skill_idx = instruction.find(_BUNDLE_FIRST_SKILL_BLOCK)
|
||||
if first_skill_idx >= 0:
|
||||
instruction = instruction[:first_skill_idx]
|
||||
instruction = instruction.strip()
|
||||
return instruction or None
|
||||
|
||||
|
||||
def _resolve_skill_commands_platform() -> Optional[str]:
|
||||
"""Return the current platform scope used for disabled-skill filtering.
|
||||
|
||||
@@ -43,14 +43,20 @@ EXCLUDED_SKILL_DIRS = frozenset(
|
||||
)
|
||||
)
|
||||
|
||||
# Supporting files live inside a skill package and are loaded explicitly via
|
||||
# skill_view(skill, file_path=...). They are not standalone skills and must not
|
||||
# be scanned for active SKILL.md/DESCRIPTION.md entries, even if a Curator or
|
||||
# archive workflow preserves a complete old skill package under references/.
|
||||
SKILL_SUPPORT_DIRS = frozenset(("references", "templates", "assets", "scripts"))
|
||||
|
||||
|
||||
def is_excluded_skill_path(path) -> bool:
|
||||
"""True if any component of *path* is in EXCLUDED_SKILL_DIRS.
|
||||
"""True if *path* should be skipped by active skill scanners.
|
||||
|
||||
Use this on every SKILL.md path produced by ``rglob`` to prune
|
||||
dependency, virtualenv, VCS, and cache directories. Centralising the
|
||||
check here keeps every skill-scanning site in sync with the shared
|
||||
exclusion set.
|
||||
Use this on every ``SKILL.md`` path produced by direct ``rglob`` scans to
|
||||
prune dependency, virtualenv, VCS, cache, and progressive-disclosure
|
||||
support-package paths. Centralising the check here keeps every
|
||||
skill-scanning site in sync with the shared exclusion set.
|
||||
|
||||
Accepts a Path or string.
|
||||
"""
|
||||
@@ -59,7 +65,36 @@ def is_excluded_skill_path(path) -> bool:
|
||||
except AttributeError:
|
||||
from pathlib import PurePath
|
||||
parts = PurePath(str(path)).parts
|
||||
return any(part in EXCLUDED_SKILL_DIRS for part in parts)
|
||||
return any(part in EXCLUDED_SKILL_DIRS for part in parts) or is_skill_support_path(
|
||||
path
|
||||
)
|
||||
|
||||
|
||||
def is_skill_support_path(path) -> bool:
|
||||
"""True if *path* is under a support dir of an actual skill root.
|
||||
|
||||
``references/``, ``templates/``, ``assets/``, and ``scripts/`` are
|
||||
progressive-disclosure support areas when they sit directly inside a skill
|
||||
directory containing ``SKILL.md``. They are not active discovery roots for
|
||||
standalone skills. A preserved package such as
|
||||
``some-skill/references/old-skill-package/SKILL.md`` is documentation data
|
||||
unless the caller explicitly loads it via ``file_path``.
|
||||
|
||||
Legitimate categories or skill names such as ``skills/scripts/foo`` remain
|
||||
discoverable because their ``scripts`` component is not directly under a
|
||||
directory that contains ``SKILL.md``.
|
||||
"""
|
||||
path_obj = path if isinstance(path, Path) else Path(str(path))
|
||||
parts = path_obj.parts
|
||||
# Last component may be a file or candidate skill directory name. Only
|
||||
# components before the leaf can be containing support directories.
|
||||
for idx, part in enumerate(parts[:-1]):
|
||||
if part not in SKILL_SUPPORT_DIRS or idx == 0:
|
||||
continue
|
||||
skill_root = Path(*parts[:idx])
|
||||
if (skill_root / "SKILL.md").exists():
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
# ── Lazy YAML loader ─────────────────────────────────────────────────────
|
||||
@@ -661,12 +696,21 @@ def extract_skill_description(frontmatter: Dict[str, Any]) -> str:
|
||||
def iter_skill_index_files(skills_dir: Path, filename: str):
|
||||
"""Walk skills_dir yielding sorted paths matching *filename*.
|
||||
|
||||
Excludes Hermes metadata, VCS, virtualenv/dependency, and cache
|
||||
directories so dependencies cannot register nested skills.
|
||||
Excludes Hermes metadata, VCS, virtualenv/dependency, cache, and skill
|
||||
support directories. Support directories (references/templates/assets/
|
||||
scripts) can contain arbitrary markdown and even archived package
|
||||
``SKILL.md`` files, but they are progressive-disclosure data loaded through
|
||||
``skill_view(..., file_path=...)`` rather than active skill roots.
|
||||
"""
|
||||
matches = []
|
||||
for root, dirs, files in os.walk(skills_dir, followlinks=True):
|
||||
dirs[:] = [d for d in dirs if d not in EXCLUDED_SKILL_DIRS]
|
||||
has_skill_md = "SKILL.md" in files
|
||||
dirs[:] = [
|
||||
d
|
||||
for d in dirs
|
||||
if d not in EXCLUDED_SKILL_DIRS
|
||||
and not (has_skill_md and d in SKILL_SUPPORT_DIRS)
|
||||
]
|
||||
if filename in files:
|
||||
matches.append(Path(root) / filename)
|
||||
for path in sorted(matches, key=lambda p: str(p.relative_to(skills_dir))):
|
||||
|
||||
@@ -33,6 +33,7 @@ from agent.prompt_builder import (
|
||||
KANBAN_GUIDANCE,
|
||||
MEMORY_GUIDANCE,
|
||||
OPENAI_MODEL_EXECUTION_GUIDANCE,
|
||||
PARALLEL_TOOL_CALL_GUIDANCE,
|
||||
PLATFORM_HINTS,
|
||||
SESSION_SEARCH_GUIDANCE,
|
||||
SKILLS_GUIDANCE,
|
||||
@@ -40,6 +41,7 @@ from agent.prompt_builder import (
|
||||
TASK_COMPLETION_GUIDANCE,
|
||||
TOOL_USE_ENFORCEMENT_GUIDANCE,
|
||||
TOOL_USE_ENFORCEMENT_MODELS,
|
||||
drain_truncation_warnings,
|
||||
)
|
||||
from agent.runtime_cwd import resolve_context_cwd
|
||||
|
||||
@@ -59,6 +61,55 @@ def _ra():
|
||||
return run_agent
|
||||
|
||||
|
||||
def _resolve_platform_hint(agent: Any, platform_key: str, default_hint: str) -> str:
|
||||
"""Apply a per-platform prompt-hint override to the default hint.
|
||||
|
||||
Reads ``agent._platform_hint_overrides`` (populated from
|
||||
``config.yaml`` ``platform_hints`` by ``agent_init``) and resolves the
|
||||
effective hint for *platform_key*:
|
||||
|
||||
* ``replace`` — substitute the default hint entirely.
|
||||
* ``append`` — keep the default and append the extra text.
|
||||
* a bare string value — treated as ``append`` (convenience shorthand).
|
||||
|
||||
Precedence: ``replace`` wins over ``append`` if both are present.
|
||||
Override text is added on top of (not instead of) the SOUL/context/
|
||||
memory tiers — it only affects the platform-hint segment, so other
|
||||
platforms are unaffected and general system instructions still apply.
|
||||
|
||||
Defensive: any malformed entry falls back to the unmodified default so
|
||||
a bad config value can never break prompt assembly or leak across
|
||||
platforms.
|
||||
"""
|
||||
if not platform_key:
|
||||
return default_hint
|
||||
overrides = getattr(agent, "_platform_hint_overrides", None)
|
||||
if not isinstance(overrides, dict) or not overrides:
|
||||
return default_hint
|
||||
spec = overrides.get(platform_key)
|
||||
if spec is None:
|
||||
return default_hint
|
||||
|
||||
# Shorthand: a bare string is treated as append text.
|
||||
if isinstance(spec, str):
|
||||
extra = spec.strip()
|
||||
return f"{default_hint}\n\n{extra}".strip() if extra else default_hint
|
||||
|
||||
if not isinstance(spec, dict):
|
||||
return default_hint
|
||||
|
||||
replace_text = spec.get("replace")
|
||||
if isinstance(replace_text, str) and replace_text.strip():
|
||||
base = replace_text.strip()
|
||||
else:
|
||||
base = default_hint
|
||||
|
||||
append_text = spec.get("append")
|
||||
if isinstance(append_text, str) and append_text.strip():
|
||||
return f"{base}\n\n{append_text.strip()}".strip()
|
||||
return base
|
||||
|
||||
|
||||
def build_system_prompt_parts(agent: Any, system_message: Optional[str] = None) -> Dict[str, str]:
|
||||
"""Assemble the system prompt as three ordered parts.
|
||||
|
||||
@@ -82,6 +133,17 @@ def build_system_prompt_parts(agent: Any, system_message: Optional[str] = None)
|
||||
# we resolve through ``_ra()`` to honor those patches.
|
||||
_r = _ra()
|
||||
|
||||
# Resolve the model's context window once so context-file caps can scale
|
||||
# to it (dynamic cap — see prompt_builder._dynamic_context_file_max_chars).
|
||||
# None falls back to the historical flat default. This value is stable for
|
||||
# the life of the conversation, so it does not threaten prompt caching.
|
||||
_ctx_len: Optional[int] = None
|
||||
_cc = getattr(agent, "context_compressor", None)
|
||||
if _cc is not None:
|
||||
_cc_len = getattr(_cc, "context_length", None)
|
||||
if isinstance(_cc_len, int) and _cc_len > 0:
|
||||
_ctx_len = _cc_len
|
||||
|
||||
# ── Stable tier ────────────────────────────────────────────────
|
||||
stable_parts: List[str] = []
|
||||
|
||||
@@ -90,7 +152,7 @@ def build_system_prompt_parts(agent: Any, system_message: Optional[str] = None)
|
||||
# cwd project instructions disabled.
|
||||
_soul_loaded = False
|
||||
if agent.load_soul_identity or not agent.skip_context_files:
|
||||
_soul_content = _r.load_soul_md()
|
||||
_soul_content = _r.load_soul_md(_ctx_len)
|
||||
if _soul_content:
|
||||
stable_parts.append(_soul_content)
|
||||
_soul_loaded = True
|
||||
@@ -111,6 +173,17 @@ def build_system_prompt_parts(agent: Any, system_message: Optional[str] = None)
|
||||
if getattr(agent, "_task_completion_guidance", True) and agent.valid_tool_names:
|
||||
stable_parts.append(TASK_COMPLETION_GUIDANCE)
|
||||
|
||||
# Universal parallel-tool-call guidance. Tells the model to batch
|
||||
# independent tool calls into one assistant turn rather than emitting one
|
||||
# call per turn — the runtime already runs independent calls concurrently
|
||||
# (read-only tools always; non-overlapping path-scoped file ops), so the
|
||||
# only thing missing was steering the model to produce the batch. Cuts
|
||||
# round-trips and the resent-context cost that compounds over a long
|
||||
# conversation. Gated by config.yaml ``agent.parallel_tool_call_guidance``
|
||||
# (default True) and only injected when tools are actually loaded.
|
||||
if getattr(agent, "_parallel_tool_call_guidance", True) and agent.valid_tool_names:
|
||||
stable_parts.append(PARALLEL_TOOL_CALL_GUIDANCE)
|
||||
|
||||
# Tool-aware behavioral guidance: only inject when the tools are loaded
|
||||
tool_guidance = []
|
||||
if "memory" in agent.valid_tool_names:
|
||||
@@ -307,18 +380,25 @@ def build_system_prompt_parts(agent: Any, system_message: Optional[str] = None)
|
||||
)
|
||||
|
||||
platform_key = (agent.platform or "").lower().strip()
|
||||
# Resolve the built-in/plugin default hint for this platform, then apply
|
||||
# any per-platform override from config (platform_hints.<platform>).
|
||||
_default_hint = ""
|
||||
if platform_key in PLATFORM_HINTS:
|
||||
stable_parts.append(PLATFORM_HINTS[platform_key])
|
||||
_default_hint = PLATFORM_HINTS[platform_key]
|
||||
elif platform_key:
|
||||
# Check plugin registry for platform-specific LLM guidance
|
||||
try:
|
||||
from gateway.platform_registry import platform_registry
|
||||
_entry = platform_registry.get(platform_key)
|
||||
if _entry and _entry.platform_hint:
|
||||
stable_parts.append(_entry.platform_hint)
|
||||
_default_hint = _entry.platform_hint
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
_effective_hint = _resolve_platform_hint(agent, platform_key, _default_hint)
|
||||
if _effective_hint:
|
||||
stable_parts.append(_effective_hint)
|
||||
|
||||
# ── Context tier (cwd-dependent, may change between sessions) ─
|
||||
context_parts: List[str] = []
|
||||
|
||||
@@ -333,7 +413,8 @@ def build_system_prompt_parts(agent: Any, system_message: Optional[str] = None)
|
||||
# dir — the user's real cwd there, but the install dir for the gateway
|
||||
# daemon, which is why the gateway sets TERMINAL_CWD.
|
||||
context_files_prompt = _r.build_context_files_prompt(
|
||||
cwd=resolve_context_cwd(), skip_soul=_soul_loaded)
|
||||
cwd=resolve_context_cwd(), skip_soul=_soul_loaded,
|
||||
context_length=_ctx_len)
|
||||
if context_files_prompt:
|
||||
context_parts.append(context_files_prompt)
|
||||
|
||||
@@ -400,7 +481,14 @@ def build_system_prompt(agent: Any, system_message: Optional[str] = None) -> str
|
||||
warm across turns.
|
||||
"""
|
||||
parts = build_system_prompt_parts(agent, system_message=system_message)
|
||||
return "\n\n".join(p for p in (parts["stable"], parts["context"], parts["volatile"]) if p)
|
||||
joined = "\n\n".join(p for p in (parts["stable"], parts["context"], parts["volatile"]) if p)
|
||||
|
||||
# Surface context-file truncation warnings through the normal agent status
|
||||
# channel so gateway/CLI users see them in chat instead of only in logs.
|
||||
for warning in drain_truncation_warnings():
|
||||
agent._emit_status(warning)
|
||||
|
||||
return joined
|
||||
|
||||
|
||||
def invalidate_system_prompt(agent: Any) -> None:
|
||||
|
||||
@@ -1012,28 +1012,42 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
|
||||
elif function_name == "memory":
|
||||
def _execute(next_args: dict) -> Any:
|
||||
target = next_args.get("target", "memory")
|
||||
operations = next_args.get("operations")
|
||||
from tools.memory_tool import memory_tool as _memory_tool
|
||||
result = _memory_tool(
|
||||
action=next_args.get("action"),
|
||||
target=target,
|
||||
content=next_args.get("content"),
|
||||
old_text=next_args.get("old_text"),
|
||||
operations=operations,
|
||||
store=agent._memory_store,
|
||||
)
|
||||
# Bridge: notify external memory provider of built-in memory writes
|
||||
if agent._memory_manager and next_args.get("action") in {"add", "replace"}:
|
||||
try:
|
||||
agent._memory_manager.on_memory_write(
|
||||
next_args.get("action", ""),
|
||||
target,
|
||||
next_args.get("content", ""),
|
||||
metadata=agent._build_memory_write_metadata(
|
||||
task_id=effective_task_id,
|
||||
tool_call_id=getattr(tool_call, "id", None),
|
||||
),
|
||||
# Bridge: notify external memory provider of built-in memory writes.
|
||||
# Covers both the single-op shape and each add/replace inside a batch.
|
||||
if agent._memory_manager:
|
||||
if operations:
|
||||
_mem_ops = [
|
||||
op for op in operations
|
||||
if isinstance(op, dict) and op.get("action") in {"add", "replace"}
|
||||
]
|
||||
else:
|
||||
_mem_ops = (
|
||||
[{"action": next_args.get("action"), "content": next_args.get("content")}]
|
||||
if next_args.get("action") in {"add", "replace"} else []
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
for _op in _mem_ops:
|
||||
try:
|
||||
agent._memory_manager.on_memory_write(
|
||||
_op.get("action", ""),
|
||||
target,
|
||||
_op.get("content", "") or "",
|
||||
metadata=agent._build_memory_write_metadata(
|
||||
task_id=effective_task_id,
|
||||
tool_call_id=getattr(tool_call, "id", None),
|
||||
),
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
return result
|
||||
function_result, function_args = _run_agent_tool_execution_middleware(
|
||||
agent,
|
||||
|
||||
@@ -88,7 +88,7 @@ class AnthropicTransport(ProviderTransport):
|
||||
from agent.transports.types import ToolCall
|
||||
|
||||
strip_tool_prefix = kwargs.get("strip_tool_prefix", False)
|
||||
_MCP_PREFIX = "mcp_"
|
||||
_MCP_PREFIX = "mcp__"
|
||||
|
||||
text_parts = []
|
||||
reasoning_parts = []
|
||||
@@ -132,17 +132,25 @@ class AnthropicTransport(ProviderTransport):
|
||||
elif block.type == "tool_use":
|
||||
name = block.name
|
||||
if strip_tool_prefix and name.startswith(_MCP_PREFIX):
|
||||
stripped = name[len(_MCP_PREFIX):]
|
||||
# Only strip the mcp_ prefix for OAuth-injected tools
|
||||
# (where Hermes adds the prefix when sending to Anthropic
|
||||
# and must remove it on the way back). Native MCP server
|
||||
# tools (from mcp_servers: in config.yaml) are registered
|
||||
# in the tool registry under their FULL mcp_<server>_<tool>
|
||||
# name and must NOT be stripped. GH-25255.
|
||||
# On the OAuth wire every tool carries a double-underscore
|
||||
# ``mcp__`` prefix (added in build_anthropic_kwargs to avoid
|
||||
# Anthropic's single-underscore third-party classifier).
|
||||
# Reverse it back to the name the registry/dispatcher knows.
|
||||
# Two original forms map onto the same ``mcp__`` wire name:
|
||||
# ``mcp__read_file`` <- bare native tool ``read_file``
|
||||
# ``mcp__linear_get_issue`` <- MCP server tool
|
||||
# ``mcp_linear_get_issue``
|
||||
# Resolve by registry lookup, preferring whichever original
|
||||
# is actually registered; never rewrite a name the LLM used
|
||||
# that already resolves natively. GH-25255.
|
||||
from tools.registry import registry as _tool_registry
|
||||
if (_tool_registry.get_entry(stripped)
|
||||
and not _tool_registry.get_entry(name)):
|
||||
name = stripped
|
||||
if not _tool_registry.get_entry(name):
|
||||
bare = name[len(_MCP_PREFIX):] # read_file
|
||||
single = "mcp_" + bare # mcp_read_file / mcp_linear_get_issue
|
||||
if _tool_registry.get_entry(single):
|
||||
name = single
|
||||
elif _tool_registry.get_entry(bare):
|
||||
name = bare
|
||||
tool_calls.append(
|
||||
ToolCall(
|
||||
id=block.id,
|
||||
|
||||
@@ -128,6 +128,65 @@ class ResponsesApiTransport(ProviderTransport):
|
||||
reasoning_effort = _effort_clamp.get(reasoning_effort, reasoning_effort)
|
||||
|
||||
response_tools = _responses_tools(tools)
|
||||
|
||||
# xAI server-side web search.
|
||||
#
|
||||
# grok models on xAI's /v1/responses surface (notably
|
||||
# grok-composer-2.5-fast on SuperGrok OAuth) have a *native*,
|
||||
# server-executed web search. When the model is handed a
|
||||
# client-side function literally named ``web_search``, it routes
|
||||
# the intent to that native engine — but because the tool is
|
||||
# declared as a plain ``function`` rather than xAI's first-class
|
||||
# ``{"type": "web_search"}`` built-in, the server-side search is
|
||||
# dispatched but never reconciled: the response streams reasoning
|
||||
# + ``web_search_call`` progress items, the searches never reach
|
||||
# ``status="completed"`` in the assembled output, no final
|
||||
# message is emitted, and ``_normalize_codex_response`` correctly
|
||||
# sees reasoning-with-no-answer and reports ``incomplete``. The
|
||||
# turn then burns 3 continuation retries and fails with "Codex
|
||||
# response remained incomplete after 3 continuation attempts".
|
||||
# Verified live against grok-composer-2.5-fast (2026-06).
|
||||
#
|
||||
# Fix: when the agent HAS a client-side ``web_search`` function (i.e.
|
||||
# the user enabled the web toolset), declare xAI's native
|
||||
# ``web_search`` built-in instead so the search actually runs to
|
||||
# completion server-side and the model streams a real answer. The
|
||||
# Responses API rejects two tools sharing the name ``web_search``
|
||||
# (HTTP 400 "Duplicate tool names"), so we drop the client-side
|
||||
# ``web_search`` function for the xAI path and let the native tool
|
||||
# satisfy it. All other client-side tools (read_file, terminal,
|
||||
# web_extract, MCP tools, …) are untouched and continue to dispatch
|
||||
# through Hermes's agent loop.
|
||||
#
|
||||
# Scope: we ONLY swap in the native built-in when the client
|
||||
# ``web_search`` was actually present. We do NOT force-enable Grok
|
||||
# server-side search on turns where the user never had web enabled —
|
||||
# that would silently route around Hermes's web-provider config and
|
||||
# tool-trace/citation plumbing for every xai-oauth turn. The swap is
|
||||
# a 1:1 replacement of an already-requested capability, not an
|
||||
# additive grant.
|
||||
#
|
||||
# NOTE: for the swapped case this routes ``web_search`` to Grok's
|
||||
# native search engine for xAI sessions instead of Hermes's
|
||||
# configured web provider (Tavily/etc.), and those results bypass
|
||||
# Hermes's tool-trace / citation plumbing (they arrive baked into the
|
||||
# model's answer rather than as a tool result the loop observes).
|
||||
# Scoped to ``is_xai_responses`` deliberately; narrow to specific
|
||||
# models if a future grok variant should keep the client-side
|
||||
# function.
|
||||
if is_xai_responses and response_tools:
|
||||
has_client_web_search = any(
|
||||
isinstance(t, dict) and t.get("name") == "web_search"
|
||||
for t in response_tools
|
||||
)
|
||||
if has_client_web_search:
|
||||
filtered = [
|
||||
t for t in response_tools
|
||||
if not (isinstance(t, dict) and t.get("name") == "web_search")
|
||||
]
|
||||
filtered.append({"type": "web_search"})
|
||||
response_tools = filtered
|
||||
|
||||
# ``tools`` MUST be omitted entirely when there are no functions to
|
||||
# expose: the openai SDK's ``responses.stream()`` / ``responses.parse()``
|
||||
# eagerly call ``_make_tools(tools)`` which does ``for tool in tools``
|
||||
@@ -218,10 +277,28 @@ class ResponsesApiTransport(ProviderTransport):
|
||||
kwargs.pop("timeout", None)
|
||||
|
||||
if is_codex_backend:
|
||||
# chatgpt.com/backend-api/codex rejects body-level
|
||||
# ``extra_headers`` with HTTP 400. Correlation/cache routing for
|
||||
# this backend must not be sent through the Responses payload.
|
||||
kwargs.pop("extra_headers", None)
|
||||
# The Codex backend rejects body-level ``extra_headers`` with
|
||||
# HTTP 400, but the OpenAI SDK's ``extra_headers`` kwarg maps
|
||||
# to actual HTTP request headers (not body fields). We need
|
||||
# these headers for cache-scope routing so prompt cache hits
|
||||
# remain high. Send session_id / x-client-request-id as HTTP
|
||||
# headers while keeping ``prompt_cache_key`` in the body for
|
||||
# standard OpenAI routing as a belt-and-braces fallback.
|
||||
cache_scope_id = str(session_id or "").strip()
|
||||
if cache_scope_id:
|
||||
existing_extra_headers = kwargs.get("extra_headers")
|
||||
merged_extra_headers: Dict[str, str] = {}
|
||||
if isinstance(existing_extra_headers, dict):
|
||||
merged_extra_headers.update(
|
||||
{
|
||||
str(key): str(value)
|
||||
for key, value in existing_extra_headers.items()
|
||||
if key and value is not None
|
||||
}
|
||||
)
|
||||
merged_extra_headers["session_id"] = cache_scope_id
|
||||
merged_extra_headers["x-client-request-id"] = cache_scope_id
|
||||
kwargs["extra_headers"] = merged_extra_headers
|
||||
|
||||
max_tokens = params.get("max_tokens")
|
||||
if max_tokens is not None and not is_codex_backend:
|
||||
|
||||
@@ -69,6 +69,7 @@ def build_turn_context(
|
||||
task_id: Optional[str],
|
||||
stream_callback,
|
||||
persist_user_message: Optional[str],
|
||||
persist_user_timestamp: Optional[float] = None,
|
||||
*,
|
||||
restore_or_build_system_prompt,
|
||||
install_safe_stdio,
|
||||
@@ -121,6 +122,7 @@ def build_turn_context(
|
||||
agent._stream_callback = stream_callback
|
||||
agent._persist_user_message_idx = None
|
||||
agent._persist_user_message_override = persist_user_message
|
||||
agent._persist_user_message_timestamp = persist_user_timestamp
|
||||
# Generate unique task_id if not provided to isolate VMs between tasks.
|
||||
effective_task_id = task_id or str(uuid.uuid4())
|
||||
agent._current_task_id = effective_task_id
|
||||
|
||||
@@ -286,7 +286,7 @@ async fn run_update(app: AppHandle) -> Result<()> {
|
||||
emit_stage(&app, "rebuild", StageState::Running, None, None);
|
||||
let started = Instant::now();
|
||||
let rebuild_args: Vec<String> = vec!["desktop".into(), "--build-only".into()];
|
||||
let rebuild = run_streamed(
|
||||
let mut rebuild = run_streamed(
|
||||
&app,
|
||||
&hermes,
|
||||
&rebuild_args,
|
||||
@@ -295,6 +295,33 @@ async fn run_update(app: AppHandle) -> Result<()> {
|
||||
Some("rebuild"),
|
||||
)
|
||||
.await?;
|
||||
|
||||
// Retry-once: the first `--build-only` can return nonzero on a still-settling
|
||||
// post-update tree or a network-blocked Electron fetch that our self-heal
|
||||
// repaired mid-run. A second attempt then builds clean off the healed dist
|
||||
// (the content-hash stamp makes it a near-no-op when the first actually
|
||||
// succeeded). Without this the updater bails here and never reaches the
|
||||
// relaunch below — the app updates but doesn't restart. Matches the
|
||||
// retry-once `hermes update` already does above, and `hermes update`'s own
|
||||
// desktop rebuild in cmd_update.
|
||||
if rebuild_needs_retry(rebuild.exit_code) {
|
||||
emit_log(
|
||||
&app,
|
||||
Some("rebuild"),
|
||||
LogStream::Stdout,
|
||||
"[rebuild] first desktop rebuild failed; retrying once (a self-healed \
|
||||
Electron download builds clean on the second run)…",
|
||||
);
|
||||
rebuild = run_streamed(
|
||||
&app,
|
||||
&hermes,
|
||||
&rebuild_args,
|
||||
&install_root,
|
||||
&child_env,
|
||||
Some("rebuild"),
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
let rebuild_ms = started.elapsed().as_millis() as u64;
|
||||
|
||||
if rebuild.exit_code != Some(0) {
|
||||
@@ -533,6 +560,14 @@ fn is_locked(path: &Path) -> bool {
|
||||
}
|
||||
}
|
||||
|
||||
/// Whether the `desktop --build-only` rebuild should be retried once. Any
|
||||
/// non-success exit qualifies: the common cause is a transient first-attempt
|
||||
/// failure (still-settling tree / self-healed Electron download) that a clean
|
||||
/// second run resolves.
|
||||
fn rebuild_needs_retry(exit_code: Option<i32>) -> bool {
|
||||
exit_code != Some(0)
|
||||
}
|
||||
|
||||
/// Spawn `hermes <args>` from `cwd`, stream stdout/stderr as Log events on the
|
||||
/// bootstrap channel, and return the exit code. Mirrors powershell::run_script
|
||||
/// but for an arbitrary command (no install.ps1 -File wrapping).
|
||||
@@ -970,6 +1005,16 @@ mod tests {
|
||||
assert_eq!(update_branch_from_args(["--update"]), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rebuild_retries_only_on_failure() {
|
||||
assert!(!rebuild_needs_retry(Some(0)), "a clean rebuild must not retry");
|
||||
assert!(rebuild_needs_retry(Some(1)), "a failed rebuild retries once");
|
||||
assert!(
|
||||
rebuild_needs_retry(None),
|
||||
"a killed/signalled rebuild (no exit code) retries once"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parses_only_app_targets() {
|
||||
assert_eq!(
|
||||
|
||||
@@ -28,6 +28,7 @@ const { detectRemoteDisplay, isWindowsBinaryPathInWsl, isWslEnvironment } = requ
|
||||
const { runBootstrap } = require('./bootstrap-runner.cjs')
|
||||
const {
|
||||
buildSessionWindowUrl,
|
||||
chatWindowWebPreferences,
|
||||
createSessionWindowRegistry,
|
||||
SESSION_WINDOW_MIN_HEIGHT,
|
||||
SESSION_WINDOW_MIN_WIDTH
|
||||
@@ -44,6 +45,7 @@ const { readDirForIpc } = require('./fs-read-dir.cjs')
|
||||
const { gitRootForIpc } = require('./git-root.cjs')
|
||||
const { worktreesForIpc } = require('./git-worktrees.cjs')
|
||||
const { OFFICIAL_REPO_HTTPS_URL, isOfficialSshRemote } = require('./update-remote.cjs')
|
||||
const { runRebuildWithRetry } = require('./update-rebuild.cjs')
|
||||
const {
|
||||
buildPosixCleanupScript,
|
||||
buildWindowsCleanupScript,
|
||||
@@ -2008,10 +2010,14 @@ async function applyUpdatesPosixInApp() {
|
||||
}
|
||||
|
||||
emitUpdateProgress({ stage: 'rebuild', message: 'Rebuilding the desktop app…', percent: 60 })
|
||||
const rebuilt = await runStreamedUpdate(hermes, ['desktop', '--build-only'], {
|
||||
cwd: updateRoot,
|
||||
env,
|
||||
stage: 'rebuild'
|
||||
// Retry-once: a first rebuild can fail on a still-settling tree or a
|
||||
// self-healed (network-blocked) Electron download; a second run builds clean
|
||||
// off the healed dist so we reach the swap+relaunch below instead of bailing.
|
||||
const rebuilt = await runRebuildWithRetry(attempt => {
|
||||
if (attempt > 0) {
|
||||
emitUpdateProgress({ stage: 'rebuild', message: 'Retrying the desktop rebuild…', percent: 60 })
|
||||
}
|
||||
return runStreamedUpdate(hermes, ['desktop', '--build-only'], { cwd: updateRoot, env, stage: 'rebuild' })
|
||||
})
|
||||
if (rebuilt.code !== 0) {
|
||||
emitUpdateProgress({
|
||||
@@ -5106,14 +5112,7 @@ function spawnSecondaryWindow({ sessionId, watch, newSession } = {}) {
|
||||
// themes/context.tsx, so the window appears already themed.
|
||||
show: false,
|
||||
backgroundColor: getWindowBackgroundColor(),
|
||||
webPreferences: {
|
||||
preload: path.join(__dirname, 'preload.cjs'),
|
||||
contextIsolation: true,
|
||||
webviewTag: true,
|
||||
sandbox: true,
|
||||
nodeIntegration: false,
|
||||
devTools: true
|
||||
}
|
||||
webPreferences: chatWindowWebPreferences(path.join(__dirname, 'preload.cjs'))
|
||||
})
|
||||
|
||||
if (IS_MAC) {
|
||||
@@ -5180,23 +5179,11 @@ function createWindow() {
|
||||
// material before the renderer paints the app theme. See createSessionWindow.
|
||||
show: false,
|
||||
backgroundColor: getWindowBackgroundColor(),
|
||||
webPreferences: {
|
||||
preload: path.join(__dirname, 'preload.cjs'),
|
||||
contextIsolation: true,
|
||||
webviewTag: true,
|
||||
sandbox: true,
|
||||
nodeIntegration: false,
|
||||
devTools: true,
|
||||
// Keep timers + requestAnimationFrame running at full speed when the
|
||||
// window is blurred/occluded. The chat transcript streams to the screen
|
||||
// through a requestAnimationFrame-gated flush (useSessionStateCache),
|
||||
// so with Chromium's default background throttling the live answer
|
||||
// stalls whenever this window isn't focused (e.g. you switch to your
|
||||
// editor mid-turn, or open detached devtools) and only appears once you
|
||||
// refocus or refresh. A streaming chat app must render in the
|
||||
// background, so opt out — matching the secondary windows above.
|
||||
backgroundThrottling: false
|
||||
}
|
||||
// Shared with the secondary session windows (chatWindowWebPreferences) so
|
||||
// both keep `backgroundThrottling: false` — the chat transcript streams via
|
||||
// a requestAnimationFrame-gated flush that Chromium pauses for blurred
|
||||
// windows, stalling the live answer until refocus. See session-windows.cjs.
|
||||
webPreferences: chatWindowWebPreferences(path.join(__dirname, 'preload.cjs'))
|
||||
})
|
||||
|
||||
if (IS_MAC) {
|
||||
@@ -6564,6 +6551,12 @@ app.on('before-quit', () => {
|
||||
flushDesktopLogBufferSync()
|
||||
closePreviewWatchers()
|
||||
|
||||
// Kill open PTYs before environment teardown to avoid the node-pty#904
|
||||
// ThreadSafeFunction SIGABRT race.
|
||||
for (const id of [...terminalSessions.keys()]) {
|
||||
disposeTerminalSession(id)
|
||||
}
|
||||
|
||||
if (hermesProcess && !hermesProcess.killed) {
|
||||
hermesProcess.kill('SIGTERM')
|
||||
}
|
||||
|
||||
@@ -10,6 +10,29 @@ const { pathToFileURL } = require('node:url')
|
||||
const SESSION_WINDOW_MIN_WIDTH = 420
|
||||
const SESSION_WINDOW_MIN_HEIGHT = 620
|
||||
|
||||
// Shared webPreferences for every window that renders the chat transcript — the
|
||||
// primary window AND the secondary session windows. Keeping it in one place is
|
||||
// the whole point: the two BrowserWindow definitions in main.cjs used to be
|
||||
// hand-copied, and the secondary windows silently lost `backgroundThrottling:
|
||||
// false`, so a streamed answer stalled until the window regained focus.
|
||||
//
|
||||
// `backgroundThrottling: false` is load-bearing: the transcript streams to the
|
||||
// screen through a requestAnimationFrame-gated flush, which Chromium pauses for
|
||||
// blurred/occluded windows. A streaming chat app must keep painting in the
|
||||
// background, so every chat window opts out. The preload path is injected
|
||||
// because it depends on the Electron entry's __dirname.
|
||||
function chatWindowWebPreferences(preloadPath) {
|
||||
return {
|
||||
preload: preloadPath,
|
||||
contextIsolation: true,
|
||||
webviewTag: true,
|
||||
sandbox: true,
|
||||
nodeIntegration: false,
|
||||
devTools: true,
|
||||
backgroundThrottling: false
|
||||
}
|
||||
}
|
||||
|
||||
// Build the renderer URL for a secondary window. The renderer uses a
|
||||
// HashRouter, so the session route lives after the '#'. The `?win=secondary`
|
||||
// flag MUST sit in the query string BEFORE the '#': anything after the '#' is
|
||||
@@ -94,6 +117,7 @@ function createSessionWindowRegistry() {
|
||||
|
||||
module.exports = {
|
||||
buildSessionWindowUrl,
|
||||
chatWindowWebPreferences,
|
||||
createSessionWindowRegistry,
|
||||
SESSION_WINDOW_MIN_HEIGHT,
|
||||
SESSION_WINDOW_MIN_WIDTH
|
||||
|
||||
@@ -1,7 +1,11 @@
|
||||
const assert = require('node:assert/strict')
|
||||
const test = require('node:test')
|
||||
|
||||
const { buildSessionWindowUrl, createSessionWindowRegistry } = require('./session-windows.cjs')
|
||||
const {
|
||||
buildSessionWindowUrl,
|
||||
chatWindowWebPreferences,
|
||||
createSessionWindowRegistry
|
||||
} = require('./session-windows.cjs')
|
||||
|
||||
// A minimal fake BrowserWindow: tracks listeners + destroyed state and lets a
|
||||
// test fire the 'closed' event, mirroring the slice of the Electron API the
|
||||
@@ -175,3 +179,21 @@ test('registry trims the session id before keying', () => {
|
||||
|
||||
assert.equal(registry.has('s1'), true)
|
||||
})
|
||||
|
||||
test('chatWindowWebPreferences disables background throttling so streaming paints while blurred', () => {
|
||||
// Regression: secondary session windows used to omit this flag, so a streamed
|
||||
// answer stalled until the window regained focus (Chromium pauses the
|
||||
// requestAnimationFrame-gated transcript flush for backgrounded windows).
|
||||
const prefs = chatWindowWebPreferences('/tmp/preload.cjs')
|
||||
|
||||
assert.equal(prefs.backgroundThrottling, false)
|
||||
})
|
||||
|
||||
test('chatWindowWebPreferences passes the preload path through and keeps the hardened defaults', () => {
|
||||
const prefs = chatWindowWebPreferences('/some/preload.cjs')
|
||||
|
||||
assert.equal(prefs.preload, '/some/preload.cjs')
|
||||
assert.equal(prefs.contextIsolation, true)
|
||||
assert.equal(prefs.sandbox, true)
|
||||
assert.equal(prefs.nodeIntegration, false)
|
||||
})
|
||||
|
||||
29
apps/desktop/electron/update-rebuild.cjs
Normal file
29
apps/desktop/electron/update-rebuild.cjs
Normal file
@@ -0,0 +1,29 @@
|
||||
'use strict'
|
||||
|
||||
/**
|
||||
* Retry-once policy for the desktop `--build-only` rebuild during self-update.
|
||||
*
|
||||
* The first rebuild can return nonzero on a still-settling post-update tree or a
|
||||
* network-blocked Electron fetch that the installer's self-heal repaired mid-run.
|
||||
* A second attempt then builds clean off the healed dist (the content-hash stamp
|
||||
* makes it a near-no-op when the first actually succeeded). Without the retry the
|
||||
* updater bails before the relaunch step — the app updates but doesn't restart.
|
||||
*/
|
||||
|
||||
function shouldRetryRebuild(code) {
|
||||
return code !== 0
|
||||
}
|
||||
|
||||
/**
|
||||
* Run `rebuild()` (async, resolves `{ code, ... }`), retrying once on failure.
|
||||
* Returns the final result.
|
||||
*/
|
||||
async function runRebuildWithRetry(rebuild) {
|
||||
let result = await rebuild(0)
|
||||
if (shouldRetryRebuild(result.code)) {
|
||||
result = await rebuild(1)
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
module.exports = { shouldRetryRebuild, runRebuildWithRetry }
|
||||
55
apps/desktop/electron/update-rebuild.test.cjs
Normal file
55
apps/desktop/electron/update-rebuild.test.cjs
Normal file
@@ -0,0 +1,55 @@
|
||||
/**
|
||||
* Tests for electron/update-rebuild.cjs — the retry-once policy for the desktop
|
||||
* `--build-only` rebuild during self-update.
|
||||
*
|
||||
* Run with: node --test electron/update-rebuild.test.cjs
|
||||
* (Wired into npm test:desktop:platforms in package.json.)
|
||||
*
|
||||
* Why this matters: a first rebuild can return nonzero on a still-settling tree
|
||||
* or a self-healed (network-blocked) Electron download. Without a second attempt
|
||||
* the updater bails before the relaunch step — the app updates but never restarts
|
||||
* (the field report behind this fix). The retry must fire on failure, not on
|
||||
* success, and must run at most twice.
|
||||
*/
|
||||
|
||||
const test = require('node:test')
|
||||
const assert = require('node:assert/strict')
|
||||
|
||||
const { shouldRetryRebuild, runRebuildWithRetry } = require('./update-rebuild.cjs')
|
||||
|
||||
test('shouldRetryRebuild retries only on a non-success exit', () => {
|
||||
assert.equal(shouldRetryRebuild(0), false)
|
||||
assert.equal(shouldRetryRebuild(1), true)
|
||||
assert.equal(shouldRetryRebuild(null), true)
|
||||
})
|
||||
|
||||
test('a clean first rebuild runs once and does not retry', async () => {
|
||||
const codes = []
|
||||
const result = await runRebuildWithRetry(attempt => {
|
||||
codes.push(attempt)
|
||||
return Promise.resolve({ code: 0 })
|
||||
})
|
||||
assert.deepEqual(codes, [0])
|
||||
assert.equal(result.code, 0)
|
||||
})
|
||||
|
||||
test('a failed first rebuild retries once and succeeds', async () => {
|
||||
const codes = []
|
||||
const result = await runRebuildWithRetry(attempt => {
|
||||
codes.push(attempt)
|
||||
return Promise.resolve({ code: attempt === 0 ? 1 : 0 })
|
||||
})
|
||||
assert.deepEqual(codes, [0, 1])
|
||||
assert.equal(result.code, 0)
|
||||
})
|
||||
|
||||
test('a rebuild that keeps failing runs at most twice and reports the failure', async () => {
|
||||
const codes = []
|
||||
const result = await runRebuildWithRetry(attempt => {
|
||||
codes.push(attempt)
|
||||
return Promise.resolve({ code: 1, error: 'rebuild-failed' })
|
||||
})
|
||||
assert.deepEqual(codes, [0, 1])
|
||||
assert.equal(result.code, 1)
|
||||
assert.equal(result.error, 'rebuild-failed')
|
||||
})
|
||||
@@ -21,7 +21,7 @@
|
||||
"build": "node scripts/assert-root-install.cjs && node scripts/write-build-stamp.cjs && node scripts/stage-native-deps.cjs && tsc -b && vite build && npm run postbuild",
|
||||
"postbuild": "node scripts/assert-dist-built.cjs",
|
||||
"prebuilder": "node scripts/patch-electron-builder-mac-binary.cjs",
|
||||
"builder": "cross-env NODE_OPTIONS=--max-old-space-size=16384 electron-builder",
|
||||
"builder": "cross-env NODE_OPTIONS=--max-old-space-size=16384 node scripts/run-electron-builder.cjs",
|
||||
"pack": "npm run build && npm run builder -- --dir",
|
||||
"dist": "npm run build && npm run builder",
|
||||
"dist:mac": "npm run build && npm run builder -- --mac",
|
||||
@@ -37,7 +37,7 @@
|
||||
"test:desktop:nsis": "node scripts/test-desktop.mjs nsis",
|
||||
"test:desktop:existing": "node scripts/test-desktop.mjs existing",
|
||||
"test:desktop:fresh": "node scripts/test-desktop.mjs fresh",
|
||||
"test:desktop:platforms": "node --test electron/bootstrap-platform.test.cjs electron/hardening.test.cjs electron/backend-env.test.cjs electron/backend-probes.test.cjs electron/bootstrap-runner.test.cjs electron/connection-config.test.cjs electron/dashboard-token.test.cjs electron/gateway-ws-probe.test.cjs electron/oauth-net-request.test.cjs electron/desktop-uninstall.test.cjs electron/session-windows.test.cjs electron/workspace-cwd.test.cjs electron/fs-read-dir.test.cjs electron/git-root.test.cjs electron/windows-child-process.test.cjs electron/update-remote.test.cjs electron/windows-user-env.test.cjs",
|
||||
"test:desktop:platforms": "node --test electron/bootstrap-platform.test.cjs electron/hardening.test.cjs electron/backend-env.test.cjs electron/backend-probes.test.cjs electron/bootstrap-runner.test.cjs electron/connection-config.test.cjs electron/dashboard-token.test.cjs electron/gateway-ws-probe.test.cjs electron/oauth-net-request.test.cjs electron/desktop-uninstall.test.cjs electron/session-windows.test.cjs electron/workspace-cwd.test.cjs electron/fs-read-dir.test.cjs electron/git-root.test.cjs electron/windows-child-process.test.cjs electron/update-remote.test.cjs electron/update-rebuild.test.cjs electron/windows-user-env.test.cjs",
|
||||
"typecheck": "tsc -p . --noEmit",
|
||||
"lint": "eslint src/ electron/",
|
||||
"lint:fix": "eslint src/ electron/ --fix",
|
||||
@@ -55,7 +55,7 @@
|
||||
"@dnd-kit/sortable": "^10.0.0",
|
||||
"@dnd-kit/utilities": "^3.2.2",
|
||||
"@hermes/shared": "file:../shared",
|
||||
"@icons-pack/react-simple-icons": "^13.13.0",
|
||||
"@icons-pack/react-simple-icons": "=13.11.1",
|
||||
"@nanostores/react": "^1.1.0",
|
||||
"@nous-research/ui": "^0.13.0",
|
||||
"@radix-ui/react-slot": "^1.2.4",
|
||||
@@ -117,7 +117,7 @@
|
||||
"@vitejs/plugin-react": "^6.0.1",
|
||||
"concurrently": "^10.0.3",
|
||||
"cross-env": "^10.1.0",
|
||||
"electron": "^40.9.3",
|
||||
"electron": "40.10.2",
|
||||
"electron-builder": "^26.8.1",
|
||||
"eslint": "^9.39.4",
|
||||
"eslint-plugin-perfectionist": "^5.9.0",
|
||||
@@ -134,8 +134,7 @@
|
||||
"wait-on": "^9.0.5"
|
||||
},
|
||||
"build": {
|
||||
"electronVersion": "40.9.3",
|
||||
"electronDist": "../../node_modules/electron/dist",
|
||||
"electronVersion": "40.10.2",
|
||||
"appId": "com.nousresearch.hermes",
|
||||
"productName": "Hermes",
|
||||
"executableName": "Hermes",
|
||||
|
||||
@@ -24,6 +24,11 @@ const replacement = ` // ${marker}: electron-builder 26.8.x can sometimes cop
|
||||
if (!fs.existsSync(bundledElectronBinary)) {
|
||||
const candidates = [
|
||||
path.join(packager.info.framework.distMacOsAppName, "Contents", "MacOS", electronBranding.productName),
|
||||
// npm may nest the workspace-only electron devDep under
|
||||
// apps/desktop/node_modules (process.cwd() during pack), or hoist
|
||||
// it to the repo root. Try the workspace-local install first, then
|
||||
// the root hoist, so the fallback works under either layout.
|
||||
path.join(process.cwd(), "node_modules", "electron", "dist", "Electron.app", "Contents", "MacOS", electronBranding.productName),
|
||||
path.join(process.cwd(), "..", "..", "node_modules", "electron", "dist", "Electron.app", "Contents", "MacOS", electronBranding.productName),
|
||||
];
|
||||
const sourceBinary = candidates.find(candidate => fs.existsSync(candidate));
|
||||
|
||||
57
apps/desktop/scripts/run-electron-builder.cjs
Normal file
57
apps/desktop/scripts/run-electron-builder.cjs
Normal file
@@ -0,0 +1,57 @@
|
||||
"use strict"
|
||||
|
||||
// Resolve electronDist at runtime (#38673, #47917): electron-builder 26.8.x can
|
||||
// re-unpack a broken Electron.app; reusing the installed dist dodges that.
|
||||
// npm workspace hoisting is non-deterministic — require.resolve finds electron
|
||||
// wherever it landed. Dist present → -c.electronDist=<abs>/dist; absent → let
|
||||
// electron-builder fetch via @electron/get (electronVersion + ELECTRON_MIRROR).
|
||||
|
||||
const fs = require("node:fs")
|
||||
const path = require("node:path")
|
||||
const { spawnSync } = require("node:child_process")
|
||||
|
||||
function electronDistDir() {
|
||||
try {
|
||||
return path.join(path.dirname(require.resolve("electron/package.json")), "dist")
|
||||
} catch {
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
function distBinary(dist) {
|
||||
if (process.platform === "darwin") {
|
||||
return path.join(dist, "Electron.app", "Contents", "MacOS", "Electron")
|
||||
}
|
||||
if (process.platform === "win32") {
|
||||
return path.join(dist, "electron.exe")
|
||||
}
|
||||
return path.join(dist, "electron")
|
||||
}
|
||||
|
||||
function electronBuilderCli() {
|
||||
const pkgJson = require.resolve("electron-builder/package.json")
|
||||
const bin = require(pkgJson).bin
|
||||
const rel = typeof bin === "string" ? bin : bin["electron-builder"]
|
||||
return path.join(path.dirname(pkgJson), rel)
|
||||
}
|
||||
|
||||
const dist = electronDistDir()
|
||||
const args = []
|
||||
if (dist && fs.existsSync(distBinary(dist))) {
|
||||
args.push(`-c.electronDist=${dist}`)
|
||||
} else {
|
||||
console.warn(
|
||||
"[run-electron-builder] no local electron dist; electron-builder will fetch " +
|
||||
"via @electron/get (electronVersion + ELECTRON_MIRROR)."
|
||||
)
|
||||
}
|
||||
args.push(...process.argv.slice(2))
|
||||
|
||||
const result = spawnSync(process.execPath, [electronBuilderCli(), ...args], {
|
||||
stdio: "inherit",
|
||||
})
|
||||
if (result.error) {
|
||||
console.error(`[run-electron-builder] spawn failed: ${result.error.message}`)
|
||||
process.exit(1)
|
||||
}
|
||||
process.exit(result.status == null ? 1 : result.status)
|
||||
@@ -9,6 +9,7 @@ import { formatCombo } from '@/lib/keybinds/combo'
|
||||
import { cn } from '@/lib/utils'
|
||||
|
||||
import type { ConversationStatus } from './hooks/use-voice-conversation'
|
||||
import { ModelPill } from './model-pill'
|
||||
import type { ChatBarState, VoiceStatus } from './types'
|
||||
|
||||
export const ICON_BTN = 'size-(--composer-control-size) shrink-0 rounded-md'
|
||||
@@ -66,6 +67,7 @@ export function ComposerControls({
|
||||
const c = t.composer
|
||||
const steerCombo = formatCombo('mod+enter')
|
||||
const steerLabel = `${c.steer} (${steerCombo})`
|
||||
|
||||
const steerTip = (
|
||||
<span className="inline-flex items-center gap-1.5">
|
||||
{c.steer}
|
||||
@@ -81,8 +83,10 @@ export function ComposerControls({
|
||||
|
||||
return (
|
||||
<div className="ml-auto flex shrink-0 items-center gap-(--composer-control-gap)">
|
||||
<DictationButton disabled={disabled} onToggle={onDictate} state={state.voice} status={voiceStatus} />
|
||||
{canSteer && (
|
||||
<ModelPill disabled={disabled} model={state.model} />
|
||||
{/* While the agent runs and the user is typing, steer takes over the mic's
|
||||
slot rather than crowding the row with an extra button. */}
|
||||
{canSteer ? (
|
||||
<Tip label={steerTip}>
|
||||
<Button
|
||||
aria-label={steerLabel}
|
||||
@@ -96,6 +100,8 @@ export function ComposerControls({
|
||||
<SteeringWheel size={16} />
|
||||
</Button>
|
||||
</Tip>
|
||||
) : (
|
||||
<DictationButton disabled={disabled} onToggle={onDictate} state={state.voice} status={voiceStatus} />
|
||||
)}
|
||||
{showVoicePrimary ? (
|
||||
<Tip label={c.startVoice}>
|
||||
|
||||
86
apps/desktop/src/app/chat/composer/model-pill.tsx
Normal file
86
apps/desktop/src/app/chat/composer/model-pill.tsx
Normal file
@@ -0,0 +1,86 @@
|
||||
import { useStore } from '@nanostores/react'
|
||||
import { useState } from 'react'
|
||||
|
||||
import { ModelMenuCloseContext } from '@/app/shell/model-menu-panel'
|
||||
import { Button } from '@/components/ui/button'
|
||||
import { DropdownMenu, DropdownMenuContent, DropdownMenuTrigger } from '@/components/ui/dropdown-menu'
|
||||
import { GlyphSpinner } from '@/components/ui/glyph-spinner'
|
||||
import { useI18n } from '@/i18n'
|
||||
import { ChevronDown } from '@/lib/icons'
|
||||
import { formatModelStatusLabel } from '@/lib/model-status-label'
|
||||
import { cn } from '@/lib/utils'
|
||||
import {
|
||||
$currentFastMode,
|
||||
$currentModel,
|
||||
$currentProvider,
|
||||
$currentReasoningEffort,
|
||||
setModelPickerOpen
|
||||
} from '@/store/session'
|
||||
|
||||
import type { ChatBarState } from './types'
|
||||
|
||||
const PILL = cn(
|
||||
'h-(--composer-control-size) max-w-40 shrink-0 gap-1 rounded-md px-2 text-xs font-normal',
|
||||
'text-(--ui-text-tertiary) hover:bg-(--chrome-action-hover) hover:text-foreground'
|
||||
)
|
||||
|
||||
/**
|
||||
* Composer model selector — the relocated status-bar pill. Reuses the live
|
||||
* `model.options` dropdown (`modelMenuContent`) verbatim; falls back to the
|
||||
* full picker when the gateway is closed and no live menu exists.
|
||||
*/
|
||||
export function ModelPill({ disabled, model }: { disabled: boolean; model: ChatBarState['model'] }) {
|
||||
const copy = useI18n().t.shell.statusbar
|
||||
const currentModel = useStore($currentModel)
|
||||
const currentProvider = useStore($currentProvider)
|
||||
const fastMode = useStore($currentFastMode)
|
||||
const reasoningEffort = useStore($currentReasoningEffort)
|
||||
const [open, setOpen] = useState(false)
|
||||
|
||||
// The model resolves a beat after the gateway/session comes up. Rather than
|
||||
// flash a literal "No model", show a quiet loader (inherits the pill text
|
||||
// color at half opacity) until a model lands.
|
||||
const label = (
|
||||
<>
|
||||
{currentModel.trim() ? (
|
||||
<span className="truncate">{formatModelStatusLabel(currentModel, { fastMode, reasoningEffort })}</span>
|
||||
) : (
|
||||
<GlyphSpinner className="opacity-50" spinner="braille" />
|
||||
)}
|
||||
<ChevronDown className="size-2.5 shrink-0 opacity-50" />
|
||||
</>
|
||||
)
|
||||
|
||||
const title = currentProvider ? copy.modelTitle(currentProvider, currentModel || copy.modelNone) : copy.switchModel
|
||||
|
||||
if (!model.modelMenuContent) {
|
||||
return (
|
||||
<Button
|
||||
aria-label={copy.openModelPicker}
|
||||
className={PILL}
|
||||
disabled={disabled}
|
||||
onClick={() => setModelPickerOpen(true)}
|
||||
title={copy.openModelPicker}
|
||||
type="button"
|
||||
variant="ghost"
|
||||
>
|
||||
{label}
|
||||
</Button>
|
||||
)
|
||||
}
|
||||
|
||||
return (
|
||||
<DropdownMenu onOpenChange={setOpen} open={open}>
|
||||
<DropdownMenuTrigger asChild>
|
||||
<Button aria-label={title} className={PILL} disabled={disabled} title={title} type="button" variant="ghost">
|
||||
{label}
|
||||
</Button>
|
||||
</DropdownMenuTrigger>
|
||||
<DropdownMenuContent align="end" className="w-64 p-0" side="top" sideOffset={8}>
|
||||
<ModelMenuCloseContext.Provider value={() => setOpen(false)}>
|
||||
{model.modelMenuContent}
|
||||
</ModelMenuCloseContext.Provider>
|
||||
</DropdownMenuContent>
|
||||
</DropdownMenu>
|
||||
)
|
||||
}
|
||||
@@ -1,3 +1,5 @@
|
||||
import type { ReactNode } from 'react'
|
||||
|
||||
import type { HermesGateway } from '@/hermes'
|
||||
import type { ComposerAttachment } from '@/store/composer'
|
||||
|
||||
@@ -22,6 +24,8 @@ export interface ChatBarState {
|
||||
canSwitch: boolean
|
||||
loading?: boolean
|
||||
quickModels?: QuickModelOption[]
|
||||
/** Reused status-bar dropdown (built with gateway + selectModel upstream). */
|
||||
modelMenuContent?: ReactNode
|
||||
}
|
||||
tools: { enabled: boolean; label: string; suggestions?: ContextSuggestion[] }
|
||||
voice: { enabled: boolean; active: boolean }
|
||||
|
||||
@@ -15,7 +15,9 @@ import { Backdrop } from '@/components/Backdrop'
|
||||
import { PromptOverlays } from '@/components/prompt-overlays'
|
||||
import { Button } from '@/components/ui/button'
|
||||
import { Codicon } from '@/components/ui/codicon'
|
||||
import { ErrorState } from '@/components/ui/error-state'
|
||||
import { getGlobalModelOptions, type HermesGateway } from '@/hermes'
|
||||
import { useI18n } from '@/i18n'
|
||||
import type { ChatMessage } from '@/lib/chat-messages'
|
||||
import { quickModelOptions, sessionTitle, toRuntimeMessage } from '@/lib/chat-runtime'
|
||||
import { useIncrementalExternalStoreRuntime } from '@/lib/incremental-external-store-runtime'
|
||||
@@ -38,11 +40,12 @@ import {
|
||||
$lastVisibleMessageIsUser,
|
||||
$messages,
|
||||
$messagesEmpty,
|
||||
$resumeExhaustedSessionId,
|
||||
$selectedStoredSessionId,
|
||||
$sessions,
|
||||
sessionPinId
|
||||
} from '@/store/session'
|
||||
import { isNewSessionWindow, isSecondaryWindow } from '@/store/windows'
|
||||
import { isSecondaryWindow } from '@/store/windows'
|
||||
import type { ModelOptionsResponse } from '@/types/hermes'
|
||||
|
||||
import { routeSessionId } from '../routes'
|
||||
@@ -62,6 +65,7 @@ import { threadLoadingState } from './thread-loading'
|
||||
|
||||
interface ChatViewProps extends Omit<React.ComponentProps<'div'>, 'onSubmit'> {
|
||||
gateway: HermesGateway | null
|
||||
modelMenuContent?: React.ReactNode
|
||||
onToggleSelectedPin: () => void
|
||||
onDeleteSelectedSession: () => void
|
||||
onCancel: () => Promise<void> | void
|
||||
@@ -85,7 +89,9 @@ interface ChatViewProps extends Omit<React.ComponentProps<'div'>, 'onSubmit'> {
|
||||
onEdit: (message: AppendMessage) => Promise<void>
|
||||
onReload: (parentId: string | null) => Promise<void>
|
||||
onRestoreToMessage?: (messageId: string) => Promise<void>
|
||||
onRetryResume: (sessionId: string) => void
|
||||
onTranscribeAudio?: (audio: Blob) => Promise<string>
|
||||
onDismissError?: (messageId: string) => void
|
||||
}
|
||||
|
||||
interface ChatHeaderProps {
|
||||
@@ -120,10 +126,10 @@ function ChatHeader({
|
||||
? pinnedSessionIds.includes(selectedSessionId)
|
||||
: false
|
||||
|
||||
// A brand-new session has no session to pin/delete/rename, so the header is
|
||||
// just a dead "New session" label + chevron. Drop it (and its border)
|
||||
// entirely until there's a real session to act on.
|
||||
if (isNewSessionWindow() || (!selectedSessionId && !activeSessionId && !isRoutedSessionView)) {
|
||||
// Secondary windows (new-session scratch, subagent watch, cmd-click pop-out)
|
||||
// are compact side panels — they drop the session-actions header + border
|
||||
// entirely. A brand-new draft has nothing to pin/delete/rename either.
|
||||
if (isSecondaryWindow() || (!selectedSessionId && !activeSessionId && !isRoutedSessionView)) {
|
||||
return null
|
||||
}
|
||||
|
||||
@@ -250,6 +256,7 @@ function ChatRuntimeBoundary({
|
||||
export function ChatView({
|
||||
className,
|
||||
gateway,
|
||||
modelMenuContent,
|
||||
onToggleSelectedPin,
|
||||
onDeleteSelectedSession,
|
||||
onCancel,
|
||||
@@ -270,9 +277,12 @@ export function ChatView({
|
||||
onEdit,
|
||||
onReload,
|
||||
onRestoreToMessage,
|
||||
onTranscribeAudio
|
||||
onRetryResume,
|
||||
onTranscribeAudio,
|
||||
onDismissError
|
||||
}: ChatViewProps) {
|
||||
const location = useLocation()
|
||||
const { t } = useI18n()
|
||||
const activeSessionId = useStore($activeSessionId)
|
||||
const awaitingResponse = useStore($awaitingResponse)
|
||||
const busy = useStore($busy)
|
||||
@@ -294,6 +304,7 @@ export function ChatView({
|
||||
const messagesEmpty = useStore($messagesEmpty)
|
||||
const lastVisibleIsUser = useStore($lastVisibleMessageIsUser)
|
||||
const selectedSessionId = useStore($selectedStoredSessionId)
|
||||
const resumeExhaustedSessionId = useStore($resumeExhaustedSessionId)
|
||||
const routedSessionId = routeSessionId(location.pathname)
|
||||
const isRoutedSessionView = Boolean(routedSessionId)
|
||||
|
||||
@@ -313,9 +324,21 @@ export function ChatView({
|
||||
// session exists — even if it has zero messages (a brand-new routed
|
||||
// session). The flicker where `busy` flips true briefly during hydrate
|
||||
// is handled by `threadLoadingState`'s last-visible-user gate.
|
||||
const loadingSession = isRoutedSessionView && (routeSessionMismatch || (messagesEmpty && !activeSessionId))
|
||||
//
|
||||
// resumeExhausted: the bounded auto-retry in use-route-resume gave up on this
|
||||
// routed session (gateway RPC + REST fallback failed through every attempt).
|
||||
// Suppress the loader and show an explicit error + manual Retry instead of
|
||||
// spinning forever. Gated on the route matching so a stale latch from another
|
||||
// session can't blank the current one.
|
||||
const resumeExhausted = isRoutedSessionView && resumeExhaustedSessionId === routedSessionId
|
||||
|
||||
const loadingSession =
|
||||
!resumeExhausted && isRoutedSessionView && (routeSessionMismatch || (messagesEmpty && !activeSessionId))
|
||||
|
||||
const threadLoading = threadLoadingState(loadingSession, busy, awaitingResponse, lastVisibleIsUser)
|
||||
const showChatBar = !loadingSession
|
||||
// Hide the composer in the exhausted error state too: there's no live runtime
|
||||
// to send to until a retry rebinds one.
|
||||
const showChatBar = !loadingSession && !resumeExhausted
|
||||
const threadKey = selectedSessionId || activeSessionId || (isRoutedSessionView ? location.pathname : 'new')
|
||||
|
||||
const modelOptionsQuery = useQuery<ModelOptionsResponse>({
|
||||
@@ -346,6 +369,7 @@ export function ChatView({
|
||||
provider: currentProvider,
|
||||
canSwitch: gatewayOpen,
|
||||
loading: !gatewayOpen || (!currentModel && !currentProvider),
|
||||
modelMenuContent,
|
||||
quickModels
|
||||
},
|
||||
tools: {
|
||||
@@ -358,7 +382,7 @@ export function ChatView({
|
||||
active: false
|
||||
}
|
||||
}),
|
||||
[contextSuggestions, currentModel, currentProvider, gatewayOpen, quickModels]
|
||||
[contextSuggestions, currentModel, currentProvider, gatewayOpen, modelMenuContent, quickModels]
|
||||
)
|
||||
|
||||
// Drop files anywhere in the conversation area, not just on the composer
|
||||
@@ -429,6 +453,7 @@ export function ChatView({
|
||||
loading={threadLoading}
|
||||
onBranchInNewChat={onBranchInNewChat}
|
||||
onCancel={onCancel}
|
||||
onDismissError={onDismissError}
|
||||
onRestoreToMessage={onRestoreToMessage}
|
||||
sessionId={activeSessionId}
|
||||
sessionKey={threadKey}
|
||||
@@ -462,6 +487,21 @@ export function ChatView({
|
||||
</Suspense>
|
||||
)}
|
||||
</ChatRuntimeBoundary>
|
||||
{resumeExhausted && routedSessionId && (
|
||||
<div className="absolute inset-0 z-10 grid place-items-center bg-(--ui-chat-surface-background) px-8 py-10">
|
||||
<ErrorState
|
||||
className="max-w-sm"
|
||||
description={t.desktop.resumeStrandedBody}
|
||||
title={t.desktop.resumeStrandedTitle}
|
||||
>
|
||||
<div className="grid justify-items-center">
|
||||
<Button onClick={() => onRetryResume(routedSessionId)} size="sm" variant="outline">
|
||||
{t.desktop.resumeRetry}
|
||||
</Button>
|
||||
</div>
|
||||
</ErrorState>
|
||||
</div>
|
||||
)}
|
||||
{showChatBar && <ScrollToBottomButton />}
|
||||
<ChatDropOverlay kind={dragKind} />
|
||||
<ChatSwapOverlay profile={gatewaySwapTarget} />
|
||||
|
||||
@@ -13,7 +13,7 @@ import { useSkinCommand } from '@/themes/use-skin-command'
|
||||
|
||||
import { formatRefValue } from '../components/assistant-ui/directive-text'
|
||||
import { getCronJobs, getSessionMessages, listAllProfileSessions, type SessionInfo, triggerCronJob } from '../hermes'
|
||||
import { preserveLocalAssistantErrors, toChatMessages } from '../lib/chat-messages'
|
||||
import { type ChatMessage, chatMessageText, preserveLocalAssistantErrors, toChatMessages } from '../lib/chat-messages'
|
||||
import {
|
||||
isMessagingSource,
|
||||
LOCAL_SESSION_SOURCE_IDS,
|
||||
@@ -52,7 +52,10 @@ import {
|
||||
$currentCwd,
|
||||
$freshDraftReady,
|
||||
$gatewayState,
|
||||
$messages,
|
||||
$messagingSessions,
|
||||
$resumeFailedSessionId,
|
||||
$resumeExhaustedSessionId,
|
||||
$selectedStoredSessionId,
|
||||
$sessions,
|
||||
$workingSessionIds,
|
||||
@@ -199,6 +202,8 @@ export function DesktopController() {
|
||||
const activeSessionId = useStore($activeSessionId)
|
||||
const currentCwd = useStore($currentCwd)
|
||||
const freshDraftReady = useStore($freshDraftReady)
|
||||
const resumeFailedSessionId = useStore($resumeFailedSessionId)
|
||||
const resumeExhaustedSessionId = useStore($resumeExhaustedSessionId)
|
||||
const filePreviewTarget = useStore($filePreviewTarget)
|
||||
const previewTarget = useStore($previewTarget)
|
||||
const selectedStoredSessionId = useStore($selectedStoredSessionId)
|
||||
@@ -711,7 +716,9 @@ export function DesktopController() {
|
||||
}
|
||||
|
||||
lastGatewayProfileRef.current = activeGatewayProfile
|
||||
void refreshCurrentModel()
|
||||
// Force: the new profile has its own default, so reseed even if the composer
|
||||
// already shows the previous profile's model.
|
||||
void refreshCurrentModel(true)
|
||||
void refreshActiveProfile()
|
||||
}, [activeGatewayProfile, refreshCurrentModel])
|
||||
|
||||
@@ -734,6 +741,49 @@ export function DesktopController() {
|
||||
[branchCurrentSession, refreshSessions]
|
||||
)
|
||||
|
||||
// Clear a failed turn's red error banner from the transcript. Errors are
|
||||
// renderer-local state (never persisted), so dismissing is purely a view +
|
||||
// session-cache edit. A message that errored before emitting any visible
|
||||
// text is a bare error placeholder → drop it entirely; one that streamed
|
||||
// partial output then failed keeps its content and just sheds the error.
|
||||
// Both the per-runtime cache AND the live $messages view must be updated:
|
||||
// `preserveLocalAssistantErrors` re-grafts any still-errored message it
|
||||
// finds in the view onto the next session.info flush, so clearing only the
|
||||
// cache would let the heartbeat resurrect the banner.
|
||||
const dismissError = useCallback(
|
||||
(messageId: string) => {
|
||||
const runtimeSessionId = activeSessionIdRef.current
|
||||
|
||||
if (!runtimeSessionId) {
|
||||
return
|
||||
}
|
||||
|
||||
const clearErrorIn = (messages: ChatMessage[]): ChatMessage[] =>
|
||||
messages.flatMap(message => {
|
||||
if (message.id !== messageId || !message.error) {
|
||||
return [message]
|
||||
}
|
||||
|
||||
if (!chatMessageText(message).trim() && !message.parts.some(part => part.type !== 'text')) {
|
||||
return []
|
||||
}
|
||||
|
||||
return [{ ...message, error: undefined, pending: false }]
|
||||
})
|
||||
|
||||
// View first: the flush below reads $messages as the "current" baseline
|
||||
// for error preservation, so the banner must be gone from it before the
|
||||
// cache update triggers a re-sync.
|
||||
setMessages(clearErrorIn($messages.get()))
|
||||
|
||||
updateSessionState(runtimeSessionId, state => ({
|
||||
...state,
|
||||
messages: clearErrorIn(state.messages)
|
||||
}))
|
||||
},
|
||||
[activeSessionIdRef, updateSessionState]
|
||||
)
|
||||
|
||||
const startSessionInWorkspace = useCallback(
|
||||
(path: null | string) => {
|
||||
startFreshSessionDraft()
|
||||
@@ -843,6 +893,8 @@ export function DesktopController() {
|
||||
gatewayState,
|
||||
locationPathname: location.pathname,
|
||||
resumeSession,
|
||||
resumeFailedSessionId,
|
||||
resumeExhaustedSessionId,
|
||||
routedSessionId,
|
||||
runtimeIdByStoredSessionIdRef,
|
||||
selectedStoredSessionId,
|
||||
@@ -859,7 +911,6 @@ export function DesktopController() {
|
||||
gatewayLogLines,
|
||||
gatewayState,
|
||||
inferenceStatus,
|
||||
modelMenuContent,
|
||||
openAgents,
|
||||
freshDraftReady,
|
||||
openCommandCenterSection,
|
||||
@@ -981,6 +1032,7 @@ export function DesktopController() {
|
||||
<ChatView
|
||||
gateway={gatewayRef.current}
|
||||
maxVoiceRecordingSeconds={voiceMaxRecordingSeconds}
|
||||
modelMenuContent={modelMenuContent}
|
||||
onAddContextRef={composer.addContextRefAttachment}
|
||||
onAddUrl={url => composer.addContextRefAttachment(`@url:${formatRefValue(url)}`, url)}
|
||||
onAttachDroppedItems={composer.attachDroppedItems}
|
||||
@@ -992,6 +1044,7 @@ export function DesktopController() {
|
||||
void removeSession(selectedStoredSessionId)
|
||||
}
|
||||
}}
|
||||
onDismissError={dismissError}
|
||||
onEdit={editMessage}
|
||||
onPasteClipboardImage={() => void composer.pasteClipboardImage()}
|
||||
onPickFiles={() => void composer.pickContextPaths('file')}
|
||||
@@ -1000,6 +1053,7 @@ export function DesktopController() {
|
||||
onReload={reloadFromMessage}
|
||||
onRemoveAttachment={id => void composer.removeAttachment(id)}
|
||||
onRestoreToMessage={restoreToMessage}
|
||||
onRetryResume={sessionId => void resumeSession(sessionId, true)}
|
||||
onSteer={steerPrompt}
|
||||
onSubmit={submitText}
|
||||
onThreadMessagesChange={handleThreadMessagesChange}
|
||||
|
||||
@@ -9,3 +9,22 @@ export const $terminalTakeover = atom(storedBoolean(TAKEOVER_KEY, false))
|
||||
$terminalTakeover.subscribe(active => persistBoolean(TAKEOVER_KEY, active))
|
||||
|
||||
export const setTerminalTakeover = (active: boolean) => $terminalTakeover.set(active)
|
||||
|
||||
/** A command queued to run in the embedded terminal. The terminal pane flushes
|
||||
* (and clears) it once its session is live, so a value set before the pane
|
||||
* mounts still runs. Cleared after flush so a later remount can't replay it. */
|
||||
export const $terminalInjection = atom<null | string>(null)
|
||||
|
||||
/** Open the terminal pane and run a command in it. Used to disconnect external
|
||||
* (CLI-managed) providers, which Hermes can't clear via the API — the user
|
||||
* sees exactly what runs instead of Hermes silently deleting their creds. */
|
||||
export const runInTerminal = (command: string) => {
|
||||
const trimmed = command.trim()
|
||||
|
||||
if (!trimmed) {
|
||||
return
|
||||
}
|
||||
|
||||
setTerminalTakeover(true)
|
||||
$terminalInjection.set(trimmed)
|
||||
}
|
||||
|
||||
@@ -10,6 +10,8 @@ import { triggerHaptic } from '@/lib/haptics'
|
||||
import { $filePreviewTarget, $previewTarget } from '@/store/preview'
|
||||
import { useTheme } from '@/themes/context'
|
||||
|
||||
import { $terminalInjection } from '../store'
|
||||
|
||||
import { makeTerminalReader, setActiveTerminalReader } from './buffer'
|
||||
import {
|
||||
isAddSelectionShortcut,
|
||||
@@ -675,6 +677,28 @@ export function useTerminalSession({ cwd, onAddSelectionToChat }: UseTerminalSes
|
||||
return () => cancelAnimationFrame(raf)
|
||||
}, [activeTheme, themeName])
|
||||
|
||||
// Flush a queued command (e.g. a provider-disconnect) into the live session.
|
||||
// Only active while open; the subscribe fires immediately, so a command set
|
||||
// before this pane mounted runs as soon as the session is ready. Clearing the
|
||||
// atom after writing stops a later remount from replaying a stale command.
|
||||
useEffect(() => {
|
||||
if (status !== 'open') {
|
||||
return
|
||||
}
|
||||
|
||||
return $terminalInjection.subscribe(command => {
|
||||
const id = sessionIdRef.current
|
||||
|
||||
if (!command || !id) {
|
||||
return
|
||||
}
|
||||
|
||||
void window.hermesDesktop?.terminal?.write(id, `${command}\r`)
|
||||
$terminalInjection.set(null)
|
||||
termRef.current?.focus()
|
||||
})
|
||||
}, [status])
|
||||
|
||||
return {
|
||||
addSelectionToChat,
|
||||
hostRef,
|
||||
|
||||
@@ -13,6 +13,7 @@ import {
|
||||
type GatewayEventPayload,
|
||||
reasoningPart,
|
||||
renderMediaTags,
|
||||
textPart,
|
||||
upsertToolPart
|
||||
} from '@/lib/chat-messages'
|
||||
import { coerceGatewayText, coerceThinkingText, normalizePersonalityValue } from '@/lib/chat-runtime'
|
||||
@@ -1080,6 +1081,32 @@ export function useMessageStream({
|
||||
// completions / watch matches here — re-sync the status stack.
|
||||
void refreshBackgroundProcesses(sessionId)
|
||||
}
|
||||
} else if (event.type === 'review.summary') {
|
||||
// Self-improvement background review saved something to memory/skills
|
||||
// and emitted a persistent summary (Python formats it as
|
||||
// "💾 Self-improvement review: …"). The CLI prints this via
|
||||
// prompt_toolkit and the Ink TUI renders it as a system line; the
|
||||
// desktop has neither, so without this handler the skill/memory
|
||||
// change happens silently. Surface it as a persistent system message
|
||||
// in the transcript so the user is always informed — it must not be a
|
||||
// transient toast that can be missed.
|
||||
const text = coerceGatewayText(payload?.text).trim()
|
||||
|
||||
if (text && sessionId) {
|
||||
flushQueuedDeltas(sessionId)
|
||||
updateSessionState(sessionId, state => ({
|
||||
...state,
|
||||
messages: [
|
||||
...state.messages,
|
||||
{
|
||||
id: `review-summary-${Date.now()}`,
|
||||
role: 'system',
|
||||
parts: [textPart(text)],
|
||||
timestamp: Math.floor(Date.now() / 1000)
|
||||
}
|
||||
]
|
||||
}))
|
||||
}
|
||||
} else if (event.type === 'error') {
|
||||
const errorMessage = payload?.message || 'Hermes reported an error'
|
||||
const looksLikeProviderSetup = isProviderSetupErrorMessage(errorMessage)
|
||||
@@ -1102,8 +1129,13 @@ export function useMessageStream({
|
||||
|
||||
if (looksLikeProviderSetup) {
|
||||
requestDesktopOnboarding(errorMessage)
|
||||
} else if (isActiveEvent) {
|
||||
} else {
|
||||
// Toast globally, not just when the failing thread is focused: a
|
||||
// turn-ending error (e.g. out of funds) blocks every thread, so the
|
||||
// inline error alone is too easy to miss. The stable id collapses the
|
||||
// same error from multiple blocked threads into one toast.
|
||||
notify({
|
||||
id: `gateway-error:${errorMessage}`,
|
||||
kind: 'error',
|
||||
title: 'Hermes error',
|
||||
message: errorMessage
|
||||
|
||||
@@ -130,7 +130,6 @@ describe('useModelControls', () => {
|
||||
await expect(
|
||||
controls.selectModel({
|
||||
model: 'claude-sonnet-4.6',
|
||||
persistGlobal: false,
|
||||
provider: 'anthropic'
|
||||
})
|
||||
).resolves.toBe(true)
|
||||
@@ -143,26 +142,57 @@ describe('useModelControls', () => {
|
||||
expect(requestGateway).not.toHaveBeenCalledWith('slash.exec', expect.anything())
|
||||
})
|
||||
|
||||
it('keeps the global path on setGlobalModel when there is no active session', async () => {
|
||||
setGlobalModel.mockResolvedValue(undefined)
|
||||
it('stores a no-session pick as UI state with no gateway or global write', async () => {
|
||||
const requestGateway = vi.fn()
|
||||
let controls!: Controls
|
||||
|
||||
render(
|
||||
<Harness
|
||||
activeSessionId={null}
|
||||
onReady={value => (controls = value)}
|
||||
requestGateway={vi.fn()}
|
||||
requestGateway={requestGateway}
|
||||
/>
|
||||
)
|
||||
|
||||
await expect(
|
||||
controls.selectModel({
|
||||
model: 'claude-sonnet-4.6',
|
||||
persistGlobal: false,
|
||||
provider: 'anthropic'
|
||||
})
|
||||
).resolves.toBe(true)
|
||||
|
||||
expect(setGlobalModel).toHaveBeenCalledWith('anthropic', 'claude-sonnet-4.6')
|
||||
// The pick is plain UI state; session.create ships it later. Nothing touches
|
||||
// the gateway or the profile default here.
|
||||
expect($currentModel.get()).toBe('claude-sonnet-4.6')
|
||||
expect($currentProvider.get()).toBe('anthropic')
|
||||
expect(requestGateway).not.toHaveBeenCalled()
|
||||
expect(setGlobalModel).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it('seeds an empty composer model from global but never clobbers a pick', async () => {
|
||||
vi.mocked(getGlobalModelInfo).mockResolvedValue({ model: 'openai/gpt-5.5', provider: 'openai-codex' })
|
||||
|
||||
const { result } = renderHook(() =>
|
||||
useModelControls({
|
||||
activeSessionId: null,
|
||||
queryClient: new QueryClient(),
|
||||
requestGateway: vi.fn()
|
||||
})
|
||||
)
|
||||
|
||||
// Empty → seeds the default.
|
||||
await result.current.refreshCurrentModel()
|
||||
expect($currentModel.get()).toBe('openai/gpt-5.5')
|
||||
|
||||
// A user pick must survive the lifecycle refreshes that fire on boot / fresh
|
||||
// draft / session events.
|
||||
setCurrentModel('anthropic/claude-sonnet-4.6')
|
||||
setCurrentProvider('anthropic')
|
||||
await result.current.refreshCurrentModel()
|
||||
expect($currentModel.get()).toBe('anthropic/claude-sonnet-4.6')
|
||||
|
||||
// A profile swap forces a reseed to the new profile's default.
|
||||
await result.current.refreshCurrentModel(true)
|
||||
expect($currentModel.get()).toBe('openai/gpt-5.5')
|
||||
})
|
||||
})
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import { type QueryClient } from '@tanstack/react-query'
|
||||
import { useCallback } from 'react'
|
||||
|
||||
import { getGlobalModelInfo, setGlobalModel } from '@/hermes'
|
||||
import { getGlobalModelInfo } from '@/hermes'
|
||||
import { useI18n } from '@/i18n'
|
||||
import { notifyError } from '@/store/notifications'
|
||||
import {
|
||||
@@ -15,7 +15,6 @@ import type { ModelOptionsResponse } from '@/types/hermes'
|
||||
|
||||
interface ModelSelection {
|
||||
model: string
|
||||
persistGlobal: boolean
|
||||
provider: string
|
||||
}
|
||||
|
||||
@@ -28,6 +27,7 @@ interface ModelControlsOptions {
|
||||
export function useModelControls({ activeSessionId, queryClient, requestGateway }: ModelControlsOptions) {
|
||||
const { t } = useI18n()
|
||||
const copy = t.desktop
|
||||
|
||||
const updateModelOptionsCache = useCallback(
|
||||
(provider: string, model: string, includeGlobal: boolean) => {
|
||||
const patch = (prev: ModelOptionsResponse | undefined) => ({ ...(prev ?? {}), provider, model })
|
||||
@@ -41,14 +41,24 @@ export function useModelControls({ activeSessionId, queryClient, requestGateway
|
||||
[activeSessionId, queryClient]
|
||||
)
|
||||
|
||||
const refreshCurrentModel = useCallback(async () => {
|
||||
// Seed the composer's model state from the profile default. `force` reseeds
|
||||
// for a profile swap (the new profile has its own default); otherwise this
|
||||
// only fills an EMPTY selection so a user's pick (plain UI state in
|
||||
// $currentModel) survives the lifecycle refreshes that fire on boot / fresh
|
||||
// draft / session events. A live session owns the footer, so skip entirely.
|
||||
const refreshCurrentModel = useCallback(async (force = false) => {
|
||||
try {
|
||||
if ($activeSessionId.get()) {
|
||||
return
|
||||
}
|
||||
|
||||
if (!force && $currentModel.get()) {
|
||||
return
|
||||
}
|
||||
|
||||
const result = await getGlobalModelInfo()
|
||||
|
||||
// A resumed/live session owns the footer model state. Global config
|
||||
// refreshes (gateway boot, profile swap, settings save) must not clobber
|
||||
// the active chat's runtime model/provider in the status bar.
|
||||
if ($activeSessionId.get()) {
|
||||
if ($activeSessionId.get() || (!force && $currentModel.get())) {
|
||||
return
|
||||
}
|
||||
|
||||
@@ -64,12 +74,14 @@ export function useModelControls({ activeSessionId, queryClient, requestGateway
|
||||
}
|
||||
}, [])
|
||||
|
||||
// Returns whether the switch succeeded so callers can await it before
|
||||
// applying follow-up changes (e.g. editing a model's reasoning/fast must land
|
||||
// on the right active model — bail rather than write to the previous one).
|
||||
// Returns whether the switch succeeded so callers can await it before applying
|
||||
// follow-up changes. The composer model is plain UI state: with no live
|
||||
// session it's just stored (and shipped on the next session.create); with one
|
||||
// it's scoped to that session via config.set. It NEVER writes the profile
|
||||
// default — that lives in Settings → Model — so picking a model here can't
|
||||
// silently mutate global config.
|
||||
const selectModel = useCallback(
|
||||
async (selection: ModelSelection): Promise<boolean> => {
|
||||
const includeGlobal = selection.persistGlobal || !activeSessionId
|
||||
// Snapshot for rollback: the switch is applied optimistically, so a
|
||||
// failure must restore the prior model/provider (store + query cache)
|
||||
// rather than leave the UI showing a model the backend never selected.
|
||||
@@ -78,42 +90,34 @@ export function useModelControls({ activeSessionId, queryClient, requestGateway
|
||||
|
||||
setCurrentModel(selection.model)
|
||||
setCurrentProvider(selection.provider)
|
||||
updateModelOptionsCache(selection.provider, selection.model, includeGlobal)
|
||||
updateModelOptionsCache(selection.provider, selection.model, !activeSessionId)
|
||||
|
||||
// No live session yet: the pick is pure UI state. session.create reads
|
||||
// $currentModel/$currentProvider and applies it as that session's override.
|
||||
if (!activeSessionId) {
|
||||
return true
|
||||
}
|
||||
|
||||
try {
|
||||
if (activeSessionId) {
|
||||
await requestGateway('config.set', {
|
||||
session_id: activeSessionId,
|
||||
key: 'model',
|
||||
value: `${selection.model} --provider ${selection.provider}${selection.persistGlobal ? ' --global' : ''}`
|
||||
})
|
||||
await requestGateway('config.set', {
|
||||
session_id: activeSessionId,
|
||||
key: 'model',
|
||||
value: `${selection.model} --provider ${selection.provider}`
|
||||
})
|
||||
|
||||
if (selection.persistGlobal) {
|
||||
void refreshCurrentModel()
|
||||
}
|
||||
|
||||
void queryClient.invalidateQueries({
|
||||
queryKey: selection.persistGlobal ? ['model-options'] : ['model-options', activeSessionId]
|
||||
})
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
await setGlobalModel(selection.provider, selection.model)
|
||||
void refreshCurrentModel()
|
||||
void queryClient.invalidateQueries({ queryKey: ['model-options'] })
|
||||
void queryClient.invalidateQueries({ queryKey: ['model-options', activeSessionId] })
|
||||
|
||||
return true
|
||||
} catch (err) {
|
||||
setCurrentModel(prevModel)
|
||||
setCurrentProvider(prevProvider)
|
||||
updateModelOptionsCache(prevProvider, prevModel, includeGlobal)
|
||||
updateModelOptionsCache(prevProvider, prevModel, !activeSessionId)
|
||||
notifyError(err, copy.modelSwitchFailed)
|
||||
|
||||
return false
|
||||
}
|
||||
},
|
||||
[activeSessionId, copy.modelSwitchFailed, queryClient, refreshCurrentModel, requestGateway, updateModelOptionsCache]
|
||||
[activeSessionId, copy.modelSwitchFailed, queryClient, requestGateway, updateModelOptionsCache]
|
||||
)
|
||||
|
||||
return { refreshCurrentModel, selectModel, updateModelOptionsCache }
|
||||
|
||||
@@ -58,6 +58,7 @@ import { clearSessionTodos } from '@/store/todos'
|
||||
|
||||
import type {
|
||||
ClientSessionState,
|
||||
BrowserManageResponse,
|
||||
FileAttachResponse,
|
||||
HandoffFailResponse,
|
||||
HandoffRequestResponse,
|
||||
@@ -1141,6 +1142,81 @@ export function usePromptActions({
|
||||
} catch (err) {
|
||||
renderSlashOutput(`error: ${err instanceof Error ? err.message : String(err)}`)
|
||||
}
|
||||
},
|
||||
// /browser connect|disconnect|status manages the live CDP connection on
|
||||
// the gateway host, mirroring the TUI's browser.manage RPC. It mutates
|
||||
// BROWSER_CDP_URL (and may launch Chrome) in the gateway process — only
|
||||
// meaningful when that process runs on this machine, so it's gated to
|
||||
// local connections. A remote gateway would act on the wrong host.
|
||||
browser: async ctx => {
|
||||
const resolved = await withSlashOutput(ctx)
|
||||
|
||||
if (!resolved) {
|
||||
return
|
||||
}
|
||||
|
||||
const { render: renderSlashOutput, sessionId } = resolved
|
||||
|
||||
if ($connection.get()?.mode === 'remote') {
|
||||
renderSlashOutput(
|
||||
'/browser manages a Chromium-family browser on the gateway host — only available when connected to a local gateway.'
|
||||
)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
const [rawAction = 'status', ...rest] = ctx.arg.trim().split(/\s+/).filter(Boolean)
|
||||
const cmdAction = rawAction.toLowerCase()
|
||||
|
||||
if (!['connect', 'disconnect', 'status'].includes(cmdAction)) {
|
||||
renderSlashOutput(
|
||||
'usage: /browser [connect|disconnect|status] [url] · persistent: set browser.cdp_url in config.yaml'
|
||||
)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
const url = cmdAction === 'connect' ? rest.join(' ').trim() || 'http://127.0.0.1:9222' : undefined
|
||||
|
||||
if (url) {
|
||||
renderSlashOutput(`checking Chromium-family browser remote debugging at ${url}...`)
|
||||
}
|
||||
|
||||
try {
|
||||
const result = await requestGateway<BrowserManageResponse>('browser.manage', {
|
||||
action: cmdAction,
|
||||
session_id: sessionId,
|
||||
...(url && { url })
|
||||
})
|
||||
|
||||
// Without a streamed session subscription, the gateway bundles its
|
||||
// progress lines into `messages` — flush them inline.
|
||||
result?.messages?.forEach(message => renderSlashOutput(message))
|
||||
|
||||
if (cmdAction === 'status') {
|
||||
renderSlashOutput(
|
||||
result?.connected
|
||||
? `browser connected: ${result.url || '(url unavailable)'}`
|
||||
: 'browser not connected (try /browser connect <url> or set browser.cdp_url in config.yaml)'
|
||||
)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
if (cmdAction === 'disconnect') {
|
||||
renderSlashOutput('browser disconnected')
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
if (result?.connected) {
|
||||
renderSlashOutput('Browser connected to live Chromium-family browser via CDP')
|
||||
renderSlashOutput(`Endpoint: ${result.url || '(url unavailable)'}`)
|
||||
renderSlashOutput('next browser tool call will use this CDP endpoint')
|
||||
}
|
||||
} catch (err) {
|
||||
renderSlashOutput(`error: ${err instanceof Error ? err.message : String(err)}`)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -2,6 +2,8 @@ import { cleanup, render } from '@testing-library/react'
|
||||
import type { MutableRefObject } from 'react'
|
||||
import { afterEach, describe, expect, it, vi } from 'vitest'
|
||||
|
||||
import { $resumeExhaustedSessionId, setResumeExhaustedSessionId } from '@/store/session'
|
||||
|
||||
import { useRouteResume } from './use-route-resume'
|
||||
|
||||
interface HarnessProps {
|
||||
@@ -13,6 +15,8 @@ interface HarnessProps {
|
||||
gatewayState: string
|
||||
locationPathname: string
|
||||
resumeSession: (sessionId: string, focus: boolean) => Promise<unknown>
|
||||
resumeFailedSessionId?: null | string
|
||||
resumeExhaustedSessionId?: null | string
|
||||
routedSessionId: null | string
|
||||
runtimeIdByStoredSessionIdRef: MutableRefObject<Map<string, string>>
|
||||
selectedStoredSessionId: null | string
|
||||
@@ -20,8 +24,12 @@ interface HarnessProps {
|
||||
startFreshSessionDraft: (focus: boolean) => unknown
|
||||
}
|
||||
|
||||
function RouteResumeHarness(props: HarnessProps) {
|
||||
useRouteResume(props)
|
||||
function RouteResumeHarness({
|
||||
resumeFailedSessionId = null,
|
||||
resumeExhaustedSessionId = null,
|
||||
...props
|
||||
}: HarnessProps) {
|
||||
useRouteResume({ ...props, resumeExhaustedSessionId, resumeFailedSessionId })
|
||||
|
||||
return null
|
||||
}
|
||||
@@ -256,3 +264,212 @@ describe('useRouteResume', () => {
|
||||
expect(resumeSession).toHaveBeenCalledWith('session-1', true)
|
||||
})
|
||||
})
|
||||
|
||||
describe('useRouteResume bounded auto-retry after a failed resume', () => {
|
||||
afterEach(() => {
|
||||
cleanup()
|
||||
vi.useRealTimers()
|
||||
vi.restoreAllMocks()
|
||||
setResumeExhaustedSessionId(null)
|
||||
})
|
||||
|
||||
// Common stranded-window props: gateway open, route on the session, no runtime
|
||||
// yet, and the ref already synced to the route (resumeSession sets it at entry
|
||||
// before failing) — the exact state that defeats the main effect's self-heal.
|
||||
function strandedProps(resumeSession: (sid: string, focus: boolean) => Promise<unknown>) {
|
||||
return {
|
||||
activeSessionId: null,
|
||||
activeSessionIdRef: { current: null } as MutableRefObject<null | string>,
|
||||
creatingSessionRef: { current: false },
|
||||
currentView: 'chat',
|
||||
freshDraftReady: false,
|
||||
gatewayState: 'open',
|
||||
locationPathname: '/session-1',
|
||||
resumeSession,
|
||||
routedSessionId: 'session-1',
|
||||
runtimeIdByStoredSessionIdRef: { current: new Map<string, string>() },
|
||||
selectedStoredSessionId: 'session-1',
|
||||
// Synced to the route by the failed resume's synchronous entry-write.
|
||||
selectedStoredSessionIdRef: { current: 'session-1' } as MutableRefObject<null | string>,
|
||||
startFreshSessionDraft: vi.fn()
|
||||
}
|
||||
}
|
||||
|
||||
it('retries the resume on backoff when the routed session is flagged as failed', () => {
|
||||
vi.useFakeTimers()
|
||||
const resumeSession = vi.fn(async () => undefined)
|
||||
|
||||
render(<RouteResumeHarness {...strandedProps(resumeSession)} resumeFailedSessionId="session-1" />)
|
||||
|
||||
// The main effect fires one resume on mount (pathname-changed). Clear it so
|
||||
// we assert purely the bounded-retry effect's scheduled retry below.
|
||||
resumeSession.mockClear()
|
||||
|
||||
// No immediate fire — the retry is scheduled behind the backoff timer.
|
||||
expect(resumeSession).not.toHaveBeenCalled()
|
||||
|
||||
// First backoff window (1s) elapses → one retry.
|
||||
vi.advanceTimersByTime(1_000)
|
||||
expect(resumeSession).toHaveBeenCalledTimes(1)
|
||||
expect(resumeSession).toHaveBeenCalledWith('session-1', true)
|
||||
})
|
||||
|
||||
it('does NOT retry a failed session that is not the routed one', () => {
|
||||
vi.useFakeTimers()
|
||||
const resumeSession = vi.fn(async () => undefined)
|
||||
|
||||
// The failure flag points at a different session than the route.
|
||||
render(<RouteResumeHarness {...strandedProps(resumeSession)} resumeFailedSessionId="other-session" />)
|
||||
resumeSession.mockClear() // drop the mount resume
|
||||
|
||||
vi.advanceTimersByTime(10_000)
|
||||
expect(resumeSession).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it('skips the scheduled retry if the session already recovered when the timer fires', () => {
|
||||
vi.useFakeTimers()
|
||||
const resumeSession = vi.fn(async () => undefined)
|
||||
const props = strandedProps(resumeSession)
|
||||
|
||||
render(<RouteResumeHarness {...props} resumeFailedSessionId="session-1" />)
|
||||
resumeSession.mockClear() // drop the mount resume
|
||||
|
||||
// A resume landed while we waited: runtime is now bound.
|
||||
props.activeSessionIdRef.current = 'runtime-1'
|
||||
|
||||
vi.advanceTimersByTime(8_000)
|
||||
expect(resumeSession).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it('stops retrying after MAX_RESUME_RETRIES consecutive failures', () => {
|
||||
vi.useFakeTimers()
|
||||
const resumeSession = vi.fn(async () => undefined)
|
||||
const props = strandedProps(resumeSession)
|
||||
|
||||
// Model the real re-arm loop: resumeSession clears $resumeFailedSessionId at
|
||||
// entry (null) and a repeat failure re-sets it ('session-1'). That null->id
|
||||
// toggle is what re-runs the effect and advances the bounded counter. The
|
||||
// routed session never changes, so the counter is NOT reset between cycles.
|
||||
const { rerender } = render(<RouteResumeHarness {...props} resumeFailedSessionId="session-1" />)
|
||||
resumeSession.mockClear() // drop the mount resume; count only the retries
|
||||
|
||||
for (let i = 0; i < 8; i += 1) {
|
||||
vi.advanceTimersByTime(8_000) // fire the scheduled retry (if any)
|
||||
rerender(<RouteResumeHarness {...props} resumeFailedSessionId={null} />) // cleared at entry
|
||||
rerender(<RouteResumeHarness {...props} resumeFailedSessionId="session-1" />) // re-armed on failure
|
||||
}
|
||||
|
||||
// Capped at MAX_RESUME_RETRIES (4): a persistently dead backend can't
|
||||
// hot-loop the resume forever.
|
||||
expect(resumeSession.mock.calls.length).toBe(4)
|
||||
|
||||
// Once auto-retry gives up, the exhausted latch is armed for the routed
|
||||
// session so the chat view can swap the perpetual loader for an explicit
|
||||
// error + manual Retry instead of spinning forever.
|
||||
expect($resumeExhaustedSessionId.get()).toBe('session-1')
|
||||
})
|
||||
|
||||
it('does not arm the exhausted latch while retries remain', () => {
|
||||
vi.useFakeTimers()
|
||||
const resumeSession = vi.fn(async () => undefined)
|
||||
const props = strandedProps(resumeSession)
|
||||
|
||||
const { rerender } = render(<RouteResumeHarness {...props} resumeFailedSessionId="session-1" />)
|
||||
resumeSession.mockClear()
|
||||
|
||||
// Two failure cycles — still under the 4-retry cap, so the latch must stay
|
||||
// clear and the loader keeps spinning (auto-recovery hasn't given up yet).
|
||||
for (let i = 0; i < 2; i += 1) {
|
||||
vi.advanceTimersByTime(8_000)
|
||||
rerender(<RouteResumeHarness {...props} resumeFailedSessionId={null} />)
|
||||
rerender(<RouteResumeHarness {...props} resumeFailedSessionId="session-1" />)
|
||||
}
|
||||
|
||||
expect($resumeExhaustedSessionId.get()).toBeNull()
|
||||
})
|
||||
|
||||
it('clears a stale exhausted latch when the route moves off the stranded session', () => {
|
||||
vi.useFakeTimers()
|
||||
const resumeSession = vi.fn(async () => undefined)
|
||||
const props = strandedProps(resumeSession)
|
||||
|
||||
// Pre-arm the latch as if this session had exhausted its retries.
|
||||
setResumeExhaustedSessionId('session-1')
|
||||
|
||||
// Route is now on a different, healthy session that is not flagged as
|
||||
// failed — the retry effect's "route moved off" branch clears the latch.
|
||||
render(
|
||||
<RouteResumeHarness
|
||||
{...props}
|
||||
activeSessionId="runtime-2"
|
||||
activeSessionIdRef={{ current: 'runtime-2' }}
|
||||
locationPathname="/session-2"
|
||||
resumeFailedSessionId={null}
|
||||
routedSessionId="session-2"
|
||||
selectedStoredSessionId="session-2"
|
||||
selectedStoredSessionIdRef={{ current: 'session-2' }}
|
||||
/>
|
||||
)
|
||||
|
||||
expect($resumeExhaustedSessionId.get()).toBeNull()
|
||||
})
|
||||
|
||||
it('resets the retry counter for a fresh backoff cycle when the exhausted latch clears (manual retry, same session)', () => {
|
||||
vi.useFakeTimers()
|
||||
const resumeSession = vi.fn(async () => undefined)
|
||||
const props = strandedProps(resumeSession)
|
||||
|
||||
// Phase A — exhaust the bounded auto-retry (counter → MAX) like a dead
|
||||
// backend. The resumeExhaustedSessionId prop stays null here: the hook sets
|
||||
// the store, which doesn't feed back into the prop in this harness.
|
||||
const { rerender } = render(<RouteResumeHarness {...props} resumeFailedSessionId="session-1" />)
|
||||
resumeSession.mockClear()
|
||||
for (let i = 0; i < 8; i += 1) {
|
||||
vi.advanceTimersByTime(8_000)
|
||||
rerender(<RouteResumeHarness {...props} resumeFailedSessionId={null} />)
|
||||
rerender(<RouteResumeHarness {...props} resumeFailedSessionId="session-1" />)
|
||||
}
|
||||
expect(resumeSession.mock.calls.length).toBe(4) // capped
|
||||
expect($resumeExhaustedSessionId.get()).toBe('session-1')
|
||||
|
||||
// Phase B — user clicks Retry on the SAME stranded session. resumeSession
|
||||
// clears both latches at entry; the exhausted latch's armed->cleared edge
|
||||
// must reset the attempt counter so a fresh bounded cycle runs, not a single
|
||||
// one-shot attempt that immediately re-arms the error. Model the prop
|
||||
// transitions: reflect the armed latch, then clear it (retry), then re-arm
|
||||
// the failure latch on the fresh failure.
|
||||
resumeSession.mockClear()
|
||||
rerender(<RouteResumeHarness {...props} resumeExhaustedSessionId="session-1" resumeFailedSessionId="session-1" />)
|
||||
rerender(<RouteResumeHarness {...props} resumeExhaustedSessionId={null} resumeFailedSessionId={null} />)
|
||||
rerender(<RouteResumeHarness {...props} resumeExhaustedSessionId={null} resumeFailedSessionId="session-1" />)
|
||||
|
||||
// A real retry fires again instead of staying pinned at MAX (which would
|
||||
// dispatch nothing). Without the reset the counter stays >= MAX and this
|
||||
// advance dispatches zero resumes.
|
||||
vi.advanceTimersByTime(8_000)
|
||||
expect(resumeSession.mock.calls.length).toBeGreaterThan(0)
|
||||
})
|
||||
|
||||
it('does not burn retry attempts on unrelated re-renders during the backoff window', () => {
|
||||
vi.useFakeTimers()
|
||||
const props = strandedProps(vi.fn())
|
||||
|
||||
// Mount schedules the first backoff timer. Then re-render repeatedly with a
|
||||
// fresh resumeSession identity (referential instability — a real dep change
|
||||
// for the retry effect) WITHOUT ever letting the timer fire. The old code
|
||||
// incremented the attempt counter at schedule time, so >= MAX re-renders
|
||||
// armed the exhausted error with zero resumes actually dispatched. The fix
|
||||
// only advances the counter when a timer truly fires, so the latch stays
|
||||
// clear no matter how many spurious re-renders happen mid-backoff.
|
||||
const { rerender } = render(
|
||||
<RouteResumeHarness {...props} resumeFailedSessionId="session-1" resumeSession={vi.fn(async () => undefined)} />
|
||||
)
|
||||
for (let j = 0; j < 8; j += 1) {
|
||||
rerender(
|
||||
<RouteResumeHarness {...props} resumeFailedSessionId="session-1" resumeSession={vi.fn(async () => undefined)} />
|
||||
)
|
||||
}
|
||||
|
||||
expect($resumeExhaustedSessionId.get()).toBeNull()
|
||||
})
|
||||
})
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import { type MutableRefObject, useEffect, useRef } from 'react'
|
||||
|
||||
import { isNewChatRoute } from '@/app/routes'
|
||||
import { setResumeExhaustedSessionId } from '@/store/session'
|
||||
|
||||
interface RouteResumeOptions {
|
||||
activeSessionId: string | null
|
||||
@@ -11,6 +12,17 @@ interface RouteResumeOptions {
|
||||
gatewayState: string | undefined
|
||||
locationPathname: string
|
||||
resumeSession: (sessionId: string, focus: boolean) => Promise<unknown>
|
||||
// Stored-session id whose most recent resume failed terminally (set by
|
||||
// useSessionActions, mirrored from $resumeFailedSessionId). While this equals
|
||||
// routedSessionId the window would otherwise latch on the loader forever, so
|
||||
// the bounded-retry effect below re-attempts the resume.
|
||||
resumeFailedSessionId: string | null
|
||||
// Stored-session id whose bounded auto-retry has EXHAUSTED (mirrored from
|
||||
// $resumeExhaustedSessionId). Only resumeSession clears this latch (manual
|
||||
// Retry / reconnect / reselect) — the auto-retry loop never does — so its
|
||||
// armed->cleared edge is an unambiguous "give me a fresh backoff cycle"
|
||||
// signal the effect below uses to reset the attempt counter.
|
||||
resumeExhaustedSessionId: string | null
|
||||
routedSessionId: string | null
|
||||
runtimeIdByStoredSessionIdRef: MutableRefObject<Map<string, string>>
|
||||
selectedStoredSessionId: string | null
|
||||
@@ -18,6 +30,19 @@ interface RouteResumeOptions {
|
||||
startFreshSessionDraft: (focus: boolean) => unknown
|
||||
}
|
||||
|
||||
// Bounded auto-retry for a stranded session window. A resume can fail terminally
|
||||
// (gateway RPC reject + REST fallback failure) on a transiently wedged backend —
|
||||
// dead provider key, a runaway turn hogging the dispatcher, flaky DNS. Without a
|
||||
// retry the loader latches forever. We retry with backoff, capped, so a
|
||||
// genuinely dead backend doesn't hot-loop the resume.
|
||||
const MAX_RESUME_RETRIES = 4
|
||||
const RESUME_RETRY_BASE_MS = 1_000
|
||||
const RESUME_RETRY_MAX_MS = 8_000
|
||||
|
||||
function resumeRetryDelayMs(attempt: number): number {
|
||||
return Math.min(RESUME_RETRY_MAX_MS, RESUME_RETRY_BASE_MS * 2 ** attempt)
|
||||
}
|
||||
|
||||
// HashRouter boot edge case: pathname briefly reads `/` before the hash is
|
||||
// parsed. If the hash references a real session, defer; resume picks it up
|
||||
// next tick. Without this, ctrl+R on `#/:sessionId` flashes 5 loading states.
|
||||
@@ -49,6 +74,8 @@ export function useRouteResume({
|
||||
gatewayState,
|
||||
locationPathname,
|
||||
resumeSession,
|
||||
resumeFailedSessionId,
|
||||
resumeExhaustedSessionId,
|
||||
routedSessionId,
|
||||
runtimeIdByStoredSessionIdRef,
|
||||
selectedStoredSessionId,
|
||||
@@ -58,6 +85,16 @@ export function useRouteResume({
|
||||
const lastPathnameRef = useRef<string | null>(null)
|
||||
const seenGatewayStateRef = useRef(false)
|
||||
const wasGatewayOpenRef = useRef(false)
|
||||
// Per-session retry bookkeeping for the bounded auto-retry effect below. Keyed
|
||||
// by the session id we're retrying so switching chats resets the counter.
|
||||
const retrySessionIdRef = useRef<string | null>(null)
|
||||
const retryAttemptRef = useRef(0)
|
||||
// Tracks the previous exhausted-latch value so we can detect its armed->cleared
|
||||
// edge. resumeSession clears $resumeExhaustedSessionId on a manual Retry /
|
||||
// reconnect / reselect; that transition is our cue to reset the attempt counter
|
||||
// for a fresh backoff cycle on the SAME session (the auto-retry loop itself
|
||||
// never touches this latch, so it can't spuriously trigger the reset).
|
||||
const prevResumeExhaustedRef = useRef<string | null>(null)
|
||||
|
||||
useEffect(() => {
|
||||
const gatewayOpen = gatewayState === 'open'
|
||||
@@ -139,4 +176,111 @@ export function useRouteResume({
|
||||
selectedStoredSessionIdRef,
|
||||
startFreshSessionDraft
|
||||
])
|
||||
|
||||
// Bounded auto-retry: when the routed session's resume failed terminally
|
||||
// (resumeFailedSessionId matches the route), schedule a backoff retry so the
|
||||
// window recovers on its own instead of latching the loader forever. This is
|
||||
// the safety net the main effect above can't provide: after a failed resume,
|
||||
// selectedStoredSessionIdRef.current already equals the route (resumeSession
|
||||
// sets it synchronously at entry) and the pathname/gateway are unchanged, so
|
||||
// none of stuckOnRoutedSession / pathnameChanged / gatewayBecameOpen fire
|
||||
// again. resumeSession clears resumeFailedSessionId on its next attempt; a
|
||||
// success keeps it clear (the effect's guard then no-ops), a repeat failure
|
||||
// re-arms it and we back off further, capped at MAX_RESUME_RETRIES.
|
||||
useEffect(() => {
|
||||
// Detect the exhausted-latch armed->cleared edge for the current route. Only
|
||||
// resumeSession clears $resumeExhaustedSessionId (manual Retry / reconnect /
|
||||
// reselect) — the auto-retry loop never touches it — so this transition
|
||||
// uniquely means "the user asked for another go." Reset the attempt counter
|
||||
// for a fresh bounded backoff cycle on the SAME session. Without this,
|
||||
// retryAttemptRef stays pinned at MAX after exhaustion (the !stranded reset
|
||||
// below only fires on a route CHANGE to a different session), so a manual
|
||||
// retry on the same stranded session would get exactly ONE attempt and then
|
||||
// immediately re-arm the exhausted error — never the renewed backoff cycle
|
||||
// the store/session.ts + use-session-actions.ts comments promise. (Point 2)
|
||||
const wasExhausted = prevResumeExhaustedRef.current
|
||||
prevResumeExhaustedRef.current = resumeExhaustedSessionId
|
||||
if (wasExhausted && wasExhausted === routedSessionId && resumeExhaustedSessionId !== wasExhausted) {
|
||||
retrySessionIdRef.current = routedSessionId
|
||||
retryAttemptRef.current = 0
|
||||
}
|
||||
|
||||
if (currentView !== 'chat' || gatewayState !== 'open') {
|
||||
return
|
||||
}
|
||||
|
||||
const stranded =
|
||||
Boolean(routedSessionId) &&
|
||||
resumeFailedSessionId === routedSessionId &&
|
||||
!creatingSessionRef.current
|
||||
|
||||
if (!stranded) {
|
||||
// Route moved off the stranded session (or it recovered) — reset the
|
||||
// counter so a future failure on another session starts fresh, and clear
|
||||
// any exhausted-latch armed for a session we're no longer viewing (never
|
||||
// the current route: that's the error state we want to keep showing).
|
||||
// resumeSession also clears it on a fresh attempt; this covers a plain
|
||||
// route-change away from the stranded window.
|
||||
if (retrySessionIdRef.current !== routedSessionId) {
|
||||
retrySessionIdRef.current = null
|
||||
retryAttemptRef.current = 0
|
||||
setResumeExhaustedSessionId(current => (current && current !== routedSessionId ? null : current))
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// New stranded session id → reset the attempt counter.
|
||||
if (retrySessionIdRef.current !== routedSessionId) {
|
||||
retrySessionIdRef.current = routedSessionId
|
||||
retryAttemptRef.current = 0
|
||||
}
|
||||
|
||||
if (retryAttemptRef.current >= MAX_RESUME_RETRIES) {
|
||||
// Give up auto-retrying a persistently dead backend; the user can still
|
||||
// reconnect / reselect (which resets the counter via the branch above).
|
||||
// Surface an explicit error + manual Retry in the chat view instead of
|
||||
// spinning the loader forever — resumeSession (manual Retry / reconnect /
|
||||
// reselect) clears this latch and resets the counter for a fresh cycle.
|
||||
setResumeExhaustedSessionId(routedSessionId)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
const attempt = retryAttemptRef.current
|
||||
const sessionId = routedSessionId as string
|
||||
|
||||
const timer = setTimeout(() => {
|
||||
// Re-check liveness at fire time: a resume may have landed while we waited.
|
||||
if (
|
||||
creatingSessionRef.current ||
|
||||
selectedStoredSessionIdRef.current !== sessionId ||
|
||||
activeSessionIdRef.current !== null
|
||||
) {
|
||||
return
|
||||
}
|
||||
|
||||
// Consume an attempt ONLY now that a resume is actually dispatching.
|
||||
// Incrementing at schedule time (the old behavior) let unrelated dep
|
||||
// changes during the 1s–8s backoff window — a transient gatewayState
|
||||
// flip, a non-referentially-stable resumeSession — clear the pending
|
||||
// timer and re-run the effect, burning an attempt without any resume
|
||||
// having fired. A flapping backend could then hit MAX in a couple of
|
||||
// re-renders with far fewer than MAX real attempts. (Point 3)
|
||||
retryAttemptRef.current += 1
|
||||
void resumeSession(sessionId, true)
|
||||
}, resumeRetryDelayMs(attempt))
|
||||
|
||||
return () => clearTimeout(timer)
|
||||
}, [
|
||||
activeSessionIdRef,
|
||||
creatingSessionRef,
|
||||
currentView,
|
||||
gatewayState,
|
||||
resumeSession,
|
||||
resumeFailedSessionId,
|
||||
resumeExhaustedSessionId,
|
||||
routedSessionId,
|
||||
selectedStoredSessionIdRef
|
||||
])
|
||||
}
|
||||
|
||||
@@ -3,8 +3,9 @@ import type { MutableRefObject } from 'react'
|
||||
import { useEffect } from 'react'
|
||||
import { afterEach, describe, expect, it, vi } from 'vitest'
|
||||
|
||||
import { getSessionMessages } from '@/hermes'
|
||||
import { $activeGatewayProfile, $newChatProfile } from '@/store/profile'
|
||||
import { $currentCwd } from '@/store/session'
|
||||
import { $currentCwd, $messages, $resumeFailedSessionId, setMessages, setResumeFailedSessionId } from '@/store/session'
|
||||
|
||||
import type { ClientSessionState } from '../../types'
|
||||
|
||||
@@ -117,3 +118,142 @@ describe('createBackendSessionForSend profile routing', () => {
|
||||
expect(params).toMatchObject({ profile: 'default' })
|
||||
})
|
||||
})
|
||||
|
||||
// ── Resume failure recovery (the "stuck loading session window" bug) ──────────
|
||||
// When session.resume rejects AND the REST transcript fallback ALSO fails, the
|
||||
// hook must (a) not throw out of the fallback (which stranded the loader), and
|
||||
// (b) arm $resumeFailedSessionId so use-route-resume can retry. A resume that
|
||||
// succeeds must NOT leave the flag armed.
|
||||
function ResumeHarness({
|
||||
onReady,
|
||||
requestGateway
|
||||
}: {
|
||||
onReady: (resume: (storedSessionId: string, replaceRoute?: boolean) => Promise<unknown>) => void
|
||||
requestGateway: <T>(method: string, params?: Record<string, unknown>) => Promise<T>
|
||||
}) {
|
||||
const ref = <T,>(value: T): MutableRefObject<T> => ({ current: value })
|
||||
|
||||
const actions = useSessionActions({
|
||||
activeSessionId: null,
|
||||
activeSessionIdRef: ref<string | null>(null),
|
||||
busyRef: ref(false),
|
||||
creatingSessionRef: ref(false),
|
||||
ensureSessionState: () => ({}) as ClientSessionState,
|
||||
getRouteToken: () => 'token',
|
||||
navigate: vi.fn() as never,
|
||||
requestGateway,
|
||||
runtimeIdByStoredSessionIdRef: ref(new Map<string, string>()),
|
||||
selectedStoredSessionId: null,
|
||||
selectedStoredSessionIdRef: ref<string | null>(null),
|
||||
sessionStateByRuntimeIdRef: ref(new Map<string, ClientSessionState>()),
|
||||
syncSessionStateToView: vi.fn(),
|
||||
updateSessionState: (_sessionId, updater) => updater({} as ClientSessionState)
|
||||
})
|
||||
|
||||
useEffect(() => {
|
||||
onReady(actions.resumeSession)
|
||||
}, [actions.resumeSession, onReady])
|
||||
|
||||
return null
|
||||
}
|
||||
|
||||
describe('resumeSession failure recovery', () => {
|
||||
afterEach(() => {
|
||||
cleanup()
|
||||
setResumeFailedSessionId(null)
|
||||
setMessages([])
|
||||
vi.restoreAllMocks()
|
||||
})
|
||||
|
||||
async function runResume(
|
||||
requestGateway: <T>(method: string, params?: Record<string, unknown>) => Promise<T>
|
||||
): Promise<void> {
|
||||
let resume: ((storedSessionId: string, replaceRoute?: boolean) => Promise<unknown>) | null = null
|
||||
render(<ResumeHarness onReady={r => (resume = r)} requestGateway={requestGateway} />)
|
||||
await waitFor(() => expect(resume).not.toBeNull())
|
||||
await resume!('stored-1', true)
|
||||
}
|
||||
|
||||
it('arms $resumeFailedSessionId when resume RPC and REST fallback both fail', async () => {
|
||||
// session.resume rejects (e.g. timeout against a wedged backend)...
|
||||
const requestGateway = vi.fn(async (method: string) => {
|
||||
if (method === 'session.resume') {
|
||||
throw new Error('request timed out: session.resume')
|
||||
}
|
||||
|
||||
return {} as never
|
||||
})
|
||||
|
||||
// ...and the REST transcript fallback also rejects (backend unreachable).
|
||||
vi.mocked(getSessionMessages).mockRejectedValue(new Error('network down'))
|
||||
|
||||
await runResume(requestGateway)
|
||||
|
||||
// The window is no longer silently stranded: the failure latch is armed for
|
||||
// the stored session, which use-route-resume consumes to retry.
|
||||
expect($resumeFailedSessionId.get()).toBe('stored-1')
|
||||
})
|
||||
|
||||
it('does NOT arm the failure latch when the resume RPC fails but the REST fallback paints history', async () => {
|
||||
// session.resume rejects, but the REST transcript fallback succeeds and
|
||||
// hydrates a readable transcript — the window is NOT stranded.
|
||||
const requestGateway = vi.fn(async (method: string) => {
|
||||
if (method === 'session.resume') {
|
||||
throw new Error('request timed out: session.resume')
|
||||
}
|
||||
|
||||
return {} as never
|
||||
})
|
||||
|
||||
vi.mocked(getSessionMessages).mockResolvedValue({
|
||||
messages: [
|
||||
{ content: 'hello', role: 'user', timestamp: 1 },
|
||||
{ content: 'hi there', role: 'assistant', timestamp: 2 }
|
||||
],
|
||||
session_id: 'stored-1'
|
||||
} as never)
|
||||
|
||||
await runResume(requestGateway)
|
||||
|
||||
// Arming here would auto-retry a window that already shows history and,
|
||||
// on exhaustion, blank that transcript behind the error overlay — a
|
||||
// regression vs. plain fallback-success. The latch must stay clear.
|
||||
expect($resumeFailedSessionId.get()).toBeNull()
|
||||
// The fallback transcript is visible.
|
||||
expect($messages.get().length).toBeGreaterThan(0)
|
||||
})
|
||||
|
||||
it('does NOT throw out of the fallback when REST also fails (no unhandled rejection)', async () => {
|
||||
const requestGateway = vi.fn(async (method: string) => {
|
||||
if (method === 'session.resume') {
|
||||
throw new Error('request timed out: session.resume')
|
||||
}
|
||||
|
||||
return {} as never
|
||||
})
|
||||
|
||||
vi.mocked(getSessionMessages).mockRejectedValue(new Error('network down'))
|
||||
|
||||
// resumeSession must resolve (swallow the fallback failure), not reject.
|
||||
await expect(runResume(requestGateway)).resolves.toBeUndefined()
|
||||
})
|
||||
|
||||
it('leaves the failure latch clear when resume succeeds', async () => {
|
||||
// Pre-arm to prove a successful resume clears it (entry-clear path).
|
||||
setResumeFailedSessionId('stored-1')
|
||||
|
||||
const requestGateway = vi.fn(async (method: string, params?: Record<string, unknown>) => {
|
||||
if (method === 'session.resume') {
|
||||
return { session_id: 'runtime-1', resumed: params?.session_id, messages: [], info: {} } as never
|
||||
}
|
||||
|
||||
return {} as never
|
||||
})
|
||||
|
||||
vi.mocked(getSessionMessages).mockResolvedValue({ messages: [] } as never)
|
||||
|
||||
await runResume(requestGateway)
|
||||
|
||||
expect($resumeFailedSessionId.get()).toBeNull()
|
||||
})
|
||||
})
|
||||
|
||||
@@ -15,6 +15,10 @@ import { requestDesktopOnboarding } from '@/store/onboarding'
|
||||
import { $activeGatewayProfile, $newChatProfile, $profiles, ensureGatewayProfile, normalizeProfileKey } from '@/store/profile'
|
||||
import {
|
||||
$currentCwd,
|
||||
$currentFastMode,
|
||||
$currentModel,
|
||||
$currentProvider,
|
||||
$currentReasoningEffort,
|
||||
$messages,
|
||||
$sessions,
|
||||
$yoloActive,
|
||||
@@ -34,6 +38,8 @@ import {
|
||||
setFreshDraftReady,
|
||||
setIntroSeed,
|
||||
setMessages,
|
||||
setResumeExhaustedSessionId,
|
||||
setResumeFailedSessionId,
|
||||
setSelectedStoredSessionId,
|
||||
setSessions,
|
||||
setSessionStartedAt,
|
||||
@@ -407,13 +413,13 @@ export function useSessionActions({
|
||||
})
|
||||
setSessionStartedAt(null)
|
||||
setTurnStartedAt(null)
|
||||
// New chats start in the configured default project dir when set,
|
||||
// otherwise the sticky last-used workspace (PR #37586).
|
||||
setCurrentModel('')
|
||||
setCurrentProvider('')
|
||||
setCurrentReasoningEffort('')
|
||||
// The composer's model/effort/fast is sticky UI state (persisted in
|
||||
// localStorage) — a new chat FOLLOWS your last pick instead of snapping
|
||||
// back to the profile default, so we deliberately don't reset it here. The
|
||||
// profile default still owns first-run seeding and profile switches (see
|
||||
// refreshCurrentModel). Only $currentServiceTier (a live-session mirror)
|
||||
// is cleared.
|
||||
setCurrentServiceTier('')
|
||||
setCurrentFastMode(false)
|
||||
setYoloActive(false)
|
||||
setCurrentCwd(workspaceCwdForNewSession())
|
||||
setCurrentBranch('')
|
||||
@@ -443,11 +449,23 @@ export function useSessionActions({
|
||||
const newChatProfile = $newChatProfile.get() ?? normalizeProfileKey($activeGatewayProfile.get())
|
||||
await ensureGatewayProfile(newChatProfile)
|
||||
const cwd = $currentCwd.get().trim() || workspaceCwdForNewSession()
|
||||
// The composer's model/effort/fast is sticky UI state ($currentModel,
|
||||
// $currentProvider, $currentReasoningEffort, $currentFastMode). Ship it
|
||||
// with every session.create so the new chat opens on whatever the picker
|
||||
// shows — applied as per-session overrides, never written to the profile
|
||||
// default (that lives in Settings → Model).
|
||||
const uiModel = $currentModel.get().trim()
|
||||
const uiProvider = $currentProvider.get().trim()
|
||||
const uiEffort = $currentReasoningEffort.get().trim()
|
||||
const uiFast = $currentFastMode.get()
|
||||
|
||||
const created = await requestGateway<SessionCreateResponse>('session.create', {
|
||||
cols: 96,
|
||||
...(cwd && { cwd }),
|
||||
...(newChatProfile ? { profile: newChatProfile } : {})
|
||||
...(newChatProfile ? { profile: newChatProfile } : {}),
|
||||
...(uiModel ? { model: uiModel, ...(uiProvider ? { provider: uiProvider } : {}) } : {}),
|
||||
...(uiEffort ? { reasoning_effort: uiEffort } : {}),
|
||||
...(uiFast ? { fast: true } : {})
|
||||
})
|
||||
|
||||
const stored = created.stored_session_id ?? null
|
||||
@@ -563,6 +581,15 @@ export function useSessionActions({
|
||||
clearNotifications()
|
||||
setSelectedStoredSessionId(storedSessionId)
|
||||
selectedStoredSessionIdRef.current = storedSessionId
|
||||
// Optimistically clear any prior resume-failure latch for this session:
|
||||
// we're attempting a fresh resume, so the self-heal in use-route-resume
|
||||
// must not keep treating it as stranded. It's re-armed below only if THIS
|
||||
// attempt fails terminally (RPC reject + REST fallback failure).
|
||||
setResumeFailedSessionId(current => (current === storedSessionId ? null : current))
|
||||
// Also clear the exhausted-latch: a fresh attempt (manual Retry, reconnect,
|
||||
// reselect) gives the bounded auto-retry counter a clean cycle, so the
|
||||
// chat view drops the error state and shows the loader again.
|
||||
setResumeExhaustedSessionId(current => (current === storedSessionId ? null : current))
|
||||
|
||||
const warmRuntimeId = runtimeIdByStoredSessionIdRef.current.get(storedSessionId)
|
||||
|
||||
@@ -753,13 +780,41 @@ export function useSessionActions({
|
||||
return
|
||||
}
|
||||
|
||||
const fallback = await getSessionMessages(storedSessionId, sessionProfile)
|
||||
// The gateway resume RPC failed. Try the REST transcript as a fallback
|
||||
// so the window at least shows history. CRITICAL: this fallback must be
|
||||
// wrapped in its own try — if it ALSO throws (wedged/unreachable backend,
|
||||
// the common case when resume failed in the first place), an unguarded
|
||||
// throw here skips setMessages AND leaves activeSessionId null with an
|
||||
// empty transcript. That is the exact state the thread loader latches on
|
||||
// forever (messagesEmpty && !activeSessionId) with no recovery path —
|
||||
// the "open in new window stays stuck loading, even after a nap" bug.
|
||||
try {
|
||||
const fallback = await getSessionMessages(storedSessionId, sessionProfile)
|
||||
|
||||
if (!isCurrentResume()) {
|
||||
return
|
||||
if (!isCurrentResume()) {
|
||||
return
|
||||
}
|
||||
|
||||
setMessages(preserveLocalAssistantErrors(toChatMessages(fallback.messages), $messages.get()))
|
||||
} catch {
|
||||
// Fallback also failed: nothing to paint. Leave whatever messages are
|
||||
// already shown and fall through to arm the resume-failure latch so
|
||||
// use-route-resume re-attempts the resume on the next render / window
|
||||
// focus / gateway reconnect instead of stranding the loader.
|
||||
}
|
||||
|
||||
if (isCurrentResume() && $messages.get().length === 0) {
|
||||
// Arm the self-heal ONLY when the window is still empty: the gateway
|
||||
// resume rejected AND the REST fallback failed to paint a transcript.
|
||||
// That is the exact stranded state the loader latches on
|
||||
// (messagesEmpty && !activeSessionId), and matches $resumeFailedSessionId's
|
||||
// documented contract. If the REST fallback DID paint history, the
|
||||
// window is readable — arming here would needlessly auto-retry and,
|
||||
// once retries exhaust, blank that visible transcript behind the
|
||||
// exhausted-state error overlay (a regression vs. plain fallback success).
|
||||
setResumeFailedSessionId(storedSessionId)
|
||||
}
|
||||
|
||||
setMessages(preserveLocalAssistantErrors(toChatMessages(fallback.messages), $messages.get()))
|
||||
notifyError(err, copy.resumeFailed)
|
||||
} finally {
|
||||
if (isCurrentResume()) {
|
||||
|
||||
@@ -2,12 +2,14 @@ import { act, cleanup, render } from '@testing-library/react'
|
||||
import type { MutableRefObject } from 'react'
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
|
||||
|
||||
import type { ChatMessage } from '@/lib/chat-messages'
|
||||
import {
|
||||
$currentFastMode,
|
||||
$currentModel,
|
||||
$currentProvider,
|
||||
$currentReasoningEffort,
|
||||
$currentServiceTier,
|
||||
$messages,
|
||||
$turnStartedAt,
|
||||
setCurrentFastMode,
|
||||
setCurrentModel,
|
||||
@@ -213,3 +215,113 @@ describe('useSessionStateCache — per-session turn timer', () => {
|
||||
expect($currentFastMode.get()).toBe(false)
|
||||
})
|
||||
})
|
||||
|
||||
function userMessage(id: string, text: string): ChatMessage {
|
||||
return { id, role: 'user', parts: [{ type: 'text', text }] }
|
||||
}
|
||||
|
||||
function assistantText(id: string, text: string): ChatMessage {
|
||||
return { id, role: 'assistant', parts: [{ type: 'text', text }] }
|
||||
}
|
||||
|
||||
function assistantError(id: string, error: string): ChatMessage {
|
||||
return { id, role: 'assistant', parts: [], error, pending: false }
|
||||
}
|
||||
|
||||
interface ViewHarnessProps {
|
||||
activeSessionId: string | null
|
||||
onReady: (cache: Cache) => void
|
||||
}
|
||||
|
||||
function ViewHarness({ activeSessionId, onReady }: ViewHarnessProps) {
|
||||
const busyRef: MutableRefObject<boolean> = { current: false }
|
||||
const cache = useSessionStateCache({
|
||||
activeSessionId,
|
||||
busyRef,
|
||||
selectedStoredSessionId: null,
|
||||
setAwaitingResponse: () => undefined,
|
||||
setBusy: () => undefined,
|
||||
// Wire the published view back into the real $messages atom the flush
|
||||
// reads from, so the round-trip matches production.
|
||||
setMessages: messages => $messages.set(messages)
|
||||
})
|
||||
|
||||
onReady(cache)
|
||||
|
||||
return null
|
||||
}
|
||||
|
||||
describe('useSessionStateCache — cross-thread error isolation', () => {
|
||||
afterEach(() => {
|
||||
cleanup()
|
||||
$messages.set([])
|
||||
})
|
||||
|
||||
it('does not leak a failed turn into another thread on switch', () => {
|
||||
$messages.set([])
|
||||
let cache!: Cache
|
||||
const { rerender } = render(<ViewHarness activeSessionId="thread-A" onReady={c => (cache = c)} />)
|
||||
|
||||
// Thread A ends its turn with an out-of-funds error and is on screen.
|
||||
act(() => {
|
||||
cache.updateSessionState(
|
||||
'thread-A',
|
||||
state => ({
|
||||
...state,
|
||||
busy: false,
|
||||
messages: [userMessage('user-a', 'do the thing'), assistantError('assistant-a-error', 'Out of funds')]
|
||||
}),
|
||||
'stored-A'
|
||||
)
|
||||
})
|
||||
|
||||
expect($messages.get().some(message => message.error === 'Out of funds')).toBe(true)
|
||||
|
||||
// Switch to thread B (which completed cleanly). Its cached state syncs to
|
||||
// the view while $messages still holds thread A's transcript.
|
||||
rerender(<ViewHarness activeSessionId="thread-B" onReady={c => (cache = c)} />)
|
||||
act(() => {
|
||||
cache.updateSessionState(
|
||||
'thread-B',
|
||||
state => ({
|
||||
...state,
|
||||
busy: false,
|
||||
messages: [userMessage('user-b', 'hello'), assistantText('assistant-b', 'hi there')]
|
||||
}),
|
||||
'stored-B'
|
||||
)
|
||||
})
|
||||
|
||||
expect($messages.get().map(message => message.id)).toEqual(['user-b', 'assistant-b'])
|
||||
expect($messages.get().some(message => message.error === 'Out of funds')).toBe(false)
|
||||
})
|
||||
|
||||
it('still preserves a same-session local error a heartbeat dropped', () => {
|
||||
$messages.set([])
|
||||
let cache!: Cache
|
||||
render(<ViewHarness activeSessionId="thread-A" onReady={c => (cache = c)} />)
|
||||
|
||||
// First paint establishes thread A as the on-screen session.
|
||||
act(() => {
|
||||
cache.updateSessionState(
|
||||
'thread-A',
|
||||
state => ({ ...state, busy: false, messages: [userMessage('user-a', 'do the thing')] }),
|
||||
'stored-A'
|
||||
)
|
||||
})
|
||||
|
||||
// A local error lands in the view (e.g. failAssistantMessage wrote it).
|
||||
$messages.set([userMessage('user-a', 'do the thing'), assistantError('assistant-a-error', 'OpenRouter 403')])
|
||||
|
||||
// A later same-session heartbeat carries cached state that lost the error.
|
||||
act(() => {
|
||||
cache.updateSessionState('thread-A', state => ({
|
||||
...state,
|
||||
busy: false,
|
||||
messages: [userMessage('user-a', 'do the thing')]
|
||||
}))
|
||||
})
|
||||
|
||||
expect($messages.get().some(message => message.error === 'OpenRouter 403')).toBe(true)
|
||||
})
|
||||
})
|
||||
|
||||
@@ -79,6 +79,9 @@ export function useSessionStateCache({
|
||||
const runtimeIdByStoredSessionIdRef = useRef(new Map<string, string>())
|
||||
const pendingViewStateRef = useRef<{ sessionId: string; state: ClientSessionState } | null>(null)
|
||||
const viewSyncRafRef = useRef<number | null>(null)
|
||||
// Runtime id whose transcript currently occupies `$messages` — lets the
|
||||
// flush below tell a same-session refresh from a thread switch.
|
||||
const viewSessionIdRef = useRef<string | null>(null)
|
||||
|
||||
useEffect(() => {
|
||||
activeSessionIdRef.current = activeSessionId
|
||||
@@ -142,12 +145,22 @@ export function useSessionStateCache({
|
||||
// jerks the scroll position while the user is reading. Skip the publish when
|
||||
// the merged result is content-identical to what's already on screen.
|
||||
const currentMessages = $messages.get()
|
||||
const nextMessages = preserveLocalAssistantErrors(pending.state.messages, currentMessages)
|
||||
// On a thread switch `$messages` still holds the *previous* thread, so
|
||||
// preserving its local errors would graft that thread's failed turn (e.g.
|
||||
// an out-of-funds error) onto this one — then cascade it everywhere as the
|
||||
// polluted view becomes the next switch's baseline. Only carry errors
|
||||
// across a same-session refresh; our cached state already keeps its own.
|
||||
const nextMessages =
|
||||
viewSessionIdRef.current === pending.sessionId
|
||||
? preserveLocalAssistantErrors(pending.state.messages, currentMessages)
|
||||
: pending.state.messages
|
||||
|
||||
if (!sameMessageList(nextMessages, currentMessages)) {
|
||||
setMessages(nextMessages)
|
||||
}
|
||||
|
||||
viewSessionIdRef.current = pending.sessionId
|
||||
|
||||
syncRuntimeMetadataToView(pending.state)
|
||||
setBusy(pending.state.busy)
|
||||
setMutableRef(busyRef, pending.state.busy)
|
||||
|
||||
@@ -23,6 +23,7 @@ import { fieldCopyForSchemaKey } from './field-copy'
|
||||
import { enumOptionsFor, getNested, prettyName, setNested } from './helpers'
|
||||
import { ModelSettings } from './model-settings'
|
||||
import { EmptyState, ListRow, LoadingState, SettingsContent } from './primitives'
|
||||
import { ProviderConfigPanel } from './provider-config-panel'
|
||||
|
||||
function ConfigField({
|
||||
schemaKey,
|
||||
@@ -368,6 +369,9 @@ export function ConfigSettings({
|
||||
schemaKey={key}
|
||||
value={getNested(config, key)}
|
||||
/>
|
||||
{key === 'memory.provider' && typeof getNested(config, key) === 'string' && getNested(config, key) ? (
|
||||
<ProviderConfigPanel provider={String(getNested(config, key))} />
|
||||
) : null}
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
|
||||
@@ -239,7 +239,7 @@ export const ENUM_OPTIONS: Record<string, string[]> = {
|
||||
'code_execution.mode': ['project', 'strict'],
|
||||
'context.engine': ['compressor', 'default', 'custom'],
|
||||
'delegation.reasoning_effort': ['', 'minimal', 'low', 'medium', 'high', 'xhigh'],
|
||||
'memory.provider': ['', 'builtin', 'honcho'],
|
||||
'memory.provider': ['', 'builtin', 'hindsight', 'honcho'],
|
||||
// Terminal execution backends — kept in sync with the dispatch ladder in
|
||||
// tools/terminal_tool.py::_create_environment (local/docker/singularity/
|
||||
// modal/daytona/ssh). Remote backends need extra env (image, tokens, host).
|
||||
|
||||
@@ -6,6 +6,12 @@ import { defineFieldCopy, fieldCopyForSchemaKey, schemaKeyToFieldCopyKey } from
|
||||
import { enumOptionsFor, getNested, providerGroup, setNested, stripToolsetLabel, toolsetDisplayLabel } from './helpers'
|
||||
|
||||
describe('settings helpers', () => {
|
||||
it('lists Hindsight as a built-in desktop memory provider option', () => {
|
||||
const options = enumOptionsFor('memory.provider', '', {})
|
||||
|
||||
expect(options).toContain('hindsight')
|
||||
})
|
||||
|
||||
describe('defineFieldCopy', () => {
|
||||
it('flattens nested field copy paths', () => {
|
||||
const copy = defineFieldCopy({
|
||||
|
||||
@@ -228,7 +228,7 @@ export function SettingsView({ gateway, onClose, onConfigSaved, onMainModelChang
|
||||
onMainModelChanged={onMainModelChanged}
|
||||
/>
|
||||
) : activeView === 'providers' ? (
|
||||
<ProvidersSettings onViewChange={setProviderView} view={providerView} />
|
||||
<ProvidersSettings onClose={onClose} onViewChange={setProviderView} view={providerView} />
|
||||
) : activeView === 'keys' ? (
|
||||
<KeysSettings view={keysView} />
|
||||
) : activeView === 'mcp' ? (
|
||||
|
||||
@@ -16,6 +16,8 @@ const getAuxiliaryModels = vi.fn()
|
||||
const setModelAssignment = vi.fn()
|
||||
const getRecommendedDefaultModel = vi.fn()
|
||||
const setEnvVar = vi.fn()
|
||||
const getHermesConfigRecord = vi.fn()
|
||||
const saveHermesConfig = vi.fn()
|
||||
const startManualProviderOAuth = vi.fn()
|
||||
|
||||
vi.mock('@/hermes', () => ({
|
||||
@@ -24,7 +26,9 @@ vi.mock('@/hermes', () => ({
|
||||
getAuxiliaryModels: () => getAuxiliaryModels(),
|
||||
setModelAssignment: (body: unknown) => setModelAssignment(body),
|
||||
getRecommendedDefaultModel: (slug: string) => getRecommendedDefaultModel(slug),
|
||||
setEnvVar: (key: string, value: string) => setEnvVar(key, value)
|
||||
setEnvVar: (key: string, value: string) => setEnvVar(key, value),
|
||||
getHermesConfigRecord: () => getHermesConfigRecord(),
|
||||
saveHermesConfig: (config: unknown) => saveHermesConfig(config)
|
||||
}))
|
||||
|
||||
vi.mock('@/store/onboarding', () => ({
|
||||
@@ -35,7 +39,13 @@ beforeEach(() => {
|
||||
getGlobalModelInfo.mockResolvedValue({ provider: 'nous', model: 'hermes-4' })
|
||||
getGlobalModelOptions.mockResolvedValue({
|
||||
providers: [
|
||||
{ name: 'Nous', slug: 'nous', models: ['hermes-4', 'hermes-4-mini'], authenticated: true },
|
||||
{
|
||||
name: 'Nous',
|
||||
slug: 'nous',
|
||||
models: ['hermes-4', 'hermes-4-mini'],
|
||||
authenticated: true,
|
||||
capabilities: { 'hermes-4': { reasoning: true, fast: true } }
|
||||
},
|
||||
// An unconfigured api_key provider — surfaced by the full-universe payload.
|
||||
{ name: 'DeepSeek', slug: 'deepseek', models: [], authenticated: false, auth_type: 'api_key', key_env: 'DEEPSEEK_API_KEY' }
|
||||
]
|
||||
@@ -47,6 +57,8 @@ beforeEach(() => {
|
||||
setModelAssignment.mockResolvedValue({ provider: 'nous', model: 'hermes-4', gateway_tools: [] })
|
||||
getRecommendedDefaultModel.mockResolvedValue({ provider: 'deepseek', model: 'deepseek-chat', free_tier: null })
|
||||
setEnvVar.mockResolvedValue({ ok: true })
|
||||
getHermesConfigRecord.mockResolvedValue({ agent: { reasoning_effort: 'medium', service_tier: 'normal' } })
|
||||
saveHermesConfig.mockResolvedValue({ ok: true })
|
||||
})
|
||||
|
||||
afterEach(() => {
|
||||
@@ -100,6 +112,31 @@ describe('ModelSettings', () => {
|
||||
await waitFor(() => expect(setEnvVar).toHaveBeenCalledWith('DEEPSEEK_API_KEY', 'sk-test-123'))
|
||||
})
|
||||
|
||||
it('writes the profile default speed (service_tier) when the fast switch is toggled', async () => {
|
||||
await renderModelSettings()
|
||||
await waitFor(() => expect(getHermesConfigRecord).toHaveBeenCalled())
|
||||
|
||||
const fastSwitch = await screen.findByRole('switch')
|
||||
fireEvent.click(fastSwitch)
|
||||
|
||||
await waitFor(() =>
|
||||
expect(saveHermesConfig).toHaveBeenCalledWith(
|
||||
expect.objectContaining({ agent: expect.objectContaining({ service_tier: 'fast' }) })
|
||||
)
|
||||
)
|
||||
})
|
||||
|
||||
it('hides the reasoning/speed defaults when the main model reports no capabilities', async () => {
|
||||
getGlobalModelOptions.mockResolvedValueOnce({
|
||||
providers: [{ name: 'Nous', slug: 'nous', models: ['hermes-4'], authenticated: true, capabilities: { 'hermes-4': { reasoning: false, fast: false } } }]
|
||||
})
|
||||
|
||||
await renderModelSettings()
|
||||
await waitFor(() => expect(getHermesConfigRecord).toHaveBeenCalled())
|
||||
|
||||
expect(screen.queryByRole('switch')).toBeNull()
|
||||
})
|
||||
|
||||
it('renders the auxiliary task rows', async () => {
|
||||
await renderModelSettings()
|
||||
|
||||
|
||||
@@ -3,11 +3,14 @@ import { useCallback, useEffect, useMemo, useState } from 'react'
|
||||
import { Button } from '@/components/ui/button'
|
||||
import { Input } from '@/components/ui/input'
|
||||
import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from '@/components/ui/select'
|
||||
import { Switch } from '@/components/ui/switch'
|
||||
import {
|
||||
getAuxiliaryModels,
|
||||
getGlobalModelInfo,
|
||||
getGlobalModelOptions,
|
||||
getHermesConfigRecord,
|
||||
getRecommendedDefaultModel,
|
||||
saveHermesConfig,
|
||||
setEnvVar,
|
||||
setModelAssignment
|
||||
} from '@/hermes'
|
||||
@@ -15,11 +18,26 @@ import type { AuxiliaryModelsResponse, ModelOptionProvider, StaleAuxAssignment }
|
||||
import { useI18n } from '@/i18n'
|
||||
import { AlertTriangle, Cpu, Loader2 } from '@/lib/icons'
|
||||
import { cn } from '@/lib/utils'
|
||||
import { notifyError } from '@/store/notifications'
|
||||
import { startManualLocalEndpoint, startManualProviderOAuth } from '@/store/onboarding'
|
||||
import type { HermesConfigRecord } from '@/types/hermes'
|
||||
|
||||
import { CONTROL_TEXT } from './constants'
|
||||
import { getNested, setNested } from './helpers'
|
||||
import { ListRow, LoadingState, Pill, SectionHeading } from './primitives'
|
||||
|
||||
// Hermes' reasoning levels (VALID_REASONING_EFFORTS); `none` = thinking off.
|
||||
// Empty config = Hermes default (medium), shown as Medium.
|
||||
const EFFORT_VALUES = ['none', 'minimal', 'low', 'medium', 'high', 'xhigh'] as const
|
||||
|
||||
// agent.service_tier stores "fast"/"priority"/"on" for fast; anything else is
|
||||
// normal (mirrors tui_gateway _load_service_tier).
|
||||
const isFastTier = (tier: unknown): boolean =>
|
||||
['fast', 'priority', 'on'].includes(String(tier ?? '').trim().toLowerCase())
|
||||
|
||||
// Reuse the composer's effort labels (`xhigh` shows as "Max", else 1:1).
|
||||
const effortLabelKey = (v: string) => (v === 'xhigh' ? 'max' : v) as 'high' | 'low' | 'max' | 'medium' | 'minimal'
|
||||
|
||||
// A provider row is "ready" to pick a model from when it reports models. The
|
||||
// backend now surfaces the full `hermes model` universe (every canonical
|
||||
// provider), so unconfigured providers come back with `authenticated:false`
|
||||
@@ -97,6 +115,9 @@ export function ModelSettings({ onMainModelChanged }: ModelSettingsProps) {
|
||||
const [selectedProvider, setSelectedProvider] = useState('')
|
||||
const [selectedModel, setSelectedModel] = useState('')
|
||||
const [auxiliary, setAuxiliary] = useState<AuxiliaryModelsResponse | null>(null)
|
||||
// Full profile config, kept so the reasoning/speed defaults round-trip
|
||||
// (read agent.* → write back the whole record) like the generic config page.
|
||||
const [config, setConfig] = useState<HermesConfigRecord | null>(null)
|
||||
const [applying, setApplying] = useState(false)
|
||||
const [editingAuxTask, setEditingAuxTask] = useState<null | string>(null)
|
||||
const [auxDraft, setAuxDraft] = useState<{ model: string; provider: string }>({ model: '', provider: '' })
|
||||
@@ -113,10 +134,11 @@ export function ModelSettings({ onMainModelChanged }: ModelSettingsProps) {
|
||||
setError('')
|
||||
|
||||
try {
|
||||
const [modelInfo, modelOptions, auxiliaryModels] = await Promise.all([
|
||||
const [modelInfo, modelOptions, auxiliaryModels, cfg] = await Promise.all([
|
||||
getGlobalModelInfo(),
|
||||
getGlobalModelOptions(),
|
||||
getAuxiliaryModels()
|
||||
getAuxiliaryModels(),
|
||||
getHermesConfigRecord()
|
||||
])
|
||||
|
||||
setMainModel({ model: modelInfo.model, provider: modelInfo.provider })
|
||||
@@ -124,6 +146,7 @@ export function ModelSettings({ onMainModelChanged }: ModelSettingsProps) {
|
||||
setSelectedProvider(prev => prev || modelInfo.provider)
|
||||
setSelectedModel(prev => prev || modelInfo.model)
|
||||
setAuxiliary(auxiliaryModels)
|
||||
setConfig(cfg)
|
||||
} catch (err) {
|
||||
setError(err instanceof Error ? err.message : String(err))
|
||||
} finally {
|
||||
@@ -181,6 +204,42 @@ export function ModelSettings({ onMainModelChanged }: ModelSettingsProps) {
|
||||
.map(entry => ({ task: entry.task, provider: entry.provider, model: entry.model }))
|
||||
}, [auxiliary, mainModel])
|
||||
|
||||
// Capabilities of the APPLIED main model — gates the profile-default
|
||||
// reasoning/speed controls the same way the composer picker gates per-model
|
||||
// edits (reasoning defaults on, fast defaults off when unreported).
|
||||
const mainCaps = useMemo(() => {
|
||||
const row = providers.find(provider => provider.slug === mainModel?.provider)
|
||||
|
||||
return mainModel ? row?.capabilities?.[mainModel.model] : undefined
|
||||
}, [providers, mainModel])
|
||||
|
||||
const reasoningSupported = mainCaps?.reasoning ?? true
|
||||
const fastSupported = mainCaps?.fast ?? false
|
||||
const effortValue = String(getNested(config ?? {}, 'agent.reasoning_effort') ?? '').trim().toLowerCase() || 'medium'
|
||||
const fastOn = isFastTier(getNested(config ?? {}, 'agent.service_tier'))
|
||||
|
||||
// Persist a single agent.* default by round-tripping the whole config record
|
||||
// (PUT /api/config replaces it) — optimistic, with rollback on failure.
|
||||
const writeAgentDefault = useCallback(
|
||||
async (key: string, value: string) => {
|
||||
if (!config) {
|
||||
return
|
||||
}
|
||||
|
||||
const prev = config
|
||||
const next = setNested(config, key, value)
|
||||
setConfig(next)
|
||||
|
||||
try {
|
||||
await saveHermesConfig(next)
|
||||
} catch (err) {
|
||||
setConfig(prev)
|
||||
notifyError(err, m.defaultsFailed)
|
||||
}
|
||||
},
|
||||
[config, m.defaultsFailed]
|
||||
)
|
||||
|
||||
// Paste an API key for the selected `api_key` provider, persist it, then
|
||||
// refresh so the now-authenticated provider's models populate. Auto-selects
|
||||
// the recommended default model so the user can Apply in one more click.
|
||||
@@ -433,6 +492,38 @@ export function ModelSettings({ onMainModelChanged }: ModelSettingsProps) {
|
||||
: `${selectedProviderRow?.name} signs in through your browser — Hermes runs the flow for you.`}
|
||||
</p>
|
||||
)}
|
||||
{config && mainModel && (reasoningSupported || fastSupported) && (
|
||||
<div className="mt-3 flex flex-wrap items-center gap-x-6 gap-y-3">
|
||||
<span className="text-xs text-muted-foreground">{m.defaultsLabel}</span>
|
||||
{reasoningSupported && (
|
||||
<div className="flex items-center gap-2 text-xs">
|
||||
{m.reasoning}
|
||||
<Select onValueChange={value => void writeAgentDefault('agent.reasoning_effort', value)} value={effortValue}>
|
||||
<SelectTrigger className={cn('min-w-28', CONTROL_TEXT)}>
|
||||
<SelectValue />
|
||||
</SelectTrigger>
|
||||
<SelectContent>
|
||||
{EFFORT_VALUES.map(value => (
|
||||
<SelectItem key={value} value={value}>
|
||||
{value === 'none' ? m.reasoningOff : t.shell.modelOptions[effortLabelKey(value)]}
|
||||
</SelectItem>
|
||||
))}
|
||||
</SelectContent>
|
||||
</Select>
|
||||
</div>
|
||||
)}
|
||||
{fastSupported && (
|
||||
<label className="flex items-center gap-2 text-xs">
|
||||
{t.shell.modelOptions.fast}
|
||||
<Switch
|
||||
checked={fastOn}
|
||||
onCheckedChange={checked => void writeAgentDefault('agent.service_tier', checked ? 'fast' : 'normal')}
|
||||
size="xs"
|
||||
/>
|
||||
</label>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
{error && <div className="mt-2 text-xs text-destructive">{error}</div>}
|
||||
{switchStaleAux.length > 0 && (
|
||||
<div className="mt-2">
|
||||
|
||||
142
apps/desktop/src/app/settings/provider-config-panel.test.tsx
Normal file
142
apps/desktop/src/app/settings/provider-config-panel.test.tsx
Normal file
@@ -0,0 +1,142 @@
|
||||
import { cleanup, fireEvent, render, screen, waitFor } from '@testing-library/react'
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
|
||||
|
||||
import type { MemoryProviderConfig } from '@/types/hermes'
|
||||
|
||||
const getMemoryProviderConfig = vi.fn()
|
||||
const saveMemoryProviderConfig = vi.fn()
|
||||
|
||||
vi.mock('@/hermes', () => ({
|
||||
getMemoryProviderConfig: (provider: string) => getMemoryProviderConfig(provider),
|
||||
saveMemoryProviderConfig: (provider: string, values: unknown) => saveMemoryProviderConfig(provider, values)
|
||||
}))
|
||||
|
||||
vi.mock('@/store/notifications', () => ({
|
||||
notify: vi.fn(),
|
||||
notifyError: vi.fn()
|
||||
}))
|
||||
|
||||
function hindsightSchema(overrides: Partial<MemoryProviderConfig['fields'][number]>[] = []): MemoryProviderConfig {
|
||||
const fields: MemoryProviderConfig['fields'] = [
|
||||
{
|
||||
key: 'mode',
|
||||
label: 'Mode',
|
||||
kind: 'select',
|
||||
value: 'cloud',
|
||||
description: 'How Hermes connects to Hindsight.',
|
||||
placeholder: '',
|
||||
is_set: true,
|
||||
options: [
|
||||
{ value: 'cloud', label: 'Cloud', description: 'Hindsight Cloud API (lightweight, just needs an API key)' },
|
||||
{ value: 'local_external', label: 'Local External', description: 'Connect to an existing Hindsight instance' }
|
||||
]
|
||||
},
|
||||
{
|
||||
key: 'api_key',
|
||||
label: 'API key',
|
||||
kind: 'secret',
|
||||
value: '',
|
||||
description: 'Used to authenticate with the Hindsight API.',
|
||||
placeholder: 'Enter Hindsight API key',
|
||||
is_set: false,
|
||||
options: []
|
||||
},
|
||||
{
|
||||
key: 'api_url',
|
||||
label: 'API URL',
|
||||
kind: 'text',
|
||||
value: 'https://api.hindsight.vectorize.io',
|
||||
description: '',
|
||||
placeholder: '',
|
||||
is_set: true,
|
||||
options: []
|
||||
},
|
||||
{ key: 'bank_id', label: 'Bank ID', kind: 'text', value: 'hermes', description: '', placeholder: '', is_set: true, options: [] },
|
||||
{
|
||||
key: 'recall_budget',
|
||||
label: 'Recall budget',
|
||||
kind: 'select',
|
||||
value: 'mid',
|
||||
description: '',
|
||||
placeholder: '',
|
||||
is_set: true,
|
||||
options: [
|
||||
{ value: 'low', label: 'low', description: '' },
|
||||
{ value: 'mid', label: 'mid', description: '' },
|
||||
{ value: 'high', label: 'high', description: '' }
|
||||
]
|
||||
}
|
||||
]
|
||||
|
||||
return {
|
||||
name: 'hindsight',
|
||||
label: 'Hindsight',
|
||||
fields: fields.map((field, index) => ({ ...field, ...overrides[index] }))
|
||||
}
|
||||
}
|
||||
|
||||
beforeEach(() => {
|
||||
getMemoryProviderConfig.mockResolvedValue(hindsightSchema())
|
||||
saveMemoryProviderConfig.mockResolvedValue({ ok: true })
|
||||
})
|
||||
|
||||
afterEach(() => {
|
||||
cleanup()
|
||||
vi.clearAllMocks()
|
||||
})
|
||||
|
||||
async function renderPanel(provider = 'hindsight') {
|
||||
const { ProviderConfigPanel } = await import('./provider-config-panel')
|
||||
|
||||
return render(<ProviderConfigPanel provider={provider} />)
|
||||
}
|
||||
|
||||
describe('ProviderConfigPanel', () => {
|
||||
it('renders the declared provider fields generically', async () => {
|
||||
await renderPanel()
|
||||
|
||||
expect(await screen.findByDisplayValue('https://api.hindsight.vectorize.io')).toBeTruthy()
|
||||
expect(screen.getByDisplayValue('hermes')).toBeTruthy()
|
||||
expect(screen.getByText('Cloud')).toBeTruthy()
|
||||
expect(screen.getAllByText('Hindsight Cloud API (lightweight, just needs an API key)').length).toBeGreaterThan(0)
|
||||
expect(screen.getByText('mid')).toBeTruthy()
|
||||
})
|
||||
|
||||
it('collapses and expands the fields', async () => {
|
||||
await renderPanel()
|
||||
|
||||
expect(await screen.findByLabelText('API URL')).toBeTruthy()
|
||||
fireEvent.click(screen.getByRole('button', { name: /Hindsight settings/ }))
|
||||
expect(screen.queryByLabelText('API URL')).toBeNull()
|
||||
fireEvent.click(screen.getByRole('button', { name: /Hindsight settings/ }))
|
||||
expect(await screen.findByLabelText('API URL')).toBeTruthy()
|
||||
})
|
||||
|
||||
it('saves edited values without requiring a secret replacement', async () => {
|
||||
await renderPanel()
|
||||
|
||||
const apiUrl = await screen.findByLabelText('API URL')
|
||||
fireEvent.change(apiUrl, { target: { value: 'http://localhost:8888' } })
|
||||
fireEvent.change(screen.getByLabelText('Bank ID'), { target: { value: 'ben-bank' } })
|
||||
fireEvent.click(screen.getByRole('button', { name: 'Save' }))
|
||||
|
||||
await waitFor(() =>
|
||||
expect(saveMemoryProviderConfig).toHaveBeenCalledWith('hindsight', {
|
||||
mode: 'cloud',
|
||||
api_key: '',
|
||||
api_url: 'http://localhost:8888',
|
||||
bank_id: 'ben-bank',
|
||||
recall_budget: 'mid'
|
||||
})
|
||||
)
|
||||
})
|
||||
|
||||
it('renders nothing for a provider with no declared config surface', async () => {
|
||||
getMemoryProviderConfig.mockResolvedValue({ name: 'builtin', label: 'builtin', fields: [] })
|
||||
|
||||
const { container } = await renderPanel('builtin')
|
||||
|
||||
await waitFor(() => expect(getMemoryProviderConfig).toHaveBeenCalledWith('builtin'))
|
||||
expect(container.querySelector('section')).toBeNull()
|
||||
})
|
||||
})
|
||||
182
apps/desktop/src/app/settings/provider-config-panel.tsx
Normal file
182
apps/desktop/src/app/settings/provider-config-panel.tsx
Normal file
@@ -0,0 +1,182 @@
|
||||
import { useCallback, useEffect, useState } from 'react'
|
||||
|
||||
import { Button } from '@/components/ui/button'
|
||||
import { DisclosureCaret } from '@/components/ui/disclosure-caret'
|
||||
import { Input } from '@/components/ui/input'
|
||||
import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from '@/components/ui/select'
|
||||
import { getMemoryProviderConfig, saveMemoryProviderConfig } from '@/hermes'
|
||||
import { Check, Loader2, Save } from '@/lib/icons'
|
||||
import { notify, notifyError } from '@/store/notifications'
|
||||
import type { MemoryProviderConfig, MemoryProviderField } from '@/types/hermes'
|
||||
|
||||
import { CONTROL_TEXT } from './constants'
|
||||
import { LoadingState, Pill } from './primitives'
|
||||
|
||||
/** Seed editable values from the schema: non-secret fields keep their current
|
||||
* value, secret fields start blank (their value is never returned). */
|
||||
function seedValues(config: MemoryProviderConfig): Record<string, string> {
|
||||
return Object.fromEntries(
|
||||
config.fields.map(field => [field.key, field.kind === 'secret' ? '' : field.value])
|
||||
)
|
||||
}
|
||||
|
||||
function FieldControl({
|
||||
field,
|
||||
value,
|
||||
onChange
|
||||
}: {
|
||||
field: MemoryProviderField
|
||||
value: string
|
||||
onChange: (value: string) => void
|
||||
}) {
|
||||
if (field.kind === 'select') {
|
||||
const selected = field.options.find(option => option.value === value)
|
||||
|
||||
return (
|
||||
<>
|
||||
<Select onValueChange={onChange} value={value}>
|
||||
<SelectTrigger className={CONTROL_TEXT}>
|
||||
<SelectValue />
|
||||
</SelectTrigger>
|
||||
<SelectContent>
|
||||
{field.options.map(option => (
|
||||
<SelectItem key={option.value} value={option.value}>
|
||||
{option.label}
|
||||
</SelectItem>
|
||||
))}
|
||||
</SelectContent>
|
||||
</Select>
|
||||
{(selected?.description || field.description) && (
|
||||
<span className="text-xs text-muted-foreground">{selected?.description || field.description}</span>
|
||||
)}
|
||||
</>
|
||||
)
|
||||
}
|
||||
|
||||
if (field.kind === 'secret') {
|
||||
return (
|
||||
<div className="flex flex-wrap items-center gap-2">
|
||||
<Input
|
||||
className="min-w-64 flex-1 font-mono"
|
||||
onChange={event => onChange(event.target.value)}
|
||||
placeholder={field.is_set ? 'Leave blank to keep current value' : field.placeholder}
|
||||
type="password"
|
||||
value={value}
|
||||
/>
|
||||
{field.is_set && (
|
||||
<Pill tone="primary">
|
||||
<Check className="size-3" />
|
||||
Set
|
||||
</Pill>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
return (
|
||||
<Input
|
||||
className="font-mono"
|
||||
onChange={event => onChange(event.target.value)}
|
||||
placeholder={field.placeholder}
|
||||
value={value}
|
||||
/>
|
||||
)
|
||||
}
|
||||
|
||||
export function ProviderConfigPanel({ provider }: { provider: string }) {
|
||||
const [config, setConfig] = useState<MemoryProviderConfig | null>(null)
|
||||
const [values, setValues] = useState<Record<string, string>>({})
|
||||
const [expanded, setExpanded] = useState(true)
|
||||
const [saving, setSaving] = useState(false)
|
||||
|
||||
const refresh = useCallback(async () => {
|
||||
try {
|
||||
const next = await getMemoryProviderConfig(provider)
|
||||
setConfig(next)
|
||||
setValues(seedValues(next))
|
||||
} catch (err) {
|
||||
notifyError(err, 'Memory provider settings failed to load')
|
||||
setConfig(null)
|
||||
}
|
||||
}, [provider])
|
||||
|
||||
useEffect(() => {
|
||||
setConfig(null)
|
||||
void refresh()
|
||||
}, [refresh])
|
||||
|
||||
const save = useCallback(async () => {
|
||||
if (!config) {
|
||||
return
|
||||
}
|
||||
|
||||
setSaving(true)
|
||||
|
||||
try {
|
||||
await saveMemoryProviderConfig(provider, values)
|
||||
notify({ kind: 'success', title: `${config.label} saved`, message: 'Memory provider configuration updated.' })
|
||||
await refresh()
|
||||
} catch (err) {
|
||||
notifyError(err, `Failed to save ${config.label} settings`)
|
||||
} finally {
|
||||
setSaving(false)
|
||||
}
|
||||
}, [config, provider, refresh, values])
|
||||
|
||||
// Providers without a declared config surface (e.g. builtin) render nothing.
|
||||
if (config && config.fields.length === 0) {
|
||||
return null
|
||||
}
|
||||
|
||||
if (!config) {
|
||||
return <LoadingState label="Loading memory provider settings..." />
|
||||
}
|
||||
|
||||
const secretFields = config.fields.filter(field => field.kind === 'secret')
|
||||
|
||||
return (
|
||||
<section className="py-3">
|
||||
<button
|
||||
aria-expanded={expanded}
|
||||
className="flex w-full items-center justify-between gap-3 rounded-lg bg-background/60 px-3 py-2 text-left hover:bg-accent/50"
|
||||
onClick={() => setExpanded(open => !open)}
|
||||
type="button"
|
||||
>
|
||||
<span className="flex min-w-0 items-center gap-2">
|
||||
<DisclosureCaret open={expanded} />
|
||||
<span className="text-[length:var(--conversation-text-font-size)] font-medium text-foreground">
|
||||
{config.label} settings
|
||||
</span>
|
||||
{secretFields.map(field => (
|
||||
<Pill key={field.key}>{field.is_set ? `${field.label} set` : `${field.label} not set`}</Pill>
|
||||
))}
|
||||
</span>
|
||||
</button>
|
||||
|
||||
{expanded && (
|
||||
<div className="mt-3 grid gap-4 rounded-xl bg-background/60 p-4">
|
||||
{config.fields.map(field => (
|
||||
<label className="grid gap-1.5" key={field.key}>
|
||||
<span className="text-xs font-medium text-muted-foreground">{field.label}</span>
|
||||
<FieldControl
|
||||
field={field}
|
||||
onChange={value => setValues(current => ({ ...current, [field.key]: value }))}
|
||||
value={values[field.key] ?? ''}
|
||||
/>
|
||||
{field.kind !== 'select' && field.description && (
|
||||
<span className="text-xs text-muted-foreground">{field.description}</span>
|
||||
)}
|
||||
</label>
|
||||
))}
|
||||
|
||||
<div className="flex justify-end">
|
||||
<Button disabled={saving} onClick={() => void save()} size="sm">
|
||||
{saving ? <Loader2 className="size-3.5 animate-spin" /> : <Save />}
|
||||
Save
|
||||
</Button>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</section>
|
||||
)
|
||||
}
|
||||
@@ -55,7 +55,7 @@ afterEach(() => {
|
||||
async function renderProvidersSettings() {
|
||||
const { ProvidersSettings } = await import('./providers-settings')
|
||||
|
||||
return render(<ProvidersSettings onViewChange={vi.fn()} view="accounts" />)
|
||||
return render(<ProvidersSettings onClose={vi.fn()} onViewChange={vi.fn()} view="accounts" />)
|
||||
}
|
||||
|
||||
describe('ProvidersSettings', () => {
|
||||
@@ -95,6 +95,6 @@ describe('ProvidersSettings', () => {
|
||||
|
||||
expect(await screen.findByText('Qwen Code')).toBeTruthy()
|
||||
expect(screen.queryByRole('button', { name: 'Remove Qwen Code' })).toBeNull()
|
||||
expect(screen.getByText(/managed outside Hermes/)).toBeTruthy()
|
||||
expect(screen.getByText(/managed by its own CLI/)).toBeTruthy()
|
||||
})
|
||||
})
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
import { useStore } from '@nanostores/react'
|
||||
import type { ReactNode } from 'react'
|
||||
import { useCallback, useEffect, useMemo, useState } from 'react'
|
||||
|
||||
import { runInTerminal } from '@/app/right-sidebar/store'
|
||||
import {
|
||||
FEATURED_ID,
|
||||
FeaturedProviderRow,
|
||||
@@ -23,6 +25,20 @@ import { SettingsCategoryHeading, useEnvCredentials } from './env-credentials'
|
||||
import { providerGroup, providerMeta, providerPriority } from './helpers'
|
||||
import { LoadingState, SettingsContent } from './primitives'
|
||||
|
||||
// The embedded terminal (and thus the "run disconnect command" path) only
|
||||
// exists in the Electron desktop shell, not the web dashboard.
|
||||
const canRunInTerminal = () => typeof window !== 'undefined' && Boolean(window.hermesDesktop?.terminal)
|
||||
|
||||
// Parallel group headers ("Connected", "Other providers") so the expanded list
|
||||
// reads as its own section instead of bleeding into the connected group.
|
||||
function GroupLabel({ children }: { children: ReactNode }) {
|
||||
return (
|
||||
<p className="mt-3 px-0.5 text-[length:var(--conversation-caption-font-size)] font-medium text-(--ui-text-tertiary)">
|
||||
{children}
|
||||
</p>
|
||||
)
|
||||
}
|
||||
|
||||
// Sub-views surfaced as a sidebar subnav: account sign-in vs raw API keys.
|
||||
export const PROVIDER_VIEWS = ['accounts', 'keys'] as const
|
||||
|
||||
@@ -90,11 +106,13 @@ function buildProviderKeyGroups(vars: Record<string, EnvVarInfo>): ProviderKeyGr
|
||||
function OAuthPicker({
|
||||
disconnecting,
|
||||
onDisconnect,
|
||||
onTerminalDisconnect,
|
||||
onWantApiKey,
|
||||
providers
|
||||
}: {
|
||||
disconnecting: null | string
|
||||
onDisconnect: (provider: OAuthProvider) => void
|
||||
onTerminalDisconnect: (provider: OAuthProvider) => void
|
||||
onWantApiKey: () => void
|
||||
providers: OAuthProvider[]
|
||||
}) {
|
||||
@@ -138,15 +156,14 @@ function OAuthPicker({
|
||||
{featured && <FeaturedProviderRow onSelect={select} provider={featured} />}
|
||||
{connected.length > 0 && (
|
||||
<>
|
||||
<p className="mt-1 px-0.5 text-[length:var(--conversation-caption-font-size)] font-medium text-(--ui-text-tertiary)">
|
||||
{p.connected}
|
||||
</p>
|
||||
<GroupLabel>{p.connected}</GroupLabel>
|
||||
{connected.map(p => (
|
||||
<ConnectedProviderRow
|
||||
disconnecting={disconnecting === p.id}
|
||||
key={p.id}
|
||||
onDisconnect={onDisconnect}
|
||||
onSelect={select}
|
||||
onTerminalDisconnect={onTerminalDisconnect}
|
||||
provider={p}
|
||||
/>
|
||||
))}
|
||||
@@ -154,6 +171,7 @@ function OAuthPicker({
|
||||
)}
|
||||
{showOthers && (
|
||||
<>
|
||||
{connected.length > 0 && <GroupLabel>{p.otherProviders}</GroupLabel>}
|
||||
{others.map(p => (
|
||||
<ProviderRow key={p.id} onSelect={select} provider={p} />
|
||||
))}
|
||||
@@ -180,21 +198,26 @@ function ConnectedProviderRow({
|
||||
disconnecting,
|
||||
onDisconnect,
|
||||
onSelect,
|
||||
onTerminalDisconnect,
|
||||
provider
|
||||
}: {
|
||||
disconnecting: boolean
|
||||
onDisconnect: (provider: OAuthProvider) => void
|
||||
onSelect: (provider: OAuthProvider) => void
|
||||
onTerminalDisconnect: (provider: OAuthProvider) => void
|
||||
provider: OAuthProvider
|
||||
}) {
|
||||
const { t } = useI18n()
|
||||
const copy = t.settings.providers
|
||||
const title = providerTitle(provider)
|
||||
const Trail = provider.flow === 'external' ? Terminal : ChevronRight
|
||||
// Hermes can clear this provider's creds via the API.
|
||||
const canDisconnect = provider.disconnectable ?? provider.flow !== 'external'
|
||||
|
||||
const disconnectHint = provider.flow === 'external'
|
||||
? t.settings.providers.removeExternal(title, provider.cli_command)
|
||||
: t.settings.providers.removeKeyManaged(title)
|
||||
// External (CLI-managed) provider Hermes can't clear via the API, but ships a
|
||||
// command we can run in the embedded terminal (Electron shell only).
|
||||
const terminalDisconnect = !canDisconnect && Boolean(provider.disconnect_command) && canRunInTerminal()
|
||||
// Only fall back to a static "remove it elsewhere" hint when we offer no button.
|
||||
const showHint = !canDisconnect && !terminalDisconnect
|
||||
|
||||
return (
|
||||
<div className="group grid grid-cols-[minmax(0,1fr)_auto] items-center gap-1 rounded-[6px] transition-colors hover:bg-(--ui-control-hover-background)">
|
||||
@@ -203,13 +226,13 @@ function ConnectedProviderRow({
|
||||
<span className="truncate text-[length:var(--conversation-text-font-size)] font-semibold">{title}</span>
|
||||
<span className="inline-flex shrink-0 items-center gap-1 bg-primary/10 px-2 py-0.5 text-xs font-medium text-primary">
|
||||
<Check className="size-3" />
|
||||
{t.settings.providers.connected}
|
||||
{copy.connected}
|
||||
</span>
|
||||
</div>
|
||||
<p className="mt-1 text-xs leading-5 text-muted-foreground">{t.onboarding.flowSubtitles[provider.flow]}</p>
|
||||
{!canDisconnect && (
|
||||
{showHint && (
|
||||
<p className="mt-0.5 truncate text-[0.68rem] leading-5 text-muted-foreground/70">
|
||||
{disconnectHint}
|
||||
{provider.flow === 'external' ? copy.removeExternalGeneric(title) : copy.removeKeyManaged(title)}
|
||||
</p>
|
||||
)}
|
||||
</button>
|
||||
@@ -228,6 +251,18 @@ function ConnectedProviderRow({
|
||||
{disconnecting ? <Loader2 className="size-3 animate-spin" /> : <Trash2 className="size-3" />}
|
||||
</Button>
|
||||
)}
|
||||
{terminalDisconnect && (
|
||||
<Button
|
||||
aria-label={`${copy.disconnect} ${title}`}
|
||||
onClick={() => onTerminalDisconnect(provider)}
|
||||
size="icon-xs"
|
||||
title={copy.disconnectInTerminal}
|
||||
type="button"
|
||||
variant="ghost"
|
||||
>
|
||||
<Trash2 className="size-3" />
|
||||
</Button>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
@@ -243,7 +278,7 @@ function NoProviderKeys() {
|
||||
)
|
||||
}
|
||||
|
||||
export function ProvidersSettings({ onViewChange, view }: ProvidersSettingsProps) {
|
||||
export function ProvidersSettings({ onClose, onViewChange, view }: ProvidersSettingsProps) {
|
||||
const { t } = useI18n()
|
||||
const { rowProps, vars } = useEnvCredentials()
|
||||
const [oauthProviders, setOauthProviders] = useState<OAuthProvider[]>([])
|
||||
@@ -282,6 +317,29 @@ export function ProvidersSettings({ onViewChange, view }: ProvidersSettingsProps
|
||||
return () => void (cancelled = true)
|
||||
}, [onboardingActive])
|
||||
|
||||
// External (CLI-managed) providers can't be cleared via the API by design —
|
||||
// Hermes never deletes creds another tool owns behind a silent API call.
|
||||
// Instead we run the documented removal command in the embedded terminal so
|
||||
// the user sees exactly what executes, then return them to chat to watch it.
|
||||
function handleTerminalDisconnect(provider: OAuthProvider) {
|
||||
const command = provider.disconnect_command
|
||||
|
||||
if (!command) {
|
||||
return
|
||||
}
|
||||
|
||||
const name = providerTitle(provider)
|
||||
|
||||
if (!window.confirm(t.settings.providers.removeTerminalConfirm(name, command))) {
|
||||
return
|
||||
}
|
||||
|
||||
// Leave the settings overlay so the terminal pane (chat-only) is visible.
|
||||
onClose()
|
||||
runInTerminal(command)
|
||||
notify({ kind: 'info', title: t.settings.providers.removedTitle, message: t.settings.providers.removeTerminalRunning(name) })
|
||||
}
|
||||
|
||||
async function handleDisconnect(provider: OAuthProvider) {
|
||||
const name = providerTitle(provider)
|
||||
|
||||
@@ -341,6 +399,7 @@ export function ProvidersSettings({ onViewChange, view }: ProvidersSettingsProps
|
||||
<OAuthPicker
|
||||
disconnecting={disconnecting}
|
||||
onDisconnect={provider => void handleDisconnect(provider)}
|
||||
onTerminalDisconnect={handleTerminalDisconnect}
|
||||
onWantApiKey={() => onViewChange('keys')}
|
||||
providers={oauthProviders}
|
||||
/>
|
||||
@@ -359,6 +418,7 @@ interface ProviderKeyGroup {
|
||||
}
|
||||
|
||||
interface ProvidersSettingsProps {
|
||||
onClose: () => void
|
||||
onViewChange: (view: ProviderView) => void
|
||||
view: ProviderView
|
||||
}
|
||||
|
||||
@@ -16,7 +16,7 @@ import {
|
||||
} from '@/store/layout'
|
||||
import { $paneWidthOverride } from '@/store/panes'
|
||||
import { $connection } from '@/store/session'
|
||||
import { isNewSessionWindow, isSecondaryWindow } from '@/store/windows'
|
||||
import { isSecondaryWindow } from '@/store/windows'
|
||||
|
||||
import { SIDEBAR_COLLAPSE_MEDIA_QUERY } from '../layout-constants'
|
||||
|
||||
@@ -80,7 +80,10 @@ export function AppShell({
|
||||
const connection = useStore($connection)
|
||||
const viewportFullscreen = useSyncExternalStore(subscribeWindowSize, viewportIsFullscreen, () => false)
|
||||
const isFullscreen = Boolean(connection?.isFullscreen) || viewportFullscreen
|
||||
const hideTitlebarControls = isNewSessionWindow()
|
||||
// Every secondary window (new-session scratch, subagent watch, cmd-click
|
||||
// pop-out) is a compact side panel — none of them carry the full titlebar
|
||||
// tool cluster. Gate on isSecondaryWindow, never the narrower new-session flag.
|
||||
const hideTitlebarControls = isSecondaryWindow()
|
||||
const titlebarControls = titlebarControlsPosition(connection?.windowButtonPosition, isFullscreen)
|
||||
// Width Windows/Linux reserve for the OS-painted min/max/close overlay (zero
|
||||
// on macOS, where window controls sit on the left and are reported via
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
import { useStore } from '@nanostores/react'
|
||||
import type { ReactNode } from 'react'
|
||||
import { useCallback, useMemo } from 'react'
|
||||
|
||||
import type { CommandCenterSection } from '@/app/command-center'
|
||||
@@ -9,7 +8,6 @@ import { useI18n } from '@/i18n'
|
||||
import {
|
||||
Activity,
|
||||
AlertCircle,
|
||||
ChevronDown,
|
||||
Clock,
|
||||
Command,
|
||||
Hash,
|
||||
@@ -19,7 +17,6 @@ import {
|
||||
Zap,
|
||||
ZapFilled
|
||||
} from '@/lib/icons'
|
||||
import { formatModelStatusLabel } from '@/lib/model-status-label'
|
||||
import type { RuntimeReadinessResult } from '@/lib/runtime-readiness'
|
||||
import { contextBarLabel, LiveDuration, usageContextLabel } from '@/lib/statusbar'
|
||||
import { cn } from '@/lib/utils'
|
||||
@@ -30,16 +27,11 @@ import {
|
||||
$activeSessionId,
|
||||
$busy,
|
||||
$connection,
|
||||
$currentFastMode,
|
||||
$currentModel,
|
||||
$currentProvider,
|
||||
$currentReasoningEffort,
|
||||
$currentUsage,
|
||||
$sessionStartedAt,
|
||||
$turnStartedAt,
|
||||
$workingSessionIds,
|
||||
$yoloActive,
|
||||
setModelPickerOpen,
|
||||
setYoloActive
|
||||
} from '@/store/session'
|
||||
import { $subagentsBySession, activeSubagentCount } from '@/store/subagents'
|
||||
@@ -65,7 +57,6 @@ interface StatusbarItemsOptions {
|
||||
gatewayLogLines: readonly string[]
|
||||
gatewayState: string
|
||||
inferenceStatus: RuntimeReadinessResult | null
|
||||
modelMenuContent?: ReactNode
|
||||
openAgents: () => void
|
||||
openCommandCenterSection: (section: CommandCenterSection) => void
|
||||
freshDraftReady: boolean
|
||||
@@ -83,7 +74,6 @@ export function useStatusbarItems({
|
||||
gatewayLogLines,
|
||||
gatewayState,
|
||||
inferenceStatus,
|
||||
modelMenuContent,
|
||||
openAgents,
|
||||
openCommandCenterSection,
|
||||
freshDraftReady,
|
||||
@@ -97,10 +87,6 @@ export function useStatusbarItems({
|
||||
const terminalTakeover = useStore($terminalTakeover)
|
||||
const yoloActive = useStore($yoloActive)
|
||||
const busy = useStore($busy)
|
||||
const currentFastMode = useStore($currentFastMode)
|
||||
const currentModel = useStore($currentModel)
|
||||
const currentProvider = useStore($currentProvider)
|
||||
const currentReasoningEffort = useStore($currentReasoningEffort)
|
||||
const currentUsage = useStore($currentUsage)
|
||||
const desktopActionTasks = useStore($desktopActionTasks)
|
||||
const previewServerRestartStatus = useStore($previewServerRestartStatus)
|
||||
@@ -416,37 +402,6 @@ export function useStatusbarItems({
|
||||
title: yoloActive ? copy.yoloOn : copy.yoloOff,
|
||||
variant: 'action'
|
||||
},
|
||||
{
|
||||
id: 'model-summary',
|
||||
label: (
|
||||
<span className="inline-flex min-w-0 items-center gap-0.5">
|
||||
<span className="truncate">
|
||||
{formatModelStatusLabel(currentModel, {
|
||||
fastMode: currentFastMode,
|
||||
reasoningEffort: currentReasoningEffort
|
||||
})}
|
||||
</span>
|
||||
<ChevronDown className="size-2.5 shrink-0 opacity-50" />
|
||||
</span>
|
||||
),
|
||||
...(modelMenuContent
|
||||
? {
|
||||
menuAlign: 'end' as const,
|
||||
menuClassName: 'w-64',
|
||||
menuContent: modelMenuContent,
|
||||
title: currentProvider
|
||||
? copy.modelTitle(currentProvider, currentModel || copy.modelNone)
|
||||
: copy.switchModel,
|
||||
variant: 'menu' as const
|
||||
}
|
||||
: {
|
||||
onSelect: () => setModelPickerOpen(true),
|
||||
title: currentProvider
|
||||
? copy.providerModelTitle(currentProvider, currentModel || copy.noModel)
|
||||
: copy.openModelPicker,
|
||||
variant: 'action' as const
|
||||
})
|
||||
},
|
||||
{
|
||||
className: `w-7 justify-center px-0${terminalTakeover ? ' bg-accent/55 text-foreground' : ''}`,
|
||||
hidden: !chatOpen,
|
||||
@@ -465,11 +420,6 @@ export function useStatusbarItems({
|
||||
contextBar,
|
||||
contextUsage,
|
||||
copy,
|
||||
currentFastMode,
|
||||
currentModel,
|
||||
currentProvider,
|
||||
currentReasoningEffort,
|
||||
modelMenuContent,
|
||||
sessionStartedAt,
|
||||
showYoloToggle,
|
||||
terminalTakeover,
|
||||
|
||||
84
apps/desktop/src/app/shell/model-edit-submenu.test.tsx
Normal file
84
apps/desktop/src/app/shell/model-edit-submenu.test.tsx
Normal file
@@ -0,0 +1,84 @@
|
||||
import { cleanup, fireEvent, render, screen } from '@testing-library/react'
|
||||
import { afterEach, beforeAll, beforeEach, describe, expect, it, vi } from 'vitest'
|
||||
|
||||
import { DropdownMenu, DropdownMenuContent, DropdownMenuSub, DropdownMenuSubTrigger } from '@/components/ui/dropdown-menu'
|
||||
import { $modelPresets, getModelPreset } from '@/store/model-presets'
|
||||
import { $activeSessionId } from '@/store/session'
|
||||
|
||||
import { type FastControl, ModelEditSubmenu } from './model-edit-submenu'
|
||||
|
||||
// Radix calls these on open; jsdom doesn't implement them.
|
||||
beforeAll(() => {
|
||||
Element.prototype.scrollIntoView = vi.fn()
|
||||
Element.prototype.hasPointerCapture = vi.fn(() => false)
|
||||
Element.prototype.releasePointerCapture = vi.fn()
|
||||
})
|
||||
|
||||
beforeEach(() => {
|
||||
$modelPresets.set({})
|
||||
$activeSessionId.set(null)
|
||||
})
|
||||
|
||||
afterEach(() => {
|
||||
cleanup()
|
||||
vi.clearAllMocks()
|
||||
})
|
||||
|
||||
// Render the submenu inside an open menu/sub so its content (switches) mounts.
|
||||
function renderSubmenu(opts: { fastControl: FastControl; reasoning: boolean; requestGateway: () => Promise<unknown> }) {
|
||||
return render(
|
||||
<DropdownMenu open>
|
||||
<DropdownMenuContent>
|
||||
<DropdownMenuSub open>
|
||||
<DropdownMenuSubTrigger>edit</DropdownMenuSubTrigger>
|
||||
<ModelEditSubmenu
|
||||
effort="medium"
|
||||
fastControl={opts.fastControl}
|
||||
isActive
|
||||
model="m1"
|
||||
onSelectModel={vi.fn()}
|
||||
provider="p1"
|
||||
reasoning={opts.reasoning}
|
||||
requestGateway={opts.requestGateway as never}
|
||||
/>
|
||||
</DropdownMenuSub>
|
||||
</DropdownMenuContent>
|
||||
</DropdownMenu>
|
||||
)
|
||||
}
|
||||
|
||||
// Regression: editing the active row before a live session exists must stay
|
||||
// preset-only — the gateway's config.set falls back to global config when no
|
||||
// session matches, so it must not be called. (Caught in the second review.)
|
||||
describe('ModelEditSubmenu no-session guard', () => {
|
||||
it('param fast: records the preset but skips the gateway without a session', () => {
|
||||
const requestGateway = vi.fn().mockResolvedValue({})
|
||||
renderSubmenu({ fastControl: { kind: 'param', on: false }, reasoning: false, requestGateway })
|
||||
|
||||
fireEvent.click(screen.getByRole('switch'))
|
||||
|
||||
expect(getModelPreset('p1', 'm1').fast).toBe(true)
|
||||
expect(requestGateway).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it('reasoning: records the preset but skips the gateway without a session', () => {
|
||||
const requestGateway = vi.fn().mockResolvedValue({})
|
||||
renderSubmenu({ fastControl: { kind: 'none' }, reasoning: true, requestGateway })
|
||||
|
||||
// Thinking starts on (medium); toggling it off routes through patchReasoning.
|
||||
fireEvent.click(screen.getByRole('switch'))
|
||||
|
||||
expect(getModelPreset('p1', 'm1').effort).toBe('none')
|
||||
expect(requestGateway).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it('param fast: pushes to the gateway once a session is active', async () => {
|
||||
const requestGateway = vi.fn().mockResolvedValue({})
|
||||
$activeSessionId.set('sess1')
|
||||
renderSubmenu({ fastControl: { kind: 'param', on: false }, reasoning: false, requestGateway })
|
||||
|
||||
fireEvent.click(screen.getByRole('switch'))
|
||||
|
||||
expect(requestGateway).toHaveBeenCalledWith('config.set', { key: 'fast', session_id: 'sess1', value: 'fast' })
|
||||
})
|
||||
})
|
||||
@@ -12,13 +12,9 @@ import {
|
||||
} from '@/components/ui/dropdown-menu'
|
||||
import { Switch } from '@/components/ui/switch'
|
||||
import { useI18n } from '@/i18n'
|
||||
import { setModelPreset } from '@/store/model-presets'
|
||||
import { notifyError } from '@/store/notifications'
|
||||
import {
|
||||
$activeSessionId,
|
||||
$currentReasoningEffort,
|
||||
setCurrentFastMode,
|
||||
setCurrentReasoningEffort
|
||||
} from '@/store/session'
|
||||
import { $activeSessionId, setCurrentFastMode, setCurrentReasoningEffort } from '@/store/session'
|
||||
|
||||
// Hermes' real reasoning levels (see VALID_REASONING_EFFORTS); `none` is owned
|
||||
// by the Thinking toggle, not the radio.
|
||||
@@ -76,96 +72,104 @@ export function resolveFastControl(
|
||||
}
|
||||
|
||||
interface ModelEditSubmenuProps {
|
||||
/** This row's effective reasoning effort (live for the active model, else its
|
||||
* preset) — the submenu shows and edits from this, never the raw session. */
|
||||
effort: string
|
||||
/** How fast mode is offered for this model (param toggle vs. variant swap). */
|
||||
fastControl: FastControl
|
||||
/** Whether this row's model is the active one. */
|
||||
isActive: boolean
|
||||
/** Switch to this model (resolves false on failure). Awaited before applying
|
||||
* edits when not active so a failed switch doesn't write to the old model. */
|
||||
onActivate: () => Promise<boolean> | void
|
||||
/** This row's model id — edits persist as its global preset. */
|
||||
model: string
|
||||
/** Switch to a specific model id (used to swap base ⇄ -fast variant). */
|
||||
onSelectModel: (model: string) => Promise<boolean> | void
|
||||
/** This row's provider slug — edits persist as its global preset. */
|
||||
provider: string
|
||||
/** Whether this model supports reasoning effort. */
|
||||
reasoning: boolean
|
||||
requestGateway: <T>(method: string, params?: Record<string, unknown>) => Promise<T>
|
||||
}
|
||||
|
||||
export function ModelEditSubmenu({
|
||||
effort,
|
||||
fastControl,
|
||||
isActive,
|
||||
onActivate,
|
||||
model,
|
||||
onSelectModel,
|
||||
provider,
|
||||
reasoning,
|
||||
requestGateway
|
||||
}: ModelEditSubmenuProps) {
|
||||
const { t } = useI18n()
|
||||
const copy = t.shell.modelOptions
|
||||
// Reactive session state comes straight from the stores rather than being
|
||||
// drilled through the panel, so editing it re-renders only this submenu.
|
||||
const activeSessionId = useStore($activeSessionId)
|
||||
const currentReasoningEffort = useStore($currentReasoningEffort)
|
||||
|
||||
const effort = normalizeEffort(currentReasoningEffort)
|
||||
const thinkingOn = isThinkingEnabled(currentReasoningEffort)
|
||||
const effortValue = normalizeEffort(effort)
|
||||
const thinkingOn = isThinkingEnabled(effort)
|
||||
|
||||
// Reasoning/fast are session-scoped (they apply to the active model), so
|
||||
// editing a non-active model first switches to it. Returns false if the
|
||||
// switch failed, so callers skip applying to the wrong (previous) model.
|
||||
const ensureActive = async (): Promise<boolean> => {
|
||||
if (isActive) {
|
||||
return true
|
||||
// Editing always records the model's global preset; the active model also gets
|
||||
// it pushed onto the live session. Non-active edits stay preset-only — they do
|
||||
// not switch you to that model.
|
||||
const patchReasoning = async (next: string) => {
|
||||
setModelPreset(provider, model, { effort: next })
|
||||
|
||||
if (!isActive) {
|
||||
return
|
||||
}
|
||||
|
||||
return (await onActivate()) !== false
|
||||
}
|
||||
|
||||
const patchReasoning = async (next: string, rollback: string) => {
|
||||
setCurrentReasoningEffort(next)
|
||||
|
||||
// Preset-only without a session: `isActive` holds for the global/default
|
||||
// row pre-session, and the gateway's `config.set` falls back to global
|
||||
// config when none matches — so don't reach it (preset + optimistic store
|
||||
// are the whole effect). Same guard in applyModelPreset / toggleFast.
|
||||
if (!activeSessionId) {
|
||||
return
|
||||
}
|
||||
|
||||
try {
|
||||
if (!(await ensureActive())) {
|
||||
setCurrentReasoningEffort(rollback)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
await requestGateway('config.set', {
|
||||
key: 'reasoning',
|
||||
session_id: activeSessionId ?? '',
|
||||
value: next
|
||||
})
|
||||
await requestGateway('config.set', { key: 'reasoning', session_id: activeSessionId, value: next })
|
||||
} catch (err) {
|
||||
setCurrentReasoningEffort(rollback)
|
||||
setCurrentReasoningEffort(effort)
|
||||
setModelPreset(provider, model, { effort })
|
||||
notifyError(err, copy.updateFailed)
|
||||
}
|
||||
}
|
||||
|
||||
const toggleFast = (enabled: boolean) => {
|
||||
if (fastControl.kind === 'variant') {
|
||||
// Fast is a separate model id — swap to it (or back to the base).
|
||||
void onSelectModel(enabled ? fastControl.fastId : fastControl.baseId)
|
||||
// Fast is a separate model id. Record the choice on the base model's
|
||||
// preset (selectFamily picks the `-fast` sibling later when set), and
|
||||
// only swap models now if this is the active row — inactive edits must
|
||||
// stay preset-only, same as the param path below.
|
||||
setModelPreset(provider, fastControl.baseId, { fast: enabled })
|
||||
|
||||
if (isActive) {
|
||||
void onSelectModel(enabled ? fastControl.fastId : fastControl.baseId)
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
if (fastControl.kind === 'param') {
|
||||
setModelPreset(provider, model, { fast: enabled })
|
||||
|
||||
if (!isActive) {
|
||||
return
|
||||
}
|
||||
|
||||
setCurrentFastMode(enabled)
|
||||
|
||||
// Preset-only without a session (see patchReasoning).
|
||||
if (!activeSessionId) {
|
||||
return
|
||||
}
|
||||
void (async () => {
|
||||
try {
|
||||
if (!(await ensureActive())) {
|
||||
setCurrentFastMode(!enabled)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
await requestGateway('config.set', {
|
||||
key: 'fast',
|
||||
session_id: activeSessionId ?? '',
|
||||
value: enabled ? 'fast' : 'normal'
|
||||
})
|
||||
await requestGateway('config.set', { key: 'fast', session_id: activeSessionId, value: enabled ? 'fast' : 'normal' })
|
||||
} catch (err) {
|
||||
setCurrentFastMode(!enabled)
|
||||
setModelPreset(provider, model, { fast: !enabled })
|
||||
notifyError(err, copy.fastFailed)
|
||||
}
|
||||
})()
|
||||
@@ -188,9 +192,7 @@ export function ModelEditSubmenu({
|
||||
<Switch
|
||||
checked={thinkingOn}
|
||||
className="ml-auto"
|
||||
onCheckedChange={checked =>
|
||||
void patchReasoning(checked ? effort || 'medium' : 'none', currentReasoningEffort)
|
||||
}
|
||||
onCheckedChange={checked => void patchReasoning(checked ? effortValue || 'medium' : 'none')}
|
||||
size="xs"
|
||||
/>
|
||||
</DropdownMenuItem>
|
||||
@@ -205,10 +207,7 @@ export function ModelEditSubmenu({
|
||||
<>
|
||||
<DropdownMenuSeparator className="mx-0" />
|
||||
<DropdownMenuLabel className={dropdownMenuSectionLabel}>{copy.effort}</DropdownMenuLabel>
|
||||
<DropdownMenuRadioGroup
|
||||
onValueChange={value => void patchReasoning(value, currentReasoningEffort)}
|
||||
value={effort}
|
||||
>
|
||||
<DropdownMenuRadioGroup onValueChange={value => void patchReasoning(value)} value={effortValue}>
|
||||
{EFFORT_OPTIONS.map(option => (
|
||||
<DropdownMenuRadioItem
|
||||
className={dropdownMenuRow}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import { useStore } from '@nanostores/react'
|
||||
import { useQuery } from '@tanstack/react-query'
|
||||
import { useMemo, useState } from 'react'
|
||||
import { useQuery, useQueryClient } from '@tanstack/react-query'
|
||||
import { createContext, useContext, useMemo, useState } from 'react'
|
||||
|
||||
import { Codicon } from '@/components/ui/codicon'
|
||||
import {
|
||||
@@ -18,8 +18,9 @@ import { Skeleton } from '@/components/ui/skeleton'
|
||||
import type { HermesGateway } from '@/hermes'
|
||||
import { getGlobalModelOptions } from '@/hermes'
|
||||
import { useI18n } from '@/i18n'
|
||||
import { displayModelName, modelDisplayParts, reasoningEffortLabel } from '@/lib/model-status-label'
|
||||
import { currentPickerSelection, displayModelName, modelDisplayParts, reasoningEffortLabel } from '@/lib/model-status-label'
|
||||
import { cn } from '@/lib/utils'
|
||||
import { $modelPresets, applyModelPreset, modelPresetKey } from '@/store/model-presets'
|
||||
import {
|
||||
$visibleModels,
|
||||
collapseModelFamilies,
|
||||
@@ -40,9 +41,14 @@ import type { ModelOptionProvider, ModelOptionsResponse } from '@/types/hermes'
|
||||
|
||||
import { ModelEditSubmenu, resolveFastControl } from './model-edit-submenu'
|
||||
|
||||
// Lets the host dropdown (model-pill) hand the panel a way to dismiss itself so
|
||||
// clicking a model row commits + closes, while the hover-revealed edit submenu
|
||||
// (reasoning/fast) stays open to play with (its items preventDefault on select).
|
||||
export const ModelMenuCloseContext = createContext<() => void>(() => {})
|
||||
|
||||
interface ModelMenuPanelProps {
|
||||
gateway?: HermesGateway
|
||||
onSelectModel: (selection: { model: string; persistGlobal: boolean; provider: string }) => Promise<boolean> | void
|
||||
onSelectModel: (selection: { model: string; provider: string }) => Promise<boolean> | void
|
||||
requestGateway: <T>(method: string, params?: Record<string, unknown>) => Promise<T>
|
||||
}
|
||||
|
||||
@@ -54,7 +60,10 @@ interface ProviderGroup {
|
||||
export function ModelMenuPanel({ gateway, onSelectModel, requestGateway }: ModelMenuPanelProps) {
|
||||
const { t } = useI18n()
|
||||
const copy = t.shell.modelMenu
|
||||
const closeMenu = useContext(ModelMenuCloseContext)
|
||||
const [search, setSearch] = useState('')
|
||||
const [refreshing, setRefreshing] = useState(false)
|
||||
const queryClient = useQueryClient()
|
||||
// Reactive session state is read from the stores here (not drilled in), so
|
||||
// toggling effort/fast/model re-renders this panel in place without forcing
|
||||
// the parent to rebuild the menu content (which would close the dropdown).
|
||||
@@ -63,6 +72,7 @@ export function ModelMenuPanel({ gateway, onSelectModel, requestGateway }: Model
|
||||
const currentModel = useStore($currentModel)
|
||||
const currentProvider = useStore($currentProvider)
|
||||
const currentReasoningEffort = useStore($currentReasoningEffort)
|
||||
const modelPresets = useStore($modelPresets)
|
||||
const visibleModels = useStore($visibleModels)
|
||||
|
||||
const modelOptions = useQuery({
|
||||
@@ -76,8 +86,12 @@ export function ModelMenuPanel({ gateway, onSelectModel, requestGateway }: Model
|
||||
}
|
||||
})
|
||||
|
||||
const optionsModel = String(modelOptions.data?.model ?? currentModel ?? '')
|
||||
const optionsProvider = String(modelOptions.data?.provider ?? currentProvider ?? '')
|
||||
const { model: optionsModel, provider: optionsProvider } = currentPickerSelection(
|
||||
!!activeSessionId,
|
||||
{ model: currentModel, provider: currentProvider },
|
||||
modelOptions.data
|
||||
)
|
||||
|
||||
const loading = modelOptions.isPending && !modelOptions.data
|
||||
|
||||
const error = modelOptions.error
|
||||
@@ -87,13 +101,73 @@ export function ModelMenuPanel({ gateway, onSelectModel, requestGateway }: Model
|
||||
: null
|
||||
|
||||
const providers = modelOptions.data?.providers
|
||||
|
||||
const effectiveVisibleModels = useMemo(
|
||||
() => effectiveVisibleKeys(visibleModels, providers ?? []),
|
||||
[visibleModels, providers]
|
||||
)
|
||||
|
||||
const switchTo = (model: string, provider: string) =>
|
||||
onSelectModel({ model, persistGlobal: !activeSessionId, provider })
|
||||
// The composer picker never persists the profile default. With a session it
|
||||
// scopes the switch to that session; with none it's UI state shipped on the
|
||||
// next session.create (see selectModel). The default lives in Settings → Model.
|
||||
const switchTo = (model: string, provider: string) => onSelectModel({ model, provider })
|
||||
|
||||
// Explicit "Refresh Models": re-fetch the catalog with refresh:true so the
|
||||
// backend busts its 1h provider-model disk cache and re-pulls each provider's
|
||||
// live list. Fixes live-only models (e.g. OpenCode Zen free tier) vanishing
|
||||
// when the cache expires and falls back to the curated static list.
|
||||
const refreshModels = async () => {
|
||||
if (refreshing) {
|
||||
return
|
||||
}
|
||||
|
||||
setRefreshing(true)
|
||||
|
||||
try {
|
||||
const queryKey = ['model-options', activeSessionId || 'global']
|
||||
|
||||
const next =
|
||||
gateway && activeSessionId
|
||||
? await gateway.request<ModelOptionsResponse>('model.options', {
|
||||
session_id: activeSessionId,
|
||||
refresh: true
|
||||
})
|
||||
: await getGlobalModelOptions({ refresh: true })
|
||||
|
||||
queryClient.setQueryData<ModelOptionsResponse>(queryKey, next)
|
||||
} catch {
|
||||
// Network/backend hiccup — fall back to a plain invalidate so the next
|
||||
// open re-fetches (still cached, but no worse than before).
|
||||
void queryClient.invalidateQueries({ queryKey: ['model-options'] })
|
||||
} finally {
|
||||
setRefreshing(false)
|
||||
}
|
||||
}
|
||||
|
||||
// Selecting a model row restores that model's remembered preset onto the
|
||||
// session (effort/fast), gated by capability. Unset → Hermes defaults.
|
||||
const selectFamily = async (family: ModelFamily, provider: ModelOptionProvider) => {
|
||||
const caps = provider.capabilities?.[family.id]
|
||||
const preset = modelPresets[modelPresetKey(provider.slug, family.id)] ?? {}
|
||||
|
||||
// Variant-fast models (no speed param) express "fast" as a separate `-fast`
|
||||
// id, so honor the saved preset by selecting that sibling. Param-fast is
|
||||
// applied via applyModelPreset below instead.
|
||||
const variantFast = !(caps?.fast ?? false) && !!family.fastId
|
||||
const targetId = variantFast && preset.fast === true ? family.fastId! : family.id
|
||||
|
||||
if ((await switchTo(targetId, provider.slug)) === false) {
|
||||
return
|
||||
}
|
||||
|
||||
await applyModelPreset(
|
||||
{
|
||||
effort: (caps?.reasoning ?? true) ? (preset.effort ?? 'medium') : undefined,
|
||||
fast: (caps?.fast ?? false) ? (preset.fast ?? false) : undefined
|
||||
},
|
||||
{ failMessage: t.shell.modelOptions.updateFailed, request: requestGateway, sessionId: activeSessionId }
|
||||
)
|
||||
}
|
||||
|
||||
const groups = useMemo(
|
||||
() => groupModels(providers ?? [], search, { model: optionsModel, provider: optionsProvider }, effectiveVisibleModels),
|
||||
@@ -152,37 +226,42 @@ export function ModelMenuPanel({ gateway, onSelectModel, requestGateway }: Model
|
||||
// -fast variant carries the same param support as its base.
|
||||
const caps = group.provider.capabilities?.[family.id]
|
||||
|
||||
// Single source of truth for the active row's fast state — keeps
|
||||
// the row label in lock-step with the submenu's Fast toggle and
|
||||
// handles the standalone `-fast` id case.
|
||||
// Effective settings for this row: live session state when it's
|
||||
// the active model, otherwise its remembered preset (Hermes
|
||||
// defaults when unset). Row label AND submenu read from these so
|
||||
// they never disagree.
|
||||
const preset = modelPresets[modelPresetKey(group.provider.slug, family.id)] ?? {}
|
||||
const effEffort = isCurrent ? currentReasoningEffort : preset.effort ?? ''
|
||||
const effFast = isCurrent ? currentFastMode : preset.fast ?? false
|
||||
|
||||
const fastControl = resolveFastControl(
|
||||
activeId ?? family.id,
|
||||
group.provider.models ?? [],
|
||||
caps?.fast ?? false,
|
||||
currentFastMode
|
||||
effFast
|
||||
)
|
||||
|
||||
// Grayed text is live session state only. Do not label inactive
|
||||
// rows as "Fast" just because they have a fast-capable sibling:
|
||||
// that makes an off Fast toggle look like it is already on.
|
||||
const meta = isCurrent
|
||||
? [
|
||||
fastControl.kind !== 'none' && fastControl.on ? copy.fast : null,
|
||||
reasoningEffortLabel(currentReasoningEffort) || copy.medium
|
||||
]
|
||||
.filter(Boolean)
|
||||
.join(' ')
|
||||
: ''
|
||||
const meta = [
|
||||
fastControl.kind !== 'none' && fastControl.on ? copy.fast : null,
|
||||
(caps?.reasoning ?? true) ? reasoningEffortLabel(effEffort) || copy.medium : null
|
||||
]
|
||||
.filter(Boolean)
|
||||
.join(' ')
|
||||
|
||||
// Every row is a hover-Edit submenu trigger. Activating it
|
||||
// (pointer or keyboard) switches to the family's base model;
|
||||
// the Fast toggle inside swaps to the -fast sibling (or flips
|
||||
// the speed param). The sub-trigger has no `onSelect`, so wire
|
||||
// both click and Enter/Space for keyboard parity.
|
||||
// (pointer or keyboard) switches to the family's base model and
|
||||
// restores its preset; the Fast toggle inside swaps to the -fast
|
||||
// sibling (or flips the speed param). The sub-trigger has no
|
||||
// `onSelect`, so wire both click and Enter/Space for keyboard parity.
|
||||
// Clicking the row commits the model and closes the picker; the
|
||||
// edit submenu (reasoning/fast) is reached by HOVER, so you can
|
||||
// still tweak those without the click dismissing everything.
|
||||
const activate = () => {
|
||||
if (!isCurrent) {
|
||||
void switchTo(family.id, group.provider.slug)
|
||||
void selectFamily(family, group.provider)
|
||||
}
|
||||
|
||||
closeMenu()
|
||||
}
|
||||
|
||||
return (
|
||||
@@ -204,10 +283,12 @@ export function ModelMenuPanel({ gateway, onSelectModel, requestGateway }: Model
|
||||
{isCurrent ? <Codicon className="ml-auto text-foreground" name="check" size="0.75rem" /> : null}
|
||||
</DropdownMenuSubTrigger>
|
||||
<ModelEditSubmenu
|
||||
effort={effEffort}
|
||||
fastControl={fastControl}
|
||||
isActive={isCurrent}
|
||||
onActivate={() => switchTo(family.id, group.provider.slug)}
|
||||
model={family.id}
|
||||
onSelectModel={nextModel => switchTo(nextModel, group.provider.slug)}
|
||||
provider={group.provider.slug}
|
||||
reasoning={caps?.reasoning ?? true}
|
||||
requestGateway={requestGateway}
|
||||
/>
|
||||
@@ -221,6 +302,18 @@ export function ModelMenuPanel({ gateway, onSelectModel, requestGateway }: Model
|
||||
|
||||
<DropdownMenuSeparator className="mx-0" />
|
||||
|
||||
<DropdownMenuItem
|
||||
className={cn(dropdownMenuRow, 'text-(--ui-text-tertiary)')}
|
||||
disabled={refreshing}
|
||||
onSelect={event => {
|
||||
event.preventDefault()
|
||||
void refreshModels()
|
||||
}}
|
||||
>
|
||||
<Codicon className={cn('mr-1.5', refreshing && 'animate-spin')} name="sync" size="0.75rem" />
|
||||
{copy.refreshModels}
|
||||
</DropdownMenuItem>
|
||||
|
||||
<DropdownMenuItem
|
||||
className={cn(dropdownMenuRow, 'text-(--ui-text-tertiary)')}
|
||||
onSelect={() => setModelVisibilityOpen(true)}
|
||||
|
||||
@@ -46,6 +46,12 @@ export interface SlashExecResponse {
|
||||
warning?: string
|
||||
}
|
||||
|
||||
export interface BrowserManageResponse {
|
||||
connected?: boolean
|
||||
url?: string
|
||||
messages?: string[]
|
||||
}
|
||||
|
||||
export interface SessionSteerResponse {
|
||||
// 'queued' == accepted into the live turn's steer slot (injected at the next
|
||||
// tool-result boundary); 'rejected' == no live tool window, caller queues.
|
||||
|
||||
@@ -0,0 +1,129 @@
|
||||
// Lists and blockquotes have chrome beside the text (markers, the quote
|
||||
// border) whose side is driven by the box's CSS direction, which the
|
||||
// unicode-bidi:plaintext rules never touch. These tests pin the split of
|
||||
// responsibilities: ul/ol/blockquote carry dir="auto" so the browser
|
||||
// resolves their box direction from content, inline code carries dir="ltr"
|
||||
// so it neither votes in that resolution nor reorders, and plain prose
|
||||
// blocks stay attribute-free (the plaintext CSS owns them). jsdom does not
|
||||
// resolve dir="auto", so the contract is asserted at the attribute level.
|
||||
import { AssistantRuntimeProvider, type ThreadMessage, useExternalStoreRuntime } from '@assistant-ui/react'
|
||||
import { render, screen } from '@testing-library/react'
|
||||
import { describe, expect, it, vi } from 'vitest'
|
||||
|
||||
import { Thread } from './thread'
|
||||
|
||||
const createdAt = new Date('2026-06-01T00:00:00.000Z')
|
||||
|
||||
class TestResizeObserver {
|
||||
observe() {}
|
||||
unobserve() {}
|
||||
disconnect() {}
|
||||
}
|
||||
|
||||
vi.stubGlobal('ResizeObserver', TestResizeObserver)
|
||||
vi.stubGlobal('requestAnimationFrame', (callback: FrameRequestCallback) =>
|
||||
window.setTimeout(() => callback(performance.now()), 0)
|
||||
)
|
||||
vi.stubGlobal('cancelAnimationFrame', (id: number) => window.clearTimeout(id))
|
||||
|
||||
Element.prototype.scrollTo = function scrollTo() {}
|
||||
|
||||
function stubOffsetDimension(
|
||||
prop: 'offsetHeight' | 'offsetWidth',
|
||||
clientProp: 'clientHeight' | 'clientWidth',
|
||||
fallback: number
|
||||
) {
|
||||
const previous = Object.getOwnPropertyDescriptor(HTMLElement.prototype, prop)
|
||||
|
||||
Object.defineProperty(HTMLElement.prototype, prop, {
|
||||
configurable: true,
|
||||
get() {
|
||||
return previous?.get?.call(this) || (this as HTMLElement)[clientProp] || fallback
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
stubOffsetDimension('offsetWidth', 'clientWidth', 800)
|
||||
stubOffsetDimension('offsetHeight', 'clientHeight', 600)
|
||||
|
||||
function userMessage(): ThreadMessage {
|
||||
return {
|
||||
id: 'user-1',
|
||||
role: 'user',
|
||||
content: [{ type: 'text', text: 'hi' }],
|
||||
attachments: [],
|
||||
createdAt,
|
||||
metadata: { custom: {} }
|
||||
} as ThreadMessage
|
||||
}
|
||||
|
||||
function assistantMessage(text: string): ThreadMessage {
|
||||
return {
|
||||
id: 'assistant-1',
|
||||
role: 'assistant',
|
||||
content: [{ type: 'text', text }],
|
||||
status: { type: 'complete', reason: 'stop' },
|
||||
createdAt,
|
||||
metadata: {
|
||||
unstable_state: null,
|
||||
unstable_annotations: [],
|
||||
unstable_data: [],
|
||||
steps: [],
|
||||
custom: {}
|
||||
}
|
||||
} as ThreadMessage
|
||||
}
|
||||
|
||||
function Harness({ text }: { text: string }) {
|
||||
const runtime = useExternalStoreRuntime<ThreadMessage>({
|
||||
messages: [userMessage(), assistantMessage(text)],
|
||||
isRunning: false,
|
||||
onNew: async () => {}
|
||||
})
|
||||
|
||||
return (
|
||||
<AssistantRuntimeProvider runtime={runtime}>
|
||||
<Thread />
|
||||
</AssistantRuntimeProvider>
|
||||
)
|
||||
}
|
||||
|
||||
describe('block-level direction chrome', () => {
|
||||
it('lists carry dir="auto" so markers follow the resolved direction', async () => {
|
||||
render(<Harness text={'מקומות:\n\n1. חוף גורדון\n2. שוק הכרמל\n\n- פריט\n- item'} />)
|
||||
|
||||
const item = await screen.findByText(/חוף גורדון/)
|
||||
|
||||
expect(item.closest('ol')?.getAttribute('dir')).toBe('auto')
|
||||
|
||||
const bullet = await screen.findByText(/פריט/)
|
||||
|
||||
expect(bullet.closest('ul')?.getAttribute('dir')).toBe('auto')
|
||||
})
|
||||
|
||||
it('blockquotes carry dir="auto" so the border follows the resolved direction', async () => {
|
||||
render(<Harness text={'> ציטוט קצר בעברית'} />)
|
||||
|
||||
const quote = await screen.findByText(/ציטוט קצר/)
|
||||
|
||||
expect(quote.closest('blockquote')?.getAttribute('dir')).toBe('auto')
|
||||
})
|
||||
|
||||
it('inline code carries dir="ltr" so it does not vote in dir="auto" resolution', async () => {
|
||||
render(<Harness text={'1. `npm install` מתקין תלויות'} />)
|
||||
|
||||
const code = await screen.findByText('npm install')
|
||||
|
||||
expect(code.tagName).toBe('CODE')
|
||||
expect(code.getAttribute('dir')).toBe('ltr')
|
||||
expect(code.closest('ol')?.getAttribute('dir')).toBe('auto')
|
||||
})
|
||||
|
||||
it('plain prose blocks stay attribute-free (plaintext CSS owns them)', async () => {
|
||||
render(<Harness text={'שלום לכולם'} />)
|
||||
|
||||
const paragraph = await screen.findByText(/שלום לכולם/)
|
||||
|
||||
expect(paragraph.closest('p')?.hasAttribute('dir')).toBe(false)
|
||||
})
|
||||
})
|
||||
@@ -322,13 +322,29 @@ function shortLabel(type: HermesRefType, id: string): string {
|
||||
return tail || id
|
||||
}
|
||||
|
||||
function safeEmbeddedImages(text: string) {
|
||||
try {
|
||||
return extractEmbeddedImages(text)
|
||||
} catch {
|
||||
return { cleanedText: text, images: [] as string[] }
|
||||
}
|
||||
}
|
||||
|
||||
function safeDirectiveSegments(text: string): Unstable_DirectiveSegment[] {
|
||||
try {
|
||||
return [...hermesDirectiveFormatter.parse(text)]
|
||||
} catch {
|
||||
return [{ kind: 'text', text }]
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Renders text containing Hermes directives (`@file:...`, `@image:...`) as
|
||||
* inline chips. Embedded MEDIA images render below as a thumbnail row.
|
||||
*/
|
||||
export function DirectiveContent({ text }: { text: string }) {
|
||||
const { cleanedText, images } = useMemo(() => extractEmbeddedImages(text ?? ''), [text])
|
||||
const segments = useMemo(() => hermesDirectiveFormatter.parse(cleanedText), [cleanedText])
|
||||
const { cleanedText, images } = useMemo(() => safeEmbeddedImages(text ?? ''), [text])
|
||||
const segments = useMemo(() => safeDirectiveSegments(cleanedText), [cleanedText])
|
||||
|
||||
return (
|
||||
<span className="whitespace-pre-line" data-slot="aui_directive-text">
|
||||
|
||||
@@ -201,4 +201,13 @@ describe('preprocessMarkdown', () => {
|
||||
|
||||
expect(output).toContain('<https://example.com/a_b/c~d/page>')
|
||||
})
|
||||
|
||||
it('handles a fenced block larger than V8 spread-argument limit', () => {
|
||||
// A single huge code block (e.g. a logged minified bundle) used to throw
|
||||
// `RangeError: Maximum call stack size exceeded` via `out.push(...lines)`.
|
||||
const body = Array.from({ length: 200_000 }, (_, i) => `line ${i}`).join('\n')
|
||||
const input = `\`\`\`js\n${body}\n\`\`\``
|
||||
|
||||
expect(() => preprocessMarkdown(input)).not.toThrow()
|
||||
})
|
||||
})
|
||||
|
||||
@@ -19,8 +19,9 @@ import {
|
||||
useState
|
||||
} from 'react'
|
||||
|
||||
import { ExpandableBlock } from '@/components/chat/expandable-block'
|
||||
import { PreviewAttachment } from '@/components/chat/preview-attachment'
|
||||
import { SyntaxHighlighter } from '@/components/chat/shiki-highlighter'
|
||||
import { chunkByLines, SyntaxHighlighter } from '@/components/chat/shiki-highlighter'
|
||||
import { ZoomableImage } from '@/components/chat/zoomable-image'
|
||||
import { normalizeExternalUrl, openExternalLink, PrettyLink } from '@/lib/external-link'
|
||||
import { createMemoizedMathPlugin } from '@/lib/katex-memo'
|
||||
@@ -57,7 +58,11 @@ const mathPlugin = createMemoizedMathPlugin({ singleDollarTextMath: true })
|
||||
// flush) with a tail-bounded repair — see lib/remend-tail.ts. Must stay
|
||||
// module-scope so the prop identity is stable across renders.
|
||||
function preprocessWithTailRepair(text: string): string {
|
||||
return tailBoundedRemend(preprocessMarkdown(text))
|
||||
try {
|
||||
return tailBoundedRemend(preprocessMarkdown(text))
|
||||
} catch {
|
||||
return text
|
||||
}
|
||||
}
|
||||
|
||||
// Memoized block splitter. Streamdown calls `parseMarkdownIntoBlocks` (a full
|
||||
@@ -453,8 +458,35 @@ const MARKDOWN_CONTAINER_CLASS_NAME = cn(
|
||||
'[&>*:first-child]:mt-0 [&>*:last-child]:mb-0 [&>*+*]:mt-(--paragraph-gap)'
|
||||
)
|
||||
|
||||
const MAX_MARKDOWN_CHARS = 200_000
|
||||
|
||||
function HugeTextFallback({ containerClassName, text }: { containerClassName?: string; text: string }) {
|
||||
const chunks = useMemo(() => chunkByLines(text, 200), [text])
|
||||
|
||||
return (
|
||||
<div
|
||||
className={cn(
|
||||
'aui-md w-full max-w-none overflow-hidden rounded-[0.625rem] border border-border font-mono text-[0.7rem] leading-relaxed text-foreground/90',
|
||||
containerClassName
|
||||
)}
|
||||
>
|
||||
<ExpandableBlock className="p-2">
|
||||
{chunks.map((chunk, index) => (
|
||||
<div
|
||||
className="[content-visibility:auto]"
|
||||
key={index}
|
||||
style={{ containIntrinsicSize: `auto ${chunk.lines * 16}px` }}
|
||||
>
|
||||
{chunk.text}
|
||||
</div>
|
||||
))}
|
||||
</ExpandableBlock>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
function MarkdownTextSurface({ containerClassName, containerProps }: MarkdownTextSurfaceProps) {
|
||||
const { status } = useMessagePartText()
|
||||
const { status, text } = useMessagePartText()
|
||||
const isStreaming = status.type === 'running'
|
||||
|
||||
// Keep code parsing enabled while streaming so incomplete fenced blocks still
|
||||
@@ -484,19 +516,37 @@ function MarkdownTextSurface({ containerClassName, containerProps }: MarkdownTex
|
||||
<p className={cn('wrap-anywhere leading-(--dt-line-height)', className)} {...props} />
|
||||
),
|
||||
a: MarkdownLink,
|
||||
// Inline code must not vote when an ancestor resolves `dir="auto"`
|
||||
// (HTML's algorithm skips descendants that carry their own dir),
|
||||
// mirroring the CSS isolate that already keeps it out of the
|
||||
// plaintext scan. Fenced code never reaches this override; it goes
|
||||
// through the code plugin's CodeCard path.
|
||||
inlineCode: ({ className, ...props }: ComponentProps<'code'>) => (
|
||||
<code className={className} dir="ltr" {...props} />
|
||||
),
|
||||
// `---` as quiet spacing, not a heavy full-width rule.
|
||||
hr: (_props: ComponentProps<'hr'>) => <div aria-hidden className="my-3" />,
|
||||
// Lists and blockquotes have chrome that sits *beside* the text
|
||||
// (markers, the quote border), and that side is driven by the CSS
|
||||
// `direction` of the box, which `unicode-bidi: plaintext` never
|
||||
// touches — an RTL list otherwise renders its numbers stranded at
|
||||
// the far left. `dir="auto"` lets the browser resolve the box
|
||||
// direction from content; the plaintext rules in styles.css keep
|
||||
// owning per-line text direction. Inline code carries `dir="ltr"`
|
||||
// (see the `code` override) so it doesn't vote here either, same
|
||||
// contract as the CSS isolate.
|
||||
blockquote: ({ className, ...props }: ComponentProps<'blockquote'>) => (
|
||||
<blockquote
|
||||
className={cn('border-l-2 border-border pl-3 text-muted-foreground italic', className)}
|
||||
className={cn('border-s-2 border-border ps-3 text-muted-foreground italic', className)}
|
||||
dir="auto"
|
||||
{...props}
|
||||
/>
|
||||
),
|
||||
ul: ({ className, ...props }: ComponentProps<'ul'>) => (
|
||||
<ul className={cn('my-1 gap-0', className)} {...props} />
|
||||
<ul className={cn('my-1 gap-0', className)} dir="auto" {...props} />
|
||||
),
|
||||
ol: ({ className, ...props }: ComponentProps<'ol'>) => (
|
||||
<ol className={cn('my-1 gap-0', className)} {...props} />
|
||||
<ol className={cn('my-1 gap-0', className)} dir="auto" {...props} />
|
||||
),
|
||||
li: ({ className, ...props }: ComponentProps<'li'>) => (
|
||||
<li className={cn('leading-(--dt-line-height)', className)} {...props} />
|
||||
@@ -533,6 +583,10 @@ function MarkdownTextSurface({ containerClassName, containerProps }: MarkdownTex
|
||||
[isStreaming]
|
||||
)
|
||||
|
||||
if (text.length > MAX_MARKDOWN_CHARS) {
|
||||
return <HugeTextFallback containerClassName={containerClassName} text={text} />
|
||||
}
|
||||
|
||||
return (
|
||||
<StreamdownTextPrimitive
|
||||
components={components}
|
||||
|
||||
@@ -378,6 +378,20 @@ function IntroHarness() {
|
||||
)
|
||||
}
|
||||
|
||||
function DismissibleErrorHarness({ onDismissError }: { onDismissError: (messageId: string) => void }) {
|
||||
const runtime = useExternalStoreRuntime<ThreadMessage>({
|
||||
messages: [assistantErrorMessage('OpenRouter rejected the request (403).')],
|
||||
isRunning: false,
|
||||
onNew: async () => {}
|
||||
})
|
||||
|
||||
return (
|
||||
<AssistantRuntimeProvider runtime={runtime}>
|
||||
<Thread onDismissError={onDismissError} />
|
||||
</AssistantRuntimeProvider>
|
||||
)
|
||||
}
|
||||
|
||||
describe('assistant-ui streaming renderer', () => {
|
||||
beforeEach(() => {
|
||||
resizeObservers.clear()
|
||||
@@ -421,6 +435,23 @@ describe('assistant-ui streaming renderer', () => {
|
||||
expect(screen.getByRole('alert').textContent).toContain('OpenRouter rejected the request (403).')
|
||||
})
|
||||
|
||||
it('omits the dismiss control when no onDismissError handler is supplied', () => {
|
||||
render(<MessageHarness message={assistantErrorMessage('OpenRouter rejected the request (403).')} />)
|
||||
|
||||
expect(screen.queryByRole('button', { name: 'Dismiss error' })).toBeNull()
|
||||
})
|
||||
|
||||
it('invokes onDismissError with the errored message id when the dismiss control is clicked', () => {
|
||||
const onDismissError = vi.fn()
|
||||
render(<DismissibleErrorHarness onDismissError={onDismissError} />)
|
||||
|
||||
const dismiss = screen.getByRole('button', { name: 'Dismiss error' })
|
||||
fireEvent.click(dismiss)
|
||||
|
||||
expect(onDismissError).toHaveBeenCalledTimes(1)
|
||||
expect(onDismissError).toHaveBeenCalledWith('assistant-error-1')
|
||||
})
|
||||
|
||||
// Scroll behavior (follow-at-bottom, escape-on-scroll-up, re-engage) is owned
|
||||
// by the use-stick-to-bottom library and covered by its own test suite. We
|
||||
// don't re-assert its scrollTop mechanics here — doing so in jsdom (no real
|
||||
|
||||
@@ -22,7 +22,7 @@ import {
|
||||
resetThreadScroll,
|
||||
setThreadAtBottom
|
||||
} from '@/store/thread-scroll'
|
||||
import { isNewSessionWindow, isSecondaryWindow } from '@/store/windows'
|
||||
import { isSecondaryWindow } from '@/store/windows'
|
||||
|
||||
import { MessageRenderBoundary } from './message-render-boundary'
|
||||
|
||||
@@ -134,13 +134,20 @@ const ThreadMessageListInner: FC<ThreadMessageListProps> = ({
|
||||
const hiddenCount = firstVisible
|
||||
const visibleGroups = hiddenCount > 0 ? groups.slice(hiddenCount) : groups
|
||||
const restoreFromBottomRef = useRef<number | null>(null)
|
||||
const newSessionWindow = isNewSessionWindow()
|
||||
const newSessionTitlebarGap = 'calc(var(--titlebar-height)+0.75rem)'
|
||||
const threadContentTopPad = newSessionWindow
|
||||
// Secondary windows (new-session scratch, subagent watch, cmd-click pop-out)
|
||||
// hide the titlebar tool cluster + session header, but the OS traffic lights
|
||||
// still sit in the top-left, so reserve the titlebar gap above the transcript.
|
||||
const secondaryWindow = isSecondaryWindow()
|
||||
// NB: CSS calc() requires whitespace around the +/- operator. This string is
|
||||
// assigned verbatim to the --sticky-human-top inline style below (it does not
|
||||
// go through Tailwind, which would auto-space it), so the spaces are load-
|
||||
// bearing — without them the declaration is invalid, gets dropped, and the
|
||||
// sticky user bubble falls back to its ~4px default and slides under the OS
|
||||
// traffic lights.
|
||||
const secondaryTitlebarGap = 'calc(var(--titlebar-height) + 0.75rem)'
|
||||
const threadContentTopPad = secondaryWindow
|
||||
? 'pt-[calc(var(--titlebar-height)+0.75rem)]'
|
||||
: isSecondaryWindow()
|
||||
? 'pt-6'
|
||||
: 'pt-[calc(var(--titlebar-height)+1.5rem)]'
|
||||
: 'pt-[calc(var(--titlebar-height)-0.5rem)]'
|
||||
|
||||
useEffect(() => setThreadAtBottom(isAtBottom), [isAtBottom])
|
||||
useEffect(() => () => resetThreadScroll(), [])
|
||||
@@ -247,10 +254,21 @@ const ThreadMessageListInner: FC<ThreadMessageListProps> = ({
|
||||
style={
|
||||
{
|
||||
height: clampToComposer ? 'var(--thread-viewport-height)' : '100%',
|
||||
...(newSessionWindow ? { '--sticky-human-top': newSessionTitlebarGap } : {})
|
||||
...(secondaryWindow ? { '--sticky-human-top': secondaryTitlebarGap } : {})
|
||||
} as CSSProperties
|
||||
}
|
||||
>
|
||||
{secondaryWindow && (
|
||||
// Secondary windows hide the titlebar chrome, so the scroller runs to
|
||||
// the window's top edge and streamed text slides up under the OS
|
||||
// traffic lights. Content padding alone scrolls away with the text — a
|
||||
// fixed opaque strip (the titlebar's drag region) masks anything behind
|
||||
// it and keeps the window draggable, matching the main window's header.
|
||||
<div
|
||||
aria-hidden="true"
|
||||
className="absolute inset-x-0 top-0 z-10 h-(--titlebar-height) bg-background [-webkit-app-region:drag]"
|
||||
/>
|
||||
)}
|
||||
<div
|
||||
className="size-full overflow-x-hidden overflow-y-auto overscroll-contain"
|
||||
data-following={isAtBottom ? 'true' : 'false'}
|
||||
|
||||
@@ -91,7 +91,7 @@ import { attachmentDisplayText, attachmentId, pathLabel } from '@/lib/chat-runti
|
||||
import { DATA_IMAGE_URL_RE } from '@/lib/embedded-images'
|
||||
import { LinkifiedText } from '@/lib/external-link'
|
||||
import { triggerHaptic } from '@/lib/haptics'
|
||||
import { GitBranchIcon, Loader2Icon, Volume2Icon, VolumeXIcon } from '@/lib/icons'
|
||||
import { GitBranchIcon, Loader2Icon, Volume2Icon, VolumeXIcon, XIcon } from '@/lib/icons'
|
||||
import { extractPreviewTargets } from '@/lib/preview-targets'
|
||||
import { useEnterAnimation } from '@/lib/use-enter-animation'
|
||||
import { cn } from '@/lib/utils'
|
||||
@@ -169,6 +169,7 @@ export const Thread: FC<{
|
||||
loading?: ThreadLoadingState
|
||||
onBranchInNewChat?: (messageId: string) => void
|
||||
onCancel?: () => Promise<void> | void
|
||||
onDismissError?: (messageId: string) => void
|
||||
onRestoreToMessage?: (messageId: string) => Promise<void> | void
|
||||
sessionId?: string | null
|
||||
sessionKey?: string | null
|
||||
@@ -180,18 +181,19 @@ export const Thread: FC<{
|
||||
loading,
|
||||
onBranchInNewChat,
|
||||
onCancel,
|
||||
onDismissError,
|
||||
onRestoreToMessage,
|
||||
sessionId = null,
|
||||
sessionKey
|
||||
}) => {
|
||||
const messageComponents = useMemo(
|
||||
() => ({
|
||||
AssistantMessage: () => <AssistantMessage onBranchInNewChat={onBranchInNewChat} />,
|
||||
AssistantMessage: () => <AssistantMessage onBranchInNewChat={onBranchInNewChat} onDismissError={onDismissError} />,
|
||||
SystemMessage,
|
||||
UserEditComposer: () => <UserEditComposer cwd={cwd} gateway={gateway} sessionId={sessionId} />,
|
||||
UserMessage: () => <UserMessage onCancel={onCancel} onRestoreToMessage={onRestoreToMessage} />
|
||||
}),
|
||||
[cwd, gateway, onBranchInNewChat, onCancel, onRestoreToMessage, sessionId]
|
||||
[cwd, gateway, onBranchInNewChat, onCancel, onDismissError, onRestoreToMessage, sessionId]
|
||||
)
|
||||
|
||||
const emptyPlaceholder = intro ? (
|
||||
@@ -245,9 +247,13 @@ const CenteredThreadSpinner: FC = () => {
|
||||
)
|
||||
}
|
||||
|
||||
const AssistantMessage: FC<{ onBranchInNewChat?: (messageId: string) => void }> = ({ onBranchInNewChat }) => {
|
||||
const AssistantMessage: FC<{
|
||||
onBranchInNewChat?: (messageId: string) => void
|
||||
onDismissError?: (messageId: string) => void
|
||||
}> = ({ onBranchInNewChat, onDismissError }) => {
|
||||
const messageId = useAuiState(s => s.message.id)
|
||||
const messageRuntime = useMessageRuntime()
|
||||
const { t } = useI18n()
|
||||
|
||||
// PERF: this component must NOT subscribe to the streaming text. Every
|
||||
// selector here returns a value that stays referentially stable across
|
||||
@@ -306,10 +312,20 @@ const AssistantMessage: FC<{ onBranchInNewChat?: (messageId: string) => void }>
|
||||
)}
|
||||
<MessagePrimitive.Error>
|
||||
<ErrorPrimitive.Root
|
||||
className="mt-1.5 text-[0.78rem] leading-5 text-[color-mix(in_srgb,var(--dt-destructive)_78%,var(--ui-text-secondary))]"
|
||||
className="mt-1.5 flex items-start gap-1.5 text-[0.78rem] leading-5 text-[color-mix(in_srgb,var(--dt-destructive)_78%,var(--ui-text-secondary))]"
|
||||
role="alert"
|
||||
>
|
||||
<ErrorPrimitive.Message />
|
||||
<ErrorPrimitive.Message className="min-w-0 flex-1" />
|
||||
{onDismissError && (
|
||||
<TooltipIconButton
|
||||
className="-my-0.5 shrink-0 text-current opacity-70 hover:opacity-100"
|
||||
onClick={() => onDismissError(messageId)}
|
||||
side="top"
|
||||
tooltip={t.assistant.thread.dismissError}
|
||||
>
|
||||
<XIcon className="size-3.5" />
|
||||
</TooltipIconButton>
|
||||
)}
|
||||
</ErrorPrimitive.Root>
|
||||
</MessagePrimitive.Error>
|
||||
</div>
|
||||
@@ -811,7 +827,7 @@ function StickyHumanMessageContainer({ attachments, children }: { attachments?:
|
||||
// so without the carve-out, clicking a stuck bubble drags the window instead of
|
||||
// opening the edit composer.
|
||||
const USER_BUBBLE_BASE_CLASS =
|
||||
'composer-human-message standalone-glass relative flex w-full min-w-0 max-w-full flex-col gap-1.5 overflow-hidden rounded-xl border bg-(--dt-user-bubble) px-3 py-2 text-left [-webkit-app-region:no-drag]'
|
||||
'composer-human-message standalone-glass relative flex w-full min-w-0 max-w-full flex-col gap-1.5 overflow-y-auto rounded-xl border bg-(--dt-user-bubble) px-3 py-2 text-left [-webkit-app-region:no-drag]'
|
||||
|
||||
const USER_ACTION_ICON_BUTTON_CLASS =
|
||||
'grid place-items-center rounded-md bg-transparent text-(--ui-text-secondary) transition-colors hover:bg-(--ui-control-active-background) hover:text-foreground disabled:cursor-default disabled:text-(--ui-text-quaternary) disabled:opacity-70'
|
||||
|
||||
@@ -66,7 +66,7 @@ function CodeCardBody({ className, ...props }: React.ComponentProps<'div'>) {
|
||||
return (
|
||||
<div
|
||||
className={cn(
|
||||
'p-1.5 font-mono text-[0.7rem] leading-relaxed text-foreground/90 [&_pre]:m-0 [&_pre]:overflow-x-auto [&_pre]:bg-transparent! [&_pre]:px-2 [&_pre]:py-1.5 [&_pre]:font-mono [&_pre]:leading-relaxed',
|
||||
'font-mono text-[0.7rem] leading-relaxed text-foreground/90 [&_pre]:m-0 [&_pre]:overflow-x-auto [&_pre]:bg-transparent! [&_pre]:px-2 [&_pre]:py-1.5 [&_pre]:font-mono [&_pre]:leading-relaxed',
|
||||
className
|
||||
)}
|
||||
data-slot="code-card-body"
|
||||
|
||||
52
apps/desktop/src/components/chat/expandable-block.tsx
Normal file
52
apps/desktop/src/components/chat/expandable-block.tsx
Normal file
@@ -0,0 +1,52 @@
|
||||
'use client'
|
||||
|
||||
import { type ReactNode, useLayoutEffect, useRef, useState } from 'react'
|
||||
|
||||
import { ChevronDown } from '@/lib/icons'
|
||||
import { cn } from '@/lib/utils'
|
||||
|
||||
interface ExpandableBlockProps {
|
||||
children: ReactNode
|
||||
className?: string
|
||||
}
|
||||
|
||||
export function ExpandableBlock({ children, className }: ExpandableBlockProps) {
|
||||
const innerRef = useRef<HTMLDivElement>(null)
|
||||
const [expanded, setExpanded] = useState(false)
|
||||
const [overflowing, setOverflowing] = useState(false)
|
||||
|
||||
useLayoutEffect(() => {
|
||||
const el = innerRef.current
|
||||
|
||||
if (!el) {return}
|
||||
|
||||
const measure = () => setOverflowing(el.scrollHeight > 121)
|
||||
measure()
|
||||
const observer = new ResizeObserver(measure)
|
||||
observer.observe(el)
|
||||
|
||||
return () => observer.disconnect()
|
||||
}, [])
|
||||
|
||||
return (
|
||||
<div className="relative">
|
||||
<div
|
||||
className={cn('overflow-y-auto', expanded ? 'max-h-[40dvh]' : 'max-h-[7.5rem]', className)}
|
||||
ref={innerRef}
|
||||
>
|
||||
{children}
|
||||
</div>
|
||||
{overflowing && (
|
||||
<button
|
||||
aria-expanded={expanded}
|
||||
aria-label={expanded ? 'Collapse' : 'Expand'}
|
||||
className="absolute inset-x-0 bottom-0 flex h-7 cursor-pointer items-end justify-center bg-linear-to-t from-(--ui-chat-surface-background) to-transparent pb-1 text-muted-foreground/70 transition-colors hover:text-foreground"
|
||||
onClick={() => setExpanded(v => !v)}
|
||||
type="button"
|
||||
>
|
||||
<ChevronDown className={cn('size-3.5 transition-transform', expanded && 'rotate-180')} />
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
37
apps/desktop/src/components/chat/shiki-highlighter.test.ts
Normal file
37
apps/desktop/src/components/chat/shiki-highlighter.test.ts
Normal file
@@ -0,0 +1,37 @@
|
||||
import { describe, expect, it } from 'vitest'
|
||||
|
||||
import { chunkByLines, exceedsHighlightBudget } from '@/components/chat/shiki-highlighter'
|
||||
|
||||
describe('exceedsHighlightBudget', () => {
|
||||
it('highlights normal-sized blocks', () => {
|
||||
expect(exceedsHighlightBudget('const x = 1\n'.repeat(100))).toBe(false)
|
||||
})
|
||||
|
||||
it('skips highlighting past the line budget', () => {
|
||||
expect(exceedsHighlightBudget('x\n'.repeat(5_000))).toBe(true)
|
||||
})
|
||||
|
||||
it('skips highlighting past the char budget on few lines', () => {
|
||||
expect(exceedsHighlightBudget('a'.repeat(200_000))).toBe(true)
|
||||
})
|
||||
|
||||
it('short-circuits on char budget before line loop', () => {
|
||||
expect(exceedsHighlightBudget('y\n'.repeat(250_000))).toBe(true)
|
||||
})
|
||||
})
|
||||
|
||||
describe('chunkByLines', () => {
|
||||
it('keeps a small block as a single chunk', () => {
|
||||
const code = 'a\nb\nc'
|
||||
expect(chunkByLines(code, 200)).toEqual([{ text: code, lines: 3 }])
|
||||
})
|
||||
|
||||
it('splits a large block and reconstructs it losslessly', () => {
|
||||
const code = Array.from({ length: 1000 }, (_, i) => `line ${i}`).join('\n')
|
||||
const chunks = chunkByLines(code, 200)
|
||||
|
||||
expect(chunks).toHaveLength(5)
|
||||
expect(chunks.map(chunk => chunk.text).join('\n')).toBe(code)
|
||||
expect(chunks.reduce((sum, chunk) => sum + chunk.lines, 0)).toBe(1000)
|
||||
})
|
||||
})
|
||||
@@ -1,7 +1,7 @@
|
||||
'use client'
|
||||
|
||||
import type { SyntaxHighlighterProps } from '@assistant-ui/react-streamdown'
|
||||
import type { FC } from 'react'
|
||||
import { type FC, useMemo } from 'react'
|
||||
import ShikiHighlighter from 'react-shiki'
|
||||
|
||||
import {
|
||||
@@ -12,6 +12,7 @@ import {
|
||||
CodeCardSubtitle,
|
||||
CodeCardTitle
|
||||
} from '@/components/chat/code-card'
|
||||
import { ExpandableBlock } from '@/components/chat/expandable-block'
|
||||
import { CopyButton } from '@/components/ui/copy-button'
|
||||
import { useI18n } from '@/i18n'
|
||||
import { codiconForLanguage, isLikelyProseCodeBlock, sanitizeLanguageTag } from '@/lib/markdown-code'
|
||||
@@ -43,6 +44,74 @@ const SHIKI_COLOR_REPLACEMENTS: Record<string, Record<string, string>> = {
|
||||
'github-light-default': { '#6e7781': '#57606a' }
|
||||
}
|
||||
|
||||
const MAX_HIGHLIGHT_CHARS = 150_000
|
||||
const MAX_HIGHLIGHT_LINES = 3_000
|
||||
const CHUNK_LINES = 200
|
||||
const EST_LINE_PX = 16
|
||||
|
||||
export function exceedsHighlightBudget(code: string): boolean {
|
||||
if (code.length > MAX_HIGHLIGHT_CHARS) {
|
||||
return true
|
||||
}
|
||||
|
||||
let lines = 1
|
||||
let idx = code.indexOf('\n')
|
||||
|
||||
while (idx !== -1) {
|
||||
if ((lines += 1) > MAX_HIGHLIGHT_LINES) {
|
||||
return true
|
||||
}
|
||||
|
||||
idx = code.indexOf('\n', idx + 1)
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
interface CodeChunk {
|
||||
text: string
|
||||
lines: number
|
||||
}
|
||||
|
||||
export function chunkByLines(code: string, perChunk: number): CodeChunk[] {
|
||||
const lines = code.split('\n')
|
||||
|
||||
if (lines.length <= perChunk) {
|
||||
return [{ text: code, lines: lines.length }]
|
||||
}
|
||||
|
||||
const chunks: CodeChunk[] = []
|
||||
|
||||
for (let i = 0; i < lines.length; i += perChunk) {
|
||||
const slice = lines.slice(i, i + perChunk)
|
||||
chunks.push({ text: slice.join('\n'), lines: slice.length })
|
||||
}
|
||||
|
||||
return chunks
|
||||
}
|
||||
|
||||
const PlainCode: FC<{ code: string }> = ({ code }) => {
|
||||
const chunks = useMemo(() => chunkByLines(code, CHUNK_LINES), [code])
|
||||
|
||||
if (chunks.length === 1) {
|
||||
return <code className="block whitespace-pre">{code}</code>
|
||||
}
|
||||
|
||||
return (
|
||||
<>
|
||||
{chunks.map((chunk, index) => (
|
||||
<code
|
||||
className="block whitespace-pre [content-visibility:auto]"
|
||||
key={index}
|
||||
style={{ containIntrinsicSize: `auto ${chunk.lines * EST_LINE_PX}px` }}
|
||||
>
|
||||
{chunk.text}
|
||||
</code>
|
||||
))}
|
||||
</>
|
||||
)
|
||||
}
|
||||
|
||||
export const SyntaxHighlighter: FC<HermesSyntaxHighlighterProps> = ({
|
||||
components: { Pre },
|
||||
language,
|
||||
@@ -64,6 +133,7 @@ export const SyntaxHighlighter: FC<HermesSyntaxHighlighterProps> = ({
|
||||
|
||||
const cleanLanguage = sanitizeLanguageTag(language || '')
|
||||
const label = cleanLanguage && cleanLanguage !== 'unknown' ? cleanLanguage : ''
|
||||
const plain = defer || exceedsHighlightBudget(trimmed)
|
||||
|
||||
return (
|
||||
<CodeCard data-streaming={defer ? 'true' : undefined}>
|
||||
@@ -83,24 +153,26 @@ export const SyntaxHighlighter: FC<HermesSyntaxHighlighterProps> = ({
|
||||
/>
|
||||
</CodeCardHeader>
|
||||
<CodeCardBody>
|
||||
<Pre className="aui-shiki m-0 overflow-hidden bg-transparent p-0">
|
||||
{defer ? (
|
||||
<code className="block whitespace-pre">{trimmed}</code>
|
||||
) : (
|
||||
<ShikiHighlighter
|
||||
addDefaultStyles={false}
|
||||
as="div"
|
||||
colorReplacements={SHIKI_COLOR_REPLACEMENTS}
|
||||
defaultColor="light-dark()"
|
||||
delay={120}
|
||||
language={language || 'text'}
|
||||
showLanguage={false}
|
||||
theme={SHIKI_THEME}
|
||||
>
|
||||
{trimmed}
|
||||
</ShikiHighlighter>
|
||||
)}
|
||||
</Pre>
|
||||
<ExpandableBlock>
|
||||
<Pre className="aui-shiki m-0 overflow-hidden bg-transparent p-0">
|
||||
{plain ? (
|
||||
<PlainCode code={trimmed} />
|
||||
) : (
|
||||
<ShikiHighlighter
|
||||
addDefaultStyles={false}
|
||||
as="div"
|
||||
colorReplacements={SHIKI_COLOR_REPLACEMENTS}
|
||||
defaultColor="light-dark()"
|
||||
delay={120}
|
||||
language={language || 'text'}
|
||||
showLanguage={false}
|
||||
theme={SHIKI_THEME}
|
||||
>
|
||||
{trimmed}
|
||||
</ShikiHighlighter>
|
||||
)}
|
||||
</Pre>
|
||||
</ExpandableBlock>
|
||||
</CodeCardBody>
|
||||
</CodeCard>
|
||||
)
|
||||
|
||||
@@ -2,6 +2,7 @@ import { useQuery } from '@tanstack/react-query'
|
||||
import { useState } from 'react'
|
||||
|
||||
import { useI18n } from '@/i18n'
|
||||
import { currentPickerSelection } from '@/lib/model-status-label'
|
||||
import type { ModelOptionProvider, ModelOptionsResponse, ModelPricing } from '@/types/hermes'
|
||||
|
||||
import type { HermesGateway } from '../hermes'
|
||||
@@ -11,7 +12,6 @@ import { startManualOnboarding } from '../store/onboarding'
|
||||
|
||||
import { InlineNotice } from './notifications'
|
||||
import { Button } from './ui/button'
|
||||
import { Checkbox } from './ui/checkbox'
|
||||
import { Command, CommandEmpty, CommandGroup, CommandInput, CommandItem, CommandList } from './ui/command'
|
||||
import { Dialog, DialogContent, DialogDescription, DialogFooter, DialogHeader, DialogTitle } from './ui/dialog'
|
||||
import { Skeleton } from './ui/skeleton'
|
||||
@@ -23,7 +23,7 @@ interface ModelPickerDialogProps {
|
||||
sessionId?: string | null
|
||||
currentModel: string
|
||||
currentProvider: string
|
||||
onSelect: (selection: { provider: string; model: string; persistGlobal: boolean }) => void
|
||||
onSelect: (selection: { provider: string; model: string }) => void
|
||||
/**
|
||||
* Optional class to apply to DialogContent. Use to override z-index when
|
||||
* stacking the picker on top of another fixed overlay (e.g. the desktop
|
||||
@@ -45,7 +45,6 @@ export function ModelPickerDialog({
|
||||
}: ModelPickerDialogProps) {
|
||||
const { t } = useI18n()
|
||||
const copy = t.modelPicker
|
||||
const [persistGlobal, setPersistGlobal] = useState(!sessionId)
|
||||
// Own the search term so we can filter manually. cmdk's built-in
|
||||
// shouldFilter reorders items by its fuzzy-match score (≈alphabetical with
|
||||
// an empty query), which destroys the backend's curated order. We disable
|
||||
@@ -68,8 +67,13 @@ export function ModelPickerDialog({
|
||||
})
|
||||
|
||||
const providers = modelOptions.data?.providers ?? []
|
||||
const optionsModel = String(modelOptions.data?.model ?? currentModel ?? '')
|
||||
const optionsProvider = String(modelOptions.data?.provider ?? currentProvider ?? '')
|
||||
|
||||
const { model: optionsModel, provider: optionsProvider } = currentPickerSelection(
|
||||
!!sessionId,
|
||||
{ model: currentModel, provider: currentProvider },
|
||||
modelOptions.data
|
||||
)
|
||||
|
||||
const loading = modelOptions.isPending && !modelOptions.data
|
||||
|
||||
const error = modelOptions.error
|
||||
@@ -79,11 +83,7 @@ export function ModelPickerDialog({
|
||||
: null
|
||||
|
||||
const selectModel = (provider: ModelOptionProvider, model: string) => {
|
||||
onSelect({
|
||||
provider: provider.slug,
|
||||
model,
|
||||
persistGlobal: persistGlobal || !sessionId
|
||||
})
|
||||
onSelect({ provider: provider.slug, model })
|
||||
onOpenChange(false)
|
||||
}
|
||||
|
||||
@@ -128,24 +128,13 @@ export function ModelPickerDialog({
|
||||
</CommandList>
|
||||
</Command>
|
||||
|
||||
<DialogFooter className="flex-row items-center justify-between gap-3 bg-card p-3 sm:justify-between">
|
||||
<label className="flex cursor-pointer select-none items-center gap-2 text-xs text-muted-foreground">
|
||||
<Checkbox
|
||||
checked={persistGlobal || !sessionId}
|
||||
disabled={!sessionId}
|
||||
onCheckedChange={checked => setPersistGlobal(checked === true)}
|
||||
/>
|
||||
{sessionId ? copy.persistGlobalSession : copy.persistGlobal}
|
||||
</label>
|
||||
|
||||
<div className="flex items-center gap-2">
|
||||
<Button onClick={addProvider} variant="ghost">
|
||||
{copy.addProvider}
|
||||
</Button>
|
||||
<Button onClick={() => onOpenChange(false)} variant="outline">
|
||||
{t.common.cancel}
|
||||
</Button>
|
||||
</div>
|
||||
<DialogFooter className="flex-row items-center justify-end gap-2 bg-card p-3">
|
||||
<Button onClick={addProvider} variant="ghost">
|
||||
{copy.addProvider}
|
||||
</Button>
|
||||
<Button onClick={() => onOpenChange(false)} variant="outline">
|
||||
{t.common.cancel}
|
||||
</Button>
|
||||
</DialogFooter>
|
||||
</DialogContent>
|
||||
</Dialog>
|
||||
|
||||
@@ -17,6 +17,7 @@ import type {
|
||||
HermesConfig,
|
||||
HermesConfigRecord,
|
||||
LogsResponse,
|
||||
MemoryProviderConfig,
|
||||
MessagingPlatformsResponse,
|
||||
MessagingPlatformTestResponse,
|
||||
MessagingPlatformUpdate,
|
||||
@@ -71,6 +72,7 @@ export type {
|
||||
HermesConfig,
|
||||
HermesConfigRecord,
|
||||
LogsResponse,
|
||||
MemoryProviderConfig,
|
||||
MessagingEnvVarInfo,
|
||||
MessagingHomeChannel,
|
||||
MessagingPlatformInfo,
|
||||
@@ -339,6 +341,23 @@ export function saveHermesConfig(config: HermesConfigRecord): Promise<{ ok: bool
|
||||
})
|
||||
}
|
||||
|
||||
export function getMemoryProviderConfig(provider: string): Promise<MemoryProviderConfig> {
|
||||
return window.hermesDesktop.api<MemoryProviderConfig>({
|
||||
path: `/api/memory/providers/${encodeURIComponent(provider)}/config`
|
||||
})
|
||||
}
|
||||
|
||||
export function saveMemoryProviderConfig(
|
||||
provider: string,
|
||||
values: Record<string, string>
|
||||
): Promise<{ ok: boolean }> {
|
||||
return window.hermesDesktop.api<{ ok: boolean }>({
|
||||
path: `/api/memory/providers/${encodeURIComponent(provider)}/config`,
|
||||
method: 'PUT',
|
||||
body: { values }
|
||||
})
|
||||
}
|
||||
|
||||
export function getEnvVars(): Promise<Record<string, EnvVarInfo>> {
|
||||
return window.hermesDesktop.api<Record<string, EnvVarInfo>>({
|
||||
...profileScoped(),
|
||||
@@ -641,10 +660,10 @@ export function getUsageAnalytics(days = 30): Promise<AnalyticsResponse> {
|
||||
})
|
||||
}
|
||||
|
||||
export function getGlobalModelOptions(): Promise<ModelOptionsResponse> {
|
||||
export function getGlobalModelOptions(opts?: { refresh?: boolean }): Promise<ModelOptionsResponse> {
|
||||
return window.hermesDesktop.api<ModelOptionsResponse>({
|
||||
...profileScoped(),
|
||||
path: '/api/model/options'
|
||||
path: opts?.refresh ? '/api/model/options?refresh=1' : '/api/model/options'
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -538,6 +538,10 @@ export const en: Translations = {
|
||||
provider: 'Provider',
|
||||
model: 'Model',
|
||||
applying: 'Applying...',
|
||||
defaultsLabel: 'Defaults',
|
||||
reasoning: 'Reasoning',
|
||||
reasoningOff: 'Off',
|
||||
defaultsFailed: 'Failed to save model defaults',
|
||||
auxiliaryTitle: 'Auxiliary models',
|
||||
resetAllToMain: 'Reset all to main',
|
||||
auxiliaryDesc: 'Helper tasks run on the main model by default. Assign a dedicated model to any task to override.',
|
||||
@@ -565,9 +569,14 @@ export const en: Translations = {
|
||||
collapse: 'Collapse',
|
||||
connectAnother: 'Connect another provider',
|
||||
otherProviders: 'Other providers',
|
||||
disconnect: 'Disconnect',
|
||||
disconnectInTerminal: 'Disconnect (runs the removal command in the terminal)',
|
||||
removeConfirm: provider => `Remove ${provider}?`,
|
||||
removeExternal: (provider, command) => `${provider} is managed outside Hermes. Remove it with ${command}.`,
|
||||
removeExternalGeneric: provider => `${provider} is managed by its own CLI — remove it there.`,
|
||||
removeKeyManaged: provider => `${provider} is configured from an API key. Remove it from API Keys.`,
|
||||
removeTerminalConfirm: (provider, command) =>
|
||||
`Disconnect ${provider}? This runs "${command}" in the terminal to clear the credential.`,
|
||||
removeTerminalRunning: provider => `Running ${provider} disconnect in the terminal…`,
|
||||
removedTitle: 'Account removed',
|
||||
removedMessage: provider => `${provider} was removed.`,
|
||||
failedRemove: provider => `Could not remove ${provider}`,
|
||||
@@ -1498,8 +1507,6 @@ export const en: Translations = {
|
||||
unknown: '(unknown)',
|
||||
search: 'Filter providers and models...',
|
||||
noModels: 'No models found.',
|
||||
persistGlobalSession: 'Persist globally (otherwise this session only)',
|
||||
persistGlobal: 'Persist globally',
|
||||
addProvider: 'Add provider',
|
||||
loadFailed: 'Could not load models',
|
||||
noAuthenticatedProviders: 'No authenticated providers.',
|
||||
@@ -1525,6 +1532,7 @@ export const en: Translations = {
|
||||
search: 'Search models',
|
||||
noModels: 'No models found',
|
||||
editModels: 'Edit Models…',
|
||||
refreshModels: 'Refresh Models',
|
||||
fast: 'Fast',
|
||||
medium: 'Med'
|
||||
},
|
||||
@@ -1726,6 +1734,7 @@ export const en: Translations = {
|
||||
refresh: 'Refresh',
|
||||
moreActions: 'More actions',
|
||||
branchNewChat: 'Branch in new chat',
|
||||
dismissError: 'Dismiss error',
|
||||
readAloudFailed: 'Read aloud failed',
|
||||
preparingAudio: 'Preparing audio...',
|
||||
stopReading: 'Stop reading',
|
||||
@@ -1835,6 +1844,9 @@ export const en: Translations = {
|
||||
regenerateFailed: 'Regenerate failed',
|
||||
editFailed: 'Edit failed',
|
||||
resumeFailed: 'Resume failed',
|
||||
resumeStrandedTitle: "Couldn't load this session",
|
||||
resumeStrandedBody: 'The connection to this session failed and automatic retries gave up. Check that the gateway is running, then try again.',
|
||||
resumeRetry: 'Retry',
|
||||
nothingToBranch: 'Nothing to branch',
|
||||
branchNeedsChat: 'Start or resume a chat before branching.',
|
||||
sessionBusy: 'Session busy',
|
||||
|
||||
@@ -695,7 +695,6 @@ export const ja = defineLocale({
|
||||
connectAnother: '別のプロバイダーを接続',
|
||||
otherProviders: 'その他のプロバイダー',
|
||||
removeConfirm: provider => `${provider} を削除しますか?`,
|
||||
removeExternal: (provider, command) => `${provider} は Hermes の外部で管理されています。${command} で削除してください。`,
|
||||
removeKeyManaged: provider => `${provider} は API キーで設定されています。API Keys から削除してください。`,
|
||||
removedTitle: 'アカウントを削除しました',
|
||||
removedMessage: provider => `${provider} を削除しました。`,
|
||||
@@ -1638,8 +1637,6 @@ export const ja = defineLocale({
|
||||
unknown: '(不明)',
|
||||
search: 'プロバイダーとモデルをフィルター...',
|
||||
noModels: 'モデルが見つかりません。',
|
||||
persistGlobalSession: 'グローバルに保持(それ以外はこのセッションのみ)',
|
||||
persistGlobal: 'グローバルに保持',
|
||||
addProvider: 'プロバイダーを追加',
|
||||
loadFailed: 'モデルを読み込めませんでした',
|
||||
noAuthenticatedProviders: '認証済みプロバイダーがありません。',
|
||||
@@ -1665,6 +1662,7 @@ export const ja = defineLocale({
|
||||
search: 'モデルを検索',
|
||||
noModels: 'モデルが見つかりません',
|
||||
editModels: 'モデルを編集…',
|
||||
refreshModels: 'モデルを更新',
|
||||
fast: '高速',
|
||||
medium: '中'
|
||||
},
|
||||
@@ -1867,6 +1865,7 @@ export const ja = defineLocale({
|
||||
refresh: '更新',
|
||||
moreActions: 'その他のアクション',
|
||||
branchNewChat: '新しいチャットでブランチ',
|
||||
dismissError: 'エラーを閉じる',
|
||||
readAloudFailed: '読み上げに失敗しました',
|
||||
preparingAudio: '音声を準備中...',
|
||||
stopReading: '読み上げを停止',
|
||||
@@ -1976,6 +1975,9 @@ export const ja = defineLocale({
|
||||
regenerateFailed: '再生成に失敗しました',
|
||||
editFailed: '編集に失敗しました',
|
||||
resumeFailed: '再開に失敗しました',
|
||||
resumeStrandedTitle: 'このセッションを読み込めませんでした',
|
||||
resumeStrandedBody: 'このセッションへの接続に失敗し、自動再試行も停止しました。ゲートウェイが実行中か確認してから、もう一度お試しください。',
|
||||
resumeRetry: '再試行',
|
||||
nothingToBranch: 'ブランチするものがありません',
|
||||
branchNeedsChat: 'ブランチする前にチャットを開始または再開してください。',
|
||||
sessionBusy: 'セッションが使用中',
|
||||
|
||||
@@ -430,6 +430,10 @@ export interface Translations {
|
||||
provider: string
|
||||
model: string
|
||||
applying: string
|
||||
defaultsLabel: string
|
||||
reasoning: string
|
||||
reasoningOff: string
|
||||
defaultsFailed: string
|
||||
auxiliaryTitle: string
|
||||
resetAllToMain: string
|
||||
auxiliaryDesc: string
|
||||
@@ -447,9 +451,13 @@ export interface Translations {
|
||||
collapse: string
|
||||
connectAnother: string
|
||||
otherProviders: string
|
||||
disconnect: string
|
||||
disconnectInTerminal: string
|
||||
removeConfirm: (provider: string) => string
|
||||
removeExternal: (provider: string, command: string) => string
|
||||
removeExternalGeneric: (provider: string) => string
|
||||
removeKeyManaged: (provider: string) => string
|
||||
removeTerminalConfirm: (provider: string, command: string) => string
|
||||
removeTerminalRunning: (provider: string) => string
|
||||
removedTitle: string
|
||||
removedMessage: (provider: string) => string
|
||||
failedRemove: (provider: string) => string
|
||||
@@ -1141,8 +1149,6 @@ export interface Translations {
|
||||
unknown: string
|
||||
search: string
|
||||
noModels: string
|
||||
persistGlobalSession: string
|
||||
persistGlobal: string
|
||||
addProvider: string
|
||||
loadFailed: string
|
||||
noAuthenticatedProviders: string
|
||||
@@ -1168,6 +1174,7 @@ export interface Translations {
|
||||
search: string
|
||||
noModels: string
|
||||
editModels: string
|
||||
refreshModels: string
|
||||
fast: string
|
||||
medium: string
|
||||
}
|
||||
@@ -1367,6 +1374,7 @@ export interface Translations {
|
||||
refresh: string
|
||||
moreActions: string
|
||||
branchNewChat: string
|
||||
dismissError: string
|
||||
readAloudFailed: string
|
||||
preparingAudio: string
|
||||
stopReading: string
|
||||
@@ -1474,6 +1482,9 @@ export interface Translations {
|
||||
regenerateFailed: string
|
||||
editFailed: string
|
||||
resumeFailed: string
|
||||
resumeStrandedTitle: string
|
||||
resumeStrandedBody: string
|
||||
resumeRetry: string
|
||||
nothingToBranch: string
|
||||
branchNeedsChat: string
|
||||
sessionBusy: string
|
||||
|
||||
@@ -672,7 +672,6 @@ export const zhHant = defineLocale({
|
||||
connectAnother: '連結其他提供方',
|
||||
otherProviders: '其他提供方',
|
||||
removeConfirm: provider => `移除 ${provider}?`,
|
||||
removeExternal: (provider, command) => `${provider} 由 Hermes 外部管理。請使用 ${command} 移除。`,
|
||||
removeKeyManaged: provider => `${provider} 由 API 金鑰設定。請從 API Keys 中移除。`,
|
||||
removedTitle: '帳號已移除',
|
||||
removedMessage: provider => `${provider} 已移除。`,
|
||||
@@ -1582,8 +1581,6 @@ export const zhHant = defineLocale({
|
||||
unknown: '(未知)',
|
||||
search: '篩選提供方和模型...',
|
||||
noModels: '找不到模型。',
|
||||
persistGlobalSession: '全域儲存(否則僅限此工作階段)',
|
||||
persistGlobal: '全域儲存',
|
||||
addProvider: '新增提供方',
|
||||
loadFailed: '無法載入模型',
|
||||
noAuthenticatedProviders: '沒有已驗證的提供方。',
|
||||
@@ -1609,6 +1606,7 @@ export const zhHant = defineLocale({
|
||||
search: '搜尋模型',
|
||||
noModels: '找不到模型',
|
||||
editModels: '編輯模型…',
|
||||
refreshModels: '重新整理模型',
|
||||
fast: '快速',
|
||||
medium: '中'
|
||||
},
|
||||
@@ -1809,6 +1807,7 @@ export const zhHant = defineLocale({
|
||||
refresh: '重新整理',
|
||||
moreActions: '更多動作',
|
||||
branchNewChat: '在新聊天中分支',
|
||||
dismissError: '关闭错误',
|
||||
readAloudFailed: '朗讀失敗',
|
||||
preparingAudio: '正在準備音訊...',
|
||||
stopReading: '停止朗讀',
|
||||
@@ -1916,6 +1915,9 @@ export const zhHant = defineLocale({
|
||||
regenerateFailed: '重新生成失敗',
|
||||
editFailed: '編輯失敗',
|
||||
resumeFailed: '繼續失敗',
|
||||
resumeStrandedTitle: '無法載入此工作階段',
|
||||
resumeStrandedBody: '與此工作階段的連線失敗,自動重試已停止。請確認閘道正在執行,然後重試。',
|
||||
resumeRetry: '重試',
|
||||
nothingToBranch: '沒有可分支的內容',
|
||||
branchNeedsChat: '分支前請先開始或繼續一個聊天。',
|
||||
sessionBusy: '工作階段忙碌中',
|
||||
|
||||
@@ -733,6 +733,10 @@ export const zh: Translations = {
|
||||
provider: '提供方',
|
||||
model: '模型',
|
||||
applying: '应用中...',
|
||||
defaultsLabel: '默认值',
|
||||
reasoning: '推理',
|
||||
reasoningOff: '关闭',
|
||||
defaultsFailed: '保存模型默认值失败',
|
||||
auxiliaryTitle: '辅助模型',
|
||||
resetAllToMain: '全部重置为主模型',
|
||||
auxiliaryDesc: '辅助任务默认使用主模型。你可以为任意任务指定专用模型。',
|
||||
@@ -759,9 +763,13 @@ export const zh: Translations = {
|
||||
collapse: '收起',
|
||||
connectAnother: '连接其他提供方',
|
||||
otherProviders: '其他提供方',
|
||||
disconnect: '断开连接',
|
||||
disconnectInTerminal: '断开连接(在终端中运行移除命令)',
|
||||
removeConfirm: provider => `移除 ${provider}?`,
|
||||
removeExternal: (provider, command) => `${provider} 由 Hermes 外部管理。请使用 ${command} 移除。`,
|
||||
removeExternalGeneric: provider => `${provider} 由其自身的 CLI 管理 — 请在那里移除。`,
|
||||
removeKeyManaged: provider => `${provider} 由 API 密钥配置。请从 API Keys 中移除。`,
|
||||
removeTerminalConfirm: (provider, command) => `断开 ${provider}?这将在终端中运行 "${command}" 以清除凭据。`,
|
||||
removeTerminalRunning: provider => `正在终端中断开 ${provider}…`,
|
||||
removedTitle: '账号已移除',
|
||||
removedMessage: provider => `${provider} 已移除。`,
|
||||
failedRemove: provider => `无法移除 ${provider}`,
|
||||
@@ -1679,8 +1687,6 @@ export const zh: Translations = {
|
||||
unknown: '(未知)',
|
||||
search: '筛选提供方和模型...',
|
||||
noModels: '未找到模型。',
|
||||
persistGlobalSession: '全局保存 (否则仅当前会话)',
|
||||
persistGlobal: '全局保存',
|
||||
addProvider: '添加提供方',
|
||||
loadFailed: '无法加载模型',
|
||||
noAuthenticatedProviders: '没有已认证的提供方。',
|
||||
@@ -1706,6 +1712,7 @@ export const zh: Translations = {
|
||||
search: '搜索模型',
|
||||
noModels: '未找到模型',
|
||||
editModels: '编辑模型…',
|
||||
refreshModels: '刷新模型',
|
||||
fast: '快速',
|
||||
medium: '中'
|
||||
},
|
||||
@@ -1906,6 +1913,7 @@ export const zh: Translations = {
|
||||
refresh: '刷新',
|
||||
moreActions: '更多操作',
|
||||
branchNewChat: '在新对话中分支',
|
||||
dismissError: '关闭错误',
|
||||
readAloudFailed: '朗读失败',
|
||||
preparingAudio: '正在准备音频...',
|
||||
stopReading: '停止朗读',
|
||||
@@ -2014,6 +2022,9 @@ export const zh: Translations = {
|
||||
regenerateFailed: '重新生成失败',
|
||||
editFailed: '编辑失败',
|
||||
resumeFailed: '恢复失败',
|
||||
resumeStrandedTitle: '无法加载此会话',
|
||||
resumeStrandedBody: '与此会话的连接失败,自动重试已停止。请确认网关正在运行,然后重试。',
|
||||
resumeRetry: '重试',
|
||||
nothingToBranch: '没有可分支的内容',
|
||||
branchNeedsChat: '分支前请先开始或恢复一个对话。',
|
||||
sessionBusy: '会话忙碌中',
|
||||
|
||||
@@ -52,6 +52,17 @@ describe('desktop slash command curation', () => {
|
||||
expect(desktopSlashUnavailableMessage('/personality')).toBeNull()
|
||||
})
|
||||
|
||||
it('treats /browser as an executable action command (local-gateway connect)', () => {
|
||||
// /browser used to be terminal-only; it now resolves to a desktop action
|
||||
// handler that routes browser.manage RPC when the gateway is local.
|
||||
expect(isDesktopSlashCommand('/browser')).toBe(true)
|
||||
expect(isDesktopSlashSuggestion('/browser')).toBe(true)
|
||||
expect(desktopSlashUnavailableMessage('/browser')).toBeNull()
|
||||
expect(resolveDesktopCommand('/browser')?.surface).toEqual({ kind: 'action', action: 'browser' })
|
||||
// Bare /browser expands to its sub-action options in the popover.
|
||||
expect(resolveDesktopCommand('/browser')?.args).toBe(true)
|
||||
})
|
||||
|
||||
it('allows aliases to execute without cluttering the popover', () => {
|
||||
expect(isDesktopSlashSuggestion('/reset')).toBe(false)
|
||||
expect(isDesktopSlashCommand('/reset')).toBe(true)
|
||||
|
||||
@@ -30,6 +30,7 @@ export interface DesktopThemeCommandOption {
|
||||
*/
|
||||
export type DesktopActionId =
|
||||
| 'branch'
|
||||
| 'browser'
|
||||
| 'handoff'
|
||||
| 'help'
|
||||
| 'new'
|
||||
@@ -103,6 +104,12 @@ const DESKTOP_COMMAND_SPECS: readonly DesktopCommandSpec[] = [
|
||||
{ name: '/skin', description: 'Switch desktop theme or cycle to the next one', surface: action('skin'), args: true },
|
||||
{ name: '/title', description: 'Rename the current session', surface: action('title') },
|
||||
{ name: '/help', description: 'Show desktop slash commands', aliases: ['/commands'], surface: action('help') },
|
||||
{
|
||||
name: '/browser',
|
||||
description: 'Manage browser CDP connection [connect|disconnect|status] (local gateway only)',
|
||||
surface: action('browser'),
|
||||
args: true
|
||||
},
|
||||
|
||||
// Overlay pickers
|
||||
{ name: '/model', description: 'Switch the model for this session', surface: picker('model'), hidden: true },
|
||||
@@ -142,7 +149,7 @@ const DESKTOP_COMMAND_SPECS: readonly DesktopCommandSpec[] = [
|
||||
// per reason beats 40 identical object literals.
|
||||
const NO_DESKTOP_SURFACE: Record<DesktopUnavailableReason, readonly string[]> = {
|
||||
terminal: [
|
||||
'/browser', '/busy', '/clear', '/compact', '/config', '/copy', '/cron', '/details',
|
||||
'/busy', '/clear', '/compact', '/config', '/copy', '/cron', '/details',
|
||||
'/exit', '/footer', '/gateway', '/gquota', '/history', '/image', '/indicator', '/logs',
|
||||
'/mouse', '/paste', '/platforms', '/plugins', '/quit', '/redraw', '/reload', '/restart',
|
||||
'/sb', '/set-home', '/sethome', '/snap', '/snapshot', '/statusbar', '/toolsets', '/update', '/verbose'
|
||||
|
||||
@@ -151,12 +151,18 @@ function normalizeVisibleProse(text: string): string {
|
||||
.join('')
|
||||
}
|
||||
|
||||
function extend(out: string[], lines: string[]) {
|
||||
for (const line of lines) {
|
||||
out.push(line)
|
||||
}
|
||||
}
|
||||
|
||||
function pushProseFence(out: string[], indent: string, info: string, lines: string[]) {
|
||||
if (info) {
|
||||
out.push(`${indent}${info}`.trimEnd())
|
||||
}
|
||||
|
||||
out.push(...lines)
|
||||
extend(out, lines)
|
||||
}
|
||||
|
||||
function findClosingFence(lines: string[], start: number, marker: string): number {
|
||||
@@ -241,7 +247,7 @@ function normalizeFenceBlocks(text: string): string {
|
||||
}
|
||||
|
||||
if (closeIndex !== -1 && isUrlOnlyBlock(bodyLines)) {
|
||||
out.push(...bodyLines)
|
||||
extend(out, bodyLines)
|
||||
index = closeIndex + 1
|
||||
|
||||
continue
|
||||
@@ -264,10 +270,10 @@ function normalizeFenceBlocks(text: string): string {
|
||||
// any literal `$$` characters in the body don't collide with
|
||||
// an outer math wrapper. No close emitted yet — streaming.
|
||||
out.push(`${indent}${marker}math`)
|
||||
out.push(...bodyLines)
|
||||
extend(out, bodyLines)
|
||||
} else {
|
||||
out.push(`${indent}${marker}${language}`)
|
||||
out.push(...bodyLines)
|
||||
extend(out, bodyLines)
|
||||
}
|
||||
|
||||
break
|
||||
@@ -288,7 +294,7 @@ function normalizeFenceBlocks(text: string): string {
|
||||
// colliding with our wrapper. Without this rewrite the block
|
||||
// would render as a syntax-highlighted "latex" code listing.
|
||||
out.push(`${indent}${marker}math`)
|
||||
out.push(...bodyLines)
|
||||
extend(out, bodyLines)
|
||||
out.push(`${indent}${marker}`)
|
||||
index = closeIndex + 1
|
||||
|
||||
@@ -296,7 +302,7 @@ function normalizeFenceBlocks(text: string): string {
|
||||
}
|
||||
|
||||
out.push(`${indent}${marker}${language}`)
|
||||
out.push(...bodyLines)
|
||||
extend(out, bodyLines)
|
||||
out.push(`${indent}${marker}`)
|
||||
index = closeIndex + 1
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import { describe, expect, it } from 'vitest'
|
||||
|
||||
import { displayModelName, formatModelStatusLabel, reasoningEffortLabel } from './model-status-label'
|
||||
import { currentPickerSelection, displayModelName, formatModelStatusLabel, reasoningEffortLabel } from './model-status-label'
|
||||
|
||||
describe('model-status-label', () => {
|
||||
it('formats display names consistently', () => {
|
||||
@@ -10,6 +10,11 @@ describe('model-status-label', () => {
|
||||
expect(displayModelName('openai/gpt-5.5')).toBe('GPT-5.5')
|
||||
})
|
||||
|
||||
it('strips trailing date-pin snapshots from the display name', () => {
|
||||
expect(displayModelName('claude-opus-4-5-20251101')).toBe('Opus 4 5')
|
||||
expect(displayModelName('anthropic/claude-haiku-4-5-20251001')).toBe('Haiku 4 5')
|
||||
})
|
||||
|
||||
it('maps reasoning effort to compact labels', () => {
|
||||
expect(reasoningEffortLabel('high')).toBe('High')
|
||||
expect(reasoningEffortLabel('xhigh')).toBe('Max')
|
||||
@@ -30,4 +35,25 @@ describe('model-status-label', () => {
|
||||
it('returns just the placeholder name when there is no model', () => {
|
||||
expect(formatModelStatusLabel('')).toBe('No model')
|
||||
})
|
||||
|
||||
describe('currentPickerSelection', () => {
|
||||
const store = { model: 'opus', provider: 'anthropic' }
|
||||
const options = { model: 'hermes-4', provider: 'nous' }
|
||||
|
||||
it('prefers the sticky composer pick over the profile default pre-session', () => {
|
||||
expect(currentPickerSelection(false, store, options)).toEqual(store)
|
||||
})
|
||||
|
||||
it('lets the live session model.options win when a session exists', () => {
|
||||
expect(currentPickerSelection(true, store, options)).toEqual(options)
|
||||
})
|
||||
|
||||
it('falls back to options when the store is empty', () => {
|
||||
expect(currentPickerSelection(false, { model: '', provider: '' }, options)).toEqual(options)
|
||||
})
|
||||
|
||||
it('falls back to the store while options are still loading', () => {
|
||||
expect(currentPickerSelection(true, store, undefined)).toEqual(store)
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
@@ -17,6 +17,22 @@ export function reasoningEffortLabel(effort: string): string {
|
||||
return REASONING_LABELS[key] ?? effort
|
||||
}
|
||||
|
||||
/** Which model/provider a picker should mark "current". With a live session the
|
||||
* gateway's `model.options` is authoritative; pre-session there is no server
|
||||
* "current", so the sticky composer pick wins over the profile default the
|
||||
* global options query returns — else the checkmark snaps back to the default
|
||||
* and the pick looks ignored. */
|
||||
export function currentPickerSelection(
|
||||
hasSession: boolean,
|
||||
store: { model: string; provider: string },
|
||||
options?: { model?: string; provider?: string }
|
||||
): { model: string; provider: string } {
|
||||
return {
|
||||
model: String((hasSession && options?.model) || store.model || options?.model || ''),
|
||||
provider: String((hasSession && options?.provider) || store.provider || options?.provider || '')
|
||||
}
|
||||
}
|
||||
|
||||
/** Strip provider prefix and normalize for display. */
|
||||
export function modelBaseId(model: string): string {
|
||||
const trimmed = model.trim()
|
||||
@@ -68,6 +84,9 @@ export function modelDisplayParts(model: string): { name: string; tag: string }
|
||||
}
|
||||
}
|
||||
|
||||
// Drop a trailing date-pin (`…-20251101`) — snapshot noise, not a name.
|
||||
base = base.replace(/-\d{8}$/, '')
|
||||
|
||||
return { name: prettifyBase(base) || model.trim() || 'No model', tag }
|
||||
}
|
||||
|
||||
|
||||
51
apps/desktop/src/store/model-presets.test.ts
Normal file
51
apps/desktop/src/store/model-presets.test.ts
Normal file
@@ -0,0 +1,51 @@
|
||||
import { beforeEach, describe, expect, it } from 'vitest'
|
||||
|
||||
import { $modelPresets, applyModelPreset, getModelPreset, modelPresetKey, setModelPreset } from './model-presets'
|
||||
|
||||
describe('model presets', () => {
|
||||
beforeEach(() => $modelPresets.set({}))
|
||||
|
||||
it('round-trips a preset and merges patches without dropping prior fields', () => {
|
||||
setModelPreset('anthropic', 'claude-opus-4-8', { effort: 'high' })
|
||||
setModelPreset('anthropic', 'claude-opus-4-8', { fast: true })
|
||||
|
||||
expect(getModelPreset('anthropic', 'claude-opus-4-8')).toEqual({ effort: 'high', fast: true })
|
||||
})
|
||||
|
||||
it('returns an empty preset for unknown models', () => {
|
||||
expect(getModelPreset('x', 'y')).toEqual({})
|
||||
})
|
||||
|
||||
it('keys by provider::model', () => {
|
||||
expect(modelPresetKey('openai', 'gpt-5.5')).toBe('openai::gpt-5.5')
|
||||
})
|
||||
|
||||
it('pushes only the provided dimensions to the gateway', async () => {
|
||||
const calls: { method: string; params?: Record<string, unknown> }[] = []
|
||||
|
||||
const request = async <T>(method: string, params?: Record<string, unknown>) => {
|
||||
calls.push({ method, params })
|
||||
|
||||
return {} as T
|
||||
}
|
||||
|
||||
await applyModelPreset({ effort: 'high' }, { failMessage: 'x', request, sessionId: 's1' })
|
||||
await applyModelPreset({}, { failMessage: 'x', request, sessionId: 's1' })
|
||||
|
||||
expect(calls).toEqual([{ method: 'config.set', params: { key: 'reasoning', session_id: 's1', value: 'high' } }])
|
||||
})
|
||||
|
||||
it('no-ops without a session so selecting a model cannot mutate global config', async () => {
|
||||
const calls: { method: string; params?: Record<string, unknown> }[] = []
|
||||
|
||||
const request = async <T>(method: string, params?: Record<string, unknown>) => {
|
||||
calls.push({ method, params })
|
||||
|
||||
return {} as T
|
||||
}
|
||||
|
||||
await applyModelPreset({ effort: 'high', fast: true }, { failMessage: 'x', request, sessionId: null })
|
||||
|
||||
expect(calls).toEqual([])
|
||||
})
|
||||
})
|
||||
86
apps/desktop/src/store/model-presets.ts
Normal file
86
apps/desktop/src/store/model-presets.ts
Normal file
@@ -0,0 +1,86 @@
|
||||
import { atom } from 'nanostores'
|
||||
|
||||
import { persistString, storedString } from '@/lib/storage'
|
||||
|
||||
import { notifyError } from './notifications'
|
||||
import { setCurrentFastMode, setCurrentReasoningEffort } from './session'
|
||||
|
||||
const STORAGE_KEY = 'hermes.desktop.model-presets'
|
||||
|
||||
/** Per-model reasoning/fast preset, remembered globally across sessions and
|
||||
* re-applied to the session whenever that model is selected. Unset dimensions
|
||||
* fall back to the Hermes default (medium effort, no fast). */
|
||||
export interface ModelPreset {
|
||||
effort?: string
|
||||
fast?: boolean
|
||||
}
|
||||
|
||||
type RequestGateway = <T>(method: string, params?: Record<string, unknown>) => Promise<T>
|
||||
|
||||
/** Stable `provider::model` key (matches the visibility-store format). */
|
||||
export const modelPresetKey = (provider: string, model: string): string => `${provider}::${model}`
|
||||
|
||||
function load(): Record<string, ModelPreset> {
|
||||
const raw = storedString(STORAGE_KEY)
|
||||
|
||||
if (!raw) {
|
||||
return {}
|
||||
}
|
||||
|
||||
try {
|
||||
const parsed = JSON.parse(raw)
|
||||
|
||||
return parsed && typeof parsed === 'object' && !Array.isArray(parsed) ? (parsed as Record<string, ModelPreset>) : {}
|
||||
} catch {
|
||||
return {}
|
||||
}
|
||||
}
|
||||
|
||||
export const $modelPresets = atom<Record<string, ModelPreset>>(load())
|
||||
|
||||
export function getModelPreset(provider: string, model: string): ModelPreset {
|
||||
return $modelPresets.get()[modelPresetKey(provider, model)] ?? {}
|
||||
}
|
||||
|
||||
/** Merge a partial preset for one model and persist. */
|
||||
export function setModelPreset(provider: string, model: string, patch: ModelPreset): void {
|
||||
const key = modelPresetKey(provider, model)
|
||||
const next = { ...$modelPresets.get(), [key]: { ...$modelPresets.get()[key], ...patch } }
|
||||
|
||||
$modelPresets.set(next)
|
||||
persistString(STORAGE_KEY, JSON.stringify(next))
|
||||
}
|
||||
|
||||
/** Push a model's preset onto the active session (optimistic + gateway).
|
||||
* `undefined` skips that dimension; values are capability-gated upstream.
|
||||
* No-ops without a session — the gateway's `config.set` reasoning/fast fall
|
||||
* back to persistent (global/profile) config when none matches, so selecting
|
||||
* a model must not reach it (else it rewrites `agent.*`, defaults included). */
|
||||
export async function applyModelPreset(
|
||||
{ effort, fast }: ModelPreset,
|
||||
ctx: { failMessage: string; request: RequestGateway; sessionId: null | string }
|
||||
): Promise<void> {
|
||||
if (!ctx.sessionId) {
|
||||
return
|
||||
}
|
||||
|
||||
if (effort !== undefined) {
|
||||
setCurrentReasoningEffort(effort)
|
||||
}
|
||||
|
||||
if (fast !== undefined) {
|
||||
setCurrentFastMode(fast)
|
||||
}
|
||||
|
||||
try {
|
||||
if (effort !== undefined) {
|
||||
await ctx.request('config.set', { key: 'reasoning', session_id: ctx.sessionId, value: effort })
|
||||
}
|
||||
|
||||
if (fast !== undefined) {
|
||||
await ctx.request('config.set', { key: 'fast', session_id: ctx.sessionId, value: fast ? 'fast' : 'normal' })
|
||||
}
|
||||
} catch (err) {
|
||||
notifyError(err, ctx.failMessage)
|
||||
}
|
||||
}
|
||||
@@ -3,6 +3,7 @@ import { describe, expect, it } from 'vitest'
|
||||
import type { ModelOptionProvider } from '@/types/hermes'
|
||||
|
||||
import {
|
||||
collapseModelFamilies,
|
||||
effectiveVisibleKeys,
|
||||
emptyProviderSentinelKey,
|
||||
isProviderSentinel,
|
||||
@@ -78,6 +79,18 @@ describe('model visibility', () => {
|
||||
expect(visible.has(modelVisibilityKey('nous', 'hermes-3-llama-3.1-8b'))).toBe(false)
|
||||
})
|
||||
|
||||
it('folds a date-pinned snapshot into its rolling alias when present', () => {
|
||||
const families = collapseModelFamilies(['claude-opus-4-5', 'claude-opus-4-5-20251101'])
|
||||
|
||||
expect(families.map(f => f.id)).toEqual(['claude-opus-4-5'])
|
||||
})
|
||||
|
||||
it('keeps a date-pinned snapshot standing alone when it has no alias', () => {
|
||||
const families = collapseModelFamilies(['claude-opus-4-5-20251101', 'claude-haiku-4-5-20251001'])
|
||||
|
||||
expect(families.map(f => f.id)).toEqual(['claude-opus-4-5-20251101', 'claude-haiku-4-5-20251001'])
|
||||
})
|
||||
|
||||
it('sentinel key helper produces correct format', () => {
|
||||
expect(emptyProviderSentinelKey('openai')).toBe('openai::')
|
||||
expect(isProviderSentinel('openai::')).toBe(true)
|
||||
|
||||
@@ -51,6 +51,11 @@ export function collapseModelFamilies(models: readonly string[]): ModelFamily[]
|
||||
continue
|
||||
}
|
||||
|
||||
if (/-\d{8}$/.test(model) && present.has(model.replace(/-\d{8}$/, ''))) {
|
||||
// A date-pinned snapshot superseded by its rolling alias — drop the dupe.
|
||||
continue
|
||||
}
|
||||
|
||||
const fastId = `${model}-fast`
|
||||
const hasFast = present.has(fastId)
|
||||
families.push({ fastId: hasFast ? fastId : null, id: model })
|
||||
|
||||
@@ -4,13 +4,23 @@ import { lastVisibleMessageIsUser } from '@/app/chat/thread-loading'
|
||||
import type { ContextSuggestion } from '@/app/types'
|
||||
import type { HermesConnection } from '@/global'
|
||||
import type { ChatMessage } from '@/lib/chat-messages'
|
||||
import { persistString, storedString } from '@/lib/storage'
|
||||
import { persistBoolean, persistString, storedBoolean, storedString } from '@/lib/storage'
|
||||
import type { SessionInfo, UsageStats } from '@/types/hermes'
|
||||
|
||||
type Updater<T> = T | ((current: T) => T)
|
||||
|
||||
const WORKSPACE_CWD_KEY = 'hermes.desktop.workspace-cwd'
|
||||
|
||||
// The composer's model/effort/fast is sticky UI state, NOT the profile default
|
||||
// (that lives in Settings → Model). Persisting it in localStorage makes a pick
|
||||
// follow across Cmd+N and app restarts instead of snapping back to the default.
|
||||
// It's deliberately global (not per-profile): a profile switch force-reseeds to
|
||||
// that profile's default, while within a profile new chats keep your last pick.
|
||||
const COMPOSER_MODEL_KEY = 'hermes.desktop.composer.model'
|
||||
const COMPOSER_PROVIDER_KEY = 'hermes.desktop.composer.provider'
|
||||
const COMPOSER_EFFORT_KEY = 'hermes.desktop.composer.reasoning-effort'
|
||||
const COMPOSER_FAST_KEY = 'hermes.desktop.composer.fast'
|
||||
|
||||
let configuredDefaultProjectDir = ''
|
||||
|
||||
function workspaceCwdKey(connection: HermesConnection | null = $connection.get()): string {
|
||||
@@ -208,11 +218,28 @@ export const $lastVisibleMessageIsUser = computed($messages, lastVisibleMessageI
|
||||
export const $freshDraftReady = atom(false)
|
||||
export const $busy = atom(false)
|
||||
export const $awaitingResponse = atom(false)
|
||||
export const $currentModel = atom('')
|
||||
export const $currentProvider = atom('')
|
||||
export const $currentReasoningEffort = atom('')
|
||||
// Stored-session id whose most recent resume FAILED terminally (the gateway RPC
|
||||
// rejected AND the REST transcript fallback also failed), leaving the window
|
||||
// with no runtime and an empty transcript. Drives use-route-resume's self-heal:
|
||||
// while this matches the routed session the loader would otherwise latch
|
||||
// forever (messagesEmpty && !activeSessionId), so the hook re-attempts the
|
||||
// resume on the next render/focus/reconnect instead of stranding the window.
|
||||
// Null whenever the active route has a healthy (or in-flight) resume.
|
||||
export const $resumeFailedSessionId = atom<string | null>(null)
|
||||
// Stored-session id whose resume has EXHAUSTED its bounded auto-retries (the
|
||||
// terminal-failure latch above kept failing through all MAX_RESUME_RETRIES
|
||||
// attempts). Distinct from $resumeFailedSessionId, which is armed *during* the
|
||||
// backoff window too: this fires only once auto-recovery has given up, so the
|
||||
// chat view can swap the perpetual loader for an explicit error + manual Retry
|
||||
// affordance. A fresh resumeSession() (manual Retry, reconnect, reselect)
|
||||
// clears it and resets the retry counter. Null whenever the active route has a
|
||||
// healthy, in-flight, or still-auto-retrying resume.
|
||||
export const $resumeExhaustedSessionId = atom<string | null>(null)
|
||||
export const $currentModel = atom(storedString(COMPOSER_MODEL_KEY) ?? '')
|
||||
export const $currentProvider = atom(storedString(COMPOSER_PROVIDER_KEY) ?? '')
|
||||
export const $currentReasoningEffort = atom(storedString(COMPOSER_EFFORT_KEY) ?? '')
|
||||
export const $currentServiceTier = atom('')
|
||||
export const $currentFastMode = atom(false)
|
||||
export const $currentFastMode = atom(storedBoolean(COMPOSER_FAST_KEY, false))
|
||||
// Effective approval-bypass state mirrored from the gateway (session.info).
|
||||
// Persistence lives in the backend config (approvals.mode), so this is a plain
|
||||
// reflection of the truth the gateway reports rather than its own store.
|
||||
@@ -252,13 +279,33 @@ export const setActiveSessionId = (next: Updater<string | null>) => updateAtom($
|
||||
export const setSelectedStoredSessionId = (next: Updater<string | null>) => updateAtom($selectedStoredSessionId, next)
|
||||
export const setMessages = (next: Updater<ChatMessage[]>) => updateAtom($messages, next)
|
||||
export const setFreshDraftReady = (next: Updater<boolean>) => updateAtom($freshDraftReady, next)
|
||||
export const setResumeFailedSessionId = (next: Updater<string | null>) => updateAtom($resumeFailedSessionId, next)
|
||||
export const setResumeExhaustedSessionId = (next: Updater<string | null>) => updateAtom($resumeExhaustedSessionId, next)
|
||||
export const setBusy = (next: Updater<boolean>) => updateAtom($busy, next)
|
||||
export const setAwaitingResponse = (next: Updater<boolean>) => updateAtom($awaitingResponse, next)
|
||||
export const setCurrentModel = (next: Updater<string>) => updateAtom($currentModel, next)
|
||||
export const setCurrentProvider = (next: Updater<string>) => updateAtom($currentProvider, next)
|
||||
export const setCurrentReasoningEffort = (next: Updater<string>) => updateAtom($currentReasoningEffort, next)
|
||||
|
||||
export const setCurrentModel = (next: Updater<string>) => {
|
||||
updateAtom($currentModel, next)
|
||||
persistString(COMPOSER_MODEL_KEY, $currentModel.get() || null)
|
||||
}
|
||||
|
||||
export const setCurrentProvider = (next: Updater<string>) => {
|
||||
updateAtom($currentProvider, next)
|
||||
persistString(COMPOSER_PROVIDER_KEY, $currentProvider.get() || null)
|
||||
}
|
||||
|
||||
export const setCurrentReasoningEffort = (next: Updater<string>) => {
|
||||
updateAtom($currentReasoningEffort, next)
|
||||
persistString(COMPOSER_EFFORT_KEY, $currentReasoningEffort.get() || null)
|
||||
}
|
||||
|
||||
export const setCurrentServiceTier = (next: Updater<string>) => updateAtom($currentServiceTier, next)
|
||||
export const setCurrentFastMode = (next: Updater<boolean>) => updateAtom($currentFastMode, next)
|
||||
|
||||
export const setCurrentFastMode = (next: Updater<boolean>) => {
|
||||
updateAtom($currentFastMode, next)
|
||||
persistBoolean(COMPOSER_FAST_KEY, $currentFastMode.get())
|
||||
}
|
||||
|
||||
export const setYoloActive = (next: Updater<boolean>) => updateAtom($yoloActive, next)
|
||||
|
||||
export const setCurrentCwd = (next: Updater<string>) => {
|
||||
|
||||
@@ -5,6 +5,9 @@ import type { DesktopUpdateStatus } from '@/global'
|
||||
const storage = new Map<string, string>()
|
||||
|
||||
vi.mock('@/lib/storage', () => ({
|
||||
persistBoolean: (key: string, value: boolean) => {
|
||||
storage.set(key, String(value))
|
||||
},
|
||||
persistString: (key: string, value: null | string) => {
|
||||
if (value === null) {
|
||||
storage.delete(key)
|
||||
@@ -12,6 +15,11 @@ vi.mock('@/lib/storage', () => ({
|
||||
storage.set(key, value)
|
||||
}
|
||||
},
|
||||
storedBoolean: (key: string, fallback: boolean) => {
|
||||
const value = storage.get(key)
|
||||
|
||||
return value === undefined ? fallback : value === 'true'
|
||||
},
|
||||
storedString: (key: string) => storage.get(key) ?? null
|
||||
}))
|
||||
|
||||
@@ -33,7 +41,7 @@ vi.mock('@/hermes', () => ({
|
||||
getActionStatus: (...args: unknown[]) => getActionStatusSpy(...args)
|
||||
}))
|
||||
|
||||
const { maybeNotifyUpdateAvailable, checkBackendUpdates, $backendUpdateStatus, applyBackendUpdate, $backendUpdateApply } = await import('./updates')
|
||||
const { maybeNotifyUpdateAvailable, checkBackendUpdates, $backendUpdateStatus, applyBackendUpdate, $backendUpdateApply, reportBackendContract } = await import('./updates')
|
||||
const { setConnection } = await import('./session')
|
||||
|
||||
const status = (over: Partial<DesktopUpdateStatus> = {}): DesktopUpdateStatus => ({
|
||||
@@ -87,6 +95,61 @@ describe('maybeNotifyUpdateAvailable', () => {
|
||||
})
|
||||
})
|
||||
|
||||
describe('reportBackendContract', () => {
|
||||
beforeEach(() => {
|
||||
storage.clear()
|
||||
notifySpy.mockClear()
|
||||
dismissSpy.mockClear()
|
||||
vi.useRealTimers()
|
||||
})
|
||||
|
||||
it('dismisses the toast when the backend meets the contract', () => {
|
||||
reportBackendContract(2)
|
||||
expect(dismissSpy).toHaveBeenCalledWith('backend-contract-skew')
|
||||
expect(notifySpy).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it('warns when the backend is behind (or reports no contract)', () => {
|
||||
reportBackendContract(undefined)
|
||||
expect(notifySpy).toHaveBeenCalledTimes(1)
|
||||
reportBackendContract(1)
|
||||
expect(notifySpy).toHaveBeenCalledTimes(2)
|
||||
})
|
||||
|
||||
it('stays quiet on later session opens once the user closed it', () => {
|
||||
reportBackendContract(1)
|
||||
lastToast().onDismiss() // user closes it → cooldown starts
|
||||
notifySpy.mockClear()
|
||||
|
||||
// Opening another pre-existing session re-runs the check within cooldown.
|
||||
reportBackendContract(1)
|
||||
expect(notifySpy).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it('reminds again after the cooldown elapses', () => {
|
||||
vi.useFakeTimers()
|
||||
vi.setSystemTime(0)
|
||||
|
||||
reportBackendContract(1)
|
||||
lastToast().onDismiss()
|
||||
notifySpy.mockClear()
|
||||
|
||||
vi.setSystemTime(25 * 60 * 60 * 1000) // > 24h cooldown
|
||||
reportBackendContract(1)
|
||||
expect(notifySpy).toHaveBeenCalledTimes(1)
|
||||
})
|
||||
|
||||
it('clears the snooze once the backend catches up, so a regression warns again', () => {
|
||||
reportBackendContract(1)
|
||||
lastToast().onDismiss()
|
||||
notifySpy.mockClear()
|
||||
|
||||
reportBackendContract(2) // backend updated → satisfied, snooze cleared
|
||||
reportBackendContract(1) // a later regression must warn immediately
|
||||
expect(notifySpy).toHaveBeenCalledTimes(1)
|
||||
})
|
||||
})
|
||||
|
||||
describe('checkBackendUpdates', () => {
|
||||
beforeEach(() => {
|
||||
storage.clear()
|
||||
|
||||
@@ -91,26 +91,60 @@ function isUpdateToastSnoozed(): boolean {
|
||||
// v2: requires the file.attach RPC (remote-gateway non-image file upload).
|
||||
const REQUIRED_BACKEND_CONTRACT = 2
|
||||
const SKEW_TOAST_ID = 'backend-contract-skew'
|
||||
// The contract check runs on every session.resume (applyRuntimeInfo), so
|
||||
// without a snooze the warning re-popped on every thread the user opened, even
|
||||
// right after they closed it. Mirror the update toast: persist a cooldown when
|
||||
// the user dismisses it. It still reminds again after the window if the backend
|
||||
// is still behind, and clears immediately once the backend catches up.
|
||||
const SKEW_TOAST_SNOOZE_KEY = 'hermes:backend-skew-toast-snooze-until'
|
||||
const SKEW_TOAST_COOLDOWN_MS = 24 * 60 * 60 * 1000
|
||||
|
||||
function snoozeSkewToast(): void {
|
||||
persistString(SKEW_TOAST_SNOOZE_KEY, String(Date.now() + SKEW_TOAST_COOLDOWN_MS))
|
||||
}
|
||||
|
||||
function isSkewToastSnoozed(): boolean {
|
||||
const until = Number(storedString(SKEW_TOAST_SNOOZE_KEY) || 0)
|
||||
|
||||
return Number.isFinite(until) && Date.now() < until
|
||||
}
|
||||
|
||||
/**
|
||||
* Guard against a desktop GUI talking to a backend that predates its contract
|
||||
* (e.g. a bb/gui-built app pointed at a `main` checkout). Rather than failing
|
||||
* cryptically downstream, surface a persistent warning with a one-click align
|
||||
* that runs the normal update flow (which self-heals to the right branch).
|
||||
* cryptically downstream, surface a warning with a one-click align that runs
|
||||
* the normal update flow (which self-heals to the right branch).
|
||||
*
|
||||
* Runs on every session open; closing the toast snoozes it for a cooldown so it
|
||||
* doesn't nag on every thread switch.
|
||||
*/
|
||||
export function reportBackendContract(contract: number | undefined): void {
|
||||
if ((contract ?? 0) >= REQUIRED_BACKEND_CONTRACT) {
|
||||
dismissNotification(SKEW_TOAST_ID)
|
||||
// Backend caught up — forget any prior snooze so a future regression warns
|
||||
// immediately rather than staying silent for the rest of the window.
|
||||
persistString(SKEW_TOAST_SNOOZE_KEY, null)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
if (isSkewToastSnoozed()) {
|
||||
return
|
||||
}
|
||||
|
||||
notify({
|
||||
action: { label: translateNow('notifications.updateHermes'), onClick: () => void applyBackendUpdate() },
|
||||
action: {
|
||||
label: translateNow('notifications.updateHermes'),
|
||||
onClick: () => {
|
||||
snoozeSkewToast()
|
||||
void applyBackendUpdate()
|
||||
}
|
||||
},
|
||||
durationMs: 0,
|
||||
id: SKEW_TOAST_ID,
|
||||
kind: 'warning',
|
||||
message: translateNow('notifications.backendOutOfDateMessage'),
|
||||
onDismiss: () => snoozeSkewToast(),
|
||||
title: translateNow('notifications.backendOutOfDateTitle')
|
||||
})
|
||||
}
|
||||
|
||||
@@ -47,6 +47,9 @@ export interface OAuthProviderStatus {
|
||||
|
||||
export interface OAuthProvider {
|
||||
cli_command: string
|
||||
/** Shell command that clears an external provider's credentials, run in the
|
||||
* embedded terminal. Null when Hermes doesn't know how to remove it. */
|
||||
disconnect_command?: null | string
|
||||
disconnect_hint?: null | string
|
||||
disconnectable?: boolean
|
||||
docs_url: string
|
||||
@@ -110,6 +113,31 @@ export interface EnvVarInfo {
|
||||
url: null | string
|
||||
}
|
||||
|
||||
export type MemoryProviderFieldKind = 'secret' | 'select' | 'text'
|
||||
|
||||
export interface MemoryProviderFieldOption {
|
||||
description: string
|
||||
label: string
|
||||
value: string
|
||||
}
|
||||
|
||||
export interface MemoryProviderField {
|
||||
description: string
|
||||
is_set: boolean
|
||||
key: string
|
||||
kind: MemoryProviderFieldKind
|
||||
label: string
|
||||
options: MemoryProviderFieldOption[]
|
||||
placeholder: string
|
||||
value: string
|
||||
}
|
||||
|
||||
export interface MemoryProviderConfig {
|
||||
fields: MemoryProviderField[]
|
||||
label: string
|
||||
name: string
|
||||
}
|
||||
|
||||
export interface MessagingEnvVarInfo {
|
||||
advanced: boolean
|
||||
description: string
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user