fix(tui): close busy-flag race that stuck queue-mode back-to-back sends

Under display.busy_input_mode: queue, sending two messages back-to-back hung the session on 'Analyzing…' until a manual Ctrl+C. The submit path only marked the session busy inside the .then of an async input.detect_drop RPC. dispatchSubmission routes queue-vs-send on getUiState().busy, so a second Enter inside that RPC window read busy===false and raced a second prompt.submit down the send path instead of enqueuing locally. The gateway accepts the mid-turn submit as a success ({status:'queued'}, not an error), and the client's only re-queue recovery is gated on catching a 'session busy' error — which never fires — so the message became invisible to the client-side drain effect and the UI stayed busy forever. Extract the ready-prompt submit into a pure submissionCore module and mark the session busy synchronously at the choke point, before the detect_drop round-trip, closing the gap for every caller (mainline submit, queue-edit picks, drain, interpolation). Verified the real gateway already queues+drains both turns correctly, so the fix is purely client-side. Adds submissionCore.test.ts whose regression assertions fail without the synchronous busy and pass with it.
Merge pull request #56029 from NousResearch/fix/desktop-drop-folder-attach
2026-07-03 00:36:53 +08:00 · 2026-07-01 13:58:20 +10:00 · 2026-06-30 22:08:21 -05:00 · 2026-06-30 22:36:37 -04:00 · 2026-07-01 12:30:59 +10:00 · 2026-06-30 18:59:45 -07:00
388 changed files with 34625 additions and 10302 deletions
--- a/acp_adapter/server.py
+++ b/acp_adapter/server.py
@@ -74,6 +74,10 @@ from acp_adapter.permissions import make_approval_callback
 from acp_adapter.provenance import session_provenance_meta
 from acp_adapter.session import SessionManager, SessionState, _expand_acp_enabled_toolsets
 from acp_adapter.tools import build_tool_complete, build_tool_start
+from tools.approval import (
+    reset_hermes_interactive_context,
+    set_hermes_interactive_context,
+)

 logger = logging.getLogger(__name__)

@@ -1446,20 +1450,23 @@ class HermesACPAgent(acp.Agent):
        # Approval callback is per-thread (thread-local, GHSA-qg5c-hvr5-hjgr).
        # Set it INSIDE _run_agent so the TLS write happens in the executor
        # thread — setting it here would write to the event-loop thread's TLS,
-        # not the executor's. Also set HERMES_INTERACTIVE so approval.py
-        # takes the CLI-interactive path (which calls the registered
-        # callback via prompt_dangerous_approval) instead of the
-        # non-interactive auto-approve branch (GHSA-96vc-wcxf-jjff).
+        # not the executor's. Interactive routing uses a contextvar in
+        # tools.approval (set_hermes_interactive_context) rather than
+        # os.environ["HERMES_INTERACTIVE"], so concurrent executor workers can't
+        # race on a process-global flag — one session's restore can't drop
+        # another onto the non-interactive auto-approve path mid-run
+        # (GHSA-96vc-wcxf-jjff). The contextvar write is isolated by the
+        # contextvars.copy_context() wrapper around the executor call below.
        # ACP's conn.request_permission maps cleanly to the interactive
        # callback shape — not the gateway-queue HERMES_EXEC_ASK path,
        # which requires a notify_cb registered in _gateway_notify_cbs.
        previous_approval_cb = None
-        previous_interactive = None
+        interactive_token = None
        edit_approval_token = None
        previous_session_id = None

        def _run_agent() -> dict:
-            nonlocal previous_approval_cb, previous_interactive, edit_approval_token, previous_session_id
+            nonlocal previous_approval_cb, interactive_token, edit_approval_token, previous_session_id
            # Bind HERMES_SESSION_KEY for this session so per-session caches
            # (e.g. the interactive sudo password cache in tools.terminal_tool)
            # scope to the ACP session rather than leaking across sessions
@@ -1491,9 +1498,10 @@ class HermesACPAgent(acp.Agent):
                except Exception:
                    logger.debug("Could not set ACP edit approval requester", exc_info=True)
            # Signal to tools.approval that we have an interactive callback
-            # and the non-interactive auto-approve path must not fire.
-            previous_interactive = os.environ.get("HERMES_INTERACTIVE")
-            os.environ["HERMES_INTERACTIVE"] = "1"
+            # and the non-interactive auto-approve path must not fire. Uses a
+            # contextvar (not os.environ) so concurrent executor workers don't
+            # race on the flag (GHSA-96vc-wcxf-jjff).
+            interactive_token = set_hermes_interactive_context(True)
            # Propagate the originating ACP session id to tools that want to
            # tag side-effects with it (e.g. ``kanban_create`` stamps it on
            # the new task so clients can render a per-session board). Save
@@ -1513,11 +1521,9 @@ class HermesACPAgent(acp.Agent):
                logger.exception("Agent error in session %s", session_id)
                return {"final_response": f"Error: {e}", "messages": state.history}
            finally:
-                # Restore HERMES_INTERACTIVE.
-                if previous_interactive is None:
-                    os.environ.pop("HERMES_INTERACTIVE", None)
-                else:
-                    os.environ["HERMES_INTERACTIVE"] = previous_interactive
+                # Restore the interactive contextvar for this context.
+                if interactive_token is not None:
+                    reset_hermes_interactive_context(interactive_token)
                # Restore HERMES_SESSION_ID symmetrically.
                if previous_session_id is None:
                    os.environ.pop("HERMES_SESSION_ID", None)
--- a/agent/agent_init.py
+++ b/agent/agent_init.py
@@ -828,7 +828,7 @@ def init_agent(
                client_kwargs["default_headers"] = build_nvidia_nim_headers(effective_base)
            elif base_url_host_matches(effective_base, "api.routermint.com"):
                client_kwargs["default_headers"] = _ra()._routermint_headers()
-            elif base_url_host_matches(effective_base, "api.githubcopilot.com"):
+            elif base_url_host_matches(effective_base, "githubcopilot.com"):
                from hermes_cli.models import copilot_default_headers

                client_kwargs["default_headers"] = copilot_default_headers()
@@ -1665,6 +1665,12 @@ def init_agent(
            abort_on_summary_failure=compression_abort_on_summary_failure,
            max_tokens=agent.max_tokens,
        )
+    _bind_session_state = getattr(agent.context_compressor, "bind_session_state", None)
+    if callable(_bind_session_state):
+        try:
+            _bind_session_state(session_db=session_db, session_id=agent.session_id)
+        except Exception:
+            pass
    agent.compression_enabled = compression_enabled
    agent.compression_in_place = compression_in_place

--- a/agent/agent_runtime_helpers.py
+++ b/agent/agent_runtime_helpers.py
@@ -368,6 +368,18 @@ def repair_message_sequence(agent, messages: List[Dict]) -> int:
    host code) can feed in already-broken histories.

    Repairs applied:
+      0. Consecutive ``assistant`` messages with no intervening
+         ``tool``/``user`` turn — merged into a single assistant turn
+         (union of ``tool_calls``, concatenated ``content``). Strict
+         OpenAI-compatible providers (DeepSeek v4, Moonshot/Kimi) reject
+         a history where an ``assistant`` message carrying ``tool_calls``
+         is immediately followed by another ``assistant`` message instead
+         of its ``tool`` results — HTTP 400 "An assistant message with
+         'tool_calls' must be followed by tool messages…". The split
+         shape is produced by recovery/continuation paths that append an
+         interim assistant turn (thinking-prefill, codex
+         incomplete-continuation) or by host-fed / legacy-persisted /
+         resumed histories. Refs #29148, #49147.
      1. Stray ``tool`` messages whose ``tool_call_id`` doesn't match
         any preceding assistant tool_call — dropped.
      2. Consecutive ``user`` messages — merged with newline separator
@@ -387,12 +399,74 @@ def repair_message_sequence(agent, messages: List[Dict]) -> int:

    repairs = 0

+    # Pass 0: merge consecutive assistant messages. Runs BEFORE Pass 1 so
+    # the merged turn's union of tool_call ids is known when Pass 1
+    # validates which tool-result messages are orphans. Two assistant
+    # messages are only adjacent here when nothing (no tool result, no
+    # user turn) separates them — an intervening ``tool`` message means
+    # two distinct, valid tool-call rounds that must NOT be merged.
+    #
+    # Codex Responses interim turns are exempt: the codex_responses
+    # api_mode legitimately keeps multiple consecutive incomplete
+    # assistant turns in history, each carrying its own encrypted
+    # continuation state (codex_reasoning_items / codex_message_items)
+    # that must be replayed verbatim. Collapsing them corrupts the
+    # Responses replay chain (the duplicate-detection logic at
+    # conversation_loop.py already de-dups identical codex interims).
+    def _is_codex_interim(m: Dict) -> bool:
+        return bool(
+            m.get("codex_reasoning_items")
+            or m.get("codex_message_items")
+            or m.get("finish_reason") == "incomplete"
+        )
+
+    collapsed: List[Dict] = []
+    for msg in messages:
+        if (
+            collapsed
+            and isinstance(msg, dict)
+            and msg.get("role") == "assistant"
+            and isinstance(collapsed[-1], dict)
+            and collapsed[-1].get("role") == "assistant"
+            and not _is_codex_interim(msg)
+            and not _is_codex_interim(collapsed[-1])
+        ):
+            prev = collapsed[-1]
+            # Union tool_calls (preserve order, both may carry them).
+            prev_calls = list(prev.get("tool_calls") or [])
+            new_calls = list(msg.get("tool_calls") or [])
+            if new_calls:
+                prev["tool_calls"] = prev_calls + new_calls
+            elif prev_calls:
+                prev["tool_calls"] = prev_calls
+            # Concatenate plain-text content; leave multimodal (list)
+            # content on either side alone to avoid mangling attachment
+            # blocks — fall back to keeping the existing content.
+            prev_content = prev.get("content")
+            new_content = msg.get("content")
+            if isinstance(prev_content, str) and isinstance(new_content, str):
+                joined = "\n".join(
+                    p for p in (prev_content.strip(), new_content.strip()) if p
+                )
+                prev["content"] = joined
+            elif not prev_content and new_content is not None:
+                prev["content"] = new_content
+            # Carry reasoning_content from the later turn only if the
+            # earlier turn lacks it (strict thinking providers require a
+            # reasoning_content on the merged tool-call turn; the first
+            # non-empty one suffices).
+            if not prev.get("reasoning_content") and msg.get("reasoning_content"):
+                prev["reasoning_content"] = msg["reasoning_content"]
+            repairs += 1
+            continue
+        collapsed.append(msg)
+
    # Pass 1: drop stray tool messages that don't follow a known
    # assistant tool_call_id. Uses a rolling set of known ids refreshed
    # on each assistant message.
    known_tool_ids: set = set()
    filtered: List[Dict] = []
-    for msg in messages:
+    for msg in collapsed:
        if not isinstance(msg, dict):
            filtered.append(msg)
            continue
@@ -663,6 +737,25 @@ def recover_with_credential_pool(
        elif status_code in {401, 403}:
            effective_reason = FailoverReason.auth

+    if effective_reason == FailoverReason.upstream_rate_limit:
+        # An upstream provider (e.g. DeepSeek behind OpenRouter) is
+        # rate-limiting the aggregator's traffic — the user's credential is
+        # healthy. Do NOT rotate or mark exhausted; let the caller's fallback
+        # path switch to a different model entirely.
+        upstream = (error_context or {}).get("upstream_provider") if error_context else None
+        if upstream:
+            _ra().logger.info(
+                "Upstream provider %s rate-limited via aggregator — skipping "
+                "credential rotation, deferring to fallback chain",
+                upstream,
+            )
+        else:
+            _ra().logger.info(
+                "Upstream aggregator 429 (provider unknown) — skipping "
+                "credential rotation, deferring to fallback chain"
+            )
+        return False, has_retried_429
+
    if effective_reason == FailoverReason.billing:
        rotate_status = status_code if status_code is not None else 402
        next_entry = pool.mark_exhausted_and_rotate(status_code=rotate_status, error_context=error_context)
@@ -1625,6 +1718,18 @@ def switch_model(agent, new_model, new_provider, api_key='', base_url='', api_mo
        if (new_provider or "").strip().lower() == "moa":
            from agent.moa_loop import MoAClient

+            # The MoA virtual provider speaks only chat.completions via the
+            # MoAClient facade — the aggregator's real transport
+            # (codex_responses / anthropic_messages) is resolved and applied
+            # *inside* the reference/aggregator fan-out, never on the outer
+            # primary call. determine_api_mode("moa", ...) above may have left
+            # api_mode set to the aggregator's transport; if the conversation
+            # loop sees that, it dispatches client.responses.create (which the
+            # facade has no .responses for) and the call falls through to the
+            # moa://local placeholder → HTTP 404 → fallback to a reference
+            # model. Pin chat_completions here so the primary call always goes
+            # through MoAClient.chat.completions, matching agent_init.py.
+            agent.api_mode = "chat_completions"
            agent.api_key = api_key or "moa-virtual-provider"
            agent.base_url = "moa://local"
            agent._client_kwargs = {}
@@ -2152,6 +2257,54 @@ def sanitize_api_messages(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]
        filtered.append(msg)
    messages = filtered

+    # --- Repair tool_calls whose function.name is empty/missing ---
+    # Some providers (and partially-streamed responses) emit a tool_call with
+    # id="call_xxx" but function.name="". Downstream Responses-API adapters
+    # silently DROP such function_call items while still emitting the matching
+    # function_call_output, producing the gateway's HTTP 400
+    # "No tool call found for function call output with call_id ...".
+    #
+    # We do NOT drop the call: hermes' own dispatch loop intentionally keeps an
+    # empty-name call paired with a synthesized anti-priming tool result
+    # ("tool name was empty", see #47967) so weak models self-correct instead of
+    # being fed the full tool catalog. Dropping the call here would (a) orphan
+    # that result and strip the anti-priming signal, and (b) still leave any
+    # provider-side orphan. Instead, rename the blank name to a non-empty
+    # sentinel so the call and its result stay PAIRED — the adapter no longer
+    # drops the function_call, so there is no orphaned output and no 400, while
+    # the result content the model needs is preserved.
+    _EMPTY_NAME_SENTINEL = "invalid_tool_call"
+    for msg in messages:
+        if msg.get("role") != "assistant":
+            continue
+        tcs = msg.get("tool_calls") or []
+        if not tcs:
+            continue
+        for tc in tcs:
+            if isinstance(tc, dict):
+                fn = tc.get("function")
+                name = fn.get("name") if isinstance(fn, dict) else getattr(fn, "name", None)
+            else:
+                fn = getattr(tc, "function", None)
+                name = getattr(fn, "name", None) if fn else None
+            if isinstance(name, str) and name.strip():
+                continue
+            _ra().logger.warning(
+                "Pre-call sanitizer: repairing tool_call with empty "
+                "function.name -> %r (id=%s)",
+                _EMPTY_NAME_SENTINEL,
+                _ra().AIAgent._get_tool_call_id_static(tc),
+            )
+            if isinstance(fn, dict):
+                fn["name"] = _EMPTY_NAME_SENTINEL
+            elif fn is not None and hasattr(fn, "name"):
+                try:
+                    fn.name = _EMPTY_NAME_SENTINEL
+                except Exception:
+                    pass
+            elif isinstance(tc, dict):
+                tc["function"] = {"name": _EMPTY_NAME_SENTINEL, "arguments": "{}"}
+
    surviving_call_ids: set = set()
    for msg in messages:
        if msg.get("role") == "assistant":
@@ -2163,7 +2316,7 @@ def sanitize_api_messages(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]
    result_call_ids: set = set()
    for msg in messages:
        if msg.get("role") == "tool":
-            cid = msg.get("tool_call_id")
+            cid = (msg.get("tool_call_id") or "").strip()
            if cid:
                result_call_ids.add(cid)

@@ -2172,7 +2325,7 @@ def sanitize_api_messages(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]
    if orphaned_results:
        messages = [
            m for m in messages
-            if not (m.get("role") == "tool" and m.get("tool_call_id") in orphaned_results)
+            if not (m.get("role") == "tool" and (m.get("tool_call_id") or "").strip() in orphaned_results)
        ]
        _ra().logger.debug(
            "Pre-call sanitizer: removed %d orphaned tool result(s)",
@@ -2206,7 +2359,7 @@ def sanitize_api_messages(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]

 def looks_like_codex_intermediate_ack(
    agent,
-    user_message: str,
+    user_message: Any,
    assistant_content: str,
    messages: List[Dict[str, Any]],
    require_workspace: bool = True,
@@ -2286,7 +2439,14 @@ def looks_like_codex_intermediate_ack(
    if not require_workspace:
        return True

-    user_text = (user_message or "").strip().lower()
+    # ``user_message`` is typed ``str`` but can arrive as an OpenAI-style
+    # multi-part content list (``[{type:"text",...}, {type:"image_url",...}]``)
+    # for vision requests routed through the OpenAI-compat API server. A
+    # truthy list survives ``(user_message or "")`` and then ``.strip()``
+    # raises ``AttributeError`` — flatten to text first.
+    from agent.codex_responses_adapter import _summarize_user_message_for_log
+
+    user_text = _summarize_user_message_for_log(user_message).strip().lower()
    user_targets_workspace = (
        any(marker in user_text for marker in workspace_markers)
        or "~/" in user_text
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -124,6 +124,15 @@ def _openai_http_client_kwargs(

 def _create_openai_client(*, api_key: str, base_url: str, **kwargs: Any) -> Any:
    kwargs = {**_openai_http_client_kwargs(base_url), **kwargs}
+    # Hermes owns auxiliary retry + provider/model fallback policy (the
+    # same-provider transient retry in call_llm plus the except-chain
+    # fallback). The OpenAI SDK's own default (max_retries=2 → up to 3
+    # attempts) silently multiplies the effective wall time of every aux call
+    # by 3× on a slow/hung endpoint, so a 120s timeout can stall ~360s before
+    # Hermes sees a single failure (issue #54465). Disable SDK-internal retries
+    # by default and let Hermes control the budget; explicit callers can still
+    # override via kwargs.
+    kwargs.setdefault("max_retries", 0)
    return OpenAI(api_key=api_key, base_url=base_url, **kwargs)


@@ -1615,7 +1624,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
            extra = {}
            if base_url_host_matches(base_url, "api.kimi.com"):
                extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
-            elif base_url_host_matches(base_url, "api.githubcopilot.com"):
+            elif base_url_host_matches(base_url, "githubcopilot.com"):
                from hermes_cli.models import copilot_default_headers

                extra["default_headers"] = copilot_default_headers()
@@ -1655,7 +1664,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
        extra = {}
        if base_url_host_matches(base_url, "api.kimi.com"):
            extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
-        elif base_url_host_matches(base_url, "api.githubcopilot.com"):
+        elif base_url_host_matches(base_url, "githubcopilot.com"):
            from hermes_cli.models import copilot_default_headers

            extra["default_headers"] = copilot_default_headers()
@@ -2590,6 +2599,27 @@ def _is_rate_limit_error(exc: Exception) -> bool:
    return False


+def _is_timeout_error(exc: Exception) -> bool:
+    """Detect a request timeout — the full-budget stall, distinct from a fast
+    connection drop.
+
+    A timeout burns the entire configured ``timeout`` before surfacing, so a
+    same-provider retry on the critical compression path doubles the
+    user-visible wall time (issue #54465). A streaming-close / dropped
+    connection, by contrast, fails fast and is cheap to retry — those stay on
+    the retry path even for compression.
+    """
+    try:
+        from openai import APITimeoutError
+        if isinstance(exc, APITimeoutError):
+            return True
+    except ImportError:
+        pass
+    if "Timeout" in type(exc).__name__:
+        return True
+    return "timed out" in str(exc).lower()
+
+
 def _is_connection_error(exc: Exception) -> bool:
    """Detect connection/network errors that warrant provider fallback.

@@ -2924,7 +2954,7 @@ def _recoverable_pool_provider(
        return "nous"
    if base_url_host_matches(base, "api.anthropic.com"):
        return "anthropic"
-    if base_url_host_matches(base, "api.githubcopilot.com"):
+    if base_url_host_matches(base, "githubcopilot.com"):
        return "copilot"
    if base_url_host_matches(base, "api.kimi.com"):
        return "kimi-coding"
@@ -3793,7 +3823,7 @@ def _to_async_client(sync_client, model: str, is_vision: bool = False):
    sync_base_url = str(sync_client.base_url)
    if base_url_host_matches(sync_base_url, "openrouter.ai"):
        async_kwargs["default_headers"] = build_or_headers()
-    elif base_url_host_matches(sync_base_url, "api.githubcopilot.com"):
+    elif base_url_host_matches(sync_base_url, "githubcopilot.com"):
        from hermes_cli.copilot_auth import copilot_request_headers

        async_kwargs["default_headers"] = copilot_request_headers(
@@ -3824,6 +3854,9 @@ def _to_async_client(sync_client, model: str, is_vision: bool = False):
        **_openai_http_client_kwargs(sync_base_url, async_mode=True),
        **async_kwargs,
    }
+    # See _create_openai_client: disable SDK-internal retries so Hermes owns
+    # the auxiliary retry/timeout budget (issue #54465).
+    async_kwargs.setdefault("max_retries", 0)
    return AsyncOpenAI(**async_kwargs), model


@@ -4095,7 +4128,7 @@ def resolve_provider_client(
                extra["default_query"] = _dq
            if base_url_host_matches(custom_base, "api.kimi.com"):
                extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
-            elif base_url_host_matches(custom_base, "api.githubcopilot.com"):
+            elif base_url_host_matches(custom_base, "githubcopilot.com"):
                from hermes_cli.copilot_auth import copilot_request_headers
                extra["default_headers"] = copilot_request_headers(
                    is_agent_turn=True, is_vision=is_vision
@@ -4348,7 +4381,7 @@ def resolve_provider_client(
        headers = {}
        if base_url_host_matches(base_url, "api.kimi.com"):
            headers["User-Agent"] = "claude-code/0.1.0"
-        elif base_url_host_matches(base_url, "api.githubcopilot.com"):
+        elif base_url_host_matches(base_url, "githubcopilot.com"):
            from hermes_cli.copilot_auth import copilot_request_headers

            headers.update(copilot_request_headers(
@@ -4821,9 +4854,14 @@ def auxiliary_max_tokens_param(value: int, *, model: Optional[str] = None) -> di
    or_key = os.getenv("OPENROUTER_API_KEY")
    # Use max_completion_tokens for direct OpenAI-compatible providers that reject
    # max_tokens on newer GPT-4o/o-series/GPT-5-style models.
+    _custom_host = base_url_hostname(custom_base) or ""
    if (not or_key
            and _read_nous_auth() is None
-            and base_url_hostname(custom_base) in {"api.openai.com", "api.githubcopilot.com"}):
+            and (
+                _custom_host == "api.openai.com"
+                or _custom_host == "api.githubcopilot.com"
+                or _custom_host.endswith(".githubcopilot.com")
+            )):
        return {"max_completion_tokens": value}
    # ...and for any caller serving a newer OpenAI-family model by name.
    if model_forces_max_completion_tokens(model):
@@ -5200,9 +5238,10 @@ def _resolve_task_provider_model(
      3. "auto" (full auto-detection chain)

    Returns (provider, model, base_url, api_key, api_mode) where model may
-    be None (use provider default). When base_url is set, provider is forced
-    to "custom" and the task uses that direct endpoint. api_mode is one of
-    "chat_completions", "codex_responses", or None (auto-detect).
+    be None (use provider default). A bare base_url is treated as custom, but
+    a first-class provider plus base_url keeps the provider identity so its
+    auth, transport, and request-shaping behavior still apply. api_mode is one
+    of "chat_completions", "codex_responses", or None (auto-detect).
    """
    cfg_provider = None
    cfg_model = None
@@ -5235,11 +5274,35 @@ def _resolve_task_provider_model(
            return prov, existing_base
        return "custom", existing_base or target_base

+    def _preserve_provider_with_base_url(prov: Optional[str]) -> bool:
+        normalized = str(prov or "").strip().lower()
+        if normalized in {"", "auto", "custom"} or normalized.startswith("custom:"):
+            return False
+        try:
+            from hermes_cli.providers import get_provider
+
+            return get_provider(normalized) is not None
+        except Exception:
+            # Keep the high-risk provider-backed routes safe even if provider
+            # catalog loading is unavailable during early import/test paths.
+            return normalized in {
+                "anthropic",
+                "copilot",
+                "copilot-acp",
+                "minimax-oauth",
+                "nous",
+                "openai-codex",
+                "qwen-oauth",
+                "xai-oauth",
+            }
+
    if provider:
        provider, base_url = _expand_direct_api_alias(provider, base_url)
    if cfg_provider:
        cfg_provider, cfg_base_url = _expand_direct_api_alias(cfg_provider, cfg_base_url)

+    if base_url and _preserve_provider_with_base_url(provider):
+        return provider, resolved_model, base_url, api_key, resolved_api_mode
    if base_url:
        return "custom", resolved_model, base_url, api_key, resolved_api_mode
    if provider:
@@ -5647,6 +5710,9 @@ def call_llm(
    tools: list = None,
    timeout: float = None,
    extra_body: dict = None,
+    api_mode: str = None,
+    stream: bool = False,
+    stream_options: dict = None,
 ) -> Any:
    """Centralized synchronous LLM call.

@@ -5659,21 +5725,32 @@ def call_llm(
              Reads provider:model from config/env. Ignored if provider is set.
        provider: Explicit provider override.
        model: Explicit model override.
+        api_mode: Explicit API mode override (e.g. "codex_responses",
+              "anthropic_messages"). Takes precedence over task config.
        messages: Chat messages list.
        temperature: Sampling temperature (None = provider default).
        max_tokens: Max output tokens (handles max_tokens vs max_completion_tokens).
        tools: Tool definitions (for function calling).
        timeout: Request timeout in seconds (None = read from auxiliary.{task}.timeout config).
        extra_body: Additional request body fields.
+        stream: When True, return the raw SDK streaming iterator instead of a
+            validated complete response. The caller is responsible for consuming
+            chunks (and for any fallback). Used by the MoA aggregator so its
+            output can stream to the user.
+        stream_options: Passed through to the request when stream is True
+            (e.g. {"include_usage": True}).

    Returns:
-        Response object with .choices[0].message.content
+        Response object with .choices[0].message.content, OR — when stream=True —
+        the raw streaming iterator from client.chat.completions.create().

    Raises:
        RuntimeError: If no provider is configured.
    """
    resolved_provider, resolved_model, resolved_base_url, resolved_api_key, resolved_api_mode = _resolve_task_provider_model(
        task, provider, model, base_url, api_key)
+    if api_mode:
+        resolved_api_mode = api_mode
    effective_extra_body = _get_task_extra_body(task)
    effective_extra_body.update(extra_body or {})

@@ -5767,6 +5844,20 @@ def call_llm(
    if _is_anthropic_compat_endpoint(resolved_provider, _client_base):
        kwargs["messages"] = _convert_openai_images_to_anthropic(kwargs["messages"])

+    # Streaming path: return the raw SDK Stream iterator directly. This is used by
+    # the MoA aggregator so its tokens stream to the user. It deliberately skips
+    # _validate_llm_response and the temperature/max_tokens/payment fallback chain
+    # below — those all assume a complete response object, whereas a stream is
+    # consumed chunk-by-chunk by the caller. The caller (the agent's streaming
+    # consumer) owns chunk reassembly, stale-stream detection, and falling back to
+    # a non-streaming call on error. stream_options is best-effort: providers that
+    # reject it surface an error the caller's fallback already handles.
+    if stream:
+        kwargs["stream"] = True
+        if stream_options:
+            kwargs["stream_options"] = stream_options
+        return client.chat.completions.create(**kwargs)
+
    # Handle unsupported temperature, max_tokens vs max_completion_tokens retry,
    # then payment fallback.
    try:
@@ -5785,6 +5876,21 @@ def call_llm(
        except Exception as transient_err:
            if not _is_transient_transport_error(transient_err):
                raise
+            # Compression is on the critical preflight path: a user cannot
+            # continue or resume an oversized session until it compacts. A
+            # same-provider retry on a timeout means another full ``timeout``-
+            # long wall-clock block before the except-chain below can fall
+            # back — doubling the user-visible stall (issue #54465). Skip the
+            # same-provider retry for compression on a full-budget timeout and
+            # fall straight through to provider/model fallback; fast blips (a
+            # streaming-close or a 5xx) still retry, since those are cheap.
+            if task == "compression" and _is_timeout_error(transient_err):
+                logger.info(
+                    "Auxiliary compression: timeout on the critical path; "
+                    "skipping same-provider retry and falling back: %s",
+                    transient_err,
+                )
+                raise
            logger.info(
                "Auxiliary %s: transient transport error; retrying once on "
                "the same provider before fallback: %s",
@@ -6310,6 +6416,16 @@ async def async_call_llm(
        except Exception as transient_err:
            if not _is_transient_transport_error(transient_err):
                raise
+            # See call_llm(): compression is on the critical preflight path,
+            # so skip the same-provider retry on a full-budget timeout and
+            # fall straight through to fallback (issue #54465).
+            if task == "compression" and _is_timeout_error(transient_err):
+                logger.info(
+                    "Auxiliary compression (async): timeout on the critical "
+                    "path; skipping same-provider retry and falling back: %s",
+                    transient_err,
+                )
+                raise
            logger.info(
                "Auxiliary %s (async): transient transport error; retrying "
                "once on the same provider before fallback: %s",
--- a/agent/background_review.py
+++ b/agent/background_review.py
@@ -18,12 +18,13 @@ for invariants and PR review criteria.

 from __future__ import annotations

-import contextlib
 import json
 import logging
 import os
 from typing import Any, Dict, List, Optional

+from agent.thread_scoped_output import thread_scoped_silence
+
 logger = logging.getLogger(__name__)


@@ -602,9 +603,15 @@ def _run_review_in_thread(
    review_agent = None
    review_messages: List[Dict] = []
    try:
-        with open(os.devnull, "w", encoding="utf-8") as _devnull, \
-             contextlib.redirect_stdout(_devnull), \
-             contextlib.redirect_stderr(_devnull):
+        # Silence stdout/stderr for THIS worker thread only.  A process-global
+        # ``contextlib.redirect_stdout(devnull)`` here would also blank
+        # ``sys.stdout``/``sys.stderr`` for every other thread — including a
+        # gateway event-loop thread driving a Telegram long-poll — for the full
+        # duration of the review (tens of seconds), swallowing their console
+        # output (#55769 / #55925).  ``thread_scoped_silence`` routes only this
+        # thread's writes to devnull and leaves all other threads on the real
+        # streams.
+        with thread_scoped_silence():
            # Inherit the parent agent's live runtime (provider, model,
            # base_url, api_key, api_mode) so the fork uses the exact
            # same credentials the main turn is using.  Without this,
@@ -725,10 +732,17 @@ def _run_review_in_thread(
                clear_thread_tool_whitelist,
            )

+            # Gate the built-in memory tool on the profile's memory_enabled flag.
+            # Hardcoding ["memory", "skills"] granted the review LLM the MEMORY.md
+            # read/write tool even when a profile set memory_enabled: false,
+            # contaminating a memory-disabled profile (#54937 layer 2).
+            review_toolsets = ["skills"]
+            if review_agent._memory_enabled or review_agent._user_profile_enabled:
+                review_toolsets.insert(0, "memory")
            review_whitelist = {
                t["function"]["name"]
                for t in get_tool_definitions(
-                    enabled_toolsets=["memory", "skills"],
+                    enabled_toolsets=review_toolsets,
                    quiet_mode=True,
                )
            }
@@ -739,6 +753,13 @@ def _run_review_in_thread(
                    "{tool_name}. Only memory/skill tools are allowed."
                ),
            )
+            try:
+                from tools.skill_manager_tool import _reset_background_review_read_marks
+
+                _reset_background_review_read_marks()
+            except Exception:
+                pass
+
            try:
                # Routed to a different model -> replay a digest (cache is cold
                # on that model anyway, so minimise cold-written tokens). Same
@@ -808,16 +829,14 @@ def _run_review_in_thread(
        logger.warning("Background memory/skill review failed: %s", e)
        agent._emit_auxiliary_failure("background review", e)
    finally:
-        # Safety-net cleanup for the exception path.  Normal
-        # completion already shut down inside redirect_stdout above.
-        # Re-open devnull here so any teardown output (Honcho flush,
-        # Hindsight sync, background thread joins) stays silent even
-        # on the exception path where redirect_stdout already exited.
+        # Safety-net cleanup for the exception path.  Normal completion already
+        # shut down inside the thread-scoped silence above.  Re-enter the
+        # thread-scoped silence here so teardown output (Honcho flush, Hindsight
+        # sync, background thread joins) stays quiet even on the exception path,
+        # without blanking other threads' streams.
        if review_agent is not None:
            try:
-                with open(os.devnull, "w", encoding="utf-8") as _fn, \
-                     contextlib.redirect_stdout(_fn), \
-                     contextlib.redirect_stderr(_fn):
+                with thread_scoped_silence():
                    try:
                        review_agent.shutdown_memory_provider()
                    except Exception:
--- a/agent/chat_completion_helpers.py
+++ b/agent/chat_completion_helpers.py
@@ -632,7 +632,7 @@ def build_api_kwargs(agent, api_messages: list) -> dict:
        _ct = agent._get_transport()
        is_github_responses = (
            base_url_host_matches(agent.base_url, "models.github.ai")
-            or base_url_host_matches(agent.base_url, "api.githubcopilot.com")
+            or base_url_host_matches(agent.base_url, "githubcopilot.com")
        )
        is_codex_backend = (
            agent.provider == "openai-codex"
@@ -702,7 +702,7 @@ def build_api_kwargs(agent, api_messages: list) -> dict:
    _is_or = agent._is_openrouter_url()
    _is_gh = (
        base_url_host_matches(agent._base_url_lower, "models.github.ai")
-        or base_url_host_matches(agent._base_url_lower, "api.githubcopilot.com")
+        or base_url_host_matches(agent._base_url_lower, "githubcopilot.com")
    )
    _is_nous = "nousresearch" in agent._base_url_lower
    _is_nvidia = "integrate.api.nvidia.com" in agent._base_url_lower
@@ -741,14 +741,26 @@ def build_api_kwargs(agent, api_messages: list) -> dict:
    if agent.provider_data_collection:
        _prefs["data_collection"] = agent.provider_data_collection

-    # Claude max-output override on aggregators
+    # Anthropic-compatible max-output fallback (last resort only — applied in
+    # build_kwargs *after* ephemeral/user/profile max_tokens, never overriding
+    # an explicit value).  Model-gated, not URL-gated: any chat-completions
+    # proxy serving a Claude/MiniMax/Qwen3 model needs max_tokens, because the
+    # Anthropic Messages API treats it as mandatory and proxies that omit it
+    # (AWS Bedrock, NVIDIA, LiteLLM, vLLM, corporate gateways) default as low
+    # as 4096 output tokens — easily exhausted by thinking + large tool calls
+    # like write_file/patch.  OpenRouter/Nous were the only routes covered
+    # before; gating on _ANTHROPIC_OUTPUT_LIMITS membership covers them all.
    _ant_max = None
-    if (_is_or or _is_nous) and "claude" in (agent.model or "").lower():
-        try:
-            from agent.anthropic_adapter import _get_anthropic_max_output
+    try:
+        from agent.anthropic_adapter import (
+            _get_anthropic_max_output,
+            _ANTHROPIC_OUTPUT_LIMITS,
+        )
+        _model_norm = (agent.model or "").lower().replace(".", "-")
+        if any(key in _model_norm for key in _ANTHROPIC_OUTPUT_LIMITS):
            _ant_max = _get_anthropic_max_output(agent.model)
-        except Exception:
-            pass
+    except Exception:
+        pass

    # Qwen session metadata
    _qwen_meta = None
@@ -1124,7 +1136,7 @@ def try_activate_fallback(agent, reason: "FailoverReason | None" = None) -> bool
    auth resolution and client construction — no duplicated provider→key
    mappings.
    """
-    if reason in {FailoverReason.rate_limit, FailoverReason.billing}:
+    if reason in {FailoverReason.rate_limit, FailoverReason.billing, FailoverReason.upstream_rate_limit}:
        # Only start cooldown when leaving the primary provider.  If we're
        # already on a fallback and chain-switching, the primary wasn't the
        # source of the 429 so the cooldown should not be reset/extended.
@@ -1142,7 +1154,7 @@ def try_activate_fallback(agent, reason: "FailoverReason | None" = None) -> bool
        # provider again.  Guards the cross-turn replay storm in #24996.
        if (
            len(agent._fallback_chain) > 0
-            and reason not in {FailoverReason.rate_limit, FailoverReason.billing}
+            and reason not in {FailoverReason.rate_limit, FailoverReason.billing, FailoverReason.upstream_rate_limit}
        ):
            _existing_cooldown = getattr(agent, "_rate_limited_until", 0) or 0
            agent._rate_limited_until = max(
@@ -1944,6 +1956,35 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
        request_client_holder["diag"] = _diag
        stream = request_client.chat.completions.create(**stream_kwargs)

+        # Some OpenAI-compatible adapters (for example copilot-acp) accept
+        # stream=True but still return a completed response object rather than
+        # an iterator of chunks.  Treat that as "streaming unsupported" for the
+        # rest of this session instead of crashing on ``for chunk in stream``
+        # with ``'types.SimpleNamespace' object is not iterable`` (#11732).
+        response_choices = getattr(stream, "choices", None)
+        if isinstance(response_choices, list) and response_choices:
+            logger.info(
+                "Streaming request returned a final response object instead of "
+                "an iterator; switching %s/%s to non-streaming for this session.",
+                agent.provider or "unknown",
+                agent.model or "unknown",
+            )
+            agent._disable_streaming = True
+            message = getattr(response_choices[0], "message", None)
+            if message is not None:
+                reasoning_text = (
+                    getattr(message, "reasoning_content", None)
+                    or getattr(message, "reasoning", None)
+                )
+                if isinstance(reasoning_text, str) and reasoning_text:
+                    _fire_first_delta()
+                    agent._fire_reasoning_delta(reasoning_text)
+                content = getattr(message, "content", None)
+                if isinstance(content, str) and content:
+                    _fire_first_delta()
+                    agent._fire_stream_delta(content)
+            return stream
+
        # Capture rate limit headers from the initial HTTP response.
        # The OpenAI SDK Stream object exposes the underlying httpx
        # response via .response before any chunks are consumed.
@@ -2086,7 +2127,7 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
                            entry["function"]["arguments"] += tc_delta.function.arguments
                    extra = getattr(tc_delta, "extra_content", None)
                    if extra is None and hasattr(tc_delta, "model_extra"):
-                        extra = (tc_delta.model_extra or {}).get("extra_content")
+                        extra = (tc_delta.model_extra if isinstance(tc_delta.model_extra, dict) else {}).get("extra_content")
                    if extra is not None:
                        if hasattr(extra, "model_dump"):
                            extra = extra.model_dump()
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@@ -19,6 +19,7 @@ Improvements over v2:
 import hashlib
 import json
 import logging
+import sqlite3
 import re
 import time
 from typing import Any, Dict, List, Optional
@@ -638,6 +639,7 @@ class ContextCompressor(ContextEngine):
        self._last_compression_savings_pct = 100.0
        self._ineffective_compression_count = 0
        self._summary_failure_cooldown_until = 0.0  # transient errors must not block a fresh session
+        self._last_summary_error = None
        self.last_real_prompt_tokens = 0
        self.last_compression_rough_tokens = 0
        self.last_rough_tokens_when_real_prompt_fit = 0
@@ -659,6 +661,104 @@ class ContextCompressor(ContextEngine):
        """
        self._previous_summary = None

+    def bind_session_state(self, session_db: Any = None, session_id: str = "") -> None:
+        """Bind the current session row so durable cooldowns can round-trip."""
+        self._session_db = session_db
+        self._session_id = session_id or ""
+        self._summary_failure_cooldown_until = 0.0
+        self._last_summary_error = None
+        self.get_active_compression_failure_cooldown()
+
+    def on_session_start(self, session_id: str, **kwargs) -> None:
+        """Bind session-scoped compression state for a new or resumed session."""
+        super().on_session_start(session_id, **kwargs)
+        self.bind_session_state(kwargs.get("session_db", getattr(self, "_session_db", None)), session_id)
+
+    def get_active_compression_failure_cooldown(self) -> Optional[Dict[str, Any]]:
+        """Return the live compression-failure cooldown for the bound session."""
+        now_mono = time.monotonic()
+        if self._summary_failure_cooldown_until > now_mono:
+            return {
+                "cooldown_until": time.time() + (
+                    self._summary_failure_cooldown_until - now_mono
+                ),
+                "remaining_seconds": self._summary_failure_cooldown_until - now_mono,
+                "error": self._last_summary_error,
+            }
+
+        session_db = getattr(self, "_session_db", None)
+        session_id = getattr(self, "_session_id", "")
+        if not session_db or not session_id:
+            return None
+
+        getter = getattr(session_db, "get_compression_failure_cooldown", None)
+        if getter is None:
+            return None
+        try:
+            state = getter(session_id)
+        except sqlite3.Error as exc:
+            logger.debug("compression failure cooldown lookup failed: %s", exc)
+            return None
+        except Exception:
+            return None
+        if not state:
+            return None
+
+        remaining_seconds = float(state.get("remaining_seconds") or 0.0)
+        if remaining_seconds <= 0:
+            return None
+
+        self._summary_failure_cooldown_until = now_mono + remaining_seconds
+        self._last_summary_error = state.get("error")
+        return {
+            "cooldown_until": float(state.get("cooldown_until") or 0.0),
+            "remaining_seconds": remaining_seconds,
+            "error": self._last_summary_error,
+        }
+
+    def _record_compression_failure_cooldown(
+        self,
+        cooldown_seconds: float,
+        error: Optional[str],
+    ) -> None:
+        cooldown_until = time.time() + cooldown_seconds
+        self._summary_failure_cooldown_until = time.monotonic() + cooldown_seconds
+        self._last_summary_error = error
+
+        session_db = getattr(self, "_session_db", None)
+        session_id = getattr(self, "_session_id", "")
+        if not session_db or not session_id:
+            return
+
+        recorder = getattr(session_db, "record_compression_failure_cooldown", None)
+        if recorder is None:
+            return
+        try:
+            recorder(session_id, cooldown_until, error)
+        except sqlite3.Error as exc:
+            logger.debug("compression failure cooldown persist failed: %s", exc)
+        except Exception as exc:
+            logger.debug("compression failure cooldown persist failed (non-sqlite): %s", exc)
+
+    def _clear_compression_failure_cooldown(self) -> None:
+        self._summary_failure_cooldown_until = 0.0
+        self._last_summary_error = None
+
+        session_db = getattr(self, "_session_db", None)
+        session_id = getattr(self, "_session_id", "")
+        if not session_db or not session_id:
+            return
+
+        clearer = getattr(session_db, "clear_compression_failure_cooldown", None)
+        if clearer is None:
+            return
+        try:
+            clearer(session_id)
+        except sqlite3.Error as exc:
+            logger.debug("compression failure cooldown clear failed: %s", exc)
+        except Exception as exc:
+            logger.debug("compression failure cooldown clear failed (non-sqlite): %s", exc)
+
    def update_model(
        self,
        model: str,
@@ -863,6 +963,8 @@ class ContextCompressor(ContextEngine):
        self.awaiting_real_usage_after_compression = False

        self.summary_model = summary_model_override or ""
+        self._session_db: Any = None
+        self._session_id: str = ""

        # Stores the previous compaction summary for iterative updates
        self._previous_summary: Optional[str] = None
@@ -971,6 +1073,23 @@ class ContextCompressor(ContextEngine):
        tokens = prompt_tokens if prompt_tokens is not None else self.last_prompt_tokens
        if tokens < self.threshold_tokens:
            return False
+        # Do not trigger compression while the summary LLM is in cooldown.
+        # On a 429/transient failure _generate_summary() sets a cooldown and
+        # returns None; compress() then inserts a static fallback marker and
+        # returns. Tokens stay above threshold, so without this guard every
+        # subsequent turn re-fires _compress_context() — re-inserting the
+        # marker and re-entering the loop, making the CLI appear frozen until
+        # the cooldown expires (issue #11529). Manual /compress passes
+        # force=True, which clears this cooldown in compress() before running,
+        # so it still retries immediately.
+        _cooldown_remaining = self._summary_failure_cooldown_until - time.monotonic()
+        if _cooldown_remaining > 0:
+            if not self.quiet_mode:
+                logger.debug(
+                    "Compression deferred — summary LLM in cooldown for %.0fs more",
+                    _cooldown_remaining,
+                )
+            return False
        # Anti-thrashing: back off if recent compressions were ineffective
        if self._ineffective_compression_count >= 2:
            if not self.quiet_mode:
@@ -1448,7 +1567,7 @@ Summary generation was unavailable, so this is a best-effort deterministic fallb
        self._last_aux_model_failure_error = _err_text
        self._last_aux_model_failure_model = self.summary_model
        self.summary_model = ""  # empty = use main model
-        self._summary_failure_cooldown_until = 0.0  # no cooldown — retry immediately
+        self._clear_compression_failure_cooldown()  # no cooldown — retry immediately

    def _generate_summary(
        self,
@@ -1666,7 +1785,15 @@ This compaction should PRIORITISE preserving all information related to the focu
            # retry (_generate_summary recursion) re-enters harmlessly.
            with aux_interrupt_protection():
                response = call_llm(**call_kwargs)
-            content = response.choices[0].message.content
+            # ``_validate_llm_response`` only guarantees ``choices[0].message``
+            # exists, not that it's an object with ``.content``. Some
+            # OpenAI-compatible proxies / local backends return a dict- or
+            # str-shaped message; coerce defensively instead of crashing.
+            message = response.choices[0].message
+            if isinstance(message, dict):
+                content = message.get("content")
+            else:
+                content = getattr(message, "content", message)
            # Handle cases where content is not a string (e.g., dict from llama.cpp)
            if not isinstance(content, str):
                content = str(content) if content else ""
@@ -1691,7 +1818,7 @@ This compaction should PRIORITISE preserving all information related to the focu
            summary = redact_sensitive_text(content.strip())
            # Store for iterative updates on next compaction
            self._previous_summary = summary
-            self._summary_failure_cooldown_until = 0.0
+            self._clear_compression_failure_cooldown()
            self._summary_model_fallen_back = False
            self._last_summary_error = None
            self._last_summary_auth_failure = False
@@ -1711,7 +1838,10 @@ This compaction should PRIORITISE preserving all information related to the focu
            # a main-model retry before any cooldown. (#11978, #11914)
            if isinstance(e, RuntimeError) and "no llm provider configured" in str(e).lower():
                # No provider configured — long cooldown, unlikely to self-resolve
-                self._summary_failure_cooldown_until = time.monotonic() + _SUMMARY_FAILURE_COOLDOWN_SECONDS
+                self._record_compression_failure_cooldown(
+                    _SUMMARY_FAILURE_COOLDOWN_SECONDS,
+                    "no auxiliary LLM provider configured",
+                )
                self._last_summary_error = "no auxiliary LLM provider configured"
                logger.warning("Context compression: no provider available for "
                                "summary. Middle turns will be dropped without summary "
@@ -1823,10 +1953,10 @@ This compaction should PRIORITISE preserving all information related to the focu
            # streaming premature-close) — shorter cooldown for JSON decode and
            # streaming-closed since those conditions can self-resolve quickly.
            _transient_cooldown = 30 if (_is_json_decode or _is_streaming_closed) else 60
-            self._summary_failure_cooldown_until = time.monotonic() + _transient_cooldown
            err_text = str(e).strip() or e.__class__.__name__
            if len(err_text) > 220:
                err_text = err_text[:217].rstrip() + "..."
+            self._record_compression_failure_cooldown(_transient_cooldown, err_text)
            self._last_summary_error = err_text
            # A terminal connection/network failure (we reach this branch only
            # after any main-model fallback has already been tried or is
@@ -2405,8 +2535,8 @@ This compaction should PRIORITISE preserving all information related to the focu
        # Manual /compress (force=True) bypasses the failure cooldown so the
        # user can retry immediately after an auto-compress abort.  Without
        # this, /compress would silently no-op for 30-60s after a failure.
-        if force and self._summary_failure_cooldown_until > 0.0:
-            self._summary_failure_cooldown_until = 0.0
+        if force:
+            self._clear_compression_failure_cooldown()
        n_messages = len(messages)
        # Only need head + 3 tail messages minimum (token budget decides the real tail size)
        _min_for_compress = self._protect_head_size(messages) + 3 + 1
--- a/agent/context_references.py
+++ b/agent/context_references.py
@@ -152,13 +152,24 @@ async def preprocess_context_references_async(
    blocks: list[str] = []
    injected_tokens = 0

-    for ref in refs:
-        warning, block = await _expand_reference(
-            ref,
-            cwd_path,
-            url_fetcher=url_fetcher,
-            allowed_root=allowed_root_path,
+    # Expand all references concurrently. Each _expand_reference is independent
+    # (no shared state during expansion) — a message with several @url: refs
+    # would otherwise pay one full web_extract round-trip per ref in series.
+    # gather preserves positional order, so we reassemble warnings/blocks in the
+    # original ref order exactly as the prior serial loop did; the token-budget
+    # check below is unchanged (it runs once, after all refs are expanded).
+    expanded = await asyncio.gather(
+        *(
+            _expand_reference(
+                ref,
+                cwd_path,
+                url_fetcher=url_fetcher,
+                allowed_root=allowed_root_path,
+            )
+            for ref in refs
        )
+    )
+    for warning, block in expanded:
        if warning:
            warnings.append(warning)
        if block:
--- a/agent/conversation_compression.py
+++ b/agent/conversation_compression.py
@@ -32,6 +32,7 @@ import logging
 import os
 import tempfile
 import uuid
+import threading
 from datetime import datetime
 from pathlib import Path
 from typing import Any, Optional, Tuple
@@ -71,6 +72,85 @@ def _compression_lock_holder(agent: Any) -> str:
    )


+class _CompressionLockLeaseRefresher:
+    def __init__(
+        self,
+        db: Any,
+        session_id: str,
+        holder: str,
+        ttl_seconds: float,
+        refresh_interval_seconds: float | None = None,
+    ) -> None:
+        self._db = db
+        self._session_id = session_id
+        self._holder = holder
+        self._ttl_seconds = ttl_seconds
+        if refresh_interval_seconds is None:
+            refresh_interval_seconds = max(1.0, min(60.0, ttl_seconds / 2.0))
+        self._refresh_interval_seconds = max(0.1, float(refresh_interval_seconds))
+        # Tolerate transient refresh failures for at most one lease's worth of
+        # time, so the give-up window is genuinely bounded by the TTL the
+        # acquirer set (a single blip recovers on the next tick; a persistent
+        # failure stops before the lease could outlive its TTL). Floor of 1 so a
+        # degenerate interval >= ttl still tolerates one blip.
+        self._max_consecutive_failures = max(
+            1, int(self._ttl_seconds / self._refresh_interval_seconds)
+        )
+        self._stop = threading.Event()
+        self._thread = threading.Thread(
+            target=self._run,
+            name="compression-lock-refresh",
+            daemon=True,
+        )
+
+    def start(self) -> "_CompressionLockLeaseRefresher":
+        self._thread.start()
+        return self
+
+    def stop(self) -> None:
+        self._stop.set()
+        # join() may time out while the refresher is mid-UPDATE; that's safe —
+        # it's a daemon thread, and a late refresh on an already-released lock
+        # matches rowcount 0 (a no-op). stop() returning does not guarantee the
+        # thread has fully quiesced, only that we've signalled it and waited
+        # briefly.
+        if self._thread.is_alive() and threading.current_thread() is not self._thread:
+            self._thread.join(timeout=1.0)
+
+    def _run(self) -> None:
+        # A single falsy refresh must NOT permanently kill the lease: a
+        # transient DB blip (write contention escaping _execute_write's retry
+        # budget, a momentary "database is locked") returns False just like a
+        # genuine lost-ownership, but only the latter should stop the loop.
+        # Tolerate consecutive failures for at most one lease's worth of time
+        # (_max_consecutive_failures = ttl / interval), so a one-off blip
+        # recovers on the next tick while the total give-up window stays bounded
+        # by the TTL the acquirer set — the lock can never be held past its TTL
+        # by a stuck refresher.
+        consecutive_failures = 0
+        while not self._stop.wait(self._refresh_interval_seconds):
+            try:
+                refreshed = self._db.refresh_compression_lock(
+                    self._session_id,
+                    self._holder,
+                    ttl_seconds=self._ttl_seconds,
+                )
+            except Exception as exc:
+                logger.debug("compression lock refresh raised: %s", exc)
+                refreshed = False
+            if refreshed:
+                consecutive_failures = 0
+                continue
+            consecutive_failures += 1
+            if consecutive_failures >= self._max_consecutive_failures:
+                logger.debug(
+                    "compression lock refresh failed %d times in a row; "
+                    "stopping lease refresher for session %s",
+                    consecutive_failures, self._session_id,
+                )
+                break
+
+
 def check_compression_model_feasibility(agent: Any) -> None:
    """Warn at session start if the auxiliary compression model's context
    window is smaller than the main model's compression threshold.
@@ -420,11 +500,17 @@ def compress_context(
    # and proceed with compression.  Skipping the lock risks a rare
    # concurrent-compression session fork; an infinite no-progress loop
    # that never compresses at all is strictly worse.
+    try:
+        _lock_ttl = float(getattr(agent, "_compression_lock_ttl_seconds", 300.0) or 300.0)
+    except (TypeError, ValueError):
+        _lock_ttl = 300.0
+    _lock_refresh_interval = getattr(agent, "_compression_lock_refresh_interval", None)
+    _lock_refresher: Optional[_CompressionLockLeaseRefresher] = None
    if _lock_db is not None and _lock_sid:
        _lock_holder = _compression_lock_holder(agent)
        try:
            _lock_acquired = _lock_db.try_acquire_compression_lock(
-                _lock_sid, _lock_holder
+                _lock_sid, _lock_holder, ttl_seconds=_lock_ttl
            )
        except Exception as _lock_err:
            # Broken/absent lock subsystem (version skew, etc.).  Log once
@@ -467,9 +553,19 @@ def compress_context(
            if not _existing_sp:
                _existing_sp = agent._build_system_prompt(system_message)
            return messages, _existing_sp
+        if _lock_holder is not None:
+            _lock_refresher = _CompressionLockLeaseRefresher(
+                _lock_db,
+                _lock_sid,
+                _lock_holder,
+                _lock_ttl,
+                _lock_refresh_interval,
+            ).start()

    def _release_lock() -> None:
        """Release the lock keyed on the OLD session_id (before rotation)."""
+        if _lock_refresher is not None:
+            _lock_refresher.stop()
        if _lock_db is not None and _lock_sid and _lock_holder:
            try:
                _lock_db.release_compression_lock(_lock_sid, _lock_holder)
@@ -488,7 +584,11 @@ def compress_context(
    except TypeError:
        # Plugin context engine with strict signature that doesn't accept
        # focus_topic / force — fall back to calling without them.
-        compressed = agent.context_compressor.compress(messages, current_tokens=approx_tokens)
+        try:
+            compressed = agent.context_compressor.compress(messages, current_tokens=approx_tokens)
+        except BaseException:
+            _release_lock()
+            raise
    except BaseException:
        # ANY exception during compress() must release the lock so the
        # session isn't permanently blocked from future compression.
@@ -501,328 +601,332 @@ def compress_context(
    # session has logically ended), and let auto-compress callers detect
    # the no-op via len(returned) == len(input).
    if getattr(agent.context_compressor, "_last_compress_aborted", False):
-        _err = getattr(agent.context_compressor, "_last_summary_error", None) or "unknown error"
-        if getattr(agent, "_last_compression_summary_warning", None) != _err:
-            agent._last_compression_summary_warning = _err
-            agent._emit_warning(
-                f"⚠ Compression aborted: {_err}. "
-                "No messages were dropped — conversation continues unchanged. "
-                "Run /compress to retry, or /new to start a fresh session."
-            )
-        _existing_sp = getattr(agent, "_cached_system_prompt", None)
-        if not _existing_sp:
-            _existing_sp = agent._build_system_prompt(system_message)
-        _release_lock()  # compression aborted — no rotation will happen
-        return messages, _existing_sp
-
-    summary_error = getattr(agent.context_compressor, "_last_summary_error", None)
-    if summary_error:
-        if getattr(agent, "_last_compression_summary_warning", None) != summary_error:
-            agent._last_compression_summary_warning = summary_error
-            agent._emit_warning(
-                f"⚠ Compression summary failed: {summary_error}. "
-                "Inserted a fallback context marker."
-            )
-    else:
-        # No hard failure — but did the configured aux model error out
-        # and get recovered by retrying on main?  Surface that so users
-        # know their auxiliary.compression.model setting is broken even
-        # though compression succeeded.
-        _aux_fail_model = getattr(agent.context_compressor, "_last_aux_model_failure_model", None)
-        _aux_fail_err = getattr(agent.context_compressor, "_last_aux_model_failure_error", None)
-        if _aux_fail_model:
-            # Dedup on (model, error) so we don't spam on every compaction
-            _aux_key = (_aux_fail_model, _aux_fail_err)
-            if getattr(agent, "_last_aux_fallback_warning_key", None) != _aux_key:
-                agent._last_aux_fallback_warning_key = _aux_key
-                agent._emit_warning(
-                    f"ℹ Configured compression model '{_aux_fail_model}' failed "
-                    f"({_aux_fail_err or 'unknown error'}). Recovered using main model — "
-                    "check auxiliary.compression.model in config.yaml."
-                )
-
-    todo_snapshot = agent._todo_store.format_for_injection()
-    if todo_snapshot:
-        compressed.append({"role": "user", "content": todo_snapshot})
-
-    agent._invalidate_system_prompt()
-    new_system_prompt = agent._build_system_prompt(system_message)
-    agent._cached_system_prompt = new_system_prompt
-
-    if agent._session_db:
        try:
-            # Trigger memory extraction on the current session before the
-            # transcript is rewritten (runs in BOTH modes — the logical
-            # conversation's pre-compaction turns are about to be summarized
-            # away regardless of whether the id rotates).
-            agent.commit_memory_session(messages)
+            _err = getattr(agent.context_compressor, "_last_summary_error", None) or "unknown error"
+            if getattr(agent, "_last_compression_summary_warning", None) != _err:
+                agent._last_compression_summary_warning = _err
+                agent._emit_warning(
+                    f"⚠ Compression aborted: {_err}. "
+                    "No messages were dropped — conversation continues unchanged. "
+                    "Run /compress to retry, or /new to start a fresh session."
+                )
+            _existing_sp = getattr(agent, "_cached_system_prompt", None)
+            if not _existing_sp:
+                _existing_sp = agent._build_system_prompt(system_message)
+            return messages, _existing_sp
+        finally:
+            _release_lock()

-            if in_place:
-                # ── In-place compaction: keep the same session_id ──────────
-                # No end_session, no new row, no parent_session_id, no title
-                # renumber, no contextvar/env/logging re-sync. The session's
-                # id, title, cwd, /goal, and gateway routing all stay put.
-                #
-                # Durable, NON-DESTRUCTIVE replace: soft-archive the
-                # pre-compaction turns (active=0, kept on disk + FTS-searchable +
-                # recoverable) and insert `compressed` as the new live (active=1)
-                # set, atomically. `compressed` already carries the surviving
-                # tail (current-turn messages the compressor kept via
-                # protect_last_n), so we DON'T pre-flush here — a flush would
-                # INSERT current-turn rows that archive_and_compact would then
-                # archive alongside the rest (harmless but wasted writes). The
-                # live-context load filters active=1, so a resume reloads ONLY
-                # the compacted set; the original turns remain under the SAME id
-                # for search/recovery (Teknium review — keep one durable id
-                # WITHOUT destroying history, unlike a hard replace_messages).
-                # See #38763.
-                agent._session_db.archive_and_compact(agent.session_id, compressed)
-                # Reset the flush identity set so the next turn's appends are
-                # diffed against the COMPACTED transcript: the compacted dicts
-                # are passed as conversation_history next turn and skipped by
-                # identity, so only genuinely new turn messages get appended
-                # (no dup of the summary, no resurrection of dropped turns).
-                agent._flushed_db_message_ids = set()
-                # Rotation-independent signal: the conversation was compacted in
-                # place (id unchanged). The gateway reads this (NOT an id-change
-                # diff) to re-baseline transcript handling.
-                compacted_in_place = True
-            else:
-                # ── Rotation (legacy): end this session, fork a continuation ─
-                # Flush any un-persisted current-turn messages to the OLD
-                # session before ending it, so they survive in the preserved
-                # parent transcript (#47202). (In-place skips this — see above.)
-                try:
-                    agent._flush_messages_to_session_db(messages)
-                except Exception:
-                    pass  # best-effort — don't block compression on a flush error
-                # Propagate title to the new session with auto-numbering
-                old_title = agent._session_db.get_session_title(agent.session_id)
-                agent._session_db.end_session(agent.session_id, "compression")
-                old_session_id = agent.session_id
-                agent.session_id = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:6]}"
-                # Ordering contract: the agent thread updates the contextvar here;
-                # the gateway propagates to SessionEntry after run_in_executor returns.
-                try:
-                    from gateway.session_context import set_current_session_id
-
-                    set_current_session_id(agent.session_id)
-                except Exception:
-                    os.environ["HERMES_SESSION_ID"] = agent.session_id
-                # The gateway/tools session context (ContextVar + env) and the
-                # logging session context are SEPARATE mechanisms. The call above
-                # moves the former; the ``[session_id]`` tag on log lines comes
-                # from ``hermes_logging._session_context`` (set once per turn in
-                # conversation_loop.py). Without this, post-rotation log lines in
-                # the same turn keep the STALE old id while the message/DB/gateway
-                # state carry the new one — breaking log correlation exactly at the
-                # compaction boundary (see #34089). Guarded separately so a logging
-                # failure can never regress the routing update above.
-                try:
-                    from hermes_logging import set_session_context
-
-                    set_session_context(agent.session_id)
-                except Exception:
-                    pass
-                agent._session_db_created = False
-                try:
-                    agent._session_db.create_session(
-                        session_id=agent.session_id,
-                        source=agent.platform or os.environ.get("HERMES_SESSION_SOURCE", "cli"),
-                        model=agent.model,
-                        model_config=agent._session_init_model_config,
-                        parent_session_id=old_session_id,
+    try:
+        summary_error = getattr(agent.context_compressor, "_last_summary_error", None)
+        if summary_error:
+            if getattr(agent, "_last_compression_summary_warning", None) != summary_error:
+                agent._last_compression_summary_warning = summary_error
+                agent._emit_warning(
+                    f"⚠ Compression summary failed: {summary_error}. "
+                    "Inserted a fallback context marker."
+                )
+        else:
+            # No hard failure — but did the configured aux model error out
+            # and get recovered by retrying on main?  Surface that so users
+            # know their auxiliary.compression.model setting is broken even
+            # though compression succeeded.
+            _aux_fail_model = getattr(agent.context_compressor, "_last_aux_model_failure_model", None)
+            _aux_fail_err = getattr(agent.context_compressor, "_last_aux_model_failure_error", None)
+            if _aux_fail_model:
+                # Dedup on (model, error) so we don't spam on every compaction
+                _aux_key = (_aux_fail_model, _aux_fail_err)
+                if getattr(agent, "_last_aux_fallback_warning_key", None) != _aux_key:
+                    agent._last_aux_fallback_warning_key = _aux_key
+                    agent._emit_warning(
+                        f"ℹ Configured compression model '{_aux_fail_model}' failed "
+                        f"({_aux_fail_err or 'unknown error'}). Recovered using main model — "
+                        "check auxiliary.compression.model in config.yaml."
                    )
-                except Exception as _cs_err:
-                    # The child row could not be created (e.g. FK constraint,
-                    # contended write). Previously the outer handler simply
-                    # warned and let the agent continue on the NEW id — which
-                    # has no row in state.db, producing an orphan: the parent
-                    # is ended, the child is never indexed, and every
-                    # subsequent message is attributed to a session that
-                    # doesn't exist (#33906/#33907). Roll the live id back to
-                    # the parent so the conversation stays attached to a real,
-                    # indexed session instead of a phantom.
-                    logger.warning(
-                        "Compression child session create failed (%s) — "
-                        "rolling back to parent session %s to avoid an orphan.",
-                        _cs_err, old_session_id,
-                    )
-                    agent.session_id = old_session_id
+
+        todo_snapshot = agent._todo_store.format_for_injection()
+        if todo_snapshot:
+            compressed.append({"role": "user", "content": todo_snapshot})
+
+        agent._invalidate_system_prompt()
+        new_system_prompt = agent._build_system_prompt(system_message)
+        agent._cached_system_prompt = new_system_prompt
+
+        if agent._session_db:
+            try:
+                # Trigger memory extraction on the current session before the
+                # transcript is rewritten (runs in BOTH modes — the logical
+                # conversation's pre-compaction turns are about to be summarized
+                # away regardless of whether the id rotates).
+                agent.commit_memory_session(messages)
+
+                if in_place:
+                    # ── In-place compaction: keep the same session_id ──────────
+                    # No end_session, no new row, no parent_session_id, no title
+                    # renumber, no contextvar/env/logging re-sync. The session's
+                    # id, title, cwd, /goal, and gateway routing all stay put.
+                    #
+                    # Durable, NON-DESTRUCTIVE replace: soft-archive the
+                    # pre-compaction turns (active=0, kept on disk + FTS-searchable +
+                    # recoverable) and insert `compressed` as the new live (active=1)
+                    # set, atomically. `compressed` already carries the surviving
+                    # tail (current-turn messages the compressor kept via
+                    # protect_last_n), so we DON'T pre-flush here — a flush would
+                    # INSERT current-turn rows that archive_and_compact would then
+                    # archive alongside the rest (harmless but wasted writes). The
+                    # live-context load filters active=1, so a resume reloads ONLY
+                    # the compacted set; the original turns remain under the SAME id
+                    # for search/recovery (Teknium review — keep one durable id
+                    # WITHOUT destroying history, unlike a hard replace_messages).
+                    # See #38763.
+                    agent._session_db.archive_and_compact(agent.session_id, compressed)
+                    # Reset the flush identity set so the next turn's appends are
+                    # diffed against the COMPACTED transcript: the compacted dicts
+                    # are passed as conversation_history next turn and skipped by
+                    # identity, so only genuinely new turn messages get appended
+                    # (no dup of the summary, no resurrection of dropped turns).
+                    agent._flushed_db_message_ids = set()
+                    # Rotation-independent signal: the conversation was compacted in
+                    # place (id unchanged). The gateway reads this (NOT an id-change
+                    # diff) to re-baseline transcript handling.
+                    compacted_in_place = True
+                else:
+                    # ── Rotation (legacy): end this session, fork a continuation ─
+                    # Flush any un-persisted current-turn messages to the OLD
+                    # session before ending it, so they survive in the preserved
+                    # parent transcript (#47202). (In-place skips this — see above.)
+                    try:
+                        agent._flush_messages_to_session_db(messages)
+                    except Exception:
+                        pass  # best-effort — don't block compression on a flush error
+                    # Propagate title to the new session with auto-numbering
+                    old_title = agent._session_db.get_session_title(agent.session_id)
+                    agent._session_db.end_session(agent.session_id, "compression")
+                    old_session_id = agent.session_id
+                    agent.session_id = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:6]}"
+                    # Ordering contract: the agent thread updates the contextvar here;
+                    # the gateway propagates to SessionEntry after run_in_executor returns.
                    try:
                        from gateway.session_context import set_current_session_id
+
                        set_current_session_id(agent.session_id)
                    except Exception:
                        os.environ["HERMES_SESSION_ID"] = agent.session_id
+                    # The gateway/tools session context (ContextVar + env) and the
+                    # logging session context are SEPARATE mechanisms. The call above
+                    # moves the former; the ``[session_id]`` tag on log lines comes
+                    # from ``hermes_logging._session_context`` (set once per turn in
+                    # conversation_loop.py). Without this, post-rotation log lines in
+                    # the same turn keep the STALE old id while the message/DB/gateway
+                    # state carry the new one — breaking log correlation exactly at the
+                    # compaction boundary (see #34089). Guarded separately so a logging
+                    # failure can never regress the routing update above.
                    try:
                        from hermes_logging import set_session_context
+
                        set_session_context(agent.session_id)
                    except Exception:
                        pass
-                    # Re-open the parent: it was ended above, but we're
-                    # continuing on it, so it must not stay closed.
+                    agent._session_db_created = False
                    try:
-                        agent._session_db.reopen_session(old_session_id)
-                    except Exception:
-                        pass
-                    old_session_id = None  # no rotation happened
-                    # The parent row already exists in state.db, so mark the
-                    # session as created — _ensure_db_session would otherwise
-                    # retry a (harmless INSERT OR IGNORE) create next turn.
+                        agent._session_db.create_session(
+                            session_id=agent.session_id,
+                            source=agent.platform or os.environ.get("HERMES_SESSION_SOURCE", "cli"),
+                            model=agent.model,
+                            model_config=agent._session_init_model_config,
+                            parent_session_id=old_session_id,
+                        )
+                    except Exception as _cs_err:
+                        # The child row could not be created (e.g. FK constraint,
+                        # contended write). Previously the outer handler simply
+                        # warned and let the agent continue on the NEW id — which
+                        # has no row in state.db, producing an orphan: the parent
+                        # is ended, the child is never indexed, and every
+                        # subsequent message is attributed to a session that
+                        # doesn't exist (#33906/#33907). Roll the live id back to
+                        # the parent so the conversation stays attached to a real,
+                        # indexed session instead of a phantom.
+                        logger.warning(
+                            "Compression child session create failed (%s) — "
+                            "rolling back to parent session %s to avoid an orphan.",
+                            _cs_err, old_session_id,
+                        )
+                        agent.session_id = old_session_id
+                        try:
+                            from gateway.session_context import set_current_session_id
+                            set_current_session_id(agent.session_id)
+                        except Exception:
+                            os.environ["HERMES_SESSION_ID"] = agent.session_id
+                        try:
+                            from hermes_logging import set_session_context
+                            set_session_context(agent.session_id)
+                        except Exception:
+                            pass
+                        # Re-open the parent: it was ended above, but we're
+                        # continuing on it, so it must not stay closed.
+                        try:
+                            agent._session_db.reopen_session(old_session_id)
+                        except Exception:
+                            pass
+                        old_session_id = None  # no rotation happened
+                        # The parent row already exists in state.db, so mark the
+                        # session as created — _ensure_db_session would otherwise
+                        # retry a (harmless INSERT OR IGNORE) create next turn.
+                        agent._session_db_created = True
+                        raise
                    agent._session_db_created = True
-                    raise
-                agent._session_db_created = True
-                # Carry a persistent /goal onto the continuation session.
-                # Compression mints a fresh child id; load_goal does a flat
-                # per-session lookup with no parent walk, so without this an
-                # active goal silently dies at the boundary (#33618).
-                try:
-                    from hermes_cli.goals import migrate_goal_to_session
-                    migrate_goal_to_session(old_session_id, agent.session_id, reason="compression")
-                except Exception as _goal_err:
-                    logger.debug("Could not migrate goal on compression: %s", _goal_err)
-                # Auto-number the title for the continuation session
-                if old_title:
+                    # Carry a persistent /goal onto the continuation session.
+                    # Compression mints a fresh child id; load_goal does a flat
+                    # per-session lookup with no parent walk, so without this an
+                    # active goal silently dies at the boundary (#33618).
                    try:
-                        new_title = agent._session_db.get_next_title_in_lineage(old_title)
-                        agent._session_db.set_session_title(agent.session_id, new_title)
-                    except (ValueError, Exception) as e:
-                        logger.debug("Could not propagate title on compression: %s", e)
+                        from hermes_cli.goals import migrate_goal_to_session
+                        migrate_goal_to_session(old_session_id, agent.session_id, reason="compression")
+                    except Exception as _goal_err:
+                        logger.debug("Could not migrate goal on compression: %s", _goal_err)
+                    # Auto-number the title for the continuation session
+                    if old_title:
+                        try:
+                            new_title = agent._session_db.get_next_title_in_lineage(old_title)
+                            agent._session_db.set_session_title(agent.session_id, new_title)
+                        except (ValueError, Exception) as e:
+                            logger.debug("Could not propagate title on compression: %s", e)

-            # Shared post-write steps (both modes target agent.session_id, which
-            # in-place keeps and rotation has already reassigned to the new id):
-            # refresh the stored system prompt and reset the flush cursor so the
-            # next turn re-bases its append diff.
-            agent._session_db.update_system_prompt(agent.session_id, new_system_prompt)
-            agent._last_flushed_db_idx = 0
-        except Exception as e:
-            # If the rotation rolled back to the parent (orphan-avoidance
-            # above), agent.session_id is the still-indexed parent and
-            # old_session_id was cleared — so this is recovery, not an
-            # un-indexed orphan. Otherwise an earlier step failed before the
-            # child was created and the warning's original meaning holds.
-            if locals().get("old_session_id") is None and not in_place:
-                logger.warning(
-                    "Compression rotation aborted and rolled back to the "
-                    "parent session (%s): %s", agent.session_id or "?", e,
-                )
-            else:
-                logger.warning("Session DB compression split failed — new session will NOT be indexed: %s", e)
+                # Shared post-write steps (both modes target agent.session_id, which
+                # in-place keeps and rotation has already reassigned to the new id):
+                # refresh the stored system prompt and reset the flush cursor so the
+                # next turn re-bases its append diff.
+                agent._session_db.update_system_prompt(agent.session_id, new_system_prompt)
+                agent._last_flushed_db_idx = 0
+            except Exception as e:
+                # If the rotation rolled back to the parent (orphan-avoidance
+                # above), agent.session_id is the still-indexed parent and
+                # old_session_id was cleared — so this is recovery, not an
+                # un-indexed orphan. Otherwise an earlier step failed before the
+                # child was created and the warning's original meaning holds.
+                if locals().get("old_session_id") is None and not in_place:
+                    logger.warning(
+                        "Compression rotation aborted and rolled back to the "
+                        "parent session (%s): %s", agent.session_id or "?", e,
+                    )
+                else:
+                    logger.warning("Session DB compression split failed — new session will NOT be indexed: %s", e)

-    # Compaction-boundary bookkeeping, computed once. `old_session_id` is only
-    # bound in the rotation branch; in-place leaves it unset. `_boundary_parent`
-    # is the id the boundary notifications attribute the prior state to: the old
-    # id on rotation, the (unchanged) current id in-place.
-    _old_sid = locals().get("old_session_id")
-    _is_boundary = bool(_old_sid) or in_place
-    _boundary_parent = _old_sid or agent.session_id or ""
+        # Compaction-boundary bookkeeping, computed once. `old_session_id` is only
+        # bound in the rotation branch; in-place leaves it unset. `_boundary_parent`
+        # is the id the boundary notifications attribute the prior state to: the old
+        # id on rotation, the (unchanged) current id in-place.
+        _old_sid = locals().get("old_session_id")
+        _is_boundary = bool(_old_sid) or in_place
+        _boundary_parent = _old_sid or agent.session_id or ""

-    # Notify the context engine that a compaction boundary occurred. Plugin
-    # engines (e.g. hermes-lcm) use boundary_reason="compression" to preserve
-    # DAG lineage / checkpoint per-session state across the boundary instead of
-    # re-initializing fresh. See hermes-lcm#68. Built-in ContextCompressor
-    # ignores kwargs. Fires in BOTH modes: rotation passes old→new ids; in-place
-    # passes the SAME id (the boundary is real even though the id didn't move).
-    try:
-        if _is_boundary and hasattr(agent.context_compressor, "on_session_start"):
-            agent.context_compressor.on_session_start(
-                agent.session_id or "",
-                boundary_reason="compression",
-                old_session_id=_boundary_parent,
-                platform=getattr(agent, "platform", None) or "cli",
-                conversation_id=getattr(agent, "_gateway_session_key", None),
-            )
-    except Exception as _ce_err:
-        logger.debug("context engine on_session_start (compression): %s", _ce_err)
-
-    # Notify memory providers of the compaction boundary so provider-cached
-    # per-session state (Hindsight's _document_id, accumulated turn buffers,
-    # counters) refreshes. reset=False because the logical conversation
-    # continues. See #6672. Fires in BOTH modes: in-place uses the same id as
-    # parent (the conversation didn't fork, but the buffer must still be told
-    # the transcript was compacted so it doesn't double-count dropped turns).
-    try:
-        if _is_boundary and agent._memory_manager:
-            agent._memory_manager.on_session_switch(
-                agent.session_id or "",
-                parent_session_id=_boundary_parent,
-                reset=False,
-                reason="compression",
-            )
-    except Exception as _me_err:
-        logger.debug("memory manager on_session_switch (compression): %s", _me_err)
-
-    # Warn on repeated compressions (quality degrades with each pass).
-    # Route through _emit_status (like the other compression warnings above)
-    # so the warning reaches the TUI / Telegram / Discord via status_callback,
-    # not just CLI stdout. _emit_status still _vprints for the CLI, and
-    # storing it on _compression_warning lets replay_compression_warning
-    # re-deliver it once a late-bound gateway status_callback is wired (#36908).
-    _cc = agent.context_compressor.compression_count
-    if _cc >= 2:
-        _cc_msg = (
-            f"{agent.log_prefix}⚠️  Session compressed {_cc} times — "
-            f"accuracy may degrade. Consider /new to start fresh."
-        )
-        agent._compression_warning = _cc_msg
-        agent._emit_status(_cc_msg)
-
-    # Emit session:compress event so hooks (e.g. MemPalace sync) can ingest
-    # the completed old session before its details are lost. In in-place mode
-    # there is no old id (same session); ``in_place=True`` tells hooks the
-    # transcript was compacted on the same id rather than rotated.
-    if getattr(agent, "event_callback", None):
+        # Notify the context engine that a compaction boundary occurred. Plugin
+        # engines (e.g. hermes-lcm) use boundary_reason="compression" to preserve
+        # DAG lineage / checkpoint per-session state across the boundary instead of
+        # re-initializing fresh. See hermes-lcm#68. Built-in ContextCompressor
+        # ignores kwargs. Fires in BOTH modes: rotation passes old→new ids; in-place
+        # passes the SAME id (the boundary is real even though the id didn't move).
        try:
-            agent.event_callback("session:compress", {
-                "platform": agent.platform or "",
-                "session_id": agent.session_id,
-                "old_session_id": _old_sid or "",
-                "in_place": in_place,
-                "compression_count": agent.context_compressor.compression_count,
-            })
-        except Exception as e:
-            logger.debug("event_callback error on session:compress: %s", e)
+            if _is_boundary and hasattr(agent.context_compressor, "on_session_start"):
+                agent.context_compressor.on_session_start(
+                    agent.session_id or "",
+                    boundary_reason="compression",
+                    old_session_id=_boundary_parent,
+                    platform=getattr(agent, "platform", None) or "cli",
+                    conversation_id=getattr(agent, "_gateway_session_key", None),
+                )
+        except Exception as _ce_err:
+            logger.debug("context engine on_session_start (compression): %s", _ce_err)

-    # Surface the compaction mode to the caller (run_conversation / gateway)
-    # via a rotation-independent flag. The gateway uses this — NOT an
-    # id-change diff — to re-baseline transcript handling (history_offset=0 +
-    # rewrite on the same id) when compaction happened in place. See #38763.
-    agent._last_compaction_in_place = compacted_in_place
+        # Notify memory providers of the compaction boundary so provider-cached
+        # per-session state (Hindsight's _document_id, accumulated turn buffers,
+        # counters) refreshes. reset=False because the logical conversation
+        # continues. See #6672. Fires in BOTH modes: in-place uses the same id as
+        # parent (the conversation didn't fork, but the buffer must still be told
+        # the transcript was compacted so it doesn't double-count dropped turns).
+        try:
+            if _is_boundary and agent._memory_manager:
+                agent._memory_manager.on_session_switch(
+                    agent.session_id or "",
+                    parent_session_id=_boundary_parent,
+                    reset=False,
+                    reason="compression",
+                )
+        except Exception as _me_err:
+            logger.debug("memory manager on_session_switch (compression): %s", _me_err)

-    # Keep the post-compression rough estimate for diagnostics, but do not
-    # treat it as provider-reported prompt usage. Schema-heavy rough estimates
-    # can remain above threshold even after the next real API request fits.
-    _compressed_est = estimate_request_tokens_rough(
-        compressed,
-        system_prompt=new_system_prompt or "",
-        tools=agent.tools or None,
-    )
-    agent.context_compressor.last_compression_rough_tokens = _compressed_est
-    agent.context_compressor.last_prompt_tokens = -1
-    agent.context_compressor.last_completion_tokens = 0
-    agent.context_compressor.awaiting_real_usage_after_compression = True
+        # Warn on repeated compressions (quality degrades with each pass).
+        # Route through _emit_status (like the other compression warnings above)
+        # so the warning reaches the TUI / Telegram / Discord via status_callback,
+        # not just CLI stdout. _emit_status still _vprints for the CLI, and
+        # storing it on _compression_warning lets replay_compression_warning
+        # re-deliver it once a late-bound gateway status_callback is wired (#36908).
+        _cc = agent.context_compressor.compression_count
+        if _cc >= 2:
+            _cc_msg = (
+                f"{agent.log_prefix}⚠️  Session compressed {_cc} times — "
+                f"accuracy may degrade. Consider /new to start fresh."
+            )
+            agent._compression_warning = _cc_msg
+            agent._emit_status(_cc_msg)

-    # Clear the file-read dedup cache.  After compression the original
-    # read content is summarised away — if the model re-reads the same
-    # file it needs the full content, not a "file unchanged" stub.
-    try:
-        from tools.file_tools import reset_file_dedup
-        reset_file_dedup(task_id)
-    except Exception:
-        pass
+        # Emit session:compress event so hooks (e.g. MemPalace sync) can ingest
+        # the completed old session before its details are lost. In in-place mode
+        # there is no old id (same session); ``in_place=True`` tells hooks the
+        # transcript was compacted on the same id rather than rotated.
+        if getattr(agent, "event_callback", None):
+            try:
+                agent.event_callback("session:compress", {
+                    "platform": agent.platform or "",
+                    "session_id": agent.session_id,
+                    "old_session_id": _old_sid or "",
+                    "in_place": in_place,
+                    "compression_count": agent.context_compressor.compression_count,
+                })
+            except Exception as e:
+                logger.debug("event_callback error on session:compress: %s", e)

-    logger.info(
-        "context compression done: session=%s messages=%d->%d rough_tokens=~%s awaiting_real_usage=true",
-        agent.session_id or "none", _pre_msg_count, len(compressed),
-        f"{_compressed_est:,}",
-    )
-    # Release the lock on the OLD session_id only AFTER rotation completed
-    # and all post-rotation bookkeeping (memory manager, context engine,
-    # file dedup) ran. A concurrent path that wakes up the moment we
-    # release will see the NEW session_id in state.db / SessionEntry and
-    # acquire on that — no race against our just-finished work.
-    _release_lock()
-    return compressed, new_system_prompt
+        # Surface the compaction mode to the caller (run_conversation / gateway)
+        # via a rotation-independent flag. The gateway uses this — NOT an
+        # id-change diff — to re-baseline transcript handling (history_offset=0 +
+        # rewrite on the same id) when compaction happened in place. See #38763.
+        agent._last_compaction_in_place = compacted_in_place
+
+        # Keep the post-compression rough estimate for diagnostics, but do not
+        # treat it as provider-reported prompt usage. Schema-heavy rough estimates
+        # can remain above threshold even after the next real API request fits.
+        _compressed_est = estimate_request_tokens_rough(
+            compressed,
+            system_prompt=new_system_prompt or "",
+            tools=agent.tools or None,
+        )
+        agent.context_compressor.last_compression_rough_tokens = _compressed_est
+        agent.context_compressor.last_prompt_tokens = -1
+        agent.context_compressor.last_completion_tokens = 0
+        agent.context_compressor.awaiting_real_usage_after_compression = True
+
+        # Clear the file-read dedup cache.  After compression the original
+        # read content is summarised away — if the model re-reads the same
+        # file it needs the full content, not a "file unchanged" stub.
+        try:
+            from tools.file_tools import reset_file_dedup
+            reset_file_dedup(task_id)
+        except Exception:
+            pass
+
+        logger.info(
+            "context compression done: session=%s messages=%d->%d rough_tokens=~%s awaiting_real_usage=true",
+            agent.session_id or "none", _pre_msg_count, len(compressed),
+            f"{_compressed_est:,}",
+        )
+        return compressed, new_system_prompt
+    finally:
+        # Release the lock on the OLD session_id only AFTER rotation completed
+        # and all post-rotation bookkeeping (memory manager, context engine,
+        # file dedup) ran. A concurrent path that wakes up the moment we
+        # release will see the NEW session_id in state.db / SessionEntry and
+        # acquire on that — no race against our just-finished work.
+        _release_lock()


 def try_shrink_image_parts_in_messages(
--- a/agent/conversation_loop.py
+++ b/agent/conversation_loop.py
@@ -52,6 +52,7 @@ from agent.model_metadata import (
    estimate_messages_tokens_rough,
    estimate_request_tokens_rough,
    get_context_length_from_provider_error,
+    is_output_cap_error,
    parse_available_output_tokens_from_error,
    save_context_length,
 )
@@ -1167,11 +1168,22 @@ def run_conversation(
                # stream.  Mirror the ACP exclusion used for Responses
                # API upgrade (lines ~1083-1085).
                elif (
-                    agent.provider in {"copilot-acp", "moa"}
+                    agent.provider in {"copilot-acp"}
                    or str(agent.base_url or "").lower().startswith("acp://copilot")
                    or str(agent.base_url or "").lower().startswith("acp+tcp://")
                ):
                    _use_streaming = False
+                # MoA streams only when a display/TTS consumer is present to
+                # receive the deltas. MoAChatCompletions.create() honors
+                # stream=True (runs the references, then returns the aggregator's
+                # raw token stream) and is reached here because, for provider
+                # "moa", _create_request_openai_client returns the MoA facade
+                # itself. Without consumers (quiet mode, subagents, health-check
+                # probes) we keep the complete-response path: the facade returns a
+                # whole response when stream is not requested, preserving the
+                # prior behavior for those callers.
+                elif agent.provider == "moa" and not agent._has_stream_consumers():
+                    _use_streaming = False
                elif not agent._has_stream_consumers():
                    # No display/TTS consumer. Still prefer streaming for
                    # health checking, but skip for Mock clients in tests
@@ -2919,6 +2931,7 @@ def run_conversation(
                is_rate_limited = classified.reason in {
                    FailoverReason.rate_limit,
                    FailoverReason.billing,
+                    FailoverReason.upstream_rate_limit,
                }
                _is_transport_failure = classified.reason in {
                    FailoverReason.timeout,
@@ -2933,13 +2946,30 @@ def run_conversation(
                    # still recover.  See _pool_may_recover_from_rate_limit
                    # for the single-credential-pool and CloudCode-quota
                    # exceptions.  Fixes #11314 and #13636.
-                    pool_may_recover = _ra()._pool_may_recover_from_rate_limit(
-                        agent._credential_pool,
-                        provider=agent.provider,
-                        base_url=getattr(agent, "base_url", None),
+                    #
+                    # Exception: an upstream-aggregator 429 — the credential
+                    # pool can't help when the *upstream* model (DeepSeek,
+                    # etc.) is throttling OpenRouter, so always fall back to a
+                    # different model regardless of pool state.
+                    _is_upstream = classified.reason == FailoverReason.upstream_rate_limit
+                    pool_may_recover = (
+                        False if _is_upstream
+                        else _ra()._pool_may_recover_from_rate_limit(
+                            agent._credential_pool,
+                            provider=agent.provider,
+                            base_url=getattr(agent, "base_url", None),
+                        )
                    )
                    if not pool_may_recover:
-                        if classified.reason == FailoverReason.billing:
+                        if _is_upstream:
+                            _upstream_name = (classified.error_context or {}).get(
+                                "upstream_provider", "aggregator"
+                            )
+                            agent._buffer_status(
+                                f"⚠️ Upstream {_upstream_name} rate-limited — "
+                                "switching to fallback model..."
+                            )
+                        elif classified.reason == FailoverReason.billing:
                            agent._buffer_status(
                                "⚠️ Billing or credits exhausted — switching to fallback provider..."
                            )
@@ -3213,6 +3243,45 @@ def run_conversation(
                        _retry.restart_with_compressed_messages = True
                        break

+                    # The error is output-cap-shaped (about max_tokens being
+                    # too large) but the provider's wording didn't let us parse
+                    # the available output budget.  Compression CANNOT help here
+                    # — the input already fits; the call fails deterministically
+                    # on the oversized max_tokens.  Routing it into compression
+                    # re-sends the same max_tokens, gets the identical 400, and
+                    # death-loops until "cannot compress further" (#55546).
+                    # Fail fast with an actionable message instead of looping.
+                    if is_output_cap_error(error_msg):
+                        agent._flush_status_buffer()
+                        agent._vprint(
+                            f"{agent.log_prefix}❌ The provider rejected the request because "
+                            f"max_tokens exceeds its output cap for this model.",
+                            force=True,
+                        )
+                        agent._vprint(
+                            f"{agent.log_prefix}   💡 Lower model.max_tokens in your config.yaml to "
+                            f"at or below the model's max-output limit. "
+                            f"(This is an output-cap error, not a context overflow — "
+                            f"compression cannot fix it.)",
+                            force=True,
+                        )
+                        logger.error(
+                            f"{agent.log_prefix}Output-cap error not routed into compression "
+                            f"(max_tokens over provider cap): {error_msg[:200]}"
+                        )
+                        agent._persist_session(messages, conversation_history)
+                        return {
+                            "messages": messages,
+                            "completed": False,
+                            "api_calls": api_call_count,
+                            "error": (
+                                "max_tokens exceeds the provider's output cap for this model. "
+                                "Lower model.max_tokens in config.yaml."
+                            ),
+                            "partial": True,
+                            "failed": True,
+                        }
+
                    # Error is about the INPUT being too large.  Only reduce
                    # context_length when the provider explicitly reports the
                    # real lower limit.  If the provider only says "input
--- a/agent/credential_pool.py
+++ b/agent/credential_pool.py
@@ -616,17 +616,32 @@ class CredentialPool:
            file_refresh = creds.get("refreshToken", "")
            file_access = creds.get("accessToken", "")
            file_expires = creds.get("expiresAt", 0)
-            # If the credentials file has a different token pair, sync it
-            if file_refresh and file_refresh != entry.refresh_token:
-                logger.debug("Pool entry %s: syncing tokens from credentials file (refresh token changed)", entry.id)
+            # Sync when either token changed.  Access tokens can be re-issued
+            # without a new refresh token (silent re-issue path), so checking
+            # only refresh_token misses that case and leaves a stale
+            # access_token in the pool → 401 on every request until the pool
+            # entry's exhausted TTL expires.
+            entry_access = entry.access_token or ""
+            entry_refresh = entry.refresh_token or ""
+            if (file_access or file_refresh) and (
+                (file_access and file_access != entry_access)
+                or (file_refresh and file_refresh != entry_refresh)
+            ):
+                logger.debug(
+                    "Pool entry %s: syncing tokens from credentials file (tokens changed)",
+                    entry.id,
+                )
                updated = replace(
                    entry,
-                    access_token=file_access,
-                    refresh_token=file_refresh,
-                    expires_at_ms=file_expires,
+                    access_token=file_access or entry.access_token,
+                    refresh_token=file_refresh or entry.refresh_token,
+                    expires_at_ms=file_expires or entry.expires_at_ms,
                    last_status=None,
                    last_status_at=None,
                    last_error_code=None,
+                    last_error_reason=None,
+                    last_error_message=None,
+                    last_error_reset_at=None,
                )
                self._replace_entry(entry, updated)
                self._persist()
@@ -1884,11 +1899,16 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
            from hermes_cli.copilot_auth import resolve_copilot_token, get_copilot_api_token
            token, source = resolve_copilot_token()
            if token:
-                api_token = get_copilot_api_token(token)
+                api_token, enterprise_base_url = get_copilot_api_token(token)
                source_name = "gh_cli" if "gh" in source.lower() else f"env:{source}"
                if not _is_suppressed(provider, source_name):
                    active_sources.add(source_name)
                    pconfig = PROVIDER_REGISTRY.get(provider)
+                    # Use enterprise base URL from token exchange if available,
+                    # otherwise fall back to the provider's default.
+                    effective_base_url = enterprise_base_url or (
+                        pconfig.inference_base_url if pconfig else ""
+                    )
                    changed |= _upsert_entry(
                        entries,
                        provider,
@@ -1897,7 +1917,7 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
                            "source": source_name,
                            "auth_type": AUTH_TYPE_API_KEY,
                            "access_token": api_token,
-                            "base_url": pconfig.inference_base_url if pconfig else "",
+                            "base_url": effective_base_url,
                            "label": source,
                        },
                    )
@@ -2142,7 +2162,12 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
        if _is_source_suppressed(provider, source):
            continue
        active_sources.add(source)
-        auth_type = AUTH_TYPE_OAUTH if provider == "anthropic" and not token.startswith("sk-ant-api") else AUTH_TYPE_API_KEY
+        # Claude Code OAuth tokens are the only Anthropic credentials that should flow into the OAuth refresh path.
+        auth_type = (
+            AUTH_TYPE_OAUTH
+            if provider == "anthropic" and token.startswith("sk-ant-oat")
+            else AUTH_TYPE_API_KEY
+        )
        base_url = env_url or pconfig.inference_base_url
        if provider == "kimi-coding":
            base_url = _resolve_kimi_base_url(token, pconfig.inference_base_url, env_url)
--- a/agent/error_classifier.py
+++ b/agent/error_classifier.py
@@ -31,6 +31,9 @@ class FailoverReason(enum.Enum):
    # Billing / quota
    billing = "billing"                  # 402 or confirmed credit exhaustion — rotate immediately
    rate_limit = "rate_limit"            # 429 or quota-based throttling — backoff then rotate
+    # Upstream model rate-limited (aggregator 429) — fallback to a different
+    # model, NOT credential rotation. The user's key is healthy.
+    upstream_rate_limit = "upstream_rate_limit"

    # Server-side
    overloaded = "overloaded"            # 503/529 — provider overloaded, backoff
@@ -909,6 +912,22 @@ def _classify_by_status(
                FailoverReason.overloaded,
                retryable=True,
            )
+        # Distinguish an OpenRouter-aggregator upstream 429 (an upstream model
+        # like DeepSeek rate-limited OpenRouter's aggregate traffic) from an
+        # account-level 429 (the user's key is actually throttled). OpenRouter
+        # wraps upstream errors with the outer message "Provider returned
+        # error" — the user's key is healthy, so marking it exhausted / rotating
+        # is wrong and burns the key for ~24min. Fall back to a different model.
+        if _is_openrouter_upstream_error(body, provider):
+            upstream_provider = _extract_upstream_provider_name(body)
+            ctx = {"upstream_provider": upstream_provider} if upstream_provider else {}
+            return result_fn(
+                FailoverReason.upstream_rate_limit,
+                retryable=True,
+                should_rotate_credential=False,
+                should_fallback=True,
+                error_context=ctx,
+            )
        return result_fn(
            FailoverReason.rate_limit,
            retryable=True,
@@ -1445,3 +1464,49 @@ def _extract_message(error: Exception, body: dict) -> str:
            return msg.strip()[:500]
    # Fallback to str(error)
    return str(error)[:500]
+
+
+def _is_openrouter_upstream_error(body: Any, provider: str) -> bool:
+    """Detect OpenRouter's aggregator-wrapped upstream provider errors.
+
+    OpenRouter returns errors from upstream model providers (DeepSeek,
+    Anthropic, etc.) wrapped with the outer message "Provider returned error"
+    and the real error nested in ``metadata.raw``. This signal means the
+    user's OpenRouter key is healthy — the upstream provider is the one that
+    failed — so credential rotation is the wrong recovery.
+    """
+    if not isinstance(body, dict):
+        return False
+    provider_lower = (provider or "").strip().lower()
+    err = body.get("error")
+    if not isinstance(err, dict):
+        return False
+    outer_msg = str(err.get("message") or "").strip().lower()
+    if outer_msg != "provider returned error":
+        return False
+    # Require either the explicit OpenRouter provider OR the metadata shape
+    # that only OpenRouter produces (metadata.raw / metadata.provider_name).
+    if provider_lower == "openrouter":
+        return True
+    metadata = err.get("metadata")
+    if isinstance(metadata, dict) and (
+        "raw" in metadata or "provider_name" in metadata
+    ):
+        return True
+    return False
+
+
+def _extract_upstream_provider_name(body: Any) -> Optional[str]:
+    """Pull the upstream provider name out of OpenRouter's error metadata."""
+    if not isinstance(body, dict):
+        return None
+    err = body.get("error")
+    if not isinstance(err, dict):
+        return None
+    metadata = err.get("metadata")
+    if not isinstance(metadata, dict):
+        return None
+    name = metadata.get("provider_name")
+    if isinstance(name, str) and name.strip():
+        return name.strip()
+    return None
--- a/agent/gemini_native_adapter.py
+++ b/agent/gemini_native_adapter.py
@@ -337,6 +337,22 @@ def _build_gemini_contents(messages: List[Dict[str, Any]]) -> tuple[List[Dict[st
        if parts:
            contents.append({"role": gemini_role, "parts": parts})

+    # Gemini's generateContent requires strict user/model alternation;
+    # consecutive same-role contents are rejected with HTTP 400 "Please ensure
+    # that multiturn requests alternate between user and model". The loop above
+    # emits one content per source message, so parallel tool calls (N tool
+    # results become N user functionResponse contents), back-to-back user turns,
+    # or merged assistant turns would each violate that. Merge adjacent
+    # same-role contents by concatenating their parts. For parallel calls this
+    # also produces the grouped multi-functionResponse turn Gemini expects.
+    merged_contents: List[Dict[str, Any]] = []
+    for content in contents:
+        if merged_contents and merged_contents[-1]["role"] == content["role"]:
+            merged_contents[-1]["parts"].extend(content["parts"])
+        else:
+            merged_contents.append(content)
+    contents = merged_contents
+
    system_instruction = None
    joined_system = "\n".join(part for part in system_text_parts if part).strip()
    if joined_system:
--- a/agent/learn_prompt.py
+++ b/agent/learn_prompt.py
@@ -117,15 +117,29 @@ def build_learn_prompt(user_request: str) -> str:

    return (
        "[/learn] The user wants you to learn a reusable skill from the "
-        "source(s) they described below, and save it.\n\n"
-        f"WHAT TO LEARN FROM:\n{req}\n\n"
+        "request below, and save it.\n\n"
+        f"THE REQUEST:\n{req}\n\n"
+        "The request is open-ended and may mix two kinds of content, in any "
+        "order: SOURCES to gather (directories, file paths, URLs, \"what we "
+        "just did\", pasted notes) AND REQUIREMENTS that shape the skill "
+        "(what to focus on, what to leave out, scope, naming, the angle to "
+        "take). Treat EVERY part of the request as load-bearing. In "
+        "particular, prose that comes after a path or link is NOT incidental "
+        "— it is the user telling you what they want from that source. A "
+        "request like `<url> focus on the auth flow, skip the deprecated "
+        "endpoints` means: gather the URL AND honor \"focus on auth, skip "
+        "deprecated\" as authoring requirements. Never fetch the first source "
+        "and ignore the rest.\n\n"
        "Do this:\n"
-        "1. Gather the material. Resolve whatever the user named using the "
-        "tools you already have — `read_file`/`search_files` for local files "
-        "or directories, `web_extract` for URLs, the current conversation "
-        "history if they referred to something you just did, and the text "
-        "they pasted as-is. If the request is ambiguous about scope, make a "
-        "reasonable choice and note it; do not stall.\n"
+        "1. Gather every source the user named, using the tools you already "
+        "have — `read_file`/`search_files` for local files or directories, "
+        "`web_extract` for URLs, the current conversation history if they "
+        "referred to something you just did, and the text they pasted as-is. "
+        "If the request is ambiguous about scope, make a reasonable choice "
+        "and note it; do not stall.\n"
+        "1b. Apply every requirement, focus, and constraint in the request to "
+        "the skill you author — these govern what the SKILL.md covers and "
+        "emphasizes, not just which sources you read.\n"
        "2. Author ONE SKILL.md and save it with the `skill_manage` tool "
        "(action=\"create\"). Pick a sensible category. If the procedure needs "
        "a non-trivial script, add it under the skill's `scripts/` with "
--- a/agent/learning_graph.py
+++ b/agent/learning_graph.py
@@ -0,0 +1,320 @@
+"""Assemble the "learning made visible" graph for desktop.
+
+This graph is intentionally scoped to what a user actually learns over time:
+- non-base, learned/profile skills (agent-created or used),
+- memory chunks from ``MEMORY.md`` / ``USER.md`` as first-class nodes.
+
+Skill links come from declared ``related_skills``. Memory-to-skill links are
+derived from lexical overlap so the graph can answer "which learned skills are
+connected to the things I remember?".
+
+Run as a module to print edge-density stats against real data:
+
+    python -m agent.learning_graph
+"""
+
+from __future__ import annotations
+
+import json
+import re
+from dataclasses import dataclass, field
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any, Optional
+
+from hermes_constants import get_hermes_home
+
+
+@dataclass
+class SkillNode:
+    name: str
+    category: str
+    source: str = "profile"
+    timestamp: Optional[int] = None
+    use_count: int = 0
+    state: str = "active"
+    created_by: Optional[str] = None
+    pinned: bool = False
+    related: list[str] = field(default_factory=list)
+
+
+def _frontmatter(text: str) -> dict[str, Any]:
+    try:
+        from agent.skill_utils import parse_frontmatter
+
+        fm, _ = parse_frontmatter(text)
+        return fm or {}
+    except Exception:
+        return {}
+
+
+def _related(fm: dict[str, Any]) -> list[str]:
+    raw = fm.get("related_skills") or (fm.get("metadata", {}).get("hermes", {}) or {}).get("related_skills")
+    if isinstance(raw, list):
+        return [str(r).strip() for r in raw if str(r).strip()]
+    if isinstance(raw, str):
+        return [r.strip() for r in raw.strip("[]").split(",") if r.strip()]
+    return []
+
+
+def _category(fm: dict[str, Any], skill_md: Path) -> str:
+    cat = fm.get("category") or (fm.get("metadata", {}).get("hermes", {}) or {}).get("category")
+    if cat:
+        return str(cat)
+    # …/skills/<category>/<skill>/SKILL.md
+    parts = skill_md.parts
+    return parts[-3] if len(parts) >= 3 else "general"
+
+
+def _iter_skill_files(roots: list[tuple[str, Path]]):
+    for source, root in roots:
+        if root.exists():
+            for path in root.rglob("SKILL.md"):
+                yield source, path
+
+
+def _load_usage() -> dict[str, dict[str, Any]]:
+    try:
+        from tools.skill_usage import load_usage
+
+        return load_usage()
+    except Exception:
+        path = get_hermes_home() / "skills" / ".usage.json"
+        try:
+            return json.loads(path.read_text(encoding="utf-8"))
+        except Exception:
+            return {}
+
+
+def _to_int_ts(value: Any) -> Optional[int]:
+    try:
+        if value is None:
+            return None
+        if isinstance(value, (int, float)):
+            return int(value)
+        s = str(value).strip()
+        if not s:
+            return None
+        try:
+            return int(float(s))
+        except ValueError:
+            parsed = datetime.fromisoformat(s.replace("Z", "+00:00"))
+            if parsed.tzinfo is None:
+                parsed = parsed.replace(tzinfo=timezone.utc)
+            return int(parsed.timestamp())
+    except Exception:
+        return None
+
+
+def _usage_timestamp(rec: dict[str, Any]) -> Optional[int]:
+    for key in ("last_activity_at", "last_used_at", "last_viewed_at", "last_patched_at", "created_at"):
+        ts = _to_int_ts(rec.get(key))
+        if ts is not None:
+            return ts
+    return None
+
+
+def build_skill_nodes(skill_roots: list[tuple[str, Path]]) -> dict[str, SkillNode]:
+    usage = _load_usage()
+    nodes: dict[str, SkillNode] = {}
+
+    for source, skill_md in _iter_skill_files(skill_roots):
+        if any(p in {".archive", ".hub", "node_modules", ".git"} for p in skill_md.parts):
+            continue
+        try:
+            fm = _frontmatter(skill_md.read_text(encoding="utf-8")[:4000])
+        except OSError:
+            continue
+        name = str(fm.get("name") or skill_md.parent.name).strip()
+        if not name or name in nodes:
+            continue
+        rec = usage.get(name, {})
+        last_activity = _usage_timestamp(rec)
+        file_ts = _to_int_ts(skill_md.stat().st_mtime)
+        nodes[name] = SkillNode(
+            name=name,
+            category=_category(fm, skill_md),
+            source=source,
+            timestamp=last_activity or file_ts,
+            use_count=int(rec.get("use_count", 0) or 0),
+            state=str(rec.get("state", "active") or "active"),
+            created_by=rec.get("created_by"),
+            pinned=bool(rec.get("pinned", False)),
+            related=_related(fm),
+        )
+    return nodes
+
+
+def build_edges(nodes: dict[str, SkillNode]) -> list[tuple[str, str]]:
+    """Undirected related_skills edges where BOTH endpoints exist (deduped)."""
+    seen: set[tuple[str, str]] = set()
+    edges: list[tuple[str, str]] = []
+    for node in nodes.values():
+        for target in node.related:
+            if target in nodes and target != node.name:
+                a, b = sorted((node.name, target))
+                key = (a, b)
+                if key not in seen:
+                    seen.add(key)
+                    edges.append(key)
+    return edges
+
+
+def density_stats(nodes: dict[str, SkillNode], edges: list[tuple[str, str]]) -> dict[str, Any]:
+    linked: set[str] = set()
+    for a, b in edges:
+        linked.add(a)
+        linked.add(b)
+    cats: dict[str, int] = {}
+    for n in nodes.values():
+        cats[n.category] = cats.get(n.category, 0) + 1
+    n = len(nodes) or 1
+    return {
+        "nodes": len(nodes),
+        "related_edges": len(edges),
+        "edges_per_node": round(len(edges) / n, 3),
+        "linked_nodes": len(linked),
+        "isolated_pct": round(100 * (n - len(linked)) / n, 1),
+        "categories": len(cats),
+        "agent_created": sum(1 for x in nodes.values() if x.created_by == "agent"),
+        "used": sum(1 for x in nodes.values() if x.use_count > 0),
+        "top_categories": sorted(cats.items(), key=lambda kv: -kv[1])[:8],
+    }
+
+
+def _memory_cards() -> list[dict[str, Any]]:
+    """Freeform memory as readable cards.
+
+    ``MEMORY.md`` / ``USER.md`` are prose split on bare ``§`` separators; each
+    chunk becomes one card. Every chunk is surfaced — the graph shows everything.
+    """
+    base = get_hermes_home() / "memories"
+    cards: list[dict[str, Any]] = []
+    for fname, source in (("MEMORY.md", "memory"), ("USER.md", "profile")):
+        path = base / fname
+        try:
+            text = path.read_text(encoding="utf-8").strip()
+            file_ts = _to_int_ts(path.stat().st_mtime)
+        except OSError:
+            continue
+        for chunk_idx, chunk in enumerate(c.strip() for c in text.split("\n§\n")):
+            if not chunk:
+                continue
+            first = chunk.splitlines()[0].strip().lstrip("# ").strip()
+            cards.append(
+                {
+                    "source": source,
+                    "timestamp": file_ts + chunk_idx if file_ts is not None else None,
+                    "title": (first[:80] + "…") if len(first) > 80 else first,
+                    "body": chunk[:1200],
+                }
+            )
+    return cards
+
+
+def _tokenize(text: str) -> set[str]:
+    return {t for t in re.split(r"[^a-z0-9]+", text.lower()) if len(t) >= 3}
+
+
+def _memory_skill_edges(memory_cards: list[dict[str, Any]], skills: list[SkillNode]) -> list[tuple[str, str]]:
+    edges: list[tuple[str, str]] = []
+    skill_meta = [(s, _tokenize(s.name), s.name.lower()) for s in skills]
+    for idx, card in enumerate(memory_cards):
+        mem_id = f"memory:{card['source']}:{idx}"
+        text = f"{card.get('title', '')}\n{card.get('body', '')}".lower()
+        text_tokens = _tokenize(text)
+        scored: list[tuple[int, str]] = []
+        for skill, tokens, skill_name_lower in skill_meta:
+            score = 0
+            if skill_name_lower in text:
+                score += 6
+            score += len(tokens & text_tokens)
+            if score > 0:
+                scored.append((score, skill.name))
+        scored.sort(key=lambda x: (-x[0], x[1]))
+        for _, skill_name in scored[:4]:
+            edges.append((mem_id, skill_name))
+    return edges
+
+
+def _skill_roots() -> list[tuple[str, Path]]:
+    repo = Path(__file__).resolve().parent.parent
+    home_skills = get_hermes_home() / "skills"
+    return [("base", repo / "skills"), ("profile", home_skills)]
+
+
+def build_learning_graph() -> dict[str, Any]:
+    """Full payload for the desktop learning panel.
+
+    Focus on what is profile-learned and actionable:
+    - skills that are NOT base-installed and show real learning signal
+      (agent-created or used),
+    - memory chunks as first-class graph nodes connected to those learned skills.
+    """
+    all_skills = build_skill_nodes(_skill_roots())
+    learned_skills = {
+        name: node
+        for name, node in all_skills.items()
+        if node.source != "base" and (node.created_by == "agent" or node.use_count > 0)
+    }
+    skill_edges = build_edges(learned_skills)
+    memory_cards = _memory_cards()
+    memory_edges = _memory_skill_edges(memory_cards, list(learned_skills.values()))
+
+    edges = skill_edges + memory_edges
+    clusters: dict[str, int] = {}
+    for node in learned_skills.values():
+        clusters[node.category] = clusters.get(node.category, 0) + 1
+    if memory_cards:
+        clusters["memory"] = len(memory_cards)
+
+    graph_nodes = [
+        {
+            "id": n.name,
+            "label": n.name,
+            "kind": "skill",
+            "timestamp": n.timestamp,
+            "category": n.category,
+            "useCount": n.use_count,
+            "state": n.state,
+            "createdBy": n.created_by,
+            "pinned": n.pinned,
+        }
+        for n in learned_skills.values()
+    ]
+    for i, card in enumerate(memory_cards):
+        graph_nodes.append(
+            {
+                "id": f"memory:{card['source']}:{i}",
+                "label": card["title"],
+                "kind": "memory",
+                "memorySource": card["source"],
+                "timestamp": card.get("timestamp"),
+                "category": "memory",
+                "useCount": 0,
+                "state": "active",
+                "createdBy": "memory",
+                "pinned": False,
+            }
+        )
+
+    return {
+        "nodes": graph_nodes,
+        "edges": [{"source": a, "target": b} for a, b in edges],
+        "clusters": [
+            {"category": c, "count": n}
+            for c, n in sorted(clusters.items(), key=lambda kv: -kv[1])
+        ],
+        "memory": memory_cards,
+        "stats": {
+            **density_stats(learned_skills, skill_edges),
+            "memory_nodes": len(memory_cards),
+            "memory_skill_edges": len(memory_edges),
+            "learned_skills": len(learned_skills),
+        },
+    }
+
+
+if __name__ == "__main__":
+    nodes = build_skill_nodes(_skill_roots())
+    print(json.dumps(density_stats(nodes, build_edges(nodes)), indent=2))
--- a/agent/learning_graph_render.py
+++ b/agent/learning_graph_render.py
@@ -0,0 +1,658 @@
+"""Terminal renderer for the learning timeline (learned skills + memories).
+
+The desktop app (``apps/desktop/src/app/starmap``) paints a GPU radial
+constellation; a terminal can't, so this is a *rendition* of the same data as a
+timeline bar chart — date rows, proportional skill/memory bars colored by the
+day's dominant category, and a cumulative trajectory sparkline — plus per-slice
+bucket metadata the TUI walks as a tree. The age gradient and complementary
+memory ink are ported from the desktop source, not guessed.
+
+Grids are emitted as style runs — ``[text, style, alpha, hex?]`` — so each
+consumer maps the semantic style + brightness onto its own palette; the
+optional 4th element overrides the base color (category heatmap). Pure,
+stdlib-only.
+"""
+
+from __future__ import annotations
+
+import math
+from datetime import datetime, timezone
+from typing import Any, Iterable, Optional
+
+# time-axis.ts LEAD_IN: the oldest node sits just off recency 0.
+LEAD_IN = 0.06
+
+# constants.ts AGE_GRADIENT — old quiet, recent bright.
+AGE_OLD_INK = 0.42
+AGE_MID_INK = 0.74
+AGE_NEW_INK = 0.95
+AGE_MID = 0.52
+
+# Style keys consumers map to base colors (brightness = the run alpha).
+STYLE_BG = "bg"
+STYLE_SKILL = "skill"
+STYLE_MEMORY = "memory"
+STYLE_LABEL = "label"
+STYLE_DIM = "dim"
+
+# Legend glyphs mirror NODE_SHAPE (skill = circle, memory = diamond).
+SKILL_GLYPH = "●"
+MEMORY_GLYPH = "◆"
+_LABEL_KEYS = tuple("123456789abc")
+
+Run = list  # [text, style, alpha, hex?]
+Row = list  # list[Run]
+Grid = list  # list[Row]
+
+
+def _to_ts(value: Any) -> Optional[float]:
+    try:
+        return None if value is None else float(value)
+    except (TypeError, ValueError):
+        return None
+
+
+def _clamp(v: float, lo: float, hi: float) -> float:
+    return lo if v < lo else hi if v > hi else v
+
+
+def _smoothstep(p: float) -> float:
+    p = _clamp(p, 0.0, 1.0)
+    return p * p * (3 - 2 * p)
+
+
+def recency_ink(rec: float) -> float:
+    """Port of geometry.ts ``recencyInk`` — smoothstep age → ink alpha."""
+    t = _clamp(rec, 0.0, 1.0)
+    if t <= AGE_MID:
+        return AGE_OLD_INK + (AGE_MID_INK - AGE_OLD_INK) * _smoothstep(t / AGE_MID)
+    return AGE_MID_INK + (AGE_NEW_INK - AGE_MID_INK) * _smoothstep((t - AGE_MID) / (1 - AGE_MID))
+
+
+def format_date(ts: Optional[float]) -> str:
+    if not ts:
+        return "unknown"
+    try:
+        return datetime.fromtimestamp(float(ts), tz=timezone.utc).strftime("%-d %b %Y")
+    except (ValueError, OSError, OverflowError):
+        return "unknown"
+
+
+def compute_recency(nodes: list[dict[str, Any]]) -> dict[str, Any]:
+    """Port of time-axis.ts ``computeRecency`` (id → recency ratio, timed flag)."""
+    known = [t for t in (_to_ts(n.get("timestamp")) for n in nodes) if t is not None]
+    min_ts = min(known) if known else None
+    max_ts = max(known) if known else None
+    timed = min_ts is not None and max_ts is not None and max_ts > min_ts
+
+    ordered = sorted(
+        nodes,
+        key=lambda n: (
+            _to_ts(n.get("timestamp")) if _to_ts(n.get("timestamp")) is not None else math.inf,
+            str(n.get("id", "")),
+        ),
+    )
+    last = max(len(ordered) - 1, 1)
+    ord_ratio = {str(n.get("id", "")): (i / last if len(ordered) > 1 else 0.0) for i, n in enumerate(ordered)}
+
+    rec: dict[str, float] = {}
+    for n in nodes:
+        nid = str(n.get("id", ""))
+        ts = _to_ts(n.get("timestamp"))
+        if timed and ts is not None and min_ts is not None and max_ts is not None:
+            ratio = (ts - min_ts) / (max_ts - min_ts)
+        else:
+            ratio = ord_ratio.get(nid, 0.0)
+        rec[nid] = LEAD_IN + (1 - LEAD_IN) * _clamp(ratio, 0.0, 1.0)
+
+    return {"rec": rec, "timed": timed, "minTs": min_ts, "maxTs": max_ts}
+
+
+def _date_at(rec: dict[str, Any], reveal: float) -> Optional[float]:
+    if not rec.get("timed"):
+        return None
+    lo, hi = rec.get("minTs"), rec.get("maxTs")
+    if lo is None or hi is None:
+        return None
+    return round(lo + _clamp(reveal, 0, 1) * (hi - lo))
+
+
+# ── Color: ported from color.ts so memory ink + age fade match the desktop ──
+
+
+def hex_to_rgb(s: str) -> tuple[int, int, int]:
+    s = s.strip().lstrip("#")
+    if len(s) == 3:
+        s = "".join(c * 2 for c in s)
+    try:
+        return int(s[0:2], 16), int(s[2:4], 16), int(s[4:6], 16)
+    except (ValueError, IndexError):
+        return 255, 215, 0
+
+
+def rgb_to_hex(c: tuple) -> str:
+    return "#{:02X}{:02X}{:02X}".format(*(int(_clamp(v, 0, 255)) for v in c))
+
+
+def mix_rgb(a: tuple, b: tuple, t: float) -> tuple[int, int, int]:
+    p = _clamp(t, 0.0, 1.0)
+    return tuple(round(a[i] + (b[i] - a[i]) * p) for i in range(3))  # type: ignore[return-value]
+
+
+def _rgb_to_hsl(c: tuple) -> tuple[float, float, float]:
+    r, g, b = (x / 255 for x in c)
+    mx, mn = max(r, g, b), min(r, g, b)
+    light = (mx + mn) / 2
+    d = mx - mn
+    if not d:
+        return 0.0, 0.0, light
+    s = d / (2 - mx - mn) if light > 0.5 else d / (mx + mn)
+    if mx == r:
+        h = (g - b) / d + (6 if g < b else 0)
+    elif mx == g:
+        h = (b - r) / d + 2
+    else:
+        h = (r - g) / d + 4
+    return h * 60, s, light
+
+
+def _hsl_to_rgb(h: float, s: float, light: float) -> tuple[int, int, int]:
+    hue = ((h % 360) + 360) % 360
+    c = (1 - abs(2 * light - 1)) * s
+    x = c * (1 - abs(((hue / 60) % 2) - 1))
+    m = light - c / 2
+    if hue < 60:
+        r, g, b = c, x, 0.0
+    elif hue < 120:
+        r, g, b = x, c, 0.0
+    elif hue < 180:
+        r, g, b = 0.0, c, x
+    elif hue < 240:
+        r, g, b = 0.0, x, c
+    elif hue < 300:
+        r, g, b = x, 0.0, c
+    else:
+        r, g, b = c, 0.0, x
+    return round((r + m) * 255), round((g + m) * 255), round((b + m) * 255)
+
+
+def _complementary_ink(c: tuple) -> tuple[int, int, int]:
+    h, s, light = _rgb_to_hsl(c)
+    return _hsl_to_rgb(h + 165, max(s, 0.5), _clamp(light, 0.5, 0.7))
+
+
+def derive_palette(primary_hex: str, *, dark: bool = True) -> dict[str, str]:
+    """Port of color.ts ``computePalette`` (the bits a terminal needs)."""
+    primary = hex_to_rgb(primary_hex)
+    base = (255, 255, 255) if dark else (0, 0, 0)
+    bg = (8, 8, 12) if dark else (250, 250, 250)
+    return {
+        "primary": primary_hex,
+        # Memories are drillable → primary "clickable" ink; skills are dead-ends
+        # → muted complement.
+        "memory": rgb_to_hex(mix_rgb(primary, base, 0.12 if dark else 0.18)),
+        "skill": rgb_to_hex(mix_rgb(_complementary_ink(primary), bg, 0.45)),
+        "label": rgb_to_hex(mix_rgb(base, bg, 0.35)),
+        "dim": rgb_to_hex(mix_rgb(base, bg, 0.7)),
+        "bg": rgb_to_hex(bg),
+    }
+
+
+def _node_score(node: dict[str, Any], rec: float) -> float:
+    """Pick which visible objects deserve map markers + label rows."""
+    if node.get("kind") == "memory":
+        return 3.5 + rec
+    use = float(node.get("useCount", 0) or 0)
+    return rec * 2 + math.sqrt(max(0.0, use)) + (2.0 if node.get("pinned") else 0.0)
+
+
+def _node_label(node: dict[str, Any]) -> str:
+    text = str(node.get("label") or node.get("id") or "unknown").strip()
+    return text if len(text) <= 26 else text[:23].rstrip() + "…"
+
+
+def _node_meta(node: dict[str, Any]) -> str:
+    if node.get("kind") == "memory":
+        source = "profile memory" if node.get("memorySource") == "profile" else "memory"
+        return f"{source} · {format_date(_to_ts(node.get('timestamp')))}"
+    bits = [str(node.get("category") or "skill"), format_date(_to_ts(node.get("timestamp")))]
+    count = int(node.get("useCount", 0) or 0)
+    if count:
+        bits.append(f"x{count}")
+    if node.get("pinned"):
+        bits.append("pinned")
+    return " · ".join(bits)
+
+
+# ── Timeline chart frame ─────────────────────────────────────────────────────
+
+
+class _ChartBucket:
+    __slots__ = ("label", "ts", "skills", "memories", "nodes", "rec")
+
+    def __init__(self, label: str, ts: float):
+        self.label = label
+        self.ts = ts
+        self.skills = 0
+        self.memories = 0
+        self.nodes: list[dict[str, Any]] = []
+        self.rec = 1.0
+
+    @property
+    def total(self) -> int:
+        return self.skills + self.memories
+
+
+def _period_key(ts: float, granularity: str) -> tuple[int, ...]:
+    dt = datetime.fromtimestamp(ts, tz=timezone.utc)
+    if granularity == "day":
+        return (dt.year, dt.month, dt.day)
+    if granularity == "month":
+        return (dt.year, dt.month)
+    return (dt.year,)
+
+
+def _period_label(ts: float, granularity: str) -> str:
+    dt = datetime.fromtimestamp(ts, tz=timezone.utc)
+    if granularity == "day":
+        return dt.strftime("%-d %b")
+    if granularity == "month":
+        return dt.strftime("%b %Y")
+    return dt.strftime("%Y")
+
+
+def _build_chart_buckets(nodes: list[dict[str, Any]], rec: dict[str, Any], max_rows: int) -> list[_ChartBucket]:
+    """Timeline rows: finest date granularity that fits, oldest → newest."""
+    if not nodes:
+        return []
+    if not rec["timed"]:
+        ordered = sorted(nodes, key=lambda n: rec["rec"].get(str(n.get("id", "")), 0.0))
+        n_bins = min(max_rows, max(1, len(ordered)))
+        buckets = [_ChartBucket(f"#{i + 1}", float(i)) for i in range(n_bins)]
+        for node in ordered:
+            idx = int(_clamp(math.floor(rec["rec"].get(str(node.get("id", "")), 0.0) * n_bins), 0, n_bins - 1))
+            b = buckets[idx]
+            b.nodes.append(node)
+            if node.get("kind") == "memory":
+                b.memories += 1
+            else:
+                b.skills += 1
+        return buckets
+
+    chosen: Optional[list[_ChartBucket]] = None
+    for granularity in ("day", "month", "year"):
+        groups: dict[tuple[int, ...], _ChartBucket] = {}
+        for node in nodes:
+            ts = _to_ts(node.get("timestamp"))
+            if ts is None:
+                continue
+            key = _period_key(ts, granularity)
+            bucket = groups.get(key)
+            if bucket is None:
+                bucket = _ChartBucket(_period_label(ts, granularity), ts)
+                groups[key] = bucket
+            bucket.nodes.append(node)
+            if node.get("kind") == "memory":
+                bucket.memories += 1
+            else:
+                bucket.skills += 1
+        # For short spans, keep the useful day-by-day graph even when the caller
+        # asked for fewer rows; terminal scrollback is better than collapsing a
+        # month of activity into one unreadable bar.
+        if len(groups) <= max_rows or (granularity == "day" and len(groups) <= 32):
+            chosen = [groups[key] for key in sorted(groups)]
+            break
+
+    if chosen is None:
+        # If even yearly buckets overflow, fall back to even time bins.
+        min_ts, max_ts = rec.get("minTs"), rec.get("maxTs")
+        n_bins = max(1, max_rows)
+        chosen = []
+        for i in range(n_bins):
+            ts = min_ts + (i / max(1, n_bins - 1)) * (max_ts - min_ts) if min_ts and max_ts else float(i)
+            chosen.append(_ChartBucket(format_date(ts), ts))
+        for node in nodes:
+            r = rec["rec"].get(str(node.get("id", "")), 0.0)
+            idx = int(_clamp(math.floor(r * n_bins), 0, n_bins - 1))
+            b = chosen[idx]
+            b.nodes.append(node)
+            if node.get("kind") == "memory":
+                b.memories += 1
+            else:
+                b.skills += 1
+
+    min_ts, max_ts = rec.get("minTs"), rec.get("maxTs")
+    span = (max_ts - min_ts) if min_ts is not None and max_ts is not None and max_ts > min_ts else 0
+    for bucket in chosen:
+        bucket.rec = LEAD_IN + (1 - LEAD_IN) * ((bucket.ts - min_ts) / span) if span else 1.0
+    return chosen
+
+
+def _bucket_label_node(bucket: _ChartBucket) -> Optional[dict[str, Any]]:
+    if not bucket.nodes:
+        return None
+    return max(bucket.nodes, key=lambda node: _node_score(node, _to_ts(node.get("timestamp")) or bucket.ts))
+
+
+def _bucket_nodes(bucket: _ChartBucket, memory_lookup: Optional[dict[str, dict[str, Any]]] = None) -> list[dict[str, Any]]:
+    out: list[dict[str, Any]] = []
+    # Chronological within the slice so the TUI tree reads oldest → newest.
+    ordered = sorted(bucket.nodes, key=lambda n: _to_ts(n.get("timestamp")) or bucket.ts)
+    for node in ordered:
+        style = STYLE_MEMORY if node.get("kind") == "memory" else STYLE_SKILL
+        raw_label = str(node.get("label") or node.get("id") or "unknown").strip()
+        memory = (memory_lookup or {}).get(str(node.get("id", "")))
+        out.append(
+            {
+                "id": str(node.get("id", "")),
+                "glyph": MEMORY_GLYPH if node.get("kind") == "memory" else SKILL_GLYPH,
+                "label": _node_label(node),
+                "fullLabel": raw_label,
+                "meta": _node_meta(node),
+                "body": str(memory.get("body", "")) if memory else "",
+                "style": style,
+            }
+        )
+    return out
+
+
+def _bucket_rows(buckets: list[_ChartBucket], payload: dict[str, Any]) -> list[dict[str, Any]]:
+    cmap = category_color_map(payload)
+    memory_lookup = {
+        f"memory:{card.get('source')}:{idx}": card
+        for idx, card in enumerate(payload.get("memory", []) or [])
+        if isinstance(card, dict)
+    }
+    rows: list[dict[str, Any]] = []
+    for idx, bucket in enumerate(buckets):
+        cat = _bucket_category(bucket)
+        rows.append(
+            {
+                "index": idx,
+                "label": bucket.label,
+                "date": format_date(bucket.ts),
+                "skills": bucket.skills,
+                "memories": bucket.memories,
+                "total": bucket.total,
+                "category": cat,
+                "color": cmap.get(cat) if cat else None,
+                "nodes": _bucket_nodes(bucket, memory_lookup),
+            }
+        )
+    return rows
+
+
+def _category_counts(payload: dict[str, Any]) -> list[tuple[str, int]]:
+    clusters = [
+        (str(c.get("category")), int(c.get("count", 0)))
+        for c in payload.get("clusters", []) or []
+        if c.get("category") and c.get("category") != "memory"
+    ]
+    if clusters:
+        return clusters
+    counts: dict[str, int] = {}
+    for node in payload.get("nodes", []):
+        if node.get("kind") == "memory":
+            continue
+        cat = str(node.get("category") or "skill")
+        counts[cat] = counts.get(cat, 0) + 1
+    return sorted(counts.items(), key=lambda kv: (-kv[1], kv[0]))
+
+
+def category_color_map(payload: dict[str, Any]) -> dict[str, str]:
+    """Deterministic, evenly-spread hue per skill category (theme-independent)."""
+    clusters = _category_counts(payload)
+    n = max(1, len(clusters))
+    # Golden-angle hue spacing so adjacent categories never collide in color.
+    return {cat: rgb_to_hex(_hsl_to_rgb((i * 137.508) % 360, 0.55, 0.62)) for i, (cat, _c) in enumerate(clusters)}
+
+
+def category_legend(payload: dict[str, Any], limit: int = 4) -> list[dict[str, Any]]:
+    cmap = category_color_map(payload)
+    cats = _category_counts(payload)
+    shown = cats[:limit]
+    hidden = max(0, len(cats) - len(shown))
+    return [
+        {"glyph": "●", "color": cmap.get(cat, ""), "label": f"{cat} ({count})"}
+        for cat, count in shown
+    ] + ([{"glyph": "·", "color": "", "label": f"+{hidden}"}] if hidden else [])
+
+
+def _bucket_category(bucket: _ChartBucket) -> Optional[str]:
+    counts: dict[str, int] = {}
+    for node in bucket.nodes:
+        if node.get("kind") == "memory":
+            continue
+        cat = str(node.get("category") or "skill")
+        counts[cat] = counts.get(cat, 0) + 1
+    return max(counts, key=lambda k: counts[k]) if counts else None
+
+
+def _trajectory_row(buckets: list[_ChartBucket], width: int, reveal: float) -> Row:
+    """Cumulative learning curve as a compact star-path sparkline."""
+    if not buckets:
+        return []
+    total = sum(b.total for b in buckets) or 1
+    visible = int(_clamp(math.ceil(reveal * len(buckets)), 0, len(buckets)))
+    acc = 0
+    points: list[int] = []
+    for b in buckets[:visible]:
+        acc += b.total
+        points.append(round((acc / total) * (width - 1)))
+    cells = [" "] * width
+    last = 0
+    for p in points:
+        for x in range(min(last, p), max(last, p) + 1):
+            if 0 <= x < width and cells[x] == " ":
+                cells[x] = "·"
+        if 0 <= p < width:
+            cells[p] = "✦"
+        last = p
+    return [["trajectory ", STYLE_LABEL, 0.55], ["".join(cells), STYLE_SKILL, 0.48]]
+
+
+def render_graph(payload: dict[str, Any], *, cols: int = 80, rows: int = 16, reveal: float = 1.0) -> dict[str, Any]:
+    """Render one timeline frame at ``reveal`` (0→1).
+
+    Date rows with proportional skill/memory bars colored by the day's dominant
+    category, numbered markers tied to label rows, and a cumulative trajectory
+    sparkline underneath.
+    """
+    reveal = _clamp(reveal, 0.0, 1.0)
+    cols = max(44, cols)
+    rows = max(14, rows)
+    nodes = list(payload.get("nodes", []))
+    if not nodes:
+        placeholder = [["no learning yet — keep using Hermes and it maps out here", STYLE_DIM, 0.7]]
+        return {"grid": [placeholder], "date": "", "reveal": reveal, "visible": 0}
+
+    rec = compute_recency(nodes)
+    cmap = category_color_map(payload)
+    buckets = _build_chart_buckets(nodes, rec, max_rows=max(4, rows - 3))
+    n_buckets = len(buckets)
+    visible_bucket_count = int(_clamp(math.ceil(reveal * n_buckets), 0, n_buckets))
+    max_total = max((b.total for b in buckets), default=1) or 1
+    label_w = min(9, max(len(b.label) for b in buckets))
+    bar_w = max(14, cols - label_w - 16)
+
+    grid: Grid = []
+    labels: list[dict[str, Any]] = []
+    visible = 0
+    for i, bucket in enumerate(buckets):
+        if i >= visible_bucket_count:
+            grid.append([])
+            continue
+        visible += bucket.total
+        ink = recency_ink(bucket.rec)
+        bar_len = max(1, round((bucket.total / max_total) * bar_w)) if bucket.total else 0
+        skill_len = round((bucket.skills / bucket.total) * bar_len) if bucket.total else 0
+        if bucket.skills and skill_len == 0:
+            skill_len = 1
+        memory_len = bar_len - skill_len
+        if bucket.memories and memory_len == 0 and bar_len > 1:
+            memory_len = 1
+            skill_len = bar_len - 1
+
+        node = _bucket_label_node(bucket)
+        marker = ""
+        if node and len(labels) < 6:
+            marker = _LABEL_KEYS[len(labels)]
+            style = STYLE_MEMORY if node.get("kind") == "memory" else STYLE_SKILL
+            labels.append(
+                {
+                    "key": marker,
+                    "glyph": MEMORY_GLYPH if node.get("kind") == "memory" else SKILL_GLYPH,
+                    "label": _node_label(node),
+                    "meta": _node_meta(node),
+                    "style": style,
+                    "alpha": round(ink, 3),
+                }
+            )
+
+        cat = _bucket_category(bucket)
+        cat_hex = cmap.get(cat) if cat else None
+
+        row: Row = [[f"{bucket.label:>{label_w}} ", STYLE_LABEL, ink], ["│ ", STYLE_DIM, 0.55]]
+        if marker:
+            row.append([marker, STYLE_LABEL, 0.95])
+        elif bucket.total:
+            head_hex = cat_hex if bucket.skills else None
+            row.append(["✦" if bucket.skills else "◆", STYLE_SKILL if bucket.skills else STYLE_MEMORY, ink, head_hex])
+        if skill_len:
+            # Bar colored by the day's dominant category — a learning heatmap.
+            row.append(["━" * skill_len, STYLE_SKILL, ink, cat_hex])
+        if memory_len:
+            if memory_len == 1:
+                mem_trail = "◆"
+            else:
+                mem_trail = "◆" + ("━" * (memory_len - 2)) + "◆"
+            row.append([mem_trail, STYLE_MEMORY, max(0.65, ink)])
+        if bar_len < bar_w:
+            # Empty space keeps counts aligned; starmap texture lives in the
+            # trajectory row below, where it reads as signal rather than noise.
+            row.append([" " * (bar_w - bar_len), STYLE_BG, 1.0])
+        row.append(["  ", STYLE_BG, 1.0])
+        row.append([str(bucket.skills), STYLE_SKILL, max(0.72, ink)])
+        if bucket.memories:
+            row.append(["+", STYLE_DIM, 0.6])
+            row.append([str(bucket.memories), STYLE_MEMORY, max(0.72, ink)])
+        if i == visible_bucket_count - 1:
+            row.append(["  ◀ now", STYLE_LABEL, 0.9])
+        elif bucket.total == max_total and max_total > 1:
+            row.append(["  ☄ peak", STYLE_LABEL, 0.75])
+        grid.append(row)
+
+    # Cumulative learning trajectory underneath the rows.
+    grid.append([[(" " * (label_w + 2)), STYLE_BG, 1.0], *_trajectory_row(buckets, max(12, cols - label_w - 13), reveal)])
+
+    return {
+        "grid": grid,
+        "date": format_date(_date_at(rec, reveal)),
+        "reveal": reveal,
+        "visible": visible,
+        "labels": labels,
+    }
+
+
+# ── Trimmings ──────────────────────────────────────────────────────────────
+
+
+def build_legend(payload: dict[str, Any]) -> list[dict[str, Any]]:
+    nodes = payload.get("nodes", [])
+    skills = sum(1 for n in nodes if n.get("kind") != "memory")
+    memories = sum(1 for n in nodes if n.get("kind") == "memory")
+    return [
+        {"glyph": SKILL_GLYPH, "style": STYLE_SKILL, "label": f"skills ({skills})"},
+        {"glyph": MEMORY_GLYPH, "style": STYLE_MEMORY, "label": f"memories ({memories})"},
+    ]
+
+
+def axis_labels(payload: dict[str, Any]) -> dict[str, str]:
+    rec = compute_recency(list(payload.get("nodes", [])))
+    if not rec["timed"]:
+        return {"start": "oldest", "end": "now"}
+    return {"start": format_date(rec.get("minTs")), "end": format_date(rec.get("maxTs"))}
+
+
+def _peak_day(payload: dict[str, Any]) -> Optional[str]:
+    counts: dict[tuple[int, ...], int] = {}
+    reps: dict[tuple[int, ...], float] = {}
+    for node in payload.get("nodes", []):
+        ts = _to_ts(node.get("timestamp"))
+        if ts is None:
+            continue
+        key = _period_key(ts, "day")
+        counts[key] = counts.get(key, 0) + 1
+        reps[key] = ts
+    if not counts:
+        return None
+    best = max(counts, key=lambda k: counts[k])
+    return f"busiest day {_period_label(reps[best], 'day')} · {counts[best]} learned"
+
+
+def build_summary(payload: dict[str, Any]) -> list[str]:
+    stats = payload.get("stats", {}) or {}
+    lines: list[str] = []
+    learned = stats.get("learned_skills", stats.get("nodes", 0))
+    mem = stats.get("memory_nodes", 0)
+    edges = stats.get("related_edges", 0)
+    lines.append(f"{learned} learned skills · {mem} memories · {edges} skill links")
+    extra = []
+    if stats.get("memory_skill_edges"):
+        extra.append(f"{stats['memory_skill_edges']} memory↔skill links")
+    peak = _peak_day(payload)
+    if peak:
+        extra.append(peak)
+    if extra:
+        lines.append(" · ".join(extra))
+    return lines
+
+
+def _merge_runs(cells: Iterable[Run]) -> Row:
+    out: Row = []
+    for run in cells:
+        text, style, alpha = run[0], run[1], (run[2] if len(run) > 2 else 1.0)
+        hex_override = run[3] if len(run) > 3 else None
+        prev_hex = out[-1][3] if out and len(out[-1]) > 3 else None
+        if out and out[-1][1] == style and abs(out[-1][2] - alpha) < 1e-6 and prev_hex == hex_override:
+            out[-1][0] += text
+        else:
+            merged: Run = [text, style, alpha]
+            if hex_override:
+                merged.append(hex_override)
+            out.append(merged)
+    return out
+
+
+def render_frames(payload: dict[str, Any], *, cols: int = 80, rows: int = 16, frames: int = 48) -> dict[str, Any]:
+    """Pre-render a full play-through (reveal 0→1) plus static legend/summary."""
+    frames = max(2, min(frames, 240))
+    nodes = list(payload.get("nodes", []))
+    rec = compute_recency(nodes)
+    # Mirror render_graph's bucketing so the interactive row list lines up with
+    # what the user sees.
+    buckets = _build_chart_buckets(nodes, rec, max_rows=max(4, rows - 3)) if nodes else []
+    out_frames = []
+    for i in range(frames):
+        reveal = i / (frames - 1)
+        frame = render_graph(payload, cols=cols, rows=rows, reveal=reveal)
+        out_frames.append(
+            {
+                "reveal": frame["reveal"],
+                "date": frame["date"],
+                "visible": frame["visible"],
+                "grid": frame["grid"],
+                "labels": frame.get("labels", []),
+            }
+        )
+    return {
+        "frames": out_frames,
+        "legend": build_legend(payload),
+        "categories": category_legend(payload),
+        "buckets": _bucket_rows(buckets, payload),
+        "summary": build_summary(payload),
+        "axis": axis_labels(payload),
+        "count": len(payload.get("nodes", [])),
+        "cols": cols,
+        "rows": rows,
+    }
--- a/agent/learning_mutations.py
+++ b/agent/learning_mutations.py
@@ -0,0 +1,206 @@
+"""User-initiated edit/delete for journey nodes (learned skills + memories).
+
+The journey graph (``agent.learning_graph``) gives every node a stable id:
+
+- **skills** → the skill name (e.g. ``"debugging-hermes-desktop"``)
+- **memories** → ``memory:<source>:<index>`` where ``source`` is ``memory``
+  (``MEMORY.md``) or ``profile`` (``USER.md``) and ``index`` is the node's
+  position in the combined card list (``MEMORY.md`` cards first, then
+  ``USER.md``).
+
+This module maps a node id back to its on-disk home and performs the mutation,
+shared by the CLI (``hermes journey delete|edit``), the TUI ``/journey`` overlay
+(gateway RPCs), and the desktop GUI (REST). Deleting a skill *archives* it
+(recoverable via ``hermes curator restore``); deleting a memory rewrites its
+file. Pure stdlib + existing skill/memory helpers.
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Any
+
+_MEMORY_FILES = {"memory": "MEMORY.md", "profile": "USER.md"}
+
+
+def parse_node_kind(node_id: str) -> str:
+    return "memory" if node_id.startswith("memory:") else "skill"
+
+
+def _memories_dir() -> Path:
+    from hermes_constants import get_hermes_home
+
+    return get_hermes_home() / "memories"
+
+
+def _parse_memory_id(node_id: str) -> tuple[str, int]:
+    """``memory:<source>:<index>`` → (source, global_index)."""
+    parts = node_id.split(":", 2)
+    if len(parts) != 3 or parts[0] != "memory" or parts[1] not in _MEMORY_FILES:
+        raise ValueError(f"bad memory node id: {node_id!r}")
+    try:
+        return parts[1], int(parts[2])
+    except ValueError as exc:
+        raise ValueError(f"bad memory node id: {node_id!r}") from exc
+
+
+def _memory_local_index(source: str, global_index: int) -> int:
+    """Global card index → position within the source's own file.
+
+    ``_memory_cards`` emits all ``MEMORY.md`` cards before ``USER.md`` cards, so
+    a profile card's local index is its global index minus the memory count.
+    """
+    from agent.learning_graph import _memory_cards
+
+    cards = _memory_cards()
+    if not 0 <= global_index < len(cards):
+        raise IndexError(f"memory index {global_index} out of range")
+    if cards[global_index].get("source") != source:
+        raise ValueError("memory node id is stale — refresh the graph")
+    if source == "memory":
+        return global_index
+    return global_index - sum(1 for c in cards if c.get("source") == "memory")
+
+
+def _locate_memory(source: str, gidx: int) -> tuple[Path, list[str], int]:
+    """Resolve a memory card to its file, all §-delimited entries, and local index.
+
+    Entries come from ``MemoryStore._read_file`` — the same parser the memory
+    tool uses — so journey indices stay aligned with what the graph renders.
+    """
+    from tools.memory_tool import MemoryStore
+
+    path = _memories_dir() / _MEMORY_FILES[source]
+    if not path.exists():
+        raise ValueError(f"{path.name} not found")
+    chunks = MemoryStore._read_file(path)
+    local = _memory_local_index(source, gidx)
+    if not 0 <= local < len(chunks):
+        raise ValueError("memory node id is stale — refresh the graph")
+    return path, chunks, local
+
+
+# ── Inspect (edit prefill) ──────────────────────────────────────────────────
+
+
+def node_detail(node_id: str) -> dict[str, Any]:
+    """Current content for an edit prefill. ``content`` is the full SKILL.md
+    (skills) or the raw memory chunk (memories)."""
+    try:
+        return _node_detail(node_id)
+    except (ValueError, IndexError) as exc:
+        return {"ok": False, "message": str(exc)}
+
+
+def _node_detail(node_id: str) -> dict[str, Any]:
+    if parse_node_kind(node_id) == "memory":
+        source, gidx = _parse_memory_id(node_id)
+        _, chunks, local = _locate_memory(source, gidx)
+        body = chunks[local].strip()
+
+        return {"ok": True, "kind": "memory", "id": node_id, "label": body.splitlines()[0][:80], "content": body}
+
+    from tools.skill_manager_tool import _find_skill
+
+    found = _find_skill(node_id)
+    if not found:
+        return {"ok": False, "message": f"skill '{node_id}' not found"}
+    skill_md = Path(found["path"]) / "SKILL.md"
+    if not skill_md.exists():
+        return {"ok": False, "message": f"SKILL.md missing for '{node_id}'"}
+
+    return {
+        "ok": True,
+        "kind": "skill",
+        "id": node_id,
+        "label": node_id,
+        "content": skill_md.read_text(encoding="utf-8"),
+    }
+
+
+# ── Delete ──────────────────────────────────────────────────────────────────
+
+
+def delete_node(node_id: str) -> dict[str, Any]:
+    try:
+        return _delete_memory(node_id) if parse_node_kind(node_id) == "memory" else _delete_skill(node_id)
+    except (ValueError, IndexError) as exc:
+        return {"ok": False, "message": str(exc)}
+
+
+def _delete_skill(name: str) -> dict[str, Any]:
+    from tools import skill_usage
+
+    if skill_usage.get_record(name).get("pinned"):
+        return {"ok": False, "message": f"'{name}' is pinned — unpin it first (hermes curator unpin {name})"}
+
+    ok, message = skill_usage.archive_skill(name)
+    if ok:
+        _clear_skill_cache()
+
+    return {"ok": ok, "message": f"archived '{name}' — restore with: hermes curator restore {name}" if ok else message}
+
+
+def _delete_memory(node_id: str) -> dict[str, Any]:
+    source, gidx = _parse_memory_id(node_id)
+    path, chunks, local = _locate_memory(source, gidx)
+
+    del chunks[local]
+    _write_memory(path, chunks)
+
+    return {"ok": True, "message": f"deleted memory from {path.name}"}
+
+
+# ── Edit ────────────────────────────────────────────────────────────────────
+
+
+def edit_node(node_id: str, content: str) -> dict[str, Any]:
+    try:
+        return _edit_memory(node_id, content) if parse_node_kind(node_id) == "memory" else _edit_skill(node_id, content)
+    except (ValueError, IndexError) as exc:
+        return {"ok": False, "message": str(exc)}
+
+
+def _edit_skill(name: str, content: str) -> dict[str, Any]:
+    from tools.skill_manager_tool import _edit_skill as _do_edit
+
+    result = _do_edit(name, content)
+    if result.get("success"):
+        _clear_skill_cache()
+
+        return {"ok": True, "message": f"updated '{name}'"}
+
+    return {"ok": False, "message": result.get("error", "edit failed")}
+
+
+def _edit_memory(node_id: str, content: str) -> dict[str, Any]:
+    source, gidx = _parse_memory_id(node_id)
+    body = content.strip()
+    if not body:
+        return {"ok": False, "message": "empty memory — use delete to remove it"}
+    path, chunks, local = _locate_memory(source, gidx)
+
+    chunks[local] = body
+    _write_memory(path, chunks)
+
+    return {"ok": True, "message": f"updated memory in {path.name}"}
+
+
+# ── Helpers ─────────────────────────────────────────────────────────────────
+
+
+def _write_memory(path: Path, chunks: list[str]) -> None:
+    """Atomic temp-file + rename via the memory tool, so a concurrent reader
+    never sees a half-written file (and the §-join stays single-sourced)."""
+    from tools.memory_tool import MemoryStore
+
+    MemoryStore._write_file(path, [c.strip() for c in chunks if c.strip()])
+
+
+def _clear_skill_cache() -> None:
+    try:
+        from agent.prompt_builder import clear_skills_system_prompt_cache
+
+        clear_skills_system_prompt_cache(clear_snapshot=True)
+    except Exception:
+        pass
--- a/agent/lsp/install.py
+++ b/agent/lsp/install.py
@@ -102,6 +102,11 @@ INSTALL_RECIPES: Dict[str, Dict[str, Any]] = {
    # Lua — manual (LuaLS is platform-specific binaries from GitHub
    # releases; complex enough that we punt to the user)
    "lua-language-server": {"strategy": "manual", "pkg": "", "bin": "lua-language-server"},
+    # PowerShell — PowerShellEditorServices ships as a GitHub release
+    # zip driven by a pwsh bootstrap script, not a single binary.  We
+    # require a manual bundle install and probe for the pwsh host so
+    # `hermes lsp status` reports the host's presence.
+    "powershell": {"strategy": "manual", "pkg": "", "bin": "pwsh"},
 }


--- a/agent/lsp/reporter.py
+++ b/agent/lsp/reporter.py
@@ -8,6 +8,7 @@ OpenCode's ``lsp/diagnostic.ts`` and Claude Code's
 """
 from __future__ import annotations

+import html
 from typing import Any, Dict, List

 # Severity-1 only by default — warnings/info/hints would flood the
@@ -18,18 +19,65 @@ DEFAULT_SEVERITIES = frozenset({1})  # ERROR only
 MAX_PER_FILE = 20
 MAX_TOTAL_CHARS = 4000

+# Per-field caps for diagnostic content sourced from the language server.
+# These bound the length of any single attacker-controlled identifier that
+# can ride into the model's tool output via an LSP diagnostic message.
+MAX_MESSAGE_CHARS = 300
+MAX_CODE_CHARS = 80
+MAX_SOURCE_CHARS = 80
+
+
+def _sanitize_field(value: Any, *, limit: int) -> str:
+    """Make a language-server field safe to embed in a tool-result block.
+
+    Diagnostic ``message``, ``code``, and ``source`` originate from a
+    language server that has just parsed user-controlled source code, so
+    they're untrusted from the agent's point of view. A hostile repo can
+    place instruction-shaped text inside identifier names, type aliases,
+    or import paths so the resulting diagnostic echoes that text back
+    into the ``<diagnostics>`` block the model reads.
+
+    This helper:
+
+    * Collapses CR/LF so a raw newline can't synthesize a new line in the
+      formatted block.
+    * Drops non-printable ASCII control characters that have no business
+      in a single-line summary.
+    * Caps length per-field so a long identifier can't push past the
+      block boundary.
+    * HTML-escapes ``< > &`` so the result can't close ``<diagnostics>``
+      early or open a new tag.
+
+    Returns ``""`` for ``None`` / empty so the surrounding format string
+    naturally omits the part (mirrors the prior ``if code not in {None,
+    ""}`` check at call sites).
+    """
+    if value is None:
+        return ""
+    raw = str(value)
+    # Collapse newlines so identifier text with raw \n can't fake new lines.
+    raw = raw.replace("\r", " ").replace("\n", " ")
+    # Drop ASCII control chars; keep regular spaces.
+    raw = "".join(ch for ch in raw if ch == " " or ch.isprintable())
+    raw = raw.strip()[:limit]
+    return html.escape(raw, quote=False)
+

 def format_diagnostic(d: Dict[str, Any]) -> str:
-    """One-line representation of a single diagnostic."""
+    """One-line representation of a single diagnostic.
+
+    ``message``, ``code``, and ``source`` are sanitized before
+    interpolation — see ``_sanitize_field``.
+    """
    sev = SEVERITY_NAMES.get(d.get("severity") or 1, "ERROR")
    rng = d.get("range") or {}
    start = rng.get("start") or {}
    line = int(start.get("line", 0)) + 1
    col = int(start.get("character", 0)) + 1
-    msg = str(d.get("message") or "").rstrip()
-    code = d.get("code")
-    code_part = f" [{code}]" if code not in {None, ""} else ""
-    source = d.get("source")
+    msg = _sanitize_field(d.get("message"), limit=MAX_MESSAGE_CHARS)
+    code = _sanitize_field(d.get("code"), limit=MAX_CODE_CHARS)
+    code_part = f" [{code}]" if code else ""
+    source = _sanitize_field(d.get("source"), limit=MAX_SOURCE_CHARS)
    source_part = f" ({source})" if source else ""
    return f"{sev} [{line}:{col}] {msg}{code_part}{source_part}"

@@ -57,7 +105,11 @@ def report_for_file(
    body = "\n".join(lines)
    if extra > 0:
        body += f"\n... and {extra} more"
-    return f"<diagnostics file=\"{file_path}\">\n{body}\n</diagnostics>"
+    # quote=True escapes both ``"`` and ``&`` so a crafted file name like
+    # ``foo"><script`` can't break out of the ``file="..."`` attribute and
+    # synthesize new tags inside the tool output.
+    safe_path = html.escape(file_path, quote=True)
+    return f"<diagnostics file=\"{safe_path}\">\n{body}\n</diagnostics>"


 def truncate(s: str, *, limit: int = MAX_TOTAL_CHARS) -> str:
--- a/agent/lsp/servers.py
+++ b/agent/lsp/servers.py
@@ -102,6 +102,9 @@ LANGUAGE_BY_EXT: Dict[str, str] = {
    ".zig": "zig",
    ".zon": "zig",
    ".dockerfile": "dockerfile",
+    ".ps1": "powershell",
+    ".psm1": "powershell",
+    ".psd1": "powershell",
 }


@@ -676,6 +679,131 @@ def _spawn_astro(root: str, ctx: ServerContext) -> Optional[SpawnSpec]:
    )


+_PSES_BUNDLE_WARNED = False
+
+
+def _find_pses_bundle(ctx: ServerContext) -> Optional[str]:
+    """Locate the PowerShellEditorServices module bundle directory.
+
+    PSES ships as a GitHub release zip (not an npm/go/pip package), so
+    there's no auto-install recipe — the user downloads it and points us
+    at the extracted bundle.  Resolution order:
+
+    1. ``command`` override in config (``lsp.servers.powershell.command``) —
+       the FIRST element is treated as the bundle path when it's a
+       directory.  This is the documented config knob.
+    2. ``init_overrides["powershell"]["bundlePath"]``.
+    3. ``PSES_BUNDLE_PATH`` env var.
+    4. ``<HERMES_HOME>/lsp/PowerShellEditorServices`` staging dir (where a
+       user-run unzip would naturally land).
+
+    Returns the bundle directory containing ``PowerShellEditorServices/``,
+    or ``None`` when it can't be found.
+    """
+    candidates: List[str] = []
+    override = ctx.binary_overrides.get("powershell")
+    if override and override[0]:
+        candidates.append(override[0])
+    init = ctx.init_overrides.get("powershell", {})
+    if isinstance(init, dict) and init.get("bundlePath"):
+        candidates.append(str(init["bundlePath"]))
+    env_path = os.environ.get("PSES_BUNDLE_PATH")
+    if env_path:
+        candidates.append(env_path)
+    home = os.environ.get("HERMES_HOME") or os.path.join(
+        os.path.expanduser("~"), ".hermes"
+    )
+    candidates.append(os.path.join(home, "lsp", "PowerShellEditorServices"))
+
+    for cand in candidates:
+        if not cand:
+            continue
+        # Accept either the bundle root or the inner module dir.
+        start_script = os.path.join(
+            cand, "PowerShellEditorServices", "Start-EditorServices.ps1"
+        )
+        if os.path.isfile(start_script):
+            return cand
+        inner = os.path.join(cand, "Start-EditorServices.ps1")
+        if os.path.isfile(inner):
+            return os.path.dirname(cand)
+    return None
+
+
+def _spawn_powershell_es(root: str, ctx: ServerContext) -> Optional[SpawnSpec]:
+    """Spawn PowerShellEditorServices over stdio.
+
+    Unlike the single-binary servers, PSES is a PowerShell module driven
+    by a bootstrap script.  We need both a PowerShell host (``pwsh`` for
+    PowerShell 7+, or Windows ``powershell``) and the PSES module bundle.
+    The bundle is manual-install (release zip) — see ``_find_pses_bundle``.
+    """
+    pwsh = _which("pwsh", "powershell")
+    if pwsh is None:
+        return None
+    bundle = _find_pses_bundle(ctx)
+    if bundle is None:
+        global _PSES_BUNDLE_WARNED
+        if not _PSES_BUNDLE_WARNED:
+            _PSES_BUNDLE_WARNED = True
+            logger.warning(
+                "powershell: pwsh found but the PowerShellEditorServices "
+                "bundle is missing. Download the release zip from "
+                "https://github.com/PowerShell/PowerShellEditorServices/releases, "
+                "extract it, and either set lsp.servers.powershell.command "
+                "to the bundle path or unzip it to "
+                "<HERMES_HOME>/lsp/PowerShellEditorServices."
+            )
+        return None
+    start_script = os.path.join(
+        bundle, "PowerShellEditorServices", "Start-EditorServices.ps1"
+    )
+    # Session details file: PSES writes connection info here on startup.
+    session_path = os.path.join(
+        hermes_lsp_session_dir(), f"pses-session-{os.getpid()}.json"
+    )
+    log_path = os.path.join(hermes_lsp_session_dir(), "pses.log")
+    inner = (
+        f"& '{start_script}' "
+        f"-BundledModulesPath '{bundle}' "
+        f"-LogPath '{log_path}' "
+        f"-SessionDetailsPath '{session_path}' "
+        f"-FeatureFlags @() -AdditionalModules @() "
+        f"-HostName Hermes -HostProfileId hermes -HostVersion 1.0.0 "
+        f"-Stdio -LogLevel Normal"
+    )
+    return SpawnSpec(
+        command=[
+            pwsh,
+            "-NoLogo",
+            "-NoProfile",
+            "-NonInteractive",
+            "-ExecutionPolicy",
+            "Bypass",
+            "-Command",
+            inner,
+        ],
+        workspace_root=root,
+        cwd=root,
+        env=ctx.env_overrides.get("powershell", {}),
+        initialization_options={
+            k: v
+            for k, v in ctx.init_overrides.get("powershell", {}).items()
+            if k != "bundlePath"
+        },
+    )
+
+
+def hermes_lsp_session_dir() -> str:
+    """Return (and create) the dir for PSES session/log scratch files."""
+    home = os.environ.get("HERMES_HOME") or os.path.join(
+        os.path.expanduser("~"), ".hermes"
+    )
+    d = os.path.join(home, "lsp", "pses")
+    os.makedirs(d, exist_ok=True)
+    return d
+
+
 def _resolve_override(ctx: ServerContext, server_id: str) -> Optional[str]:
    """User can pin a binary path in config."""
    override = ctx.binary_overrides.get(server_id)
@@ -823,6 +951,18 @@ def _root_java(file_path: str, workspace: str) -> Optional[str]:
    )


+def _root_powershell(file_path: str, workspace: str) -> Optional[str]:
+    # PowerShell projects rarely have a universal root marker. Use the
+    # PSScriptAnalyzer settings file when present, otherwise fall back to
+    # the git workspace root (nearest_root does exact-name matching only,
+    # so no globs here).
+    return _root_or_workspace(
+        file_path,
+        workspace,
+        ["PSScriptAnalyzerSettings.psd1"],
+    )
+
+
 # ---------------------------------------------------------------------------
 # the registry
 # ---------------------------------------------------------------------------
@@ -1012,6 +1152,13 @@ SERVERS: List[ServerDef] = [
        build_spawn=_spawn_jdtls,
        description="Java — Eclipse JDT Language Server",
    ),
+    ServerDef(
+        server_id="powershell",
+        extensions=(".ps1", ".psm1", ".psd1"),
+        resolve_root=_root_powershell,
+        build_spawn=_spawn_powershell_es,
+        description="PowerShell — PowerShellEditorServices (manual bundle)",
+    ),
 ]


--- a/agent/moa_loop.py
+++ b/agent/moa_loop.py
@@ -93,22 +93,27 @@ def _slot_runtime(slot: dict[str, str]) -> dict[str, Any]:
        from hermes_cli.runtime_provider import resolve_runtime_provider

        rt = resolve_runtime_provider(requested=provider, target_model=model)
-        resolved_provider = str(rt.get("provider") or provider).strip().lower()
-        # call_llm treats an explicit base_url as a custom endpoint. That is
-        # correct for ordinary OpenAI-compatible targets, but wrong for OAuth /
-        # provider-backed targets whose provider branch adds auth refresh,
-        # request metadata, or request-shape adapters. Keep those providers
-        # identified by name.
-        if resolved_provider in {"nous", "openai-codex", "xai-oauth"}:
-            return out
-        # Pass the resolved endpoint through so call_llm builds the request for
-        # the provider's actual API surface instead of auto-detecting. base_url
-        # routes call_llm to the right adapter (incl. anthropic_messages mode);
-        # api_key is the resolved credential for that provider.
+        # Forward the resolved endpoint through to call_llm unconditionally.
+        # call_llm's _resolve_task_provider_model() is the single chokepoint that
+        # decides whether an explicit base_url collapses a call to the generic
+        # ``custom`` route or keeps the provider's real identity: it preserves
+        # identity for any first-class provider (via
+        # _preserve_provider_with_base_url, a provider-catalog capability check),
+        # so provider branches that add auth refresh / request metadata /
+        # request-shape adapters — anthropic OAuth (Bearer + anthropic-beta),
+        # openai-codex Responses wrapping + Cloudflare headers, xai-oauth,
+        # bedrock SigV4 signing, nous Portal tags — still fire. Those branches
+        # re-resolve their own credentials by name and ignore a forwarded
+        # base_url/api_key, so forwarding is safe even for a placeholder key
+        # (bedrock's "aws-sdk"). We used to maintain a name-preservation set here
+        # too; that duplicated the chokepoint and drifted out of sync, so the
+        # single source of truth now lives in call_llm.
        if rt.get("base_url"):
            out["base_url"] = rt["base_url"]
        if rt.get("api_key"):
            out["api_key"] = rt["api_key"]
+        if rt.get("api_mode"):
+            out["api_mode"] = rt["api_mode"]
    except Exception as exc:  # pragma: no cover - defensive
        logger.debug("MoA slot runtime resolution failed for %s: %s", _slot_label(slot), exc)
    return out
@@ -352,8 +357,14 @@ def _extract_text(response: Any) -> str:
    except Exception:
        pass
    try:
-        content = response.choices[0].message.content
-        return (content or "").strip()
+        message = response.choices[0].message
+        if isinstance(message, dict):
+            content = message.get("content")
+        else:
+            content = getattr(message, "content", message)
+        if not isinstance(content, str):
+            content = str(content) if content else ""
+        return content.strip()
    except Exception:
        return ""

@@ -569,6 +580,24 @@ class MoAChatCompletions:
        # max_tokens is passed through from the caller (normally None → omitted
        # → the model's real maximum). The preset's old hardcoded 4096 default
        # is gone — it truncated long syntheses.
+        # When the agent's streaming consumer calls us with stream=True, run the
+        # references first (above) and then return the aggregator's RAW token
+        # stream so the acting model's output reaches the user live. The consumer
+        # reassembles chunks + tool_calls, runs stale-stream detection, and falls
+        # back to a non-streaming retry on error. The non-streaming path
+        # (stream=False) is unchanged — no stream/stream_options/timeout are
+        # forwarded, so its behavior is byte-for-byte identical to before.
+        stream = bool(api_kwargs.get("stream"))
+        stream_kwargs: dict[str, Any] = {}
+        if stream:
+            stream_kwargs["stream"] = True
+            stream_kwargs["stream_options"] = (
+                api_kwargs.get("stream_options") or {"include_usage": True}
+            )
+            # Forward the consumer's per-request (stream read) timeout so it
+            # actually governs the aggregator stream, not just call_llm's default.
+            if api_kwargs.get("timeout") is not None:
+                stream_kwargs["timeout"] = api_kwargs["timeout"]
        return call_llm(
            task="moa_aggregator",
            messages=agg_messages,
@@ -576,6 +605,7 @@ class MoAChatCompletions:
            max_tokens=agg_kwargs.get("max_tokens"),
            tools=agg_kwargs.get("tools"),
            extra_body=agg_kwargs.get("extra_body"),
+            **stream_kwargs,
            **_slot_runtime(aggregator),
        )

--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@@ -429,6 +429,10 @@ _URL_TO_PROVIDER: Dict[str, str] = {
    "inference-api.nousresearch.com": "nous",
    "api.deepseek.com": "deepseek",
    "api.githubcopilot.com": "copilot",
+    # Enterprise Copilot endpoints look like api.enterprise.githubcopilot.com,
+    # api.business.githubcopilot.com, etc.  Match the suffix so context-window
+    # resolution works for enterprise accounts too.
+    ".githubcopilot.com": "copilot",
    "models.github.ai": "copilot",
    # GitHub Models free tier (Azure-hosted prototyping endpoint) — same
    # canonical provider as the Copilot API.  Hard per-request token cap
@@ -1075,10 +1079,29 @@ def parse_available_output_tokens_from_error(error_msg: str) -> Optional[int]:
        "maximum context length" in error_lower
        and "requested" in error_lower
        and "output tokens" in error_lower
+    ) or (
+        # DashScope / Alibaba Cloud (Qwen) phrasing.  The provider rejects an
+        # over-cap output request with a bounded range whose upper bound IS the
+        # real max-output cap, e.g.
+        #   "Range of max_tokens should be [1, 65536]"
+        # The input itself fits — this is purely an output-cap error, so reduce
+        # max_tokens and retry; do NOT compress.
+        "range of max_tokens should be" in error_lower
    )
    if not is_output_cap_error:
        return None

+    # DashScope / Alibaba range form: "Range of max_tokens should be [1, 65536]".
+    # The upper bound is the available output cap.
+    _m_range = re.search(
+        r'range of max_tokens should be\s*\[\s*\d+\s*,\s*(\d+)\s*\]',
+        error_lower,
+    )
+    if _m_range:
+        _cap = int(_m_range.group(1))
+        if _cap >= 1:
+            return _cap
+
    # Extract the available_tokens figure.
    # Anthropic format: "… = available_tokens: 10000"
    patterns = [
@@ -1125,6 +1148,70 @@ def parse_available_output_tokens_from_error(error_msg: str) -> Optional[int]:
    return None


+def is_output_cap_error(error_msg: str) -> bool:
+    """Return True if a 400 is about the OUTPUT cap (max_tokens) being too large.
+
+    This is the broader sibling of :func:`parse_available_output_tokens_from_error`:
+    that function only returns a number when it can extract the available output
+    budget from a *known* provider phrasing.  This one answers the cheaper
+    yes/no question — "is this an output-cap error at all?" — across providers
+    whose exact wording we may not yet parse a number from.
+
+    Why this matters: an output-cap 400 is deterministic (every retry with the
+    same ``max_tokens`` gets the identical rejection).  If such an error is
+    misclassified as a context-overflow it gets routed into the compression
+    loop, the compressor re-issues the call with the same oversized
+    ``max_tokens``, the provider rejects it identically, and the session
+    death-loops until "cannot compress further" (issue #55546, DashScope/Qwen:
+    "Range of max_tokens should be [1, 65536]").  Compression cannot help an
+    output-cap error — the input already fits.
+
+    The signal: the error talks about ``max_tokens`` (or its aliases) as a
+    cap/range/limit, and does NOT talk about the INPUT/prompt/context window
+    being too long.  When both are present we defer to the context-overflow
+    path (a real input overflow can also mention max_tokens).
+    """
+    error_lower = error_msg.lower()
+
+    mentions_output_param = (
+        "max_tokens" in error_lower
+        or "max_output_tokens" in error_lower
+        or "max_completion_tokens" in error_lower
+    )
+    if not mentions_output_param:
+        return False
+
+    # Phrasing that signals the OUTPUT cap specifically is the problem.
+    output_cap_signal = (
+        "range of max_tokens should be" in error_lower      # DashScope / Alibaba
+        or "available_tokens" in error_lower                # Anthropic
+        or "available tokens" in error_lower
+        or ("in the output" in error_lower                  # OpenRouter / Nous
+            and "maximum context length" in error_lower)
+        or ("requested" in error_lower                      # LM Studio / llama.cpp
+            and "output tokens" in error_lower)
+        or "should be" in error_lower                       # generic "max_tokens should be <= N"
+        or "less than or equal" in error_lower
+        or "must be" in error_lower
+    )
+    if not output_cap_signal:
+        return False
+
+    # If the error ALSO clearly describes an oversized INPUT, it is a genuine
+    # context overflow that happens to mention max_tokens — let the
+    # context-overflow path handle it (it can compress the input).
+    input_overflow_signal = (
+        "prompt is too long" in error_lower
+        or "prompt too long" in error_lower
+        or "input is too long" in error_lower
+        or "input token" in error_lower
+        or "prompt length" in error_lower
+        or "prompt contains" in error_lower
+        or "reduce the length" in error_lower
+    )
+    return not input_overflow_signal
+
+
 def _model_id_matches(candidate_id: str, lookup_model: str) -> bool:
    """Return True if *candidate_id* (from server) matches *lookup_model* (configured).

--- a/agent/pet/render.py
+++ b/agent/pet/render.py
@@ -230,6 +230,68 @@ def _png_bytes(frame) -> bytes:
    return buf.getvalue()


+def _union_alpha_bbox(frames) -> tuple[int, int, int, int] | None:
+    """Union opaque-pixel bbox across *frames* (a stable trim for animation)."""
+    left = top = right = bottom = None
+    for frame in frames:
+        try:
+            bbox = frame.getchannel("A").getbbox()
+        except Exception:  # noqa: BLE001 - cosmetic; fail open
+            bbox = None
+        if not bbox:
+            continue
+        l, t, r, b = bbox
+        left = l if left is None else min(left, l)
+        top = t if top is None else min(top, t)
+        right = r if right is None else max(right, r)
+        bottom = b if bottom is None else max(bottom, b)
+    if left is None or top is None or right is None or bottom is None:
+        return None
+    return (left, top, right, bottom)
+
+
+def _crop_frames_to_alpha_union(frames):
+    """Crop every frame to the union opaque bbox so the sprite hugs its box.
+
+    kitty paints the whole transmitted rectangle, transparent margins included,
+    which makes the visible pet look small and adrift inside a larger cell box.
+    Trimming to the visible bounds keeps the pet tight in its corner.
+    """
+    bbox = _union_alpha_bbox(frames)
+    if not bbox:
+        return frames
+    return [f.crop(bbox) for f in frames]
+
+
+# Nominal terminal cell size in pixels. kitty fits an image to its cell
+# rectangle preserving aspect, so a frame whose pixel size isn't a whole
+# multiple of the cell rounds up — which makes the terminal clip the bottom row
+# (the "clipped feet") and letterbox a blank row. Snapping each frame to an
+# exact cell multiple avoids that. (See ratatui-image #57: "render in multiples
+# of the font-size, to avoid stale character artifacts.")
+_CELL_W = 8
+_CELL_H = 16
+
+
+def _snap_frames_to_cell_grid(frames):
+    """Resize frames so width/height are exact multiples of the cell box.
+
+    Removes the sub-cell remainder kitty would otherwise round up + clip. All
+    frames share the union-cropped size, so they snap to the same cell grid.
+    """
+    if not frames:
+        return frames
+    from PIL import Image
+
+    w, h = frames[0].size
+    cols = max(1, round(w / _CELL_W))
+    rows = max(1, round(h / _CELL_H))
+    target = (cols * _CELL_W, rows * _CELL_H)
+    if (w, h) == target:
+        return frames
+    return [f.resize(target, Image.LANCZOS) for f in frames]
+
+
 def _kitty_apc(ctrl: str, data: str) -> str:
    """Emit a kitty APC escape for *data*, chunked into ≤4096-byte ``m`` pieces."""
    chunk = 4096
@@ -563,6 +625,8 @@ class PetRenderer:
        frames = self._frames(state)
        if not frames:
            return None
+        frames = _crop_frames_to_alpha_union(frames)
+        frames = _snap_frames_to_cell_grid(frames)
        cols, rows = self._cell_box(frames[0])
        return {
            "cols": cols,
--- a/agent/redact.py
+++ b/agent/redact.py
@@ -106,6 +106,7 @@ _PREFIX_PATTERNS = [
    r"brv_[A-Za-z0-9]{10,}",            # ByteRover API key
    r"xai-[A-Za-z0-9]{30,}",            # xAI (Grok) API key
    r"ntn_[A-Za-z0-9]{10,}",            # Notion internal integration token
+    r"fw_[A-Za-z0-9]{30,}",             # Fireworks AI API key
 ]

 # ENV assignment patterns: KEY=value where KEY contains a secret-like name.
--- a/agent/thread_scoped_output.py
+++ b/agent/thread_scoped_output.py
@@ -0,0 +1,147 @@
+"""Thread-scoped stdout/stderr silencing for background worker threads.
+
+``contextlib.redirect_stdout``/``redirect_stderr`` reassign the *process-global*
+``sys.stdout``/``sys.stderr``.  When a daemon worker thread (e.g. the background
+memory/skill review) wraps its whole body in those context managers, every other
+thread in the process — including a gateway's asyncio event-loop thread driving a
+Telegram long-poll — sees ``sys.stdout``/``sys.stderr`` pointing at ``devnull``
+for the full duration.  Any bare ``print`` / ``sys.stderr.write`` from those other
+threads is silently lost during that window (see issue #55769 / #55925).
+
+This module installs a thin proxy as ``sys.stdout``/``sys.stderr`` that routes
+writes per-thread: threads registered as "silenced" go to a sink; every other
+thread passes through to the *original* stream.  The proxy is installed once,
+idempotently, and is never uninstalled (uninstalling would race other threads
+mid-write), so the only observable effect for unregistered threads is one extra
+attribute lookup per write.
+"""
+
+from __future__ import annotations
+
+import contextlib
+import os
+import sys
+import threading
+from typing import Iterator, TextIO
+
+__all__ = ["thread_scoped_silence"]
+
+_install_lock = threading.Lock()
+# Maps the proxy we installed for a given attribute ("stdout"/"stderr") so we
+# never double-wrap and so we can recover the original stream.
+_installed: dict[str, "_ThreadRoutingStream"] = {}
+
+
+class _ThreadRoutingStream:
+    """A ``sys.stdout``/``sys.stderr`` stand-in that routes writes per-thread.
+
+    Threads whose ident is in ``_silenced`` write to ``_sink``; all other
+    threads write to ``_passthrough`` (the original stream captured at install
+    time).  Attribute access for anything other than the methods we override
+    is delegated to the *current* target so things like ``.encoding`` /
+    ``.fileno()`` behave like the underlying stream for the calling thread.
+    """
+
+    def __init__(self, passthrough: TextIO, sink: TextIO) -> None:
+        self._passthrough = passthrough
+        self._sink = sink
+        # ident -> nesting depth.  A thread is silenced while depth > 0, so
+        # nested ``thread_scoped_silence()`` on the same thread composes
+        # correctly (the inner exit decrements rather than fully clearing).
+        self._silenced: dict[int, int] = {}
+        self._lock = threading.Lock()
+
+    def _target(self) -> TextIO:
+        if self._silenced.get(threading.get_ident(), 0) > 0:
+            return self._sink
+        return self._passthrough
+
+    # --- registration -----------------------------------------------------
+    def silence(self, ident: int) -> None:
+        with self._lock:
+            self._silenced[ident] = self._silenced.get(ident, 0) + 1
+
+    def unsilence(self, ident: int) -> None:
+        with self._lock:
+            depth = self._silenced.get(ident, 0) - 1
+            if depth > 0:
+                self._silenced[ident] = depth
+            else:
+                self._silenced.pop(ident, None)
+
+    # --- file-like surface ------------------------------------------------
+    def write(self, data):  # type: ignore[no-untyped-def]
+        try:
+            return self._target().write(data)
+        except Exception:
+            return len(data) if isinstance(data, str) else 0
+
+    def flush(self):  # type: ignore[no-untyped-def]
+        try:
+            return self._target().flush()
+        except Exception:
+            return None
+
+    def writelines(self, lines):  # type: ignore[no-untyped-def]
+        target = self._target()
+        try:
+            return target.writelines(lines)
+        except Exception:
+            return None
+
+    def isatty(self) -> bool:
+        try:
+            return bool(self._target().isatty())
+        except Exception:
+            return False
+
+    def fileno(self):  # type: ignore[no-untyped-def]
+        return self._target().fileno()
+
+    def __getattr__(self, name):  # type: ignore[no-untyped-def]
+        # Delegate everything we don't override (encoding, buffer, mode, ...)
+        # to the calling thread's current target.
+        return getattr(self._target(), name)
+
+
+def _ensure_installed(attr: str, sink: TextIO) -> "_ThreadRoutingStream":
+    """Install (idempotently) a routing proxy as ``sys.<attr>`` and return it."""
+    with _install_lock:
+        proxy = _installed.get(attr)
+        current = getattr(sys, attr, None)
+        if proxy is not None and current is proxy:
+            return proxy
+        # Capture whatever is currently bound as the passthrough.  If a prior
+        # global redirect_stdout is active we deliberately route non-silenced
+        # threads to *that* (matching prior behaviour) rather than guessing at
+        # the "real" stream.
+        passthrough = current if current is not None else sink
+        proxy = _ThreadRoutingStream(passthrough, sink)
+        setattr(sys, attr, proxy)
+        _installed[attr] = proxy
+        return proxy
+
+
+@contextlib.contextmanager
+def thread_scoped_silence() -> Iterator[None]:
+    """Silence ``stdout``/``stderr`` for the *current thread only*.
+
+    Other threads keep writing to the real streams.  Use this around a worker
+    thread's body instead of ``contextlib.redirect_stdout(devnull)`` when the
+    process is multi-threaded and another thread must keep its console output.
+    """
+    sink = open(os.devnull, "w", encoding="utf-8")
+    ident = threading.get_ident()
+    out_proxy = _ensure_installed("stdout", sink)
+    err_proxy = _ensure_installed("stderr", sink)
+    out_proxy.silence(ident)
+    err_proxy.silence(ident)
+    try:
+        yield
+    finally:
+        out_proxy.unsilence(ident)
+        err_proxy.unsilence(ident)
+        try:
+            sink.close()
+        except Exception:
+            pass
--- a/agent/transports/chat_completions.py
+++ b/agent/transports/chat_completions.py
@@ -619,7 +619,7 @@ class ChatCompletionsTransport(ProviderTransport):
                tc_provider_data: dict[str, Any] = {}
                extra = getattr(tc, "extra_content", None)
                if extra is None and hasattr(tc, "model_extra"):
-                    extra = (tc.model_extra or {}).get("extra_content")
+                    extra = (tc.model_extra if isinstance(tc.model_extra, dict) else {}).get("extra_content")
                if extra is not None:
                    if hasattr(extra, "model_dump"):
                        try:
--- a/agent/turn_context.py
+++ b/agent/turn_context.py
@@ -223,6 +223,9 @@ def build_turn_context(
    agent._unicode_sanitization_passes = 0
    agent._tool_guardrails.reset_for_turn()
    agent._tool_guardrail_halt_decision = None
+    _reset_consol = getattr(agent._memory_store, "reset_consolidation_failures", None)
+    if callable(_reset_consol):
+        _reset_consol()
    agent._vision_supported = True

    # Pre-turn connection health check: clean up dead TCP connections.
@@ -360,6 +363,12 @@ def build_turn_context(
            if _last >= 0 and _preflight_tokens > _last:
                _compressor.last_prompt_tokens = _preflight_tokens

+        _compression_cooldown = getattr(
+            _compressor,
+            "get_active_compression_failure_cooldown",
+            lambda: None,
+        )()
+
        if _preflight_deferred:
            logger.info(
                "Skipping preflight compression: rough estimate ~%s >= %s, "
@@ -368,6 +377,13 @@ def build_turn_context(
                f"{_compressor.threshold_tokens:,}",
                f"{_compressor.last_real_prompt_tokens:,}",
            )
+        elif _compression_cooldown:
+            logger.info(
+                "Skipping preflight compression: same-session cooldown active "
+                "(~%s seconds remaining, session %s)",
+                int(_compression_cooldown.get("remaining_seconds", 0.0)),
+                agent.session_id or "none",
+            )
        elif _compressor.should_compress(_preflight_tokens):
            logger.info(
                "Preflight compression: ~%s tokens >= %s threshold (model %s, ctx %s)",
--- a/agent/verification_stop.py
+++ b/agent/verification_stop.py
@@ -137,12 +137,12 @@ def verify_on_stop_enabled(config: dict[str, Any] | None = None) -> bool:

    Precedence: an explicit ``HERMES_VERIFY_ON_STOP`` env var wins, then an
    explicit ``agent.verify_on_stop`` config value. The config default is
-    ``False`` (see ``DEFAULT_CONFIG``) — verify-on-stop is OFF unless the user
-    opts in. The legacy ``"auto"`` sentinel is still honored for anyone who
-    sets it explicitly: it resolves to ON for interactive coding surfaces
-    (CLI, TUI, desktop) and programmatic callers, and OFF for conversational
-    messaging surfaces (Telegram, Discord, etc.). A missing/unknown value
-    falls back to OFF.
+    ``"auto"`` (see ``DEFAULT_CONFIG``) — surface-aware: ON for interactive
+    coding surfaces (CLI, TUI, desktop) and programmatic callers, OFF for
+    conversational messaging surfaces (Telegram, Discord, etc.) where the
+    verification narrative would reach a human as chat noise. An explicit
+    bool forces the behavior in either direction. A missing or unrecognized
+    value falls back to the surface-aware ``"auto"`` default.
    """
    env = os.environ.get("HERMES_VERIFY_ON_STOP")
    if env is not None:
@@ -165,10 +165,9 @@ def verify_on_stop_enabled(config: dict[str, Any] | None = None) -> bool:
        if token in {"0", "false", "no", "off"}:
            return False
        if token == "auto":
-            # Explicit opt-in to the legacy surface-aware behavior.
            return not _session_is_messaging_surface()
-    # Missing or unknown value -> OFF (the new default).
-    return False
+    # Missing or unrecognized value -> surface-aware "auto" default.
+    return not _session_is_messaging_surface()


 def _candidate_cwds(paths: Iterable[str]) -> list[Path]:
--- a/apps/bootstrap-installer/public/nous-girl.jpg
+++ b/apps/bootstrap-installer/public/nous-girl.jpg
--- a/apps/bootstrap-installer/src-tauri/capabilities/default.json
+++ b/apps/bootstrap-installer/src-tauri/capabilities/default.json
@@ -7,6 +7,7 @@
    "core:default",
    "core:window:allow-close",
    "core:window:allow-minimize",
+    "core:window:allow-theme",
    "core:event:default",
    "opener:default",
    "dialog:default",
--- a/apps/bootstrap-installer/src-tauri/src/update.rs
+++ b/apps/bootstrap-installer/src-tauri/src/update.rs
@@ -12,8 +12,10 @@
 //!   4. launch the freshly-built desktop (reuses bootstrap::launch logic).
 //!
 //! We reuse the `BootstrapEvent` channel + the existing progress UI by
-//! emitting a synthetic two-stage manifest ("update", "rebuild"). To the
-//! frontend an update looks like a short bootstrap.
+//! emitting a synthetic multi-stage manifest (handoff → update → rebuild, plus
+//! an install stage on macOS). To the frontend an update looks like a short
+//! bootstrap, broken into the real operations run_update performs so the user
+//! sees discrete steps (with the live log underneath) instead of one bar.
 //!
 //! Cross-platform note: `hermes update` already handles macOS/Linux (git/pip).
 //! The only OS-specific bits here are the venv shim path (resolve_hermes) and
@@ -70,17 +72,10 @@ pub async fn start_update(app: AppHandle) -> Result<(), String> {
        } else {
            None
        };
-        let mut stages = vec![
-            stage_info("update", "Updating Hermes"),
-            stage_info("rebuild", "Rebuilding the desktop app"),
-        ];
-        if cfg!(target_os = "macos") && target_app.is_some() {
-            stages.push(stage_info("install", "Installing the updated app"));
-        }
        emit(
            &app,
            BootstrapEvent::Manifest {
-                stages,
+                stages: update_stages(target_app.is_some()),
                protocol_version: None,
            },
        );
@@ -183,32 +178,35 @@ async fn run_update(app: AppHandle) -> Result<()> {
        anyhow!(msg)
    })?;

-    // Synthetic manifest so the existing progress UI renders our two stages.
-    let mut stages = vec![
-        stage_info("update", "Updating Hermes"),
-        stage_info("rebuild", "Rebuilding the desktop app"),
-    ];
-    if cfg!(target_os = "macos") && target_app.is_some() {
-        stages.push(stage_info("install", "Installing the updated app"));
-    }
-
+    // Synthetic manifest so the existing progress UI renders our stages.
    emit(
        &app,
        BootstrapEvent::Manifest {
-            stages,
+            stages: update_stages(target_app.is_some()),
            protocol_version: None,
        },
    );

-    // ---- pre-step: wait for the old desktop to die -----------------------
+    // ---- stage 1: wait for the old desktop to die ------------------------
    // The desktop exec'd us then called app.exit(), but process teardown is
    // async on Windows. If it still holds the venv shim, `hermes update`
    // aborts with exit 2. If it still holds the packaged app.asar,
    // install.ps1's repair/re-clone path cannot move/remove the install tree.
-    // Give both handles a bounded window to clear.
-    wait_for_install_locks_free(&install_root, &app, "update").await;
+    // Give both handles a bounded window to clear. Surfaced as its own stage
+    // (rather than a silent pre-step) so a slow close / force-kill reads as
+    // real progress instead of a frozen first bar.
+    let started = Instant::now();
+    emit_stage(&app, "handoff", StageState::Running, None, None);
+    wait_for_install_locks_free(&install_root, &app, "handoff").await;
+    emit_stage(
+        &app,
+        "handoff",
+        StageState::Succeeded,
+        Some(started.elapsed().as_millis() as u64),
+        None,
+    );

-    // ---- stage 1: hermes update -----------------------------------------
+    // ---- stage 2: hermes update -----------------------------------------
    // Pass --branch so `hermes update` targets the branch this installer was
    // built/pinned against (BUILD_PIN_BRANCH), NOT its built-in default of
    // `main`. The install was a detached-HEAD checkout of a specific commit;
@@ -332,7 +330,7 @@ async fn run_update(app: AppHandle) -> Result<()> {
        }
    }

-    // ---- stage 2: hermes desktop --build-only ----------------------------
+    // ---- stage 3: hermes desktop --build-only ----------------------------
    // `hermes update` deliberately does NOT build apps/desktop (it installs
    // repo-root deps with --workspaces=false). This is the rebuild it skips.
    emit_stage(&app, "rebuild", StageState::Running, None, None);
@@ -953,6 +951,23 @@ fn stage_info(name: &str, title: &str) -> StageInfo {
    }
 }

+/// The synthetic update manifest. Mirrors the real operations `run_update`
+/// performs so the progress UI shows them as discrete steps (with the live log
+/// underneath) instead of one monolithic bar. `include_install` adds the macOS
+/// app-swap stage. Both the happy path and the re-entrancy guard build the
+/// manifest here so the two can never drift apart.
+fn update_stages(include_install: bool) -> Vec<StageInfo> {
+    let mut stages = vec![
+        stage_info("handoff", "Preparing to update"),
+        stage_info("update", "Downloading the latest version"),
+        stage_info("rebuild", "Rebuilding the desktop app"),
+    ];
+    if include_install {
+        stages.push(stage_info("install", "Installing the update"));
+    }
+    stages
+}
+
 // option_env! only accepts string literals, so the build-time pins are read
 // by their literal names here. Mirrors bootstrap.rs's helper of the same name
 // (kept local rather than shared because option_env! can't be parameterized).
@@ -1101,6 +1116,36 @@ mod tests {
        assert_eq!(update_branch_from_args(["--update"]), None);
    }

+    #[test]
+    fn update_manifest_leads_with_handoff_and_gates_install() {
+        let base = update_stages(false);
+        assert_eq!(
+            base.first().map(|s| s.name.as_str()),
+            Some("handoff"),
+            "the lock-wait must surface as the first visible step"
+        );
+        assert!(
+            base.iter().any(|s| s.name == "update") && base.iter().any(|s| s.name == "rebuild"),
+            "update + rebuild remain distinct stages"
+        );
+        assert!(
+            base.iter().all(|s| s.name != "install"),
+            "no app-swap stage unless an install target was passed"
+        );
+
+        let with_install = update_stages(true);
+        assert_eq!(
+            with_install.last().map(|s| s.name.as_str()),
+            Some("install"),
+            "the macOS app-swap is the final stage when present"
+        );
+        assert_eq!(
+            with_install.len(),
+            base.len() + 1,
+            "include_install adds exactly one stage"
+        );
+    }
+
    #[test]
    fn rebuild_retries_only_on_failure() {
        assert!(!rebuild_needs_retry(Some(0)), "a clean rebuild must not retry");
--- a/apps/bootstrap-installer/src/components/brand-mark.tsx
+++ b/apps/bootstrap-installer/src/components/brand-mark.tsx
@@ -0,0 +1,13 @@
+import { cn } from '../lib/utils'
+
+const assetPath = (path: string) => `${import.meta.env.BASE_URL}${path.replace(/^\/+/, '')}`
+
+// Brand badge: nous-girl mark on a white tile, identical in light/dark.
+// Ported from apps/desktop's BrandMark; asset lives in this app's public/.
+export function BrandMark({ className, ...props }: React.ComponentProps<'span'>) {
+  return (
+    <span className={cn('inline-flex size-14 shrink-0 items-center justify-center bg-white', className)} {...props}>
+      <img alt="" className="size-full object-contain" src={assetPath('nous-girl.jpg')} />
+    </span>
+  )
+}
--- a/apps/bootstrap-installer/src/components/button.tsx
+++ b/apps/bootstrap-installer/src/components/button.tsx
@@ -17,7 +17,7 @@ import { cn } from '../lib/utils'
 */

 const buttonVariants = cva(
-  "inline-flex shrink-0 items-center justify-center gap-2 rounded-md text-sm font-medium whitespace-nowrap transition-all outline-none focus-visible:border-ring focus-visible:ring-[0.1875rem] focus-visible:ring-ring/50 disabled:pointer-events-none disabled:opacity-50 aria-invalid:border-destructive aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 [&_svg]:pointer-events-none [&_svg]:shrink-0 [&_svg:not([class*='size-'])]:size-4",
+  "inline-flex shrink-0 cursor-pointer items-center justify-center gap-1.5 rounded-[2.5px] text-xs leading-4 font-medium whitespace-nowrap shadow-none transition-all duration-100 outline-none focus-visible:border-ring focus-visible:ring-[0.1875rem] focus-visible:ring-ring/50 disabled:pointer-events-none disabled:cursor-default disabled:opacity-50 aria-invalid:border-destructive aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 [&_svg]:pointer-events-none [&_svg]:shrink-0 [&_svg:not([class*='size-'])]:size-3.5",
  {
    variants: {
      variant: {
@@ -25,23 +25,24 @@ const buttonVariants = cva(
        destructive:
          'bg-destructive text-white hover:bg-destructive/90 focus-visible:ring-destructive/20 dark:bg-destructive/60 dark:focus-visible:ring-destructive/40',
        outline:
-          'border bg-background shadow-xs hover:bg-accent hover:text-accent-foreground dark:border-input dark:bg-input/30 dark:hover:bg-input/50',
+          'bg-transparent text-(--ui-text-primary) shadow-[inset_0_0_0_1px_color-mix(in_srgb,var(--ui-stroke-secondary)_50%,transparent)] hover:bg-(--chrome-action-hover) hover:text-(--ui-text-primary)',
        secondary:
-          'bg-secondary text-secondary-foreground hover:bg-secondary/80',
-        ghost:
-          'hover:bg-accent hover:text-accent-foreground dark:hover:bg-accent/50',
-        link: 'text-primary underline-offset-4 decoration-current/20 hover:underline'
+          'bg-(--ui-bg-quaternary) text-(--ui-text-primary) hover:bg-(--chrome-action-hover) hover:text-(--ui-text-primary)',
+        ghost: 'text-(--ui-text-secondary) hover:bg-(--chrome-action-hover) hover:text-(--ui-text-primary)',
+        link: 'text-primary underline-offset-4 decoration-current/20 hover:underline',
+        text: 'text-muted-foreground underline-offset-4 hover:text-foreground hover:underline',
+        textStrong: 'font-semibold text-muted-foreground underline underline-offset-4 hover:text-foreground'
      },
      size: {
-        default: 'h-9 px-4 py-2 has-[>svg]:px-3',
-        xs: "h-6 gap-1 rounded-md px-2 text-xs has-[>svg]:px-1.5 [&_svg:not([class*='size-'])]:size-3",
-        sm: 'h-8 gap-1.5 rounded-md px-3 has-[>svg]:px-2.5',
-        lg: 'h-10 rounded-md px-6 has-[>svg]:px-4',
-        icon: 'size-9',
-        'icon-xs':
-          "size-6 rounded-md [&_svg:not([class*='size-'])]:size-3",
-        'icon-sm': 'size-8',
-        'icon-lg': 'size-10'
+        default: 'px-3 py-1.5 has-[>svg]:px-2.5',
+        xs: "gap-1 px-2 py-0.5 text-[0.6875rem] leading-4 has-[>svg]:px-1.5 [&_svg:not([class*='size-'])]:size-3",
+        sm: 'px-2.5 py-1 has-[>svg]:px-2',
+        lg: 'px-5 py-2 text-sm leading-5 has-[>svg]:px-4',
+        inline: 'h-auto gap-1 p-0 has-[>svg]:px-0',
+        icon: 'size-9 rounded-[4px]',
+        'icon-xs': "size-6 rounded-[4px] [&_svg:not([class*='size-'])]:size-3",
+        'icon-sm': 'size-8 rounded-[4px]',
+        'icon-lg': 'size-10 rounded-[4px]'
      }
    },
    defaultVariants: {
--- a/apps/bootstrap-installer/src/components/hackery-button.tsx
+++ b/apps/bootstrap-installer/src/components/hackery-button.tsx
@@ -0,0 +1,36 @@
+import { Loader2 } from 'lucide-react'
+
+import { cn } from '../lib/utils'
+
+/*
+ * HackeryButton — the onboarding "Begin" CTA, ported standalone.
+ *
+ * Bracketed [ LABEL ], mono/uppercase, primary accent on a --stroke-nous hairline.
+ * Lifted from apps/desktop's desktop-onboarding-overlay.tsx (sans the exit-scramble
+ * choreography, which is overlay-specific). Self-contained: cn + lucide only.
+ */
+export function HackeryButton({
+  className,
+  label,
+  loading,
+  ...props
+}: Omit<React.ComponentProps<'button'>, 'children'> & { label: React.ReactNode; loading?: boolean }) {
+  return (
+    <button
+      {...props}
+      className={cn(
+        'group inline-flex cursor-pointer items-center gap-2 rounded-md border border-(--stroke-nous) px-6 py-2.5',
+        'font-mono text-xs font-semibold uppercase text-primary',
+        'transition-all duration-150 hover:border-primary/60 hover:bg-primary/[0.06]',
+        'disabled:pointer-events-none disabled:opacity-50',
+        className
+      )}
+      type="button"
+    >
+      <span className="text-primary/40 transition-colors group-hover:text-primary">[</span>
+      {loading ? <Loader2 className="size-3 animate-spin" /> : null}
+      <span className="-mr-[0.25em] pl-[0.25em] tracking-[0.25em]">{label}</span>
+      <span className="text-primary/40 transition-colors group-hover:text-primary">]</span>
+    </button>
+  )
+}
--- a/apps/bootstrap-installer/src/components/loader.tsx
+++ b/apps/bootstrap-installer/src/components/loader.tsx
@@ -0,0 +1,136 @@
+import { type ComponentProps, useEffect, useRef } from 'react'
+
+import { cn } from '../lib/utils'
+
+/*
+ * Loader — the desktop's "Fourier Flow" curve, ported standalone.
+ *
+ * The shim can't import apps/desktop's 559-line multi-curve <Loader> (cross-app
+ * coupling + bundle bloat that defeats the point of a lightweight installer), so
+ * this is just the one curve the installer uses. Math + tuning lifted verbatim
+ * from apps/desktop/src/components/ui/loader.tsx ('fourier-flow'); rotation is
+ * dropped because that curve never rotates. Keep the constants in sync if the
+ * desktop's curve is retuned.
+ */
+
+const TWO_PI = Math.PI * 2
+
+const CURVE = {
+  durationMs: 2200,
+  particleCount: 92,
+  pulseDurationMs: 2000,
+  strokeWidth: 4.2,
+  trailSpan: 0.31,
+  point(progress: number, detailScale: number) {
+    const t = progress * TWO_PI
+    const mix = 1 + detailScale * 0.16
+    const x = 17 * Math.cos(t) + 7.5 * Math.cos(3 * t + 0.6 * mix) + 3.2 * Math.sin(5 * t - 0.4)
+    const y = 15 * Math.sin(t) + 8.2 * Math.sin(2 * t + 0.25) - 4.2 * Math.cos(4 * t - 0.5 * mix)
+
+    return { x: 50 + x, y: 50 + y }
+  }
+}
+
+const norm = (progress: number) => ((progress % 1) + 1) % 1
+
+function detailScaleFor(time: number, phaseOffset: number) {
+  const p = ((time + phaseOffset * CURVE.pulseDurationMs) % CURVE.pulseDurationMs) / CURVE.pulseDurationMs
+
+  return 0.52 + ((Math.sin(p * TWO_PI + 0.55) + 1) / 2) * 0.48
+}
+
+function buildPath(detailScale: number, steps: number) {
+  return Array.from({ length: steps + 1 }, (_, i) => {
+    const { x, y } = CURVE.point(i / steps, detailScale)
+
+    return `${i === 0 ? 'M' : 'L'} ${x.toFixed(2)} ${y.toFixed(2)}`
+  }).join(' ')
+}
+
+function particleFor(index: number, progress: number, detailScale: number, strokeScale: number) {
+  const tail = index / (CURVE.particleCount - 1)
+  const { x, y } = CURVE.point(norm(progress - tail * CURVE.trailSpan), detailScale)
+  const fade = (1 - tail) ** 0.56
+
+  return { x, y, opacity: 0.04 + fade * 0.96, radius: (0.9 + fade * 2.7) * strokeScale }
+}
+
+interface LoaderProps extends Omit<ComponentProps<'div'>, 'children'> {
+  label?: string
+  pathSteps?: number
+  strokeScale?: number
+}
+
+export function Loader({
+  className,
+  label = 'Loading',
+  pathSteps = 240,
+  role = 'status',
+  strokeScale = 1,
+  ...props
+}: LoaderProps) {
+  const particleRefs = useRef<Array<SVGCircleElement | null>>([])
+  const pathRef = useRef<SVGPathElement | null>(null)
+
+  useEffect(() => {
+    let frame = 0
+    const startedAt = performance.now()
+    const phaseOffset = Math.random()
+    particleRefs.current.length = CURVE.particleCount
+
+    const render = (now: number) => {
+      const time = now - startedAt
+      const progress = ((time + phaseOffset * CURVE.durationMs) % CURVE.durationMs) / CURVE.durationMs
+      const detailScale = detailScaleFor(time, phaseOffset)
+
+      pathRef.current?.setAttribute('d', buildPath(detailScale, pathSteps))
+
+      particleRefs.current.forEach((node, index) => {
+        if (!node) {
+          return
+        }
+
+        const p = particleFor(index, progress, detailScale, strokeScale)
+        node.setAttribute('cx', p.x.toFixed(2))
+        node.setAttribute('cy', p.y.toFixed(2))
+        node.setAttribute('r', p.radius.toFixed(2))
+        node.setAttribute('opacity', p.opacity.toFixed(3))
+      })
+
+      frame = window.requestAnimationFrame(render)
+    }
+
+    render(performance.now())
+
+    return () => window.cancelAnimationFrame(frame)
+  }, [pathSteps, strokeScale])
+
+  return (
+    <div
+      {...props}
+      aria-label={props['aria-label'] ?? label}
+      className={cn('inline-grid size-10 place-items-center text-primary', className)}
+      role={role}
+    >
+      <svg aria-hidden="true" className="size-full overflow-visible" fill="none" viewBox="0 0 100 100">
+        <path
+          opacity="0.1"
+          ref={pathRef}
+          stroke="currentColor"
+          strokeLinecap="round"
+          strokeLinejoin="round"
+          strokeWidth={CURVE.strokeWidth * strokeScale}
+        />
+        {Array.from({ length: CURVE.particleCount }, (_, index) => (
+          <circle
+            fill="currentColor"
+            key={index}
+            ref={node => {
+              particleRefs.current[index] = node
+            }}
+          />
+        ))}
+      </svg>
+    </div>
+  )
+}
--- a/apps/bootstrap-installer/src/main.tsx
+++ b/apps/bootstrap-installer/src/main.tsx
@@ -2,11 +2,13 @@ import { StrictMode } from 'react'
 import { createRoot } from 'react-dom/client'
 import App from './app.tsx'
 import './styles.css'
+import { watchTheme } from './theme'
+
+// Follow the OS light/dark appearance. theme.ts paints the first frame on
+// import (synchronously, from the media query); this subscribes to live OS
+// theme changes via the authoritative Tauri window theme.
+void watchTheme()

-// Default to LIGHT mode — matches the Hermes desktop's default. The
-// desktop's runtime theme system can switch to .dark later, but our
-// installer ships in light mode only since we don't carry the theme
-// provider machinery.
 createRoot(document.getElementById('root')!).render(
  <StrictMode>
    <App />
--- a/apps/bootstrap-installer/src/routes/failure.tsx
+++ b/apps/bootstrap-installer/src/routes/failure.tsx
@@ -19,8 +19,8 @@ interface FailureProps {
 * Failure screen. Same hero treatment as Welcome/Success — the wordmark
 * carries the brand, so we keep it across every terminal state.
 *
- * The actual error message lives below in muted text. Two clear
- * affordances: Retry (primary) and Open log folder (secondary).
+ * The actual error message lives below in muted text. Two affordances on
+ * shared Button tokens: Retry (primary) and Open logs (quiet text link).
 */
 export default function Failure({ bootstrap }: FailureProps) {
  const logPath = useStore($logPath)
@@ -55,22 +55,13 @@ export default function Failure({ bootstrap }: FailureProps) {
      </div>

      <div className="flex items-center gap-3">
-        <Button
-          onClick={() => void (isUpdate ? startUpdate() : startInstall())}
-          size="lg"
-          className="inline-flex items-center gap-2 px-6"
-        >
-          <RefreshCw size={16} />
+        <Button onClick={() => void (isUpdate ? startUpdate() : startInstall())} className="gap-1.5">
+          <RefreshCw />
          {isUpdate ? 'Retry update' : 'Retry install'}
        </Button>
-        <Button
-          variant="outline"
-          size="lg"
-          onClick={() => void openLogDir()}
-          className="inline-flex items-center gap-2"
-        >
-          <FileText size={16} />
-          Open log folder
+        <Button variant="text" onClick={() => void openLogDir()} className="gap-1.5">
+          <FileText />
+          Open logs
        </Button>
      </div>

--- a/apps/bootstrap-installer/src/routes/progress.tsx
+++ b/apps/bootstrap-installer/src/routes/progress.tsx
@@ -3,12 +3,15 @@ import { useStore } from '@nanostores/react'
 import { Button } from '../components/button'
 import {
  cancelInstall,
+  $mode,
  $progress,
  type BootstrapStateModel,
  type StageState
 } from '../store'
-import { Check, X, ChevronRight, FileText, Loader2 } from 'lucide-react'
+import { Check, X, ChevronRight, FileText } from 'lucide-react'
 import clsx from 'clsx'
+import { BrandMark } from '../components/brand-mark'
+import { Loader } from '../components/loader'

 interface ProgressProps {
  bootstrap: BootstrapStateModel
@@ -21,7 +24,9 @@ interface ProgressProps {
 */
 export default function ProgressScreen({ bootstrap }: ProgressProps) {
  const progress = useStore($progress)
+  const mode = useStore($mode)
  const [showLogs, setShowLogs] = useState(false)
+  const [now, setNow] = useState(() => Date.now())
  const logEndRef = useRef<HTMLDivElement>(null)

  useEffect(() => {
@@ -30,69 +35,82 @@ export default function ProgressScreen({ bootstrap }: ProgressProps) {
    }
  }, [bootstrap.logs.length, showLogs])

-  const currentStage =
-    bootstrap.currentStage != null
-      ? bootstrap.stages[bootstrap.currentStage]
-      : null
+  // Tick once a second while the run is in flight so the active step shows a
+  // live elapsed timer — a long single step (e.g. the dependency download)
+  // reads as working, not frozen. Stops when nothing is running.
+  useEffect(() => {
+    if (bootstrap.status !== 'running') {
+      return
+    }
+    const id = window.setInterval(() => setNow(Date.now()), 1000)
+    return () => window.clearInterval(id)
+  }, [bootstrap.status])
+
+  const isUpdate = mode === 'update'
+  const title = bootstrap.status === 'completed' ? 'Done' : isUpdate ? 'Updating Hermes' : 'Setting up Hermes Agent'
+  const description = isUpdate
+    ? 'Hermes is updating to the latest version — this only takes a moment.'
+    : 'This is a one-time setup. The Hermes installer is downloading dependencies and configuring your machine. Subsequent launches will skip this step.'
+  const pct = Math.round(progress.fraction * 100)

  return (
    <div className="hermes-fade-in flex h-full flex-col">
-      <div className="border-b border-border px-6 py-4">
-        <div className="mb-3 flex items-center justify-between text-xs">
-          <div className="flex items-center gap-2 text-foreground">
-            {bootstrap.status === 'running' && (
-              <Loader2 size={12} className="animate-spin text-primary" />
-            )}
-            <span>
-              {bootstrap.status === 'running'
-                ? currentStage
-                  ? currentStage.info.title
-                  : 'Preparing\u2026'
-                : bootstrap.status === 'completed'
-                  ? 'Done'
-                  : 'Installing'}
-            </span>
-          </div>
-          <div className="text-muted-foreground">
-            {progress.done} of {progress.total} steps
-          </div>
-        </div>
-        {/* Top progress bar — plain HTML, derived from --primary so it
-            tracks the theme accent. */}
-        <div className="h-1 w-full overflow-hidden rounded-full bg-muted">
-          <div
-            className="h-full bg-primary transition-all duration-300 ease-out"
-            style={{ width: `${Math.max(2, progress.fraction * 100)}%` }}
-          />
+      {/* Header: brand + title + description, matching the desktop install overlay. */}
+      <div className="flex shrink-0 items-start gap-4 px-6 pt-6 pb-4">
+        <BrandMark className="size-11" />
+        <div className="min-w-0">
+          <h2 className="text-xl font-semibold tracking-tight">{title}</h2>
+          <p className="mt-1.5 text-sm text-muted-foreground">{description}</p>
        </div>
      </div>

      <div className="flex flex-1 overflow-hidden">
-        <div className="flex-1 overflow-y-auto px-6 py-4">
-          <ol className="space-y-1">
+        <div className="flex-1 overflow-y-auto px-6 pt-2 pb-4">
+          {/* Progress line + bar; the count shimmers while the install runs.
+              pt-2 matches the log header's py-2 so the "steps complete" line and
+              the "Live output" header share a baseline. */}
+          <div className="mb-4">
+            <div className="mb-1 flex items-center justify-between text-xs text-muted-foreground">
+              <span className={clsx(bootstrap.status === 'running' && 'shimmer')}>
+                {progress.done} of {progress.total} steps complete
+              </span>
+              <span className="tabular-nums">{pct}%</span>
+            </div>
+            <div className="h-1.5 w-full overflow-hidden rounded-full bg-(--ui-bg-tertiary)">
+              <div
+                className="h-full bg-primary transition-all duration-300 ease-out"
+                style={{ width: `${Math.max(2, progress.fraction * 100)}%` }}
+              />
+            </div>
+          </div>
+
+          {/* Flat stage list: only the running step is opaque; the rest read as
+              muted. Running loader overhangs left so labels stay aligned; the
+              terminal check/cross sits right of the label. */}
+          <ol className="space-y-0.5">
            {bootstrap.stageOrder.map((name) => {
              const rec = bootstrap.stages[name]
              if (!rec) return null
+              const meta =
+                rec.state === 'running' && rec.startedAt != null
+                  ? formatElapsed(now - rec.startedAt)
+                  : rec.durationMs != null && rec.state !== 'failed'
+                    ? formatDuration(rec.durationMs)
+                    : null
              return (
                <li
                  key={name}
                  className={clsx(
-                    'flex items-center gap-3 rounded-md px-3 py-2 text-sm transition-colors',
-                    rec.state === 'running' && 'bg-card text-foreground',
-                    rec.state === 'succeeded' && 'text-foreground/80',
-                    rec.state === 'skipped' && 'text-muted-foreground',
-                    rec.state === 'failed' &&
-                      'bg-destructive/10 text-destructive',
-                    !rec.state && 'text-muted-foreground/60'
+                    'flex items-center gap-2.5 px-3 py-1.5 text-sm',
+                    rec.state === 'running'
+                      ? 'font-medium text-foreground'
+                      : 'text-muted-foreground'
                  )}
                >
-                  <StateIcon state={rec.state ?? null} />
+                  {rec.state === 'running' && <Loader className="-ml-2 size-6 shrink-0" />}
                  <span className="flex-1 truncate">{rec.info.title}</span>
-                  {rec.durationMs != null && (
-                    <span className="text-xs text-muted-foreground">
-                      {formatDuration(rec.durationMs)}
-                    </span>
-                  )}
+                  {meta && <span className="text-xs tabular-nums text-muted-foreground/70">{meta}</span>}
+                  <StateIcon state={rec.state ?? null} />
                </li>
              )
            })}
@@ -100,16 +118,12 @@ export default function ProgressScreen({ bootstrap }: ProgressProps) {
        </div>

        {showLogs && (
-          <div className="flex w-1/2 flex-col border-l border-border bg-card/40">
-            <div className="flex shrink-0 items-center justify-between border-b border-border px-3 py-2">
-              <div className="text-xs font-medium text-foreground/80">
-                Live output
-              </div>
-              <div className="text-xs text-muted-foreground">
-                {bootstrap.logs.length} lines
-              </div>
+          <div className="flex w-1/2 flex-col border-l border-(--stroke-nous)">
+            <div className="flex shrink-0 items-center justify-between border-b border-(--stroke-nous) px-3 py-2 text-xs">
+              <span className="font-medium text-foreground/80">Live output</span>
+              <span className="tabular-nums text-muted-foreground">{bootstrap.logs.length} lines</span>
            </div>
-            <div className="flex-1 overflow-y-auto px-3 py-2 font-mono text-[11px] leading-relaxed">
+            <div className="flex-1 overflow-y-auto px-3 py-2 font-mono text-[10.5px] leading-relaxed">
              {bootstrap.logs.map((entry, idx) => (
                <div
                  key={idx}
@@ -127,29 +141,19 @@ export default function ProgressScreen({ bootstrap }: ProgressProps) {
        )}
      </div>

-      <div className="flex shrink-0 items-center justify-between border-t border-border px-6 py-3">
+      <div className="flex shrink-0 items-center justify-between border-t border-(--stroke-nous) px-6 py-3">
        <button
          type="button"
          onClick={() => setShowLogs((v) => !v)}
-          className="inline-flex items-center gap-1.5 text-xs text-muted-foreground transition-colors hover:text-foreground"
+          className="inline-flex cursor-pointer items-center gap-1.5 text-xs text-muted-foreground transition-colors hover:text-foreground"
        >
          <FileText size={14} />
          {showLogs ? 'Hide details' : 'Show details'}
-          <ChevronRight
-            size={12}
-            className={clsx(
-              'transition-transform',
-              showLogs && 'rotate-90'
-            )}
-          />
+          <ChevronRight size={12} className={clsx('transition-transform', showLogs && 'rotate-90')} />
        </button>

        {bootstrap.status === 'running' && (
-          <Button
-            variant="outline"
-            size="sm"
-            onClick={() => void cancelInstall()}
-          >
+          <Button variant="outline" size="sm" onClick={() => void cancelInstall()}>
            Cancel
          </Button>
        )}
@@ -158,25 +162,20 @@ export default function ProgressScreen({ bootstrap }: ProgressProps) {
  )
 }

+// Terminal-state markers, neutral by design: a muted check for done/skipped
+// (no celebratory green), a destructive cross for failure. Running renders its
+// spinner on the left; pending stays icon-less.
 function StateIcon({ state }: { state: StageState | null }) {
-  if (state === 'running') {
-    return <Loader2 size={14} className="animate-spin text-primary" />
-  }
  if (state === 'succeeded') {
-    return <Check size={14} className="text-emerald-400" />
+    return <Check size={13} className="shrink-0 text-muted-foreground" />
  }
  if (state === 'skipped') {
-    return <ChevronRight size={14} className="text-muted-foreground/70" />
+    return <Check size={13} className="shrink-0 text-muted-foreground/50" />
  }
  if (state === 'failed') {
-    return <X size={14} className="text-destructive" />
+    return <X size={13} className="shrink-0 text-destructive" />
  }
-  return (
-    <div
-      className="h-[6px] w-[6px] rounded-full bg-muted-foreground/40"
-      aria-hidden
-    />
-  )
+  return null
 }

 function formatDuration(ms: number): string {
@@ -186,3 +185,11 @@ function formatDuration(ms: number): string {
  const s = Math.round((ms % 60000) / 1000)
  return `${m}m ${s}s`
 }
+
+// Live elapsed for a running stage: bare seconds under a minute, then m:ss.
+function formatElapsed(ms: number): string {
+  const s = Math.max(0, Math.floor(ms / 1000))
+  if (s < 60) return `${s}s`
+  const m = Math.floor(s / 60)
+  return `${m}:${String(s - m * 60).padStart(2, '0')}`
+}
--- a/apps/bootstrap-installer/src/routes/success.tsx
+++ b/apps/bootstrap-installer/src/routes/success.tsx
@@ -1,8 +1,8 @@
 import { useState } from 'react'
 import { type CSSProperties } from 'react'
-import { Button } from '../components/button'
+import { HackeryButton } from '../components/hackery-button'
 import { launchHermesDesktop } from '../store'
-import { Rocket, AlertCircle } from 'lucide-react'
+import { AlertCircle } from 'lucide-react'

 /*
 * Success screen. HERMES AGENT wordmark stays as the visual anchor
@@ -53,32 +53,23 @@ export default function Success() {

        <p className="m-0 text-center text-base leading-normal tracking-tight text-muted-foreground">
          You can launch from here, or any time from your terminal with{' '}
-          <code className="rounded bg-muted/60 px-1 py-0.5 font-mono text-sm">
-            hermes desktop
-          </code>
-          .
+          <code className="font-mono text-sm text-foreground/80">hermes desktop</code>.
        </p>
      </div>

-      <Button
-        onClick={() => void handleLaunch()}
-        size="lg"
+      <HackeryButton
        disabled={launching}
-        className="inline-flex items-center gap-2 px-6"
-      >
-        <Rocket size={18} />
-        {launching ? 'Launching…' : 'Launch Hermes'}
-      </Button>
+        label={launching ? 'Launching' : 'Launch'}
+        loading={launching}
+        onClick={() => void handleLaunch()}
+      />

      {error && (
-        <div
-          role="alert"
-          className="flex max-w-2xl items-start gap-2 rounded-md border border-destructive/30 bg-destructive/10 px-4 py-3 text-sm text-destructive"
-        >
-          <AlertCircle size={16} className="mt-0.5 shrink-0" />
+        <div role="alert" className="flex max-w-2xl items-start gap-2 text-sm">
+          <AlertCircle size={16} className="mt-0.5 shrink-0 text-destructive" />
          <div className="min-w-0">
-            <div className="font-medium">Couldn&rsquo;t launch the desktop app</div>
-            <div className="mt-1 text-destructive/80">{error}</div>
+            <div className="font-medium text-destructive">Couldn&rsquo;t launch the desktop app</div>
+            <div className="mt-0.5 text-muted-foreground">{error}</div>
          </div>
        </div>
      )}
--- a/apps/bootstrap-installer/src/routes/welcome.tsx
+++ b/apps/bootstrap-installer/src/routes/welcome.tsx
@@ -1,7 +1,6 @@
 import { type CSSProperties } from 'react'
-import { Button } from '../components/button'
+import { HackeryButton } from '../components/hackery-button'
 import { startInstall } from '../store'
-import { ArrowRight } from 'lucide-react'

 /*
 * Welcome screen.
@@ -42,17 +41,7 @@ export default function Welcome() {
        </p>
      </div>

-      <Button
-        onClick={() => void startInstall()}
-        size="lg"
-        className="group inline-flex items-center gap-2 px-6"
-      >
-        Install Hermes
-        <ArrowRight
-          size={18}
-          className="transition-transform group-hover:translate-x-0.5"
-        />
-      </Button>
+      <HackeryButton label="Install" onClick={() => void startInstall()} />
    </div>
  )
 }
--- a/apps/bootstrap-installer/src/store.ts
+++ b/apps/bootstrap-installer/src/store.ts
@@ -31,6 +31,10 @@ export interface StageRecord {
  info: StageInfo
  state: StageState | null
  durationMs?: number
+  /** Wall-clock time the stage entered `running`, stamped client-side so the UI
+   * can tick a live elapsed timer for long steps. Preserved across repeated
+   * running events. */
+  startedAt?: number
  error?: string
 }

@@ -84,6 +88,34 @@ export const $progress = computed($bootstrap, (b) => {
  return { done, total, fraction: done / total }
 })

+/** Apply a stage transition: stamp `startedAt` on the running edge, track the
+ * active stage. Shared by the live Rust handler and the fake-boot preview so the
+ * two behave identically. */
+function withStageState(
+  cur: BootstrapStateModel,
+  name: string,
+  state: StageState,
+  durationMs?: number,
+  error?: string
+): BootstrapStateModel {
+  const existing = cur.stages[name]
+  if (!existing) return cur
+  return {
+    ...cur,
+    stages: {
+      ...cur.stages,
+      [name]: {
+        ...existing,
+        state,
+        startedAt: state === 'running' ? (existing.startedAt ?? Date.now()) : existing.startedAt,
+        durationMs,
+        error
+      }
+    },
+    currentStage: state === 'running' ? name : cur.currentStage
+  }
+}
+
 // ---------------------------------------------------------------------------
 // Tauri event subscription
 // ---------------------------------------------------------------------------
@@ -133,6 +165,19 @@ let unlisten: UnlistenFn | null = null
 export async function initialize(): Promise<void> {
  if (unlisten) return

+  // Dev-only isolated preview (see runFakeBoot): drive the screens in a plain
+  // browser, no Tauri backend, no real install.
+  const fake = fakeMode()
+  if (fake) {
+    unlisten = () => {}
+    $logPath.set('~/.hermes/logs/bootstrap-installer.log')
+    $hermesHome.set('~/.hermes')
+    $mode.set(fake === 'update' ? 'update' : 'install')
+    // Update auto-runs (it's a hand-off); install/failure wait for the welcome click.
+    if (fake === 'update') void runFakeBoot('update')
+    return
+  }
+
  // Pull static info on mount for the diagnostics footer.
  try {
    const [logPath, hermesHome, mode] = await Promise.all([
@@ -173,23 +218,13 @@ export async function initialize(): Promise<void> {
        break
      }
      case 'stage': {
-        const existing = cur.stages[payload.name]
-        if (!existing) {
+        if (!cur.stages[payload.name]) {
          console.warn('stage event for unknown stage', payload.name)
          break
        }
-        const next: StageRecord = {
-          ...existing,
-          state: payload.state,
-          durationMs: payload.durationMs,
-          error: payload.error
-        }
-        $bootstrap.set({
-          ...cur,
-          stages: { ...cur.stages, [payload.name]: next },
-          currentStage:
-            payload.state === 'running' ? payload.name : cur.currentStage
-        })
+        $bootstrap.set(
+          withStageState(cur, payload.name, payload.state, payload.durationMs, payload.error)
+        )
        break
      }
      case 'log': {
@@ -240,6 +275,11 @@ export async function initialize(): Promise<void> {
 // ---------------------------------------------------------------------------

 export async function startInstall(opts?: { branch?: string }): Promise<void> {
+  const fake = fakeMode()
+  if (fake) {
+    void runFakeBoot(fake === 'failure' ? 'failure' : 'install')
+    return
+  }
  // Reset before kicking off so a retry from the failure screen clears
  // the previous run's state.
  $bootstrap.set(INITIAL)
@@ -255,6 +295,10 @@ export async function startInstall(opts?: { branch?: string }): Promise<void> {
 }

 export async function startUpdate(): Promise<void> {
+  if (fakeMode()) {
+    void runFakeBoot('update')
+    return
+  }
  // Update is driven by the desktop handing off (Hermes-Setup.exe --update);
  // there's no welcome click. Reset + jump straight to progress, then let the
  // Rust side stream the synthetic update manifest.
@@ -264,15 +308,135 @@ export async function startUpdate(): Promise<void> {
 }

 export async function cancelInstall(): Promise<void> {
+  if (fakeMode()) {
+    fakeCancelled = true
+    return
+  }
  await invoke('cancel_bootstrap')
 }

 export async function launchHermesDesktop(): Promise<void> {
+  if (fakeMode()) throw new Error('Preview mode — launching is disabled.')
  const installRoot = $bootstrap.get().installRoot
  if (!installRoot) throw new Error('no install root')
  await invoke('launch_hermes_desktop', { installRoot })
 }

 export async function openLogDir(): Promise<void> {
+  if (fakeMode()) return
  await invoke('open_log_dir')
 }
+
+// ---------------------------------------------------------------------------
+// Dev-only isolated preview ("fake boot")
+//
+// Synthesises the manifest + stage/log events Rust normally streams, so the
+// whole reskin can be reviewed in a plain browser (`npm run dev`):
+//   ?fake=install   welcome → [ INSTALL ] → success
+//   ?fake=update    auto-runs the granular update flow
+//   ?fake=failure   install that fails partway
+// Gated on import.meta.env.DEV → stripped from the shipped Tauri bundle.
+// ---------------------------------------------------------------------------
+
+type FakeMode = 'install' | 'update' | 'failure'
+
+function fakeMode(): FakeMode | null {
+  if (!import.meta.env.DEV || typeof window === 'undefined') return null
+  const v = new URLSearchParams(window.location.search).get('fake')
+  return v === 'install' || v === 'update' || v === 'failure' ? v : null
+}
+
+interface FakeStage {
+  name: string
+  title: string
+}
+
+const FAKE_INSTALL_STAGES: FakeStage[] = [
+  { name: 'system-packages', title: 'System packages' },
+  { name: 'uv', title: 'uv' },
+  { name: 'python', title: 'Python environment' },
+  { name: 'repo', title: 'Hermes repository' },
+  { name: 'dependencies', title: 'Python dependencies' },
+  { name: 'node', title: 'Node runtime' },
+  { name: 'desktop', title: 'Desktop app' }
+]
+
+const FAKE_UPDATE_STAGES: FakeStage[] = [
+  { name: 'handoff', title: 'Preparing to update' },
+  { name: 'update', title: 'Downloading the latest version' },
+  { name: 'rebuild', title: 'Rebuilding the desktop app' },
+  { name: 'install', title: 'Installing the update' }
+]
+
+const sleep = (ms: number) => new Promise<void>((resolve) => setTimeout(resolve, ms))
+
+let fakeRunning = false
+let fakeCancelled = false
+
+const fakeStage = (name: string, state: StageState, durationMs?: number, error?: string) =>
+  $bootstrap.set(withStageState($bootstrap.get(), name, state, durationMs, error))
+
+const fakeLog = (stage: string, line: string) =>
+  $bootstrap.set({ ...$bootstrap.get(), logs: [...$bootstrap.get().logs, { stage, line, stream: 'stdout' }] })
+
+const fakeFail = (error: string) =>
+  $bootstrap.set({ ...$bootstrap.get(), status: 'failed', error, currentStage: null })
+
+async function runFakeBoot(kind: FakeMode): Promise<void> {
+  if (fakeRunning) return
+  fakeRunning = true
+  fakeCancelled = false
+  try {
+    const stages = kind === 'update' ? FAKE_UPDATE_STAGES : FAKE_INSTALL_STAGES
+    const cancelled = () => {
+      if (!fakeCancelled) return false
+      fakeFail(kind === 'update' ? 'Update cancelled.' : 'Install cancelled.')
+      $route.set('failure')
+      return true
+    }
+
+    $bootstrap.set({
+      ...INITIAL,
+      status: 'running',
+      stageOrder: stages.map((s) => s.name),
+      stages: Object.fromEntries(
+        stages.map((s): [string, StageRecord] => [
+          s.name,
+          { info: { ...s, category: kind, needs_user_input: false }, state: null }
+        ])
+      )
+    })
+    $route.set('progress')
+
+    // Blow up midway in the failure preview so the failure screen shows.
+    const failAt = kind === 'failure' ? stages[Math.floor(stages.length / 2)]?.name : null
+
+    for (const s of stages) {
+      if (cancelled()) return
+      fakeStage(s.name, 'running')
+
+      const durationMs = 700 + Math.floor(Math.random() * 2200)
+      const lines = Math.max(2, Math.round(durationMs / 450))
+      for (let l = 0; l < lines; l++) {
+        await sleep(durationMs / lines)
+        if (cancelled()) return
+        fakeLog(s.name, `[${s.name}] ${s.title.toLowerCase()} — step ${l + 1}/${lines}…`)
+      }
+
+      if (s.name === failAt) {
+        fakeStage(s.name, 'failed', durationMs, 'Simulated failure for preview.')
+        fakeFail('Simulated failure for preview (fake boot).')
+        $route.set('failure')
+        return
+      }
+      fakeStage(s.name, 'succeeded', durationMs)
+    }
+
+    $bootstrap.set({ ...$bootstrap.get(), status: 'completed', currentStage: null })
+    // Install lands on success; update stays on progress (the real updater
+    // relaunches the desktop and exits from there).
+    if (kind !== 'update') $route.set('success')
+  } finally {
+    fakeRunning = false
+  }
+}
--- a/apps/bootstrap-installer/src/styles.css
+++ b/apps/bootstrap-installer/src/styles.css
@@ -18,10 +18,12 @@
 *     to the file that contains them, so they continue to point at the
 *     correct node_modules path even from here.
 *
- * Forced light mode: the desktop ships with a runtime theme switcher
- * (ThemeProvider + applyTheme) that can flip to dark via document.documentElement.
- * The installer has no UI for theme switching, so we stay on the desktop's
- * default light surface (Nous-blue accent on near-white chrome).
+ * Follows the OS appearance: the installer has no in-app theme switcher, so
+ * src/theme.ts tracks the Tauri window theme and toggles `.dark` on
+ * <html>. The desktop's runtime applyTheme() normally PAINTS the dark seed
+ * colors inline (its imported :root.dark below only flips the per-mode mix
+ * knobs + neutral chrome), so we supply the Nous *dark* seeds ourselves in the
+ * :root.dark block at the end of this file.
 */
@import '../../desktop/src/styles.css';

@@ -49,3 +51,38 @@
    transparent 60%
  );
 }
+
+/*
+ * Dark appearance — Nous dark seeds.
+ *
+ * The imported desktop :root.dark only flips the per-mode mix knobs + neutral
+ * chrome; the seed COLORS are normally painted at runtime by the desktop's
+ * applyTheme(). The installer has no theme runtime, so we mirror them here from
+ * apps/desktop/src/themes/presets.ts (nousTheme.darkColors). The whole
+ * --ui-* / --dt-* chain in the imported stylesheet derives from these seeds, so
+ * flipping them is enough — we only additionally override the few tokens
+ * applyTheme() sets inline that DON'T derive from a seed (primary-foreground on
+ * the cream accent, destructive). Unlayered on purpose so it wins over the
+ * imported @layer base :root light seeds. Keep in sync with nousTheme.darkColors
+ * if that palette is retuned.
+ */
+:root.dark {
+  color-scheme: dark;
+
+  --theme-foreground: #ffe6cb;
+  --theme-primary: #ffe6cb;
+  --theme-secondary: #1b45a4;
+  --theme-accent-soft: #1540b1;
+  --theme-midground: #0053fd;
+  --theme-warm: #ffe6cb;
+  --theme-background-seed: #0d2f86;
+  --theme-sidebar-seed: #09286f;
+  --theme-card-seed: #12378f;
+  --theme-elevated-seed: #123a96;
+  --theme-bubble-seed: #143b91;
+
+  /* Non-derived shadcn tokens applyTheme() paints inline (Nous dark values). */
+  --dt-primary-foreground: #0d2f86;
+  --dt-destructive: #c0473a;
+  --dt-destructive-foreground: #fef2f2;
+}
--- a/apps/bootstrap-installer/src/theme.ts
+++ b/apps/bootstrap-installer/src/theme.ts
@@ -0,0 +1,51 @@
+import { getCurrentWindow, type Theme } from '@tauri-apps/api/window'
+
+/*
+ * OS appearance follower.
+ *
+ * The installer ships no in-app theme switcher, so it tracks the system the
+ * way the desktop overlays do. Two Tauri realities shape this:
+ *
+ *   1. The strict `script-src 'self'` CSP (tauri.conf.json) forbids an inline
+ *      pre-paint <script> in index.html, so the earliest hook we get is this
+ *      bundled module.
+ *   2. The webview's `prefers-color-scheme` is not reliable across WebView2 /
+ *      WebKitGTK. The authoritative signal in a Tauri window is the window's
+ *      OWN theme — `getCurrentWindow().theme()` + `onThemeChanged` — so we read
+ *      that and fall back to the media query only outside Tauri (e.g. plain
+ *      `vite preview`).
+ *
+ * We only flip the `.dark` class + `color-scheme`; the dark seed values live in
+ * styles.css (:root.dark), mirroring apps/desktop's applyTheme() palette.
+ */
+
+const prefersDark = (): boolean => window.matchMedia('(prefers-color-scheme: dark)').matches
+
+function paint(theme: Theme): void {
+  const dark = theme === 'dark'
+  const root = document.documentElement
+  root.classList.toggle('dark', dark)
+  root.style.colorScheme = dark ? 'dark' : 'light'
+}
+
+// Best-effort synchronous first paint from the media query so the very first
+// frame is already in the right mode. Refined below by the authoritative Tauri
+// window theme once its IPC resolves.
+paint(prefersDark() ? 'dark' : 'light')
+
+/** Adopt the Tauri window theme and keep tracking live OS appearance changes. */
+export async function watchTheme(): Promise<void> {
+  try {
+    const win = getCurrentWindow()
+    const current = await win.theme()
+
+    if (current) {
+      paint(current)
+    }
+
+    await win.onThemeChanged(({ payload }) => paint(payload))
+  } catch {
+    // Non-Tauri context (e.g. `vite preview`): keep the media query live.
+    window.matchMedia('(prefers-color-scheme: dark)').addEventListener('change', e => paint(e.matches ? 'dark' : 'light'))
+  }
+}
--- a/apps/desktop/electron/main.cjs
+++ b/apps/desktop/electron/main.cjs
@@ -5108,13 +5108,24 @@ function resetBootProgressForReconnect() {
  )
 }

+function stopBackendChild(child) {
+  if (!child || child.killed) return
+  try {
+    if (IS_WINDOWS && Number.isInteger(child.pid)) {
+      forceKillProcessTree(child.pid)
+    } else {
+      child.kill('SIGTERM')
+    }
+  } catch {
+    // Already gone.
+  }
+}
+
 function resetHermesConnection() {
  connectionPromise = null
  backendStartFailure = null

-  if (hermesProcess && !hermesProcess.killed) {
-    hermesProcess.kill('SIGTERM')
-  }
+  stopBackendChild(hermesProcess)

  hermesProcess = null
  resetBootProgressForReconnect()
@@ -5362,13 +5373,7 @@ function stopPoolBackend(profile) {
  const entry = backendPool.get(profile)
  if (!entry) return
  backendPool.delete(profile)
-  if (entry.process && !entry.process.killed) {
-    try {
-      entry.process.kill('SIGTERM')
-    } catch {
-      // Already gone.
-    }
-  }
+  stopBackendChild(entry.process)
 }

 async function teardownPoolBackendAndWait(profile) {
@@ -5376,13 +5381,7 @@ async function teardownPoolBackendAndWait(profile) {
  if (!entry) return
  backendPool.delete(profile)

-  if (entry.process && !entry.process.killed) {
-    try {
-      entry.process.kill('SIGTERM')
-    } catch {
-      // Already gone.
-    }
-  }
+  stopBackendChild(entry.process)

  await waitForBackendExit(entry.process)
 }
@@ -7600,9 +7599,7 @@ app.on('before-quit', () => {
    disposeTerminalSession(id)
  }

-  if (hermesProcess && !hermesProcess.killed) {
-    hermesProcess.kill('SIGTERM')
-  }
+  stopBackendChild(hermesProcess)
  stopAllPoolBackends()
 })

--- a/apps/desktop/electron/windows-child-process.test.cjs
+++ b/apps/desktop/electron/windows-child-process.test.cjs
@@ -74,6 +74,29 @@ test('desktop backend launches console python so child consoles are inherited, n
  requireHiddenChildOptions(source, /hermesProcess = spawn\(\s*backend\.command,\s*backend\.args/)
 })

+test('desktop backend teardown tree-kills Windows backend descendants', () => {
+  const source = readElectronFile('main.cjs')
+
+  const helperIndex = source.indexOf('function stopBackendChild(child)')
+  assert.notEqual(helperIndex, -1, 'missing backend teardown helper')
+  const helperSnippet = source.slice(helperIndex, helperIndex + 500)
+  assert.match(helperSnippet, /IS_WINDOWS && Number\.isInteger\(child\.pid\)/)
+  assert.match(helperSnippet, /forceKillProcessTree\(child\.pid\)/)
+  assert.match(helperSnippet, /child\.kill\('SIGTERM'\)/)
+
+  const resetIndex = source.indexOf('function resetHermesConnection()')
+  assert.notEqual(resetIndex, -1, 'missing resetHermesConnection')
+  const resetSnippet = source.slice(resetIndex, resetIndex + 300)
+  assert.match(resetSnippet, /stopBackendChild\(hermesProcess\)/)
+  assert.doesNotMatch(resetSnippet, /hermesProcess\.kill\('SIGTERM'\)/)
+
+  const quitIndex = source.indexOf("app.on('before-quit'")
+  assert.notEqual(quitIndex, -1, 'missing before-quit handler')
+  const quitSnippet = source.slice(quitIndex, quitIndex + 900)
+  assert.match(quitSnippet, /stopBackendChild\(hermesProcess\)/)
+  assert.doesNotMatch(quitSnippet, /hermesProcess\.kill\('SIGTERM'\)/)
+})
+
 test('intentional or interactive desktop child processes stay documented', () => {
  const source = readElectronFile('main.cjs')

--- a/apps/desktop/package.json
+++ b/apps/desktop/package.json
@@ -81,8 +81,10 @@
    "class-variance-authority": "^0.7.1",
    "clsx": "^2.1.1",
    "cmdk": "^1.1.1",
+    "d3-force": "^3.0.0",
    "dnd-core": "^14.0.1",
    "dompurify": "^3.4.11",
+    "fflate": "^0.8.3",
    "hast-util-from-html-isomorphic": "^2.0.0",
    "hast-util-to-text": "^4.0.2",
    "ignore": "^7.0.5",
@@ -118,6 +120,7 @@
    "@eslint/js": "^9.39.4",
    "@testing-library/dom": "^10.4.0",
    "@testing-library/react": "^16.3.2",
+    "@types/d3-force": "^3.0.10",
    "@types/hast": "^3.0.4",
    "@types/node": "^24.13.2",
    "@types/react": "^19.2.14",
--- a/apps/desktop/scripts/.gitignore
+++ b/apps/desktop/scripts/.gitignore
@@ -0,0 +1 @@
+share-codes.txt
--- a/apps/desktop/scripts/gen-share-codes.ts
+++ b/apps/desktop/scripts/gen-share-codes.ts
@@ -0,0 +1,171 @@
+// Throwaway generator: deterministic fake star-map graphs → real share codes
+// (runs the actual encoder, so every string round-trips). Run with `npx tsx`.
+import { writeFileSync } from 'node:fs'
+
+import type { StarmapEdge, StarmapGraph, StarmapMemoryCard, StarmapNode } from '../src/types/hermes'
+
+import { decodeShareCode, encodeShareCode } from '../src/app/starmap/share-code'
+
+const DAY = 86_400
+const END = Math.floor(Date.UTC(2026, 5, 29) / 1000)
+
+// mulberry32 — tiny seeded PRNG so the output is byte-stable across runs.
+const rng = (seed: number) => () => {
+  seed |= 0
+  seed = (seed + 0x6d2b79f5) | 0
+  let t = Math.imul(seed ^ (seed >>> 15), 1 | seed)
+  t = (t + Math.imul(t ^ (t >>> 7), 61 | t)) ^ t
+
+  return ((t ^ (t >>> 14)) >>> 0) / 4_294_967_296
+}
+
+const pick = <T>(arr: readonly T[], r: number): T => arr[Math.floor(r * arr.length)]!
+
+const CATEGORIES = ['devops', 'research', 'creative', 'security', 'mlops', 'blockchain', 'email', 'health', 'web-development', 'comms'] as const
+const STATES = ['active', 'active', 'active', 'archived', 'draft', 'disabled'] as const
+const CREATED = [null, 'agent', 'agent', 'user'] as const
+
+const skill = (id: string, label: string, ts: number, r: () => number): StarmapNode => ({
+  category: pick(CATEGORIES, r()),
+  createdBy: pick(CREATED, r()),
+  id,
+  kind: 'skill',
+  label,
+  pinned: r() > 0.85,
+  state: pick(STATES, r()),
+  timestamp: ts,
+  useCount: Math.floor(r() ** 3 * 120)
+})
+
+const memNode = (i: number, source: 'memory' | 'profile', label: string, ts: null | number): StarmapNode => ({
+  category: 'memory',
+  createdBy: 'memory',
+  id: `memory:${source}:${i}`,
+  kind: 'memory',
+  label,
+  memorySource: source,
+  pinned: false,
+  state: 'active',
+  timestamp: ts,
+  useCount: 0
+})
+
+const card = (source: 'memory' | 'profile', title: string, body: string, ts: null | number): StarmapMemoryCard => ({ body, source, timestamp: ts, title })
+
+// ── 1. Tiny + quirky ──────────────────────────────────────────────────────────
+function tiny(): StarmapGraph {
+  const r = rng(7)
+  const nodes: StarmapNode[] = [
+    skill('summon-coffee', 'Summon Coffee', END - 40 * DAY, r),
+    skill('rubber-duck', 'Rubber-Duck Debugging', END - 22 * DAY, r),
+    skill('git-blame-zen', 'Git Blame Without Rage', END - 9 * DAY, r),
+    memNode(0, 'profile', 'Prefers tabs, dies on this hill', END - 30 * DAY),
+    memNode(1, 'memory', 'The prod incident of last Tuesday', END - 3 * DAY)
+  ]
+  const edges: StarmapEdge[] = [
+    { source: 'memory:memory:1', target: 'git-blame-zen' },
+    { source: 'rubber-duck', target: 'git-blame-zen' }
+  ]
+  const memory = [
+    card('profile', 'Prefers tabs, dies on this hill', 'Tabs over spaces. Non-negotiable.', END - 30 * DAY),
+    card('memory', 'The prod incident of last Tuesday', 'Never deploy on a Friday again.', END - 3 * DAY)
+  ]
+
+  return { clusters: [], edges, memory, nodes, stats: {} }
+}
+
+// ── 2. Mid-size, mixed signal ────────────────────────────────────────────────
+function mid(): StarmapGraph {
+  const r = rng(42)
+  const names = ['Kubernetes Whispering', 'Prompt Surgery', 'Threat Modeling', 'Pixel Pushing', 'Vector Janitor', 'Smart-Contract Audit', 'Inbox Zero Ops', 'Sleep Debt Tracker', 'SSR Hydration', 'Standup Telepathy', 'Flaky-Test Exorcism', 'Cost Spelunking']
+  const nodes: StarmapNode[] = names.map((label, i) => skill(`s${i}`, label, END - Math.floor(r() * 200) * DAY, r))
+  const memTitles = ['Hates meetings before noon', 'Lives in us-east-1', 'Allergic to YAML', 'Caffeine half-life ~5h', 'Reviews in dark mode']
+
+  memTitles.forEach((title, i) => {
+    const ts = END - Math.floor(r() * 120) * DAY
+    nodes.push(memNode(i, i % 2 ? 'memory' : 'profile', title, ts))
+  })
+
+  const edges: StarmapEdge[] = []
+
+  for (let i = 0; i < 9; i += 1) {
+    edges.push({ source: `s${Math.floor(r() * names.length)}`, target: `s${Math.floor(r() * names.length)}` })
+  }
+
+  const memory = memTitles.map((title, i) => card(i % 2 ? 'memory' : 'profile', title, `${title}. Logged automatically.`, END - Math.floor(rng(99 + i)() * 120) * DAY))
+
+  return { clusters: [], edges, memory, nodes, stats: {} }
+}
+
+// ── 3. Dense web, partly undated (ordinal fallback) ──────────────────────────
+function web(): StarmapGraph {
+  const r = rng(1337)
+  const nodes: StarmapNode[] = Array.from({ length: 22 }, (_, i) =>
+    // Half the skills carry no timestamp → exercises the ordinal recency path.
+    skill(`w${i}`, `Neuron ${String.fromCharCode(65 + (i % 26))}${i}`, i % 2 ? END - Math.floor(r() * 300) * DAY : (null as unknown as number), r)
+  )
+  const edges: StarmapEdge[] = []
+
+  for (let i = 0; i < 44; i += 1) {
+    edges.push({ source: `w${Math.floor(r() * 22)}`, target: `w${Math.floor(r() * 22)}` })
+  }
+
+  return { clusters: [], edges, memory: [], nodes, stats: {} }
+}
+
+// ── 4. The beast: ~2 years, hundreds of nodes, bursty timeline ───────────────
+function beast(): StarmapGraph {
+  const r = rng(2024)
+  const start = END - 730 * DAY
+  const span = END - start
+  const nodes: StarmapNode[] = []
+  const memory: StarmapMemoryCard[] = []
+
+  // Bursts → an interesting waveform instead of a flat smear.
+  const burstAt = (q: number) => Math.floor(start + (q + (r() - 0.5) * 0.06) * span)
+
+  for (let i = 0; i < 240; i += 1) {
+    const burst = Math.floor(r() ** 1.5 * 12) / 12 // cluster toward the recent end
+    nodes.push(skill(`b${i}`, `Skill ${i} · ${pick(CATEGORIES, r())}`, burstAt(burst), r))
+  }
+
+  for (let i = 0; i < 150; i += 1) {
+    const ts = burstAt(Math.floor(r() ** 1.5 * 12) / 12)
+    const source = r() > 0.5 ? 'memory' : 'profile'
+    nodes.push(memNode(i, source, `Memory ${i}: ${pick(['quirk', 'fact', 'preference', 'incident', 'lesson'], r())}`, ts))
+    memory.push(card(source, `Memory ${i}`, `Auto-captured note #${i}.`, ts))
+  }
+
+  const edges: StarmapEdge[] = []
+
+  for (let i = 0; i < 380; i += 1) {
+    const a = Math.floor(r() * 240)
+    const b = Math.floor(r() * 240)
+
+    if (a !== b) {
+      edges.push({ source: `b${a}`, target: `b${b}` })
+    }
+  }
+
+  return { clusters: [], edges, memory, nodes, stats: {} }
+}
+
+const graphs: [string, StarmapGraph][] = [
+  ['tiny + quirky', tiny()],
+  ['mid · mixed signal', mid()],
+  ['dense web · half undated', web()],
+  ['the beast · ~2 years', beast()]
+]
+
+const lines: string[] = []
+
+for (const [name, g] of graphs) {
+  const code = encodeShareCode(g)
+  const back = decodeShareCode(code) // round-trip assert — throws if invalid
+  // v2 is viz-only: nodes + edge topology survive; memory prose is dropped.
+  const ok = back.nodes.length === g.nodes.length && back.edges.length <= g.edges.length
+  console.log(`${ok ? 'ok ' : 'BAD'}  ${name} — ${g.nodes.length} nodes / ${g.edges.length} edges / ${g.memory.length} cards (${code.length} chars)`)
+  lines.push(`# ${name} — ${g.nodes.length} nodes, ${g.edges.length} edges, ${g.memory.length} cards`, code, '')
+}
+
+writeFileSync(new URL('share-codes.txt', import.meta.url), lines.join('\n'))
--- a/apps/desktop/src/app/artifacts/index.tsx
+++ b/apps/desktop/src/app/artifacts/index.tsx
@@ -16,6 +16,7 @@ import {
  PaginationNext,
  PaginationPrevious
 } from '@/components/ui/pagination'
+import { RowButton } from '@/components/ui/row-button'
 import { TextTab, TextTabMeta } from '@/components/ui/text-tab'
 import { Tip } from '@/components/ui/tooltip'
 import { getSessionMessages, listAllProfileSessions } from '@/hermes'
@@ -761,13 +762,12 @@ function ArtifactCellAction({
  }

  return (
-    <button
+    <RowButton
      className="flex h-full w-full min-w-0 items-center gap-2 px-2.5 py-1.5 text-left text-[length:var(--conversation-caption-font-size)] leading-(--conversation-caption-line-height) font-normal text-(--ui-text-secondary) no-underline underline-offset-4 decoration-current/20 transition-colors hover:text-foreground hover:underline"
      onClick={onClick}
-      type="button"
    >
      {children}
-    </button>
+    </RowButton>
  )
 }

--- a/apps/desktop/src/app/chat/composer/composer-utils.test.ts
+++ b/apps/desktop/src/app/chat/composer/composer-utils.test.ts
@@ -0,0 +1,40 @@
+import type { Unstable_TriggerItem } from '@assistant-ui/core'
+import { describe, expect, it } from 'vitest'
+
+import { pickPlaceholder, slashArgStage, slashChipKindForItem, slashCommandToken } from './composer-utils'
+
+const item = (group: string): Unstable_TriggerItem =>
+  ({ id: 'x', type: 'slash', label: 'x', metadata: { group } }) as unknown as Unstable_TriggerItem
+
+describe('slashArgStage', () => {
+  it('is true only once the query is past the command name', () => {
+    expect(slashArgStage('personality')).toBe(false)
+    expect(slashArgStage('personality alice')).toBe(true)
+  })
+})
+
+describe('slashCommandToken', () => {
+  it('extracts the lowercased /command token', () => {
+    expect(slashCommandToken('Personality alice')).toBe('/personality')
+    expect(slashCommandToken('model')).toBe('/model')
+  })
+
+  it('handles an empty query', () => {
+    expect(slashCommandToken('')).toBe('/')
+  })
+})
+
+describe('slashChipKindForItem', () => {
+  it('maps completion groups to chip kinds', () => {
+    expect(slashChipKindForItem(item('Skills'))).toBe('skill')
+    expect(slashChipKindForItem(item('Themes'))).toBe('theme')
+    expect(slashChipKindForItem(item('Commands'))).toBe('command')
+  })
+})
+
+describe('pickPlaceholder', () => {
+  it('returns a member of the pool', () => {
+    const pool = ['a', 'b', 'c'] as const
+    expect(pool).toContain(pickPlaceholder(pool))
+  })
+})
--- a/apps/desktop/src/app/chat/composer/composer-utils.ts
+++ b/apps/desktop/src/app/chat/composer/composer-utils.ts
@@ -0,0 +1,60 @@
+import type { Unstable_TriggerItem } from '@assistant-ui/core'
+
+import type { SlashChipKind } from '@/components/assistant-ui/directive-text'
+import type { ComposerAttachment } from '@/store/composer'
+import { setSessionPickerOpen } from '@/store/session'
+
+export const COMPOSER_STACK_BREAKPOINT_PX = 320
+
+// A single editor line is ~28px (--composer-input-min-height 1.625rem + 0.5rem
+// vertical padding). Anything taller means the text wrapped to a second line,
+// which is when the composer should expand to the stacked layout.
+export const COMPOSER_SINGLE_LINE_MAX_PX = 36
+
+export const COMPOSER_FADE_BACKGROUND =
+  'linear-gradient(to bottom, transparent, color-mix(in srgb, var(--dt-background) 10%, transparent))'
+
+// Quiet period after the last keystroke before persisting the draft;
+// unmount/pagehide flushes bypass it.
+export const DRAFT_PERSIST_DEBOUNCE_MS = 400
+
+export const pickPlaceholder = (pool: readonly string[]) => pool[Math.floor(Math.random() * pool.length)]
+
+/** Completion items can carry an `action` (set in use-slash-completions) that
+ *  runs a side effect on pick instead of inserting a chip — e.g. the session
+ *  picker's "Browse all…" entry opens the overlay. Table-driven so new action
+ *  items are a registry row, not a composer branch. */
+export const COMPLETION_ACTIONS: Record<string, () => void> = {
+  'session-picker': () => setSessionPickerOpen(true)
+}
+
+/** Map a picked `/` completion to its pill accent. Driven by the completion
+ *  group set in use-slash-completions (Skills / Themes / Commands|Options). */
+export function slashChipKindForItem(item: Unstable_TriggerItem): SlashChipKind {
+  const group = (item.metadata as { group?: unknown } | undefined)?.group
+
+  if (group === 'Skills') {
+    return 'skill'
+  }
+
+  if (group === 'Themes') {
+    return 'theme'
+  }
+
+  return 'command'
+}
+
+/** A `/` query is at its arg stage once it's past the command name. */
+export const slashArgStage = (query: string) => query.includes(' ')
+
+/** The `/command` token of a slash query (`personality x` → `/personality`). */
+export const slashCommandToken = (query: string) => `/${query.split(/\s+/, 1)[0]?.toLowerCase() ?? ''}`
+
+export interface QueueEditState {
+  attachments: ComposerAttachment[]
+  draft: string
+  entryId: string
+  sessionKey: string
+}
+
+export const cloneAttachments = (attachments: ComposerAttachment[]) => attachments.map(a => ({ ...a }))
--- a/apps/desktop/src/app/chat/composer/controls.tsx
+++ b/apps/desktop/src/app/chat/composer/controls.tsx
@@ -4,7 +4,7 @@ import { KbdCombo } from '@/components/ui/kbd'
 import { Tip } from '@/components/ui/tooltip'
 import { useI18n } from '@/i18n'
 import { triggerHaptic } from '@/lib/haptics'
-import { AudioLines, Layers3, Loader2, Square, SteeringWheel, Volume2, VolumeX } from '@/lib/icons'
+import { AudioLines, iconSize, Layers3, Loader2, Square, SteeringWheel, Volume2, VolumeX } from '@/lib/icons'
 import { formatCombo } from '@/lib/keybinds/combo'
 import { cn } from '@/lib/utils'

@@ -103,7 +103,7 @@ export function ComposerControls({
            type="button"
            variant="ghost"
          >
-            <SteeringWheel size={14} />
+            <SteeringWheel className={iconSize.sm} />
          </Button>
        </Tip>
      ) : (
@@ -123,7 +123,7 @@ export function ComposerControls({
            size="icon"
            type="button"
          >
-            <AudioLines size={15} />
+            <AudioLines className={iconSize.sm} />
          </Button>
        </Tip>
      ) : (
@@ -136,7 +136,7 @@ export function ComposerControls({
          >
            {busy ? (
              busyAction === 'queue' ? (
-                <Layers3 size={14} />
+                <Layers3 className={iconSize.sm} />
              ) : (
                <span className="block size-2.5 rounded-[0.1875rem] bg-current" />
              )
@@ -207,7 +207,7 @@ function ConversationPill({
          type="button"
          variant="ghost"
        >
-          <Square className="fill-current" size={11} />
+          <Square className={cn('fill-current', iconSize.xs)} />
          <span>{c.stopShort}</span>
        </Button>
      )}
@@ -242,7 +242,7 @@ function ConversationIndicator({
  speaking: boolean
 }) {
  if (speaking) {
-    return <Loader2 className="animate-spin" size={12} />
+    return <Loader2 className={cn('animate-spin', iconSize.xs)} />
  }

  const bars = [0.55, 0.85, 1, 0.85, 0.55]
@@ -262,15 +262,7 @@ function ConversationIndicator({
 // Pure-TTS toggle: type normally, but have every assistant reply read aloud —
 // no dictation, no full conversation loop. Filled/accent when on, mirroring the
 // muted-mic pressed state above. Driven by (and persisted to) `voice.auto_tts`.
-function AutoSpeakButton({
-  active,
-  disabled,
-  onToggle
-}: {
-  active: boolean
-  disabled: boolean
-  onToggle: () => void
-}) {
+function AutoSpeakButton({ active, disabled, onToggle }: { active: boolean; disabled: boolean; onToggle: () => void }) {
  const { t } = useI18n()
  const c = t.composer
  const label = active ? c.stopSpeakingReplies : c.speakReplies
@@ -294,7 +286,7 @@ function AutoSpeakButton({
        type="button"
        variant="ghost"
      >
-        {active ? <Volume2 size={14} /> : <VolumeX size={14} />}
+        {active ? <Volume2 className={iconSize.sm} /> : <VolumeX className={iconSize.sm} />}
      </Button>
    </Tip>
  )
@@ -341,9 +333,9 @@ function DictationButton({
        variant="ghost"
      >
        {status === 'recording' ? (
-          <Square className="fill-current" size={11} />
+          <Square className={cn('fill-current', iconSize.xs)} />
        ) : status === 'transcribing' ? (
-          <Loader2 className="animate-spin" size={14} />
+          <Loader2 className={cn('animate-spin', iconSize.sm)} />
        ) : (
          <Codicon name="mic" size="0.875rem" />
        )}
--- a/apps/desktop/src/app/chat/composer/hooks/use-composer-branch.ts
+++ b/apps/desktop/src/app/chat/composer/hooks/use-composer-branch.ts
@@ -0,0 +1,95 @@
+import { type MutableRefObject, useCallback } from 'react'
+
+import { clearComposerAttachments } from '@/store/composer'
+import { listRepoBranches, requestStartWorkSession, startWorkInRepo, switchBranchInRepo } from '@/store/projects'
+
+interface UseComposerBranchOptions {
+  clearDraft: () => void
+  cwd: null | string | undefined
+  draftRef: MutableRefObject<string>
+}
+
+/**
+ * Branch / worktree engine — the `CodingStatusRow` hand-offs. Each action opens
+ * a fresh session anchored in a worktree carrying the current composer draft as
+ * its first turn; clearing here means the draft travels to the new session
+ * instead of getting stashed under this one. Backend coupling (cwd + the
+ * projects store) is the only dependency; nothing about ChatBar's render.
+ */
+export function useComposerBranch({ clearDraft, cwd, draftRef }: UseComposerBranchOptions) {
+  // Hand a worktree off to the controller: open a fresh session anchored there,
+  // carrying the composer draft as its first turn. Clearing here means the draft
+  // travels to the new session instead of getting stashed under this one.
+  const openInWorktree = useCallback(
+    (path: string) => {
+      const text = draftRef.current
+      clearDraft()
+      clearComposerAttachments()
+      requestStartWorkSession(path, text)
+    },
+    [clearDraft, draftRef]
+  )
+
+  // Branch off into a NEW worktree (base = branch name, or current HEAD). A
+  // create failure throws back to the row (which toasts) before we touch the
+  // draft; a missing cwd / remote backend no-ops (the row hides the affordance).
+  const handleBranchOff = useCallback(
+    async (branch: string, base?: string) => {
+      const repoPath = cwd?.trim()
+      const result = repoPath && (await startWorkInRepo(repoPath, { base, branch, name: branch }))
+
+      if (result) {
+        openInWorktree(result.path)
+      }
+    },
+    [cwd, openInWorktree]
+  )
+
+  // Convert an EXISTING branch into a fresh worktree + session (no new branch).
+  // Mirrors handleBranchOff's hand-off: create the worktree, then open a session
+  // anchored there carrying the draft.
+  const handleConvertBranch = useCallback(
+    async (branch: string, path?: null | string, isDefault?: boolean) => {
+      if (path?.trim()) {
+        openInWorktree(path)
+
+        return
+      }
+
+      const repoPath = cwd?.trim()
+
+      if (repoPath && isDefault) {
+        await switchBranchInRepo(repoPath, branch)
+        openInWorktree(repoPath)
+
+        return
+      }
+
+      const result = repoPath && (await startWorkInRepo(repoPath, { existingBranch: branch }))
+
+      if (result) {
+        openInWorktree(result.path)
+      }
+    },
+    [cwd, openInWorktree]
+  )
+
+  const handleListBranches = useCallback(async () => {
+    const repoPath = cwd?.trim()
+
+    return repoPath ? listRepoBranches(repoPath) : []
+  }, [cwd])
+
+  const handleSwitchBranch = useCallback(
+    async (branch: string) => {
+      const repoPath = cwd?.trim()
+
+      if (repoPath) {
+        await switchBranchInRepo(repoPath, branch)
+      }
+    },
+    [cwd]
+  )
+
+  return { handleBranchOff, handleConvertBranch, handleListBranches, handleSwitchBranch, openInWorktree }
+}
--- a/apps/desktop/src/app/chat/composer/hooks/use-composer-draft.ts
+++ b/apps/desktop/src/app/chat/composer/hooks/use-composer-draft.ts
@@ -0,0 +1,344 @@
+import { useAui, useAuiState, useComposerRuntime } from '@assistant-ui/react'
+import { type RefObject, useCallback, useEffect, useRef, useState } from 'react'
+
+import { SLASH_COMMAND_RE } from '@/lib/chat-runtime'
+import { $composerAttachments, type ComposerAttachment, stashSessionDraft, takeSessionDraft } from '@/store/composer'
+import { isBrowsingHistory } from '@/store/composer-input-history'
+
+import { cloneAttachments, DRAFT_PERSIST_DEBOUNCE_MS, type QueueEditState } from '../composer-utils'
+import {
+  type ComposerInsertMode,
+  focusComposerInput,
+  markActiveComposer,
+  onComposerFocusRequest,
+  onComposerInsertRefsRequest,
+  onComposerInsertRequest
+} from '../focus'
+import { type InlineRefInput, insertInlineRefsIntoEditor } from '../inline-refs'
+import { composerPlainText, placeCaretEnd, renderComposerContents } from '../rich-editor'
+import type { ChatBarProps } from '../types'
+
+interface UseComposerDraftArgs {
+  activeQueueSessionKey: string | null
+  focusKey: ChatBarProps['focusKey']
+  inputDisabled: boolean
+  queueEditRef: RefObject<QueueEditState | null>
+  sessionId: string | null | undefined
+}
+
+/**
+ * The composer's draft engine — the detached source-of-truth spine. The live
+ * text lives in the contentEditable DOM + `draftRef`; React only sees coarse
+ * edge selectors, so typing never re-renders the chrome. Owns the imperative
+ * composer-runtime subscription (draftRef mirror + external repaint + debounced
+ * per-session stash), the edit primitives (append/insert/inline-refs), focus,
+ * and per-session load/clear/stash/restore. The contentEditable *event*
+ * handlers stay in ChatBar (they bridge into the trigger engine) and drive the
+ * primitives exposed here.
+ */
+export function useComposerDraft({
+  activeQueueSessionKey,
+  focusKey,
+  inputDisabled,
+  queueEditRef,
+  sessionId
+}: UseComposerDraftArgs) {
+  const aui = useAui()
+  const composerRuntime = useComposerRuntime()
+
+  // Coarse edges only — these flip rarely (empty↔non-empty, the `?` help sigil,
+  // steerable-vs-slash), so typing within a line costs no render.
+  const hasText = useAuiState(s => s.composer.text.trim().length > 0)
+  const isHelpHint = useAuiState(s => s.composer.text === '?')
+
+  const isSteerableText = useAuiState(s => {
+    const trimmed = s.composer.text.trim()
+
+    return trimmed.length > 0 && !SLASH_COMMAND_RE.test(trimmed)
+  })
+
+  // assistant-ui's composer mutators throw when the core isn't bound yet (a
+  // startup/thread-swap window); the DOM + draftRef hold the text and the
+  // subscription reconciles once it binds, so swallow the premature write.
+  const setComposerText = useCallback(
+    (value: string) => {
+      try {
+        aui.composer().setText(value)
+      } catch {
+        // Composer core not bound yet — DOM/draftRef carry the text.
+      }
+    },
+    [aui]
+  )
+
+  const editorRef = useRef<HTMLDivElement | null>(null)
+  const draftRef = useRef('')
+  const pendingDraftPersistRef = useRef<{ scope: string | null; text: string } | null>(null)
+  const draftPersistTimerRef = useRef<number | undefined>(undefined)
+  const activeQueueSessionKeyRef = useRef(activeQueueSessionKey)
+  activeQueueSessionKeyRef.current = activeQueueSessionKey
+  const sessionIdRef = useRef(sessionId)
+  sessionIdRef.current = sessionId
+  const queueEditStateRef = useRef<QueueEditState | null>(queueEditRef.current)
+  queueEditStateRef.current = queueEditRef.current
+
+  const [focusRequestId, setFocusRequestId] = useState(0)
+
+  const focusInput = useCallback(() => {
+    focusComposerInput(editorRef.current)
+    markActiveComposer('main')
+  }, [])
+
+  const requestMainFocus = useCallback(() => {
+    setFocusRequestId(id => id + 1)
+  }, [])
+
+  // The single write path for programmatic draft mutations: mirror → AUI state →
+  // repaint the editor (caret to end). Repaints even while focused — inserts /
+  // restores run mid-focus, and the runtime sync only repaints an unfocused
+  // editor — so the visible text never lags the store.
+  const paintDraft = useCallback(
+    (next: string, focus = true) => {
+      draftRef.current = next
+      setComposerText(next)
+
+      const editor = editorRef.current
+
+      if (editor) {
+        renderComposerContents(editor, next)
+        placeCaretEnd(editor)
+      }
+
+      if (focus) {
+        requestMainFocus()
+      }
+    },
+    [requestMainFocus, setComposerText]
+  )
+
+  const appendExternalText = useCallback(
+    (text: string, mode: ComposerInsertMode) => {
+      const value = text.trim()
+
+      if (!value) {
+        return
+      }
+
+      const base = mode === 'inline' ? draftRef.current.trimEnd() : draftRef.current
+      const sep = mode === 'inline' ? (base ? ' ' : '') : base && !base.endsWith('\n') ? '\n\n' : ''
+
+      paintDraft(`${base}${sep}${value}`)
+    },
+    [paintDraft]
+  )
+
+  useEffect(() => {
+    if (!inputDisabled) {
+      focusInput()
+    }
+  }, [focusInput, focusKey, focusRequestId, inputDisabled])
+
+  useEffect(() => {
+    if (inputDisabled) {
+      return undefined
+    }
+
+    const offFocus = onComposerFocusRequest(target => {
+      if (target === 'main') {
+        setFocusRequestId(id => id + 1)
+      }
+    })
+
+    const offInsert = onComposerInsertRequest(({ mode, target, text }) => {
+      if (target === 'main') {
+        appendExternalText(text, mode)
+      }
+    })
+
+    return () => {
+      offFocus()
+      offInsert()
+    }
+  }, [appendExternalText, inputDisabled])
+
+  const stashAt = (scope: string | null, text = draftRef.current, attachments = $composerAttachments.get()) =>
+    stashSessionDraft(scope, text, attachments)
+
+  const loadIntoComposer = (text: string, attachments: ComposerAttachment[]) => {
+    $composerAttachments.set(cloneAttachments(attachments))
+    paintDraft(text, false)
+  }
+
+  const clearDraft = useCallback(() => {
+    setComposerText('')
+    draftRef.current = ''
+
+    if (editorRef.current) {
+      editorRef.current.replaceChildren()
+    }
+  }, [setComposerText])
+
+  // Read the editor's current plain text into draftRef + composer state. This
+  // closes the "queued rAF flush hasn't run yet" window so scope-swap/pagehide
+  // persistence captures the latest keystrokes.
+  const syncDraftFromEditor = useCallback(() => {
+    const editor = editorRef.current
+
+    if (!editor) {
+      return draftRef.current
+    }
+
+    const text = composerPlainText(editor)
+
+    if (text !== draftRef.current) {
+      draftRef.current = text
+      setComposerText(text)
+    }
+
+    return text
+  }, [setComposerText])
+
+  // Imperative draft sync — the spine of the "work only when work is to be
+  // performed" model. Subscribing to the composer runtime directly (not
+  // `useAuiState(text)` + a `[draft]` effect) keeps per-keystroke text out of
+  // React, so typing never re-renders the chrome. On each change we (1) mirror
+  // text into draftRef, (2) repaint the editor only when the change came from
+  // OUTSIDE it (programmatic clear/restore/insert; the focused editor is the
+  // source otherwise), and (3) schedule the debounced per-session stash.
+  // Browsing history / editing a queued prompt suppress the stash so recalled
+  // text never clobbers the draft.
+  useEffect(() => {
+    const sync = () => {
+      const text = composerRuntime.getState().text
+      draftRef.current = text
+
+      const editor = editorRef.current
+
+      if (editor && document.activeElement !== editor && composerPlainText(editor) !== text) {
+        renderComposerContents(editor, text)
+      }
+
+      if (isBrowsingHistory(sessionIdRef.current) || queueEditRef.current) {
+        return
+      }
+
+      const scope = activeQueueSessionKeyRef.current
+      pendingDraftPersistRef.current = { scope, text }
+      window.clearTimeout(draftPersistTimerRef.current)
+      draftPersistTimerRef.current = window.setTimeout(() => {
+        pendingDraftPersistRef.current = null
+        stashAt(scope, text)
+      }, DRAFT_PERSIST_DEBOUNCE_MS)
+    }
+
+    const unsubscribe = composerRuntime.subscribe(sync)
+
+    return () => {
+      unsubscribe()
+      window.clearTimeout(draftPersistTimerRef.current)
+    }
+  }, [composerRuntime, queueEditRef])
+
+  const insertText = (text: string) => {
+    const base = draftRef.current
+    const sep = base && !base.endsWith('\n') ? '\n' : ''
+
+    paintDraft(`${base}${sep}${text}`)
+  }
+
+  // insertInlineRefs mutates the editor in place (chips), so it can't go through
+  // paintDraft's re-render — it mirrors the resulting plain text and refocuses.
+  const insertInlineRefs = (refs: InlineRefInput[]) => {
+    const editor = editorRef.current
+
+    if (!editor) {
+      return false
+    }
+
+    const nextDraft = insertInlineRefsIntoEditor(editor, refs)
+
+    if (nextDraft === null) {
+      return false
+    }
+
+    draftRef.current = nextDraft
+    setComposerText(nextDraft)
+    requestMainFocus()
+
+    return true
+  }
+
+  // Latest-closure ref so the once-only subscription always calls the current
+  // insertInlineRefs without re-subscribing every render.
+  const insertInlineRefsRef = useRef(insertInlineRefs)
+  insertInlineRefsRef.current = insertInlineRefs
+
+  useEffect(() => {
+    return onComposerInsertRefsRequest(({ refs, target }) => {
+      if (target === 'main') {
+        insertInlineRefsRef.current(refs)
+      }
+    })
+  }, [])
+
+  // Per-thread draft swap — the composer's only session coupling. Lifecycle
+  // never clears composer state; this effect alone stashes on leave, restores
+  // on enter. Keyed writes are idempotent, so no skip-sentinel.
+  useEffect(() => {
+    const { attachments, text } = takeSessionDraft(activeQueueSessionKey)
+    loadIntoComposer(text, attachments)
+
+    return () => {
+      const latestText = syncDraftFromEditor()
+      const editing = queueEditStateRef.current
+
+      if (editing?.sessionKey === activeQueueSessionKey) {
+        stashAt(activeQueueSessionKey, editing.draft, editing.attachments)
+      } else if (!isBrowsingHistory(sessionId)) {
+        stashAt(activeQueueSessionKey, latestText)
+      }
+    }
+  }, [activeQueueSessionKey]) // eslint-disable-line react-hooks/exhaustive-deps
+
+  // pagehide is load-bearing: React skips effect cleanups on reload, so Cmd+R
+  // inside the debounce/rAF window would drop trailing keystrokes without this.
+  useEffect(() => {
+    const flushPendingDraftPersist = () => {
+      const scope = activeQueueSessionKeyRef.current
+      const editing = queueEditStateRef.current
+
+      if (editing?.sessionKey === scope || isBrowsingHistory(sessionIdRef.current)) {
+        return
+      }
+
+      const latestText = syncDraftFromEditor()
+      pendingDraftPersistRef.current = null
+      stashAt(scope, latestText)
+    }
+
+    window.addEventListener('pagehide', flushPendingDraftPersist)
+
+    return () => {
+      window.removeEventListener('pagehide', flushPendingDraftPersist)
+      flushPendingDraftPersist()
+    }
+  }, [syncDraftFromEditor])
+
+  return {
+    activeQueueSessionKeyRef,
+    clearDraft,
+    draftRef,
+    editorRef,
+    focusInput,
+    hasText,
+    insertInlineRefs,
+    insertText,
+    isHelpHint,
+    isSteerableText,
+    loadIntoComposer,
+    requestMainFocus,
+    sessionIdRef,
+    setComposerText,
+    stashAt
+  }
+}
--- a/apps/desktop/src/app/chat/composer/hooks/use-composer-drop.ts
+++ b/apps/desktop/src/app/chat/composer/hooks/use-composer-drop.ts
@@ -0,0 +1,164 @@
+import { type DragEvent as ReactDragEvent, useRef, useState } from 'react'
+
+import { triggerHaptic } from '@/lib/haptics'
+
+import { extractDroppedFiles, HERMES_PATHS_MIME, partitionDroppedFiles } from '../../hooks/use-composer-actions'
+import { dragHasAttachments, droppedFileInlineRefs, type InlineRefInput } from '../inline-refs'
+import type { ChatBarProps } from '../types'
+
+interface UseComposerDropArgs {
+  cwd: ChatBarProps['cwd']
+  insertInlineRefs: (refs: InlineRefInput[]) => boolean
+  onAttachDroppedItems: ChatBarProps['onAttachDroppedItems']
+  requestMainFocus: () => void
+}
+
+/**
+ * Drag-and-drop attachment engine. Splits drops by origin: in-app drags
+ * (project tree / gutter) stay inline `@file:`/`@line:` refs the gateway
+ * resolves directly; OS/Finder drops (absolute local paths a remote gateway
+ * can't read, image bytes vision needs) route through the upload pipeline.
+ * Off the keystroke path; consumes `insertInlineRefs` + the attach handler.
+ */
+export function useComposerDrop({
+  cwd,
+  insertInlineRefs,
+  onAttachDroppedItems,
+  requestMainFocus
+}: UseComposerDropArgs) {
+  const [dragActive, setDragActive] = useState(false)
+  const dragDepthRef = useRef(0)
+
+  const resetDragState = () => {
+    dragDepthRef.current = 0
+    setDragActive(false)
+  }
+
+  const handleDragEnter = (event: ReactDragEvent<HTMLFormElement>) => {
+    if (!onAttachDroppedItems || !dragHasAttachments(event.dataTransfer, HERMES_PATHS_MIME)) {
+      return
+    }
+
+    event.preventDefault()
+    dragDepthRef.current += 1
+
+    if (!dragActive) {
+      setDragActive(true)
+    }
+  }
+
+  const handleDragOver = (event: ReactDragEvent<HTMLFormElement>) => {
+    if (!onAttachDroppedItems || !dragHasAttachments(event.dataTransfer, HERMES_PATHS_MIME)) {
+      return
+    }
+
+    event.preventDefault()
+    event.dataTransfer.dropEffect = 'copy'
+  }
+
+  const handleDragLeave = (event: ReactDragEvent<HTMLFormElement>) => {
+    if (!onAttachDroppedItems) {
+      return
+    }
+
+    event.preventDefault()
+    dragDepthRef.current = Math.max(0, dragDepthRef.current - 1)
+
+    if (dragDepthRef.current === 0) {
+      setDragActive(false)
+    }
+  }
+
+  const handleDrop = (event: ReactDragEvent<HTMLFormElement>) => {
+    if (!onAttachDroppedItems) {
+      return
+    }
+
+    event.preventDefault()
+    resetDragState()
+
+    const candidates = extractDroppedFiles(event.dataTransfer)
+
+    if (candidates.length === 0) {
+      return
+    }
+
+    // In-app drags (project tree / gutter) are workspace-relative paths the
+    // gateway resolves directly, so they stay inline @file:/@line: refs. OS
+    // drops are absolute local paths a remote gateway can't read (and images
+    // need byte upload for vision), so route them through the upload pipeline.
+    const { inAppRefs, osDrops } = partitionDroppedFiles(candidates)
+    const refs = droppedFileInlineRefs(inAppRefs, cwd)
+
+    if (refs.length && insertInlineRefs(refs)) {
+      triggerHaptic('selection')
+    }
+
+    if (osDrops.length) {
+      void Promise.resolve(onAttachDroppedItems(osDrops)).then(attached => {
+        if (attached) {
+          triggerHaptic('selection')
+          requestMainFocus()
+        }
+      })
+    }
+  }
+
+  const handleInputDragOver = (event: ReactDragEvent<HTMLDivElement>) => {
+    if (!dragHasAttachments(event.dataTransfer, HERMES_PATHS_MIME)) {
+      return
+    }
+
+    event.preventDefault()
+    event.stopPropagation()
+    event.dataTransfer.dropEffect = 'copy'
+  }
+
+  const handleInputDrop = (event: ReactDragEvent<HTMLDivElement>) => {
+    if (!dragHasAttachments(event.dataTransfer, HERMES_PATHS_MIME)) {
+      return
+    }
+
+    const candidates = extractDroppedFiles(event.dataTransfer)
+
+    if (!candidates.length) {
+      return
+    }
+
+    event.preventDefault()
+    event.stopPropagation()
+    resetDragState()
+
+    // Dropping straight onto the text box used to inline-ref *every* file —
+    // including OS/Finder drops, whose absolute local path a remote gateway
+    // can't read and whose image bytes never reached vision. Split by origin:
+    // in-app drags stay inline refs; OS drops go through the upload pipeline.
+    // (When no upload handler is wired, fall back to inline refs for all.)
+    const attach = onAttachDroppedItems
+    const { inAppRefs, osDrops } = partitionDroppedFiles(candidates)
+    const refs = droppedFileInlineRefs(attach ? inAppRefs : candidates, cwd)
+
+    if (refs.length && insertInlineRefs(refs)) {
+      triggerHaptic('selection')
+    }
+
+    if (attach && osDrops.length) {
+      void Promise.resolve(attach(osDrops)).then(attached => {
+        if (attached) {
+          triggerHaptic('selection')
+          requestMainFocus()
+        }
+      })
+    }
+  }
+
+  return {
+    dragActive,
+    handleDragEnter,
+    handleDragLeave,
+    handleDragOver,
+    handleDrop,
+    handleInputDragOver,
+    handleInputDrop
+  }
+}
--- a/apps/desktop/src/app/chat/composer/hooks/use-composer-esc-cancel.ts
+++ b/apps/desktop/src/app/chat/composer/hooks/use-composer-esc-cancel.ts
@@ -0,0 +1,54 @@
+import { useEffect, useRef } from 'react'
+
+import { triggerHaptic } from '@/lib/haptics'
+
+interface UseComposerEscCancelOptions {
+  awaitingInput: boolean
+  busy: boolean
+  onCancel: () => unknown
+}
+
+/**
+ * Global Esc-to-cancel: stop the in-flight turn when the CHAT (not the composer
+ * input, which has its own handler) has focus — clicking into the transcript and
+ * hitting Esc stops the run, matching the Stop button. A latest-handler ref keeps
+ * the window listener registered exactly once while still reading fresh
+ * busy/awaitingInput/onCancel each press.
+ */
+export function useComposerEscCancel({ awaitingInput, busy, onCancel }: UseComposerEscCancelOptions) {
+  // Intentional only: we bail if (a) the composer/another field already handled
+  // Esc (defaultPrevented), (b) focus is in any input/textarea/contenteditable
+  // (you're typing, not stopping), or (c) a dialog/popover is open — Esc must
+  // close that overlay, never double as canceling the stream behind it.
+  const escCancelRef = useRef<(event: globalThis.KeyboardEvent) => void>(() => {})
+
+  escCancelRef.current = (event: globalThis.KeyboardEvent) => {
+    // `awaitingInput`: the turn is parked on a clarify / approval / sudo / secret
+    // prompt, which owns Esc (or is meant to persist) — never cancel the stream
+    // out from under it.
+    if (event.key !== 'Escape' || event.defaultPrevented || !busy || awaitingInput) {
+      return
+    }
+
+    const active = document.activeElement as HTMLElement | null
+
+    if (active && (active.tagName === 'INPUT' || active.tagName === 'TEXTAREA' || active.isContentEditable)) {
+      return
+    }
+
+    if (document.querySelector('[role="dialog"],[role="alertdialog"],[data-radix-popper-content-wrapper]')) {
+      return
+    }
+
+    event.preventDefault()
+    triggerHaptic('cancel')
+    void Promise.resolve(onCancel())
+  }
+
+  useEffect(() => {
+    const onKeyDown = (event: globalThis.KeyboardEvent) => escCancelRef.current(event)
+    window.addEventListener('keydown', onKeyDown)
+
+    return () => window.removeEventListener('keydown', onKeyDown)
+  }, [])
+}
--- a/apps/desktop/src/app/chat/composer/hooks/use-composer-metrics.ts
+++ b/apps/desktop/src/app/chat/composer/hooks/use-composer-metrics.ts
@@ -0,0 +1,160 @@
+import { useAuiState } from '@assistant-ui/react'
+import { type RefObject, useCallback, useEffect, useRef, useState } from 'react'
+
+import { useMediaQuery } from '@/hooks/use-media-query'
+import { useResizeObserver } from '@/hooks/use-resize-observer'
+import { $composerPoppedOut } from '@/store/composer-popout'
+import { isSecondaryWindow } from '@/store/windows'
+
+import { COMPOSER_SINGLE_LINE_MAX_PX, COMPOSER_STACK_BREAKPOINT_PX } from '../composer-utils'
+
+interface UseComposerMetricsArgs {
+  composerRef: RefObject<HTMLFormElement | null>
+  composerSurfaceRef: RefObject<HTMLDivElement | null>
+  editorRef: RefObject<HTMLDivElement | null>
+  poppedOut: boolean
+}
+
+/**
+ * Owns the composer's *sizing* engine: the stacked-vs-inline layout decision
+ * and the measured-height CSS vars the thread reads for bottom clearance. All
+ * work is edge-gated — the ResizeObserver only fires on real size changes, the
+ * height vars are 8px-bucketed so per-keystroke growth never invalidates the
+ * tree's computed style, and `tight` only flips when it crosses the breakpoint.
+ * Returns `stacked` (the only value the render needs).
+ */
+export function useComposerMetrics({ composerRef, composerSurfaceRef, editorRef, poppedOut }: UseComposerMetricsArgs): {
+  stacked: boolean
+} {
+  const [expanded, setExpanded] = useState(false)
+  const [tight, setTight] = useState(false)
+  const narrow = useMediaQuery('(max-width: 30rem)')
+
+  // Edge signals, not the live text: these only re-render when emptiness / the
+  // presence of a non-trailing newline actually flips, so typing within a line
+  // costs nothing here.
+  const isEmpty = useAuiState(s => s.composer.text.length === 0)
+  const hasHardNewline = useAuiState(s => s.composer.text.trimEnd().includes('\n'))
+
+  // Expansion (input on its own full-width row, controls below) is driven by
+  // the editor's *actual* rendered height via the ResizeObserver in
+  // syncComposerMetrics — it only fires when the text genuinely wraps to a
+  // second line, so the layout flips exactly at the wrap point rather than at
+  // a guessed character count. We only handle the two cases the observer
+  // can't: an explicit newline (expand before layout settles) and an emptied
+  // draft (collapse back). We never read scrollHeight per keystroke.
+  useEffect(() => {
+    if (isEmpty) {
+      setExpanded(false)
+
+      return
+    }
+
+    if (expanded) {
+      return
+    }
+
+    // Only a non-trailing newline forces an immediate expand. A trailing newline
+    // (or phantom \n from contenteditable junk) is left to the ResizeObserver,
+    // which expands only when the editor's real height actually grows.
+    if (hasHardNewline) {
+      setExpanded(true)
+    }
+  }, [expanded, hasHardNewline, isEmpty])
+
+  // Bucket measured heights so we only invalidate the global CSS var when
+  // the size crosses a meaningful threshold. Without bucketing, the editor
+  // grows ~1px per character → setProperty fires every keystroke → entire
+  // tree's computed style is invalidated → next paint forces a full
+  // recalculate-style pass. With an 8px bucket, the invalidation rate drops
+  // ~8× and small char-by-char typing produces no style invalidation at all
+  // until a wrap or row change actually happens.
+  const lastBucketedHeightRef = useRef(0)
+  const lastBucketedSurfaceHeightRef = useRef(0)
+  const lastTightRef = useRef<boolean | null>(null)
+
+  const syncComposerMetrics = useCallback(() => {
+    const composer = composerRef.current
+
+    if (!composer) {
+      return
+    }
+
+    // Floating composer is out of the thread's flow — it must not reserve any
+    // bottom clearance. Zero the measured vars so the thread reclaims the space.
+    // (Read globals here so the callback stays stable; mirror the popoutAllowed
+    // gate since secondary windows are forced docked.)
+    if ($composerPoppedOut.get() && !isSecondaryWindow()) {
+      const root = document.documentElement
+      lastBucketedHeightRef.current = 0
+      lastBucketedSurfaceHeightRef.current = 0
+      root.style.setProperty('--composer-measured-height', '0px')
+      root.style.setProperty('--composer-surface-measured-height', '0px')
+
+      return
+    }
+
+    const { height, width } = composer.getBoundingClientRect()
+    const surfaceHeight = composerSurfaceRef.current?.getBoundingClientRect().height
+    const root = document.documentElement
+
+    if (width > 0) {
+      const nextTight = width < COMPOSER_STACK_BREAKPOINT_PX
+
+      if (nextTight !== lastTightRef.current) {
+        lastTightRef.current = nextTight
+        setTight(nextTight)
+      }
+    }
+
+    // Expand once the input has actually wrapped past a single line. The
+    // observer only fires on real size changes, so this reads scrollHeight at
+    // most once per wrap (not per keystroke). One line ≈ 28px (1.625rem
+    // min-height + padding); a second line clears ~36px. We only ever expand
+    // here — collapse is handled by the emptied-draft effect to avoid
+    // oscillating across the wrap boundary as the input switches widths.
+    const editor = editorRef.current
+
+    if (editor && editor.scrollHeight > COMPOSER_SINGLE_LINE_MAX_PX) {
+      setExpanded(true)
+    }
+
+    if (height > 0) {
+      const bucket = Math.round(height / 8) * 8
+
+      if (bucket !== lastBucketedHeightRef.current) {
+        lastBucketedHeightRef.current = bucket
+        root.style.setProperty('--composer-measured-height', `${bucket}px`)
+      }
+    }
+
+    if (surfaceHeight && surfaceHeight > 0) {
+      const bucket = Math.round(surfaceHeight / 8) * 8
+
+      if (bucket !== lastBucketedSurfaceHeightRef.current) {
+        lastBucketedSurfaceHeightRef.current = bucket
+        root.style.setProperty('--composer-surface-measured-height', `${bucket}px`)
+      }
+    }
+  }, [composerRef, composerSurfaceRef, editorRef])
+
+  useResizeObserver(syncComposerMetrics, composerRef, composerSurfaceRef, editorRef)
+
+  // Toggling pop-out changes whether the composer reserves thread clearance.
+  // The ResizeObserver may not fire (the box can keep the same box size), so
+  // re-sync explicitly: docked republishes the measured height, floating zeroes
+  // it so the thread reclaims the bottom space.
+  useEffect(() => {
+    syncComposerMetrics()
+  }, [poppedOut, syncComposerMetrics])
+
+  useEffect(() => {
+    return () => {
+      const root = document.documentElement
+      root.style.removeProperty('--composer-measured-height')
+      root.style.removeProperty('--composer-surface-measured-height')
+    }
+  }, [])
+
+  return { stacked: expanded || narrow || tight }
+}
--- a/apps/desktop/src/app/chat/composer/hooks/use-composer-placeholder.ts
+++ b/apps/desktop/src/app/chat/composer/hooks/use-composer-placeholder.ts
@@ -0,0 +1,60 @@
+import { useEffect, useRef, useState } from 'react'
+
+import { useI18n } from '@/i18n'
+import { resetBrowseState } from '@/store/composer-input-history'
+
+import { pickPlaceholder } from '../composer-utils'
+
+interface UseComposerPlaceholderOptions {
+  disabled: boolean
+  reconnecting: boolean
+  sessionId: null | string | undefined
+}
+
+/**
+ * The composer's placeholder text. A resting starter (new session) / continuation
+ * (existing session) is picked once and only re-rolled when we genuinely move to
+ * a *different* conversation — the null→id persist of a freshly-started session
+ * keeps its starter so the text doesn't flip mid-stream. While the transport is
+ * down, it swaps to a reconnecting / starting message instead.
+ */
+export function useComposerPlaceholder({ disabled, reconnecting, sessionId }: UseComposerPlaceholderOptions): string {
+  const { t } = useI18n()
+  const newSessionPlaceholders = t.composer.newSessionPlaceholders
+  const followUpPlaceholders = t.composer.followUpPlaceholders
+
+  const [restingPlaceholder, setRestingPlaceholder] = useState(() =>
+    pickPlaceholder(sessionId ? followUpPlaceholders : newSessionPlaceholders)
+  )
+
+  const prevSessionIdRef = useRef(sessionId)
+
+  useEffect(() => {
+    const prev = prevSessionIdRef.current
+    prevSessionIdRef.current = sessionId
+
+    if (prev === sessionId) {
+      return
+    }
+
+    // null → id: the new session we're already in just got persisted. Keep the
+    // starter we showed instead of swapping to a follow-up under the user.
+    if (prev == null && sessionId) {
+      return
+    }
+
+    resetBrowseState(prev)
+    setRestingPlaceholder(pickPlaceholder(sessionId ? followUpPlaceholders : newSessionPlaceholders))
+  }, [followUpPlaceholders, newSessionPlaceholders, sessionId])
+
+  // When the transport is disabled it's because the gateway isn't open.
+  // Distinguish a cold start ("Starting Hermes...") from a dropped connection
+  // we're trying to restore. During reconnect, keep the textbox editable so a
+  // flaky network doesn't block drafting; only submit/backend actions stay
+  // disabled until the gateway is open again.
+  return disabled
+    ? reconnecting
+      ? t.composer.placeholderReconnecting
+      : t.composer.placeholderStarting
+    : restingPlaceholder
+}
--- a/apps/desktop/src/app/chat/composer/hooks/use-composer-popout.ts
+++ b/apps/desktop/src/app/chat/composer/hooks/use-composer-popout.ts
@@ -0,0 +1,97 @@
+import { useStore } from '@nanostores/react'
+import { type RefObject, useCallback, useEffect } from 'react'
+
+import { triggerHaptic } from '@/lib/haptics'
+import {
+  $composerPopoutPosition,
+  $composerPoppedOut,
+  readPopoutBounds,
+  setComposerPopoutPosition,
+  setComposerPoppedOut
+} from '@/store/composer-popout'
+import { isSecondaryWindow } from '@/store/windows'
+
+import { useComposerPopoutGestures } from './use-popout-drag'
+
+interface UseComposerPopoutOptions {
+  composerRef: RefObject<HTMLFormElement | null>
+}
+
+/**
+ * Pop-out engine: the docked↔floating state (a shared, persisted atom), the
+ * dock/float/toggle actions, the drag gestures, and the on-screen re-clamp.
+ * Secondary windows (the tiny Ctrl+Shift+N window, subagent watch windows) can't
+ * pop out — a floating composer makes no sense there and would yank the main
+ * window's composer out via the shared atom.
+ */
+export function useComposerPopout({ composerRef }: UseComposerPopoutOptions) {
+  const popoutAllowed = !isSecondaryWindow()
+  const poppedOut = useStore($composerPoppedOut) && popoutAllowed
+  const popoutPosition = useStore($composerPopoutPosition)
+
+  const handleComposerPopOut = useCallback(() => {
+    triggerHaptic('open')
+    setComposerPoppedOut(true)
+  }, [])
+
+  const handleComposerDock = useCallback(() => {
+    triggerHaptic('success')
+    setComposerPoppedOut(false)
+  }, [])
+
+  // Double-click the grab area toggles dock/float. Undocking restores the last
+  // position (the persisted atom is never cleared on dock).
+  const handleComposerToggle = useCallback(() => {
+    poppedOut ? handleComposerDock() : handleComposerPopOut()
+  }, [handleComposerDock, handleComposerPopOut, poppedOut])
+
+  const {
+    dockProximity,
+    dragging,
+    onPointerDown: onComposerGesturePointerDown
+  } = useComposerPopoutGestures({
+    composerRef,
+    onDock: handleComposerDock,
+    onPopOut: handleComposerPopOut,
+    poppedOut,
+    position: popoutPosition
+  })
+
+  // Keep the floating box on-screen: re-clamp (with the real measured size +
+  // thread bounds) when it pops out and on every window resize — so a position
+  // persisted on a bigger/other monitor, a shrunk window, or now-wider sidebar
+  // can never strand it. The rAF pass re-clamps after layout settles (sidebar
+  // widths, fonts), so anyone loading in out of bounds is pulled back + saved
+  // even if the first measure was premature.
+  useEffect(() => {
+    if (!poppedOut) {
+      return undefined
+    }
+
+    const reclamp = (persist: boolean) => {
+      const el = composerRef.current
+      const size = el ? { height: el.offsetHeight, width: el.offsetWidth } : undefined
+      setComposerPopoutPosition($composerPopoutPosition.get(), { area: readPopoutBounds(el), persist, size })
+    }
+
+    reclamp(true)
+    const raf = requestAnimationFrame(() => reclamp(true))
+    const onResize = () => reclamp(false)
+    window.addEventListener('resize', onResize)
+
+    return () => {
+      cancelAnimationFrame(raf)
+      window.removeEventListener('resize', onResize)
+    }
+  }, [composerRef, poppedOut])
+
+  return {
+    dockProximity,
+    dragging,
+    handleComposerToggle,
+    onComposerGesturePointerDown,
+    popoutAllowed,
+    popoutPosition,
+    poppedOut
+  }
+}
--- a/apps/desktop/src/app/chat/composer/hooks/use-composer-queue.ts
+++ b/apps/desktop/src/app/chat/composer/hooks/use-composer-queue.ts
@@ -0,0 +1,350 @@
+import { type RefObject, useCallback, useEffect, useRef, useState } from 'react'
+
+import { useI18n } from '@/i18n'
+import { triggerHaptic } from '@/lib/haptics'
+import { useSessionSlice } from '@/lib/use-session-slice'
+import { clearComposerAttachments, type ComposerAttachment } from '@/store/composer'
+import { resetBrowseState } from '@/store/composer-input-history'
+import {
+  $queuedPromptsBySession,
+  enqueueQueuedPrompt,
+  MAX_AUTO_DRAIN_ATTEMPTS,
+  migrateQueuedPrompts,
+  promoteQueuedPrompt,
+  type QueuedPromptEntry,
+  removeQueuedPrompt,
+  shouldAutoDrain,
+  updateQueuedPrompt
+} from '@/store/composer-queue'
+import { notify } from '@/store/notifications'
+
+import { cloneAttachments, type QueueEditState } from '../composer-utils'
+import type { ChatBarProps } from '../types'
+
+interface UseComposerQueueArgs {
+  activeQueueSessionKey: string | null
+  attachments: ComposerAttachment[]
+  busy: boolean
+  clearDraft: () => void
+  draftRef: RefObject<string>
+  focusInput: () => void
+  loadIntoComposer: (text: string, attachments: ComposerAttachment[]) => void
+  onCancel: ChatBarProps['onCancel']
+  onSubmit: ChatBarProps['onSubmit']
+  queueEditRef: RefObject<QueueEditState | null>
+  queueSessionKey: ChatBarProps['queueSessionKey']
+  sessionId: string | null | undefined
+}
+
+/**
+ * The composer's queue engine — everything about queued turns: the per-session
+ * queue store binding, in-place queued-prompt editing (begin/step/exit), the
+ * shared drain lock + send-then-remove sequence, manual send-now, and the
+ * edge-independent auto-drain with bounded retries. It consumes the draft API
+ * (draftRef/clearDraft/loadIntoComposer/focusInput) and writes the
+ * coordinator-owned `queueEditRef` so the draft engine can read the edit state
+ * without a back-reference. Behaviour-identical to the inline original.
+ */
+export function useComposerQueue({
+  activeQueueSessionKey,
+  attachments,
+  busy,
+  clearDraft,
+  draftRef,
+  focusInput,
+  loadIntoComposer,
+  onCancel,
+  onSubmit,
+  queueEditRef,
+  queueSessionKey,
+  sessionId
+}: UseComposerQueueArgs) {
+  const { t } = useI18n()
+
+  // Per-session slice (edge): re-renders only when THIS session's queue changes,
+  // not on cross-session queue churn (the plain atom's map ref changes on every
+  // write; the keyed array does not).
+  const queuedPrompts = useSessionSlice($queuedPromptsBySession, activeQueueSessionKey)
+
+  const [queueEdit, setQueueEdit] = useState<QueueEditState | null>(null)
+  queueEditRef.current = queueEdit
+
+  const setQueueEditSnapshot = useCallback(
+    (next: QueueEditState | null) => {
+      queueEditRef.current = next
+      setQueueEdit(next)
+    },
+    [queueEditRef]
+  )
+
+  const editingQueuedPrompt = queueEdit ? (queuedPrompts.find(entry => entry.id === queueEdit.entryId) ?? null) : null
+
+  const prevQueueKeyRef = useRef(activeQueueSessionKey)
+  const drainingQueueRef = useRef(false)
+  const drainFailuresRef = useRef(new Map<string, number>())
+
+  const beginQueuedEdit = (entry: QueuedPromptEntry) => {
+    if (!activeQueueSessionKey || queueEdit) {
+      return
+    }
+
+    setQueueEditSnapshot({
+      attachments: cloneAttachments(attachments),
+      draft: draftRef.current,
+      entryId: entry.id,
+      sessionKey: activeQueueSessionKey
+    })
+    loadIntoComposer(entry.text, entry.attachments)
+    triggerHaptic('selection')
+    focusInput()
+  }
+
+  // Walk queued entries while editing (ArrowUp = older, ArrowDown = newer),
+  // saving the in-progress edit on each step. Stepping newer past the last
+  // entry exits edit mode and restores the pre-edit draft.
+  const stepQueuedEdit = (direction: -1 | 1) => {
+    if (!queueEdit) {
+      return false
+    }
+
+    const index = queuedPrompts.findIndex(e => e.id === queueEdit.entryId)
+    const target = index + direction
+
+    if (index < 0 || target < 0) {
+      return index >= 0 // at the oldest: swallow; missing entry: let it fall through
+    }
+
+    const saved = updateQueuedPrompt(queueEdit.sessionKey, queueEdit.entryId, {
+      attachments: cloneAttachments(attachments),
+      text: draftRef.current
+    })
+
+    const next = queuedPrompts[target]
+
+    if (next) {
+      setQueueEditSnapshot({ ...queueEdit, entryId: next.id })
+      loadIntoComposer(next.text, next.attachments)
+    } else {
+      setQueueEditSnapshot(null)
+      loadIntoComposer(queueEdit.draft, queueEdit.attachments)
+    }
+
+    triggerHaptic(saved ? 'success' : 'selection')
+    focusInput()
+
+    return true
+  }
+
+  const exitQueuedEdit = (action: 'cancel' | 'save'): boolean => {
+    if (!queueEdit) {
+      return false
+    }
+
+    if (action === 'save') {
+      const text = draftRef.current
+      const next = cloneAttachments(attachments)
+
+      if (!text.trim() && next.length === 0) {
+        return false
+      }
+
+      const saved = updateQueuedPrompt(queueEdit.sessionKey, queueEdit.entryId, { attachments: next, text })
+      triggerHaptic(saved ? 'success' : 'selection')
+    } else {
+      triggerHaptic('cancel')
+    }
+
+    setQueueEditSnapshot(null)
+    loadIntoComposer(queueEdit.draft, queueEdit.attachments)
+    focusInput()
+
+    return true
+  }
+
+  const queueCurrentDraft = useCallback(() => {
+    const text = draftRef.current
+
+    if (!activeQueueSessionKey || (!text.trim() && attachments.length === 0)) {
+      return false
+    }
+
+    if (!enqueueQueuedPrompt(activeQueueSessionKey, { text, attachments })) {
+      return false
+    }
+
+    clearDraft()
+    clearComposerAttachments()
+    triggerHaptic('selection')
+
+    return true
+  }, [activeQueueSessionKey, attachments, clearDraft, draftRef])
+
+  // All queue drain paths share one lock + send-then-remove sequence.
+  // `pickEntry` lets each caller choose head, by-id, or skip-edited.
+  const runDrain = useCallback(
+    async (pickEntry: (entries: QueuedPromptEntry[]) => QueuedPromptEntry | undefined): Promise<boolean> => {
+      if (drainingQueueRef.current || !activeQueueSessionKey) {
+        return false
+      }
+
+      const entry = pickEntry(queuedPrompts)
+
+      if (!entry) {
+        return false
+      }
+
+      drainingQueueRef.current = true
+
+      try {
+        const accepted = await Promise.resolve(
+          onSubmit(entry.text, { attachments: entry.attachments, fromQueue: true })
+        )
+
+        if (accepted === false) {
+          return false
+        }
+
+        drainFailuresRef.current.delete(entry.id)
+        removeQueuedPrompt(activeQueueSessionKey, entry.id)
+        resetBrowseState(sessionId)
+
+        return true
+      } finally {
+        drainingQueueRef.current = false
+      }
+    },
+    [activeQueueSessionKey, onSubmit, queuedPrompts, sessionId]
+  )
+
+  const pickDrainHead = useCallback(
+    (entries: QueuedPromptEntry[]) => {
+      const skip = queueEditRef.current?.entryId
+
+      return skip ? entries.find(e => e.id !== skip) : entries[0]
+    },
+    [queueEditRef] // reads the edit id off a ref so the lock-holder always sees the latest
+  )
+
+  const drainNextQueued = useCallback(() => runDrain(pickDrainHead), [pickDrainHead, runDrain])
+
+  const sendQueuedNow = useCallback(
+    (id: string) => {
+      if (!activeQueueSessionKey || id === queueEdit?.entryId) {
+        return false
+      }
+
+      if (busy) {
+        // Promote to the head, then interrupt. The gateway always emits a
+        // settle (message.complete + session.info running:false) when the
+        // turn unwinds, and the busy→false auto-drain below sends this entry.
+        promoteQueuedPrompt(activeQueueSessionKey, id)
+        triggerHaptic('selection')
+        void Promise.resolve(onCancel())
+
+        return true
+      }
+
+      // A manual send clears the auto-drain backoff so a stuck entry the user
+      // taps gets a fresh attempt (and re-enables auto-retry on success).
+      drainFailuresRef.current.delete(id)
+
+      return runDrain(entries => entries.find(e => e.id === id))
+    },
+    [activeQueueSessionKey, busy, onCancel, queueEdit, runDrain]
+  )
+
+  // Edge-independent auto-drain: send the head whenever the session is idle and
+  // the queue is non-empty, bounding retries so a thrown/rejected onSubmit (e.g.
+  // a stale-session 404) can't strand the entry permanently nor spin-loop. The
+  // drain lock serializes sends; a remount/reconnect resets the failure counts.
+  const autoDrainNext = useCallback(() => {
+    if (busy || drainingQueueRef.current || !activeQueueSessionKey) {
+      return
+    }
+
+    const entry = pickDrainHead(queuedPrompts)
+
+    if (!entry || (drainFailuresRef.current.get(entry.id) ?? 0) >= MAX_AUTO_DRAIN_ATTEMPTS) {
+      return
+    }
+
+    const onFail = () => {
+      const fails = (drainFailuresRef.current.get(entry.id) ?? 0) + 1
+      drainFailuresRef.current.set(entry.id, fails)
+
+      if (fails >= MAX_AUTO_DRAIN_ATTEMPTS) {
+        notify({
+          id: 'composer-queue-stuck',
+          kind: 'error',
+          title: t.composer.queueStuckTitle,
+          message: t.composer.queueStuckBody
+        })
+      }
+    }
+
+    void runDrain(() => entry)
+      .then(sent => {
+        if (!sent) {
+          onFail()
+        }
+      })
+      .catch(onFail)
+  }, [activeQueueSessionKey, busy, pickDrainHead, queuedPrompts, runDrain, t])
+
+  // Re-key on a runtime session-id change. A stable stored id (queueSessionKey)
+  // never churns, so a change there is a real session switch and must NOT
+  // migrate; only the runtime-derived key (queueSessionKey falsy → key is
+  // sessionId) churns on a backend bounce/resume of the same conversation.
+  useEffect(() => {
+    const prev = prevQueueKeyRef.current
+    prevQueueKeyRef.current = activeQueueSessionKey
+
+    if (queueSessionKey || !prev || !activeQueueSessionKey || prev === activeQueueSessionKey) {
+      return
+    }
+
+    migrateQueuedPrompts(prev, activeQueueSessionKey)
+  }, [activeQueueSessionKey, queueSessionKey])
+
+  // Queued turns flow whenever the session is idle — on the busy→false settle
+  // edge, on mount/reconnect, and after a re-key — so a swallowed edge can't
+  // strand them. To cancel queued turns, the user deletes them from the panel.
+  useEffect(() => {
+    if (shouldAutoDrain({ isBusy: busy, queueLength: queuedPrompts.length })) {
+      autoDrainNext()
+    }
+  }, [autoDrainNext, busy, queuedPrompts.length])
+
+  // Queue-edit cleanup: on session swap the scope effect already stashed the
+  // edit snapshot; only restore into the composer when still on the same scope.
+  useEffect(() => {
+    if (!queueEdit) {
+      return
+    }
+
+    if (queueEdit.sessionKey === activeQueueSessionKey) {
+      if (editingQueuedPrompt) {
+        return
+      }
+
+      setQueueEditSnapshot(null)
+      loadIntoComposer(queueEdit.draft, queueEdit.attachments)
+
+      return
+    }
+
+    setQueueEditSnapshot(null)
+  }, [activeQueueSessionKey, editingQueuedPrompt, queueEdit, setQueueEditSnapshot]) // eslint-disable-line react-hooks/exhaustive-deps
+
+  return {
+    beginQueuedEdit,
+    drainNextQueued,
+    editingQueuedPrompt,
+    exitQueuedEdit,
+    queueCurrentDraft,
+    queueEdit,
+    queuedPrompts,
+    sendQueuedNow,
+    stepQueuedEdit
+  }
+}
--- a/apps/desktop/src/app/chat/composer/hooks/use-composer-submit.ts
+++ b/apps/desktop/src/app/chat/composer/hooks/use-composer-submit.ts
@@ -0,0 +1,190 @@
+import { type RefObject, useEffect, useRef } from 'react'
+
+import { SLASH_COMMAND_RE } from '@/lib/chat-runtime'
+import { triggerHaptic } from '@/lib/haptics'
+import { clearComposerAttachments, clearSessionDraft, type ComposerAttachment } from '@/store/composer'
+import { resetBrowseState } from '@/store/composer-input-history'
+import { enqueueQueuedPrompt, type QueuedPromptEntry } from '@/store/composer-queue'
+
+import { cloneAttachments, type QueueEditState } from '../composer-utils'
+import { onComposerSubmitRequest } from '../focus'
+import { composerPlainText } from '../rich-editor'
+import type { ChatBarProps } from '../types'
+
+interface UseComposerSubmitArgs {
+  activeQueueSessionKey: string | null
+  activeQueueSessionKeyRef: RefObject<string | null>
+  attachments: ComposerAttachment[]
+  busy: boolean
+  canSteer: boolean
+  clearDraft: () => void
+  disabled: boolean
+  draftRef: RefObject<string>
+  drainNextQueued: () => Promise<boolean>
+  editorRef: RefObject<HTMLDivElement | null>
+  exitQueuedEdit: (action: 'cancel' | 'save') => boolean
+  focusInput: () => void
+  inputDisabled: boolean
+  loadIntoComposer: (text: string, attachments: ComposerAttachment[]) => void
+  onCancel: ChatBarProps['onCancel']
+  onSteer: ChatBarProps['onSteer']
+  onSubmit: ChatBarProps['onSubmit']
+  queueCurrentDraft: () => boolean
+  queueEdit: QueueEditState | null
+  queuedPrompts: QueuedPromptEntry[]
+  sessionId: string | null | undefined
+  setComposerText: (value: string) => void
+  stashAt: (scope: string | null, text?: string, attachments?: ComposerAttachment[]) => void
+}
+
+/**
+ * The composer's submit engine — the orchestration seam where the draft and
+ * queue meet. `submitDraft` is the one decision tree (queue-edit save · slash-
+ * now-while-busy · queue · drain · send · stop); `dispatchSubmit` is the shared
+ * send-with-restore primitive (re-loads + re-stashes the draft if the gateway
+ * rejects, so nothing is ever lost); `steerDraft` nudges the live turn. Reads
+ * the draft + queue APIs; owns no state of its own beyond the stable
+ * external-submit listener ref.
+ */
+export function useComposerSubmit({
+  activeQueueSessionKey,
+  activeQueueSessionKeyRef,
+  attachments,
+  busy,
+  canSteer,
+  clearDraft,
+  disabled,
+  draftRef,
+  drainNextQueued,
+  editorRef,
+  exitQueuedEdit,
+  focusInput,
+  inputDisabled,
+  loadIntoComposer,
+  onCancel,
+  onSteer,
+  onSubmit,
+  queueCurrentDraft,
+  queueEdit,
+  queuedPrompts,
+  sessionId,
+  setComposerText,
+  stashAt
+}: UseComposerSubmitArgs) {
+  // Shared send primitive: fire onSubmit, and if the gateway rejects (accepted
+  // === false) or throws, re-load + re-stash the draft so the words survive.
+  const dispatchSubmit = (text: string, attachments?: ComposerAttachment[]) => {
+    const submittedScope = activeQueueSessionKeyRef.current
+    const submittedAttachments = attachments ?? []
+
+    const restore = () => {
+      loadIntoComposer(text, submittedAttachments)
+      stashAt(activeQueueSessionKeyRef.current, text, submittedAttachments)
+    }
+
+    void Promise.resolve(attachments ? onSubmit(text, { attachments }) : onSubmit(text))
+      .then(accepted => void (accepted === false ? restore() : clearSessionDraft(submittedScope)))
+      .catch(restore)
+  }
+
+  // External "submit this prompt" requests (e.g. the review pane's agent-ship
+  // button) route through the same send path. A ref keeps the listener stable
+  // while always calling the latest dispatchSubmit closure.
+  const dispatchSubmitRef = useRef(dispatchSubmit)
+  dispatchSubmitRef.current = dispatchSubmit
+
+  useEffect(
+    () =>
+      onComposerSubmitRequest(({ target, text }) => {
+        if (target === 'main' && !inputDisabled) {
+          dispatchSubmitRef.current(text)
+        }
+      }),
+    [inputDisabled]
+  )
+
+  const submitDraft = () => {
+    if (disabled) {
+      return
+    }
+
+    // Source the text from the DOM editor, not React state. The AUI composer
+    // state (`draft`) and the derived `hasComposerPayload` lag the DOM by a
+    // render, so on fast typing or IME composition the final keystroke(s) may
+    // not have synced yet — reading state here drops the message (Enter looks
+    // like it does nothing; typing a trailing space only "fixes" it because the
+    // extra input event forces a state sync). draftRef is updated on every
+    // input event; refresh it from the editor once more to also cover an
+    // in-flight keystroke that hasn't fired its input event yet.
+    const editor = editorRef.current
+
+    if (editor) {
+      const domText = composerPlainText(editor)
+
+      if (domText !== draftRef.current) {
+        draftRef.current = domText
+        setComposerText(domText)
+      }
+    }
+
+    const text = draftRef.current
+    const payloadPresent = text.trim().length > 0 || attachments.length > 0
+
+    if (queueEdit) {
+      exitQueuedEdit('save')
+    } else if (busy) {
+      // Slash commands should execute immediately even while the agent is
+      // busy — they're client-side operations (/yolo, /skin, /new, /help,
+      // etc.) or self-contained gateway RPCs (/status, /compress).  onSubmit
+      // routes them to executeSlashCommand, which has its own per-command
+      // busy guard for commands that genuinely need an idle session (skill
+      // /send directives).  Queuing them would make every slash command wait
+      // for the current turn to finish, which is how the TUI never behaves.
+      if (!attachments.length && SLASH_COMMAND_RE.test(text.trim())) {
+        triggerHaptic('submit')
+        clearDraft()
+        dispatchSubmit(text)
+      } else if (payloadPresent) {
+        queueCurrentDraft()
+      } else {
+        // Stop button (the only way to reach here while busy with an empty
+        // composer — empty Enter is short-circuited in the keydown handler).
+        triggerHaptic('cancel')
+        void Promise.resolve(onCancel())
+      }
+    } else if (!payloadPresent && queuedPrompts.length > 0) {
+      void drainNextQueued()
+    } else if (payloadPresent) {
+      const submittedAttachments = cloneAttachments(attachments)
+      triggerHaptic('submit')
+      resetBrowseState(sessionId)
+      clearDraft()
+      clearComposerAttachments()
+      dispatchSubmit(text, submittedAttachments)
+    }
+
+    focusInput()
+  }
+
+  // Steer the live turn (nudge without interrupting). Clears the draft up front
+  // for snappy feedback; if the gateway rejects (no live tool window) the words
+  // are re-queued so nothing is lost — same safety net as a plain queue.
+  const steerDraft = () => {
+    if (!onSteer || !canSteer) {
+      return
+    }
+
+    const text = draftRef.current.trim()
+
+    triggerHaptic('submit')
+    clearDraft()
+
+    void Promise.resolve(onSteer(text)).then(accepted => {
+      if (!accepted && activeQueueSessionKey) {
+        enqueueQueuedPrompt(activeQueueSessionKey, { text, attachments: [] })
+      }
+    })
+  }
+
+  return { dispatchSubmit, steerDraft, submitDraft }
+}
--- a/apps/desktop/src/app/chat/composer/hooks/use-composer-trigger.ts
+++ b/apps/desktop/src/app/chat/composer/hooks/use-composer-trigger.ts
@@ -0,0 +1,282 @@
+import type { Unstable_TriggerAdapter, Unstable_TriggerItem } from '@assistant-ui/core'
+import { type MutableRefObject, type RefObject, useCallback, useEffect, useRef, useState } from 'react'
+
+import { hermesDirectiveFormatter } from '@/components/assistant-ui/directive-text'
+import { desktopSlashCommandTakesArgs } from '@/lib/desktop-slash-commands'
+
+import { COMPLETION_ACTIONS, slashArgStage, slashChipKindForItem, slashCommandToken } from '../composer-utils'
+import {
+  composerPlainText,
+  placeCaretEnd,
+  refChipElement,
+  renderComposerContents,
+  slashChipElement
+} from '../rich-editor'
+import { detectTrigger, textBeforeCaret, type TriggerState } from '../text-utils'
+
+interface CompletionSource {
+  adapter: Unstable_TriggerAdapter | null
+  loading: boolean
+}
+
+interface UseComposerTriggerOptions {
+  at: CompletionSource
+  draftRef: MutableRefObject<string>
+  editorRef: RefObject<HTMLDivElement | null>
+  requestMainFocus: () => void
+  setComposerText: (text: string) => void
+  slash: CompletionSource
+}
+
+/**
+ * Trigger / completion engine: `@`/`/` detection against the live editor, the
+ * adapter-driven item list, the open popover's selection state, and the chip
+ * insertion that commits a pick back into the contentEditable. Owns the trigger
+ * state; ChatBar threads its editor refs in and consumes the returned API from
+ * the input/keydown/keyup paths + the popover render. `triggerKeyConsumedRef` is
+ * exposed so keydown can mark a navigation/control key as handled and the
+ * subsequent keyup skips its refresh.
+ */
+export function useComposerTrigger({
+  at,
+  draftRef,
+  editorRef,
+  requestMainFocus,
+  setComposerText,
+  slash
+}: UseComposerTriggerOptions) {
+  const [trigger, setTrigger] = useState<TriggerState | null>(null)
+  const [triggerActive, setTriggerActive] = useState(0)
+  const [triggerItems, setTriggerItems] = useState<readonly Unstable_TriggerItem[]>([])
+  // Set synchronously in keydown when the open trigger popover consumes a
+  // navigation/control key (Arrow/Enter/Tab/Escape). The subsequent keyup must
+  // NOT run refreshTrigger for that keypress: it never edits text, and for
+  // Escape the keydown has already set trigger=null, so a keyup refresh would
+  // re-detect the still-present `/` and instantly reopen the menu. A ref is
+  // used instead of reading `trigger` in keyup because by keyup time React has
+  // re-rendered and the handler closure sees the post-keydown state.
+  const triggerKeyConsumedRef = useRef(false)
+
+  const refreshTrigger = useCallback(() => {
+    const editor = editorRef.current
+
+    if (!editor) {
+      return
+    }
+
+    // Fast-bail: if neither `@` nor `/` appears in the current draft, there's
+    // nothing for `detectTrigger` to match. Use `textContent` (cheap browser-
+    // native walk) for the precondition check rather than `composerPlainText`
+    // (recursive child walk with chip-aware logic). Only when a trigger char
+    // is present do we pay the cost of the full walk + DOM range work.
+    const rawText = editor.textContent ?? ''
+
+    if (!rawText.includes('@') && !rawText.includes('/')) {
+      if (trigger) {
+        setTrigger(null)
+        setTriggerActive(0)
+      }
+
+      return
+    }
+
+    const before = textBeforeCaret(editor)
+    const found = detectTrigger(before ?? composerPlainText(editor))
+
+    // The arg-stage popover is only useful for commands with an options screen.
+    // For a no-arg command it would dead-end on "No matches", so drop it — the
+    // directive is already complete.
+    const detected =
+      found?.kind === '/' && slashArgStage(found.query) && !desktopSlashCommandTakesArgs(slashCommandToken(found.query))
+        ? null
+        : found
+
+    setTrigger(detected)
+
+    // Only reset the highlight when the trigger actually changed (opened, or
+    // the query/kind differs). Re-detecting the *same* trigger — e.g. on a
+    // caret move (mouseup) or a stray refresh — must preserve the user's
+    // current selection instead of snapping back to the first item.
+    if (detected?.kind !== trigger?.kind || detected?.query !== trigger?.query) {
+      setTriggerActive(0)
+    }
+  }, [editorRef, trigger])
+
+  const triggerAdapter: Unstable_TriggerAdapter | null =
+    trigger?.kind === '@' ? at.adapter : trigger?.kind === '/' ? slash.adapter : null
+
+  useEffect(() => {
+    if (!trigger || !triggerAdapter?.search) {
+      setTriggerItems([])
+
+      return
+    }
+
+    setTriggerItems(triggerAdapter.search(trigger.query))
+  }, [trigger, triggerAdapter])
+
+  const triggerLoading = trigger?.kind === '@' ? at.loading : trigger?.kind === '/' ? slash.loading : false
+
+  // Suppress the "No matches" empty state once a slash command is past its name:
+  // a no-arg command has nothing to offer, and a fully-typed arg commits on
+  // Space/Tab — neither should dead-end on a popover.
+  const argStageEmpty = trigger?.kind === '/' && slashArgStage(trigger.query) && !triggerLoading && !triggerItems.length
+
+  const closeTrigger = () => {
+    setTrigger(null)
+    setTriggerItems([])
+    setTriggerActive(0)
+  }
+
+  useEffect(() => {
+    setTriggerActive(idx => Math.min(idx, Math.max(0, triggerItems.length - 1)))
+  }, [triggerItems.length])
+
+  // Commit the literally-typed `/command arg` as a directive chip — used when
+  // the completion list is empty because the arg is already fully typed (the
+  // backend completer drops exact matches). Reuses the chip path via a
+  // synthetic item whose serialized form is the verbatim text.
+  const commitTypedSlashDirective = () => {
+    if (trigger?.kind !== '/') {
+      return
+    }
+
+    const text = `/${trigger.query.trimEnd()}`
+
+    replaceTriggerWithChip({
+      id: text,
+      type: 'slash',
+      label: text.slice(1),
+      metadata: {
+        command: slashCommandToken(trigger.query),
+        display: text,
+        meta: '',
+        group: '',
+        action: '',
+        rawText: text
+      }
+    })
+  }
+
+  const replaceTriggerWithChip = (item: Unstable_TriggerItem) => {
+    const editor = editorRef.current
+
+    if (!editor || !trigger) {
+      return
+    }
+
+    // Action items (e.g. "Browse all sessions…") run a side effect instead of
+    // inserting a chip: strip the typed trigger token, then fire the action.
+    const completionAction = (item.metadata as { action?: unknown } | undefined)?.action
+    const runAction = typeof completionAction === 'string' ? COMPLETION_ACTIONS[completionAction] : undefined
+
+    if (runAction) {
+      const current = composerPlainText(editor)
+      const prefix = current.slice(0, Math.max(0, current.length - trigger.tokenLength))
+
+      renderComposerContents(editor, prefix)
+      placeCaretEnd(editor)
+      draftRef.current = composerPlainText(editor)
+      setComposerText(draftRef.current)
+      closeTrigger()
+      runAction()
+      requestMainFocus()
+
+      return
+    }
+
+    const serialized = hermesDirectiveFormatter.serialize(item)
+    const starter = serialized.endsWith(':')
+
+    // Picking a bare arg-taking command (e.g. `/personality`) shouldn't commit
+    // it — expand to its options step so the popover shows the inline list, just
+    // as typing `/personality ` by hand would. A serialized value with a space is
+    // already an arg pick (`/personality alice`), so it commits normally.
+    const command = (item.metadata as { command?: string } | undefined)?.command ?? ''
+
+    const expandsToArgs = trigger.kind === '/' && !serialized.includes(' ') && desktopSlashCommandTakesArgs(command)
+
+    const text = starter || serialized.endsWith(' ') ? serialized : `${serialized} `
+    const directive = !starter && serialized.match(/^@([^:]+):(.+)$/)
+    // No pill while expanding — the bare command stays plain text until an arg
+    // is picked, at which point a single pill is emitted for the full command.
+    const slashKind = !expandsToArgs && trigger.kind === '/' ? slashChipKindForItem(item) : null
+    const keepTriggerOpen = starter || expandsToArgs
+
+    const finish = () => {
+      draftRef.current = composerPlainText(editor)
+      setComposerText(draftRef.current)
+      requestMainFocus()
+      keepTriggerOpen ? window.setTimeout(refreshTrigger, 0) : closeTrigger()
+    }
+
+    const sel = window.getSelection()
+    const range = sel?.rangeCount ? sel.getRangeAt(0) : null
+    const node = range?.startContainer
+    const offset = range?.startOffset ?? 0
+
+    if (!sel || !range || node?.nodeType !== Node.TEXT_NODE || offset < trigger.tokenLength) {
+      const current = composerPlainText(editor)
+      const prefix = current.slice(0, Math.max(0, current.length - trigger.tokenLength))
+
+      if (slashKind) {
+        // Two-step arg picks (e.g. `/handoff` pill already inserted, now picking
+        // the platform) land here because the caret sits past a contenteditable
+        // chip. Rebuild the prefix and re-emit a single pill for the full command.
+        renderComposerContents(editor, prefix)
+        editor.append(slashChipElement(serialized, slashKind), document.createTextNode(' '))
+        placeCaretEnd(editor)
+
+        return finish()
+      }
+
+      renderComposerContents(editor, `${prefix}${text}`)
+      placeCaretEnd(editor)
+
+      return finish()
+    }
+
+    const replaceRange = document.createRange()
+    replaceRange.setStart(node, offset - trigger.tokenLength)
+    replaceRange.setEnd(node, offset)
+    replaceRange.deleteContents()
+
+    const chip = slashKind
+      ? slashChipElement(serialized, slashKind)
+      : directive
+        ? refChipElement(directive[1], directive[2])
+        : null
+
+    if (chip) {
+      const space = document.createTextNode(' ')
+      const fragment = document.createDocumentFragment()
+      fragment.append(chip, space)
+      replaceRange.insertNode(fragment)
+
+      const caret = document.createRange()
+      caret.setStart(space, 1)
+      caret.collapse(true)
+      sel.removeAllRanges()
+      sel.addRange(caret)
+
+      return finish()
+    }
+
+    document.execCommand('insertText', false, text)
+    finish()
+  }
+
+  return {
+    argStageEmpty,
+    closeTrigger,
+    commitTypedSlashDirective,
+    refreshTrigger,
+    replaceTriggerWithChip,
+    setTriggerActive,
+    trigger,
+    triggerActive,
+    triggerItems,
+    triggerKeyConsumedRef,
+    triggerLoading
+  }
+}
--- a/apps/desktop/src/app/chat/composer/hooks/use-composer-url-dialog.test.tsx
+++ b/apps/desktop/src/app/chat/composer/hooks/use-composer-url-dialog.test.tsx
@@ -0,0 +1,48 @@
+import { act, renderHook } from '@testing-library/react'
+import { describe, expect, it, vi } from 'vitest'
+
+import { useComposerUrlDialog } from './use-composer-url-dialog'
+
+vi.mock('@/lib/haptics', () => ({ triggerHaptic: () => {} }))
+
+describe('useComposerUrlDialog', () => {
+  it('drops an @url: directive into the draft when there is no host onAddUrl', () => {
+    const insertText = vi.fn()
+    const { result } = renderHook(() => useComposerUrlDialog({ insertText }))
+
+    act(() => result.current.setUrlValue('  https://example.dev  '))
+    act(() => result.current.submitUrl())
+
+    // The trailing/leading whitespace is trimmed before building the directive.
+    expect(insertText).toHaveBeenCalledWith('@url:https://example.dev')
+  })
+
+  it('prefers the host onAddUrl handler, then clears + closes the dialog', () => {
+    const insertText = vi.fn()
+    const onAddUrl = vi.fn()
+    const { result } = renderHook(() => useComposerUrlDialog({ insertText, onAddUrl }))
+
+    act(() => {
+      result.current.openUrlDialog()
+      result.current.setUrlValue(' https://example.dev ')
+    })
+    act(() => result.current.submitUrl())
+
+    expect(onAddUrl).toHaveBeenCalledWith('https://example.dev')
+    expect(insertText).not.toHaveBeenCalled()
+    expect(result.current.urlValue).toBe('')
+    expect(result.current.urlOpen).toBe(false)
+  })
+
+  it('no-ops on an empty / whitespace-only URL', () => {
+    const insertText = vi.fn()
+    const onAddUrl = vi.fn()
+    const { result } = renderHook(() => useComposerUrlDialog({ insertText, onAddUrl }))
+
+    act(() => result.current.setUrlValue('   '))
+    act(() => result.current.submitUrl())
+
+    expect(insertText).not.toHaveBeenCalled()
+    expect(onAddUrl).not.toHaveBeenCalled()
+  })
+})
--- a/apps/desktop/src/app/chat/composer/hooks/use-composer-url-dialog.ts
+++ b/apps/desktop/src/app/chat/composer/hooks/use-composer-url-dialog.ts
@@ -0,0 +1,50 @@
+import { useEffect, useRef, useState } from 'react'
+
+import { triggerHaptic } from '@/lib/haptics'
+
+interface UseComposerUrlDialogOptions {
+  insertText: (text: string) => void
+  onAddUrl?: (url: string) => void
+}
+
+/**
+ * "Add URL" dialog engine: open/value state, autofocus-on-open, and submit. On
+ * submit it prefers the host's `onAddUrl` (which may fetch/title the link) and
+ * otherwise drops an `@url:` directive into the draft.
+ */
+export function useComposerUrlDialog({ insertText, onAddUrl }: UseComposerUrlDialogOptions) {
+  const urlInputRef = useRef<HTMLInputElement | null>(null)
+  const [urlOpen, setUrlOpen] = useState(false)
+  const [urlValue, setUrlValue] = useState('')
+
+  useEffect(() => {
+    if (urlOpen) {
+      window.requestAnimationFrame(() => urlInputRef.current?.focus({ preventScroll: true }))
+    }
+  }, [urlOpen])
+
+  const openUrlDialog = () => {
+    triggerHaptic('open')
+    setUrlOpen(true)
+  }
+
+  const submitUrl = () => {
+    const url = urlValue.trim()
+
+    if (!url) {
+      return
+    }
+
+    if (onAddUrl) {
+      onAddUrl(url)
+    } else {
+      insertText(`@url:${url}`)
+    }
+
+    triggerHaptic('success')
+    setUrlValue('')
+    setUrlOpen(false)
+  }
+
+  return { openUrlDialog, setUrlOpen, setUrlValue, submitUrl, urlInputRef, urlOpen, urlValue }
+}
--- a/apps/desktop/src/app/chat/composer/hooks/use-composer-voice.ts
+++ b/apps/desktop/src/app/chat/composer/hooks/use-composer-voice.ts
@@ -0,0 +1,160 @@
+import { useCallback, useEffect, useRef, useState } from 'react'
+
+import { useI18n } from '@/i18n'
+import { chatMessageText } from '@/lib/chat-messages'
+import { triggerHaptic } from '@/lib/haptics'
+import { resetBrowseState } from '@/store/composer-input-history'
+import { notifyError } from '@/store/notifications'
+import { $messages } from '@/store/session'
+import { $autoSpeakReplies, setAutoSpeakReplies } from '@/store/voice-prefs'
+
+import { onComposerVoiceToggleRequest } from '../focus'
+import type { ChatBarProps } from '../types'
+
+import { useAutoSpeakReplies } from './use-auto-speak-replies'
+import { useVoiceConversation } from './use-voice-conversation'
+import { useVoiceRecorder } from './use-voice-recorder'
+
+interface UseComposerVoiceArgs {
+  busy: boolean
+  clearDraft: () => void
+  disabled: boolean
+  focusInput: () => void
+  insertText: (text: string) => void
+  maxRecordingSeconds: number
+  onSubmit: ChatBarProps['onSubmit']
+  onTranscribeAudio: ChatBarProps['onTranscribeAudio']
+  sessionId: string | null | undefined
+}
+
+/**
+ * The composer's voice engine: push-to-talk dictation (transcript → draft), the
+ * full voice-conversation loop, and auto-speak of replies. Self-contained — it
+ * consumes the draft/submit primitives passed in but nothing depends back on it,
+ * so it lifts cleanly out of ChatBar.
+ */
+export function useComposerVoice({
+  busy,
+  clearDraft,
+  disabled,
+  focusInput,
+  insertText,
+  maxRecordingSeconds,
+  onSubmit,
+  onTranscribeAudio,
+  sessionId
+}: UseComposerVoiceArgs) {
+  const { t } = useI18n()
+  const [voiceConversationActive, setVoiceConversationActive] = useState(false)
+  const lastSpokenIdRef = useRef<string | null>(null)
+
+  const { dictate, voiceActivityState, voiceStatus } = useVoiceRecorder({
+    focusInput,
+    maxRecordingSeconds,
+    onTranscript: insertText,
+    onTranscribeAudio
+  })
+
+  const pendingResponse = () => {
+    const messages = $messages.get()
+    const last = messages.findLast(m => m.role === 'assistant' && !m.hidden)
+
+    if (!last || last.id === lastSpokenIdRef.current) {
+      return null
+    }
+
+    const text = chatMessageText(last).trim()
+
+    if (!text) {
+      return null
+    }
+
+    return {
+      id: last.id,
+      pending: Boolean(last.pending),
+      text
+    }
+  }
+
+  const consumePendingResponse = () => {
+    const messages = $messages.get()
+    const last = messages.findLast(m => m.role === 'assistant' && !m.hidden)
+
+    if (last) {
+      lastSpokenIdRef.current = last.id
+    }
+  }
+
+  const submitVoiceTurn = async (text: string) => {
+    if (busy) {
+      return
+    }
+
+    triggerHaptic('submit')
+    resetBrowseState(sessionId)
+    clearDraft()
+    await onSubmit(text)
+  }
+
+  const conversation = useVoiceConversation({
+    busy,
+    consumePendingResponse,
+    enabled: voiceConversationActive,
+    onFatalError: () => setVoiceConversationActive(false),
+    onSubmit: submitVoiceTurn,
+    onTranscribeAudio,
+    pendingResponse
+  })
+
+  // The `composer.voice` hotkey (Ctrl+B) toggles the conversation. Starting
+  // with STT unconfigured lets the conversation surface its own "configure
+  // speech-to-text" notice rather than silently no-opping.
+  const toggleVoiceConversation = useCallback(() => {
+    if (disabled) {
+      return
+    }
+
+    if (voiceConversationActive) {
+      setVoiceConversationActive(false)
+      void conversation.end()
+    } else {
+      setVoiceConversationActive(true)
+    }
+  }, [conversation, disabled, voiceConversationActive])
+
+  useEffect(() => onComposerVoiceToggleRequest(toggleVoiceConversation), [toggleVoiceConversation])
+
+  // Explicit start/end for the on-screen conversation controls (the hotkey uses
+  // the gated toggle above).
+  const startConversation = useCallback(() => setVoiceConversationActive(true), [])
+
+  const endConversation = useCallback(() => {
+    setVoiceConversationActive(false)
+    void conversation.end()
+  }, [conversation])
+
+  const handleToggleAutoSpeak = useCallback(() => {
+    void setAutoSpeakReplies(!$autoSpeakReplies.get()).catch(error =>
+      notifyError(error, t.settings.config.autosaveFailed)
+    )
+  }, [t])
+
+  useAutoSpeakReplies({
+    conversationActive: voiceConversationActive,
+    failureLabel: t.assistant.thread.readAloudFailed,
+    markSpoken: consumePendingResponse,
+    pendingReply: pendingResponse,
+    sessionId
+  })
+
+  return {
+    conversation,
+    dictate,
+    endConversation,
+    handleToggleAutoSpeak,
+    startConversation,
+    voiceActivityState,
+    voiceConversationActive,
+    voiceStatus
+  }
+}
--- a/apps/desktop/src/app/chat/composer/hooks/use-status-presence.ts
+++ b/apps/desktop/src/app/chat/composer/hooks/use-status-presence.ts
@@ -0,0 +1,36 @@
+import { useSyncExternalStore } from 'react'
+
+import { $statusItemsBySession } from '@/store/composer-status'
+import { $previewStatusBySession } from '@/store/preview-status'
+
+const subscribe = (onChange: () => void) => {
+  const offItems = $statusItemsBySession.listen(onChange)
+  const offPreviews = $previewStatusBySession.listen(onChange)
+
+  return () => {
+    offItems()
+    offPreviews()
+  }
+}
+
+/**
+ * Whether a session has any status items or previews, as a coarse *edge*: the
+ * boolean only flips when the stack appears/disappears. ChatBar uses it to
+ * toggle a styling data-attr — subscribing to the whole `$statusItemsBySession`
+ * (a `computed` that rebuilds the entire map) / `$previewStatusBySession` maps
+ * re-rendered the ~1.4k ChatBar on every per-item mutation (a subagent tick, a
+ * 5s background poll) and on churn in OTHER sessions. The boolean snapshot bails
+ * out of all of that, re-rendering only on the actual show/hide transition.
+ */
+export function useSessionStatusPresence(sessionId: string | null): boolean {
+  return useSyncExternalStore(subscribe, () => {
+    if (!sessionId) {
+      return false
+    }
+
+    return (
+      ($statusItemsBySession.get()[sessionId]?.length ?? 0) > 0 ||
+      ($previewStatusBySession.get()[sessionId]?.length ?? 0) > 0
+    )
+  })
+}
--- a/apps/desktop/src/app/chat/composer/index.tsx
+++ b/apps/desktop/src/app/chat/composer/index.tsx
--- a/apps/desktop/src/app/chat/composer/queue-panel.tsx
+++ b/apps/desktop/src/app/chat/composer/queue-panel.tsx
@@ -1,9 +1,10 @@
 import { StatusRow } from '@/components/chat/status-row'
 import { StatusSection } from '@/components/chat/status-section'
 import { Button } from '@/components/ui/button'
+import { Codicon } from '@/components/ui/codicon'
 import { Tip } from '@/components/ui/tooltip'
 import { type Translations, useI18n } from '@/i18n'
-import { ArrowUp, Pencil, Trash2 } from '@/lib/icons'
+import { ArrowUp, iconSize, Pencil, Trash2 } from '@/lib/icons'
 import { cn } from '@/lib/utils'
 import type { QueuedPromptEntry } from '@/store/composer-queue'

@@ -28,7 +29,10 @@ export function QueuePanel({ busy, editingId, entries, onDelete, onEdit, onSendN
  }

  return (
-    <StatusSection label={c.queued(entries.length)}>
+    <StatusSection
+      icon={<Codicon className="text-muted-foreground/70" name="layers" size="0.8rem" />}
+      label={c.queued(entries.length)}
+    >
      {entries.map(entry => {
        const isEditing = editingId === entry.id
        const attachmentsCount = entry.attachments.length
@@ -52,7 +56,7 @@ export function QueuePanel({ busy, editingId, entries, onDelete, onEdit, onSendN
                    type="button"
                    variant="ghost"
                  >
-                    <Pencil size={11} />
+                    <Pencil className={iconSize.xs} />
                  </Button>
                </Tip>
                <Tip label={busy ? c.queueSendNext : c.queueSend}>
@@ -65,7 +69,7 @@ export function QueuePanel({ busy, editingId, entries, onDelete, onEdit, onSendN
                    type="button"
                    variant="ghost"
                  >
-                    <ArrowUp size={11} />
+                    <ArrowUp className={iconSize.xs} />
                  </Button>
                </Tip>
                <Tip label={c.queueDelete}>
@@ -77,7 +81,7 @@ export function QueuePanel({ busy, editingId, entries, onDelete, onEdit, onSendN
                    type="button"
                    variant="ghost"
                  >
-                    <Trash2 size={11} />
+                    <Trash2 className={iconSize.xs} />
                  </Button>
                </Tip>
              </>
--- a/apps/desktop/src/app/chat/composer/status-stack/index.tsx
+++ b/apps/desktop/src/app/chat/composer/status-stack/index.tsx
@@ -35,11 +35,11 @@ const BACKGROUND_POLL_MS = 5_000
 // letting dead URLs pile up. File previews (a real on-disk artifact) stand alone.
 const isLocalhostPreview = (target: string): boolean => /\b(?:localhost|127\.0\.0\.1|0\.0\.0\.0)\b/i.test(target)

-// Real codicons per group (no sparkles): a checklist for todos, a bot for
-// subagents, a background process glyph for background tasks.
+// Real codicons per group (no sparkles): a checklist for todos, the agent glyph
+// for subagents, a background process glyph for background tasks.
 const GROUP_ICON: Record<StatusGroup['type'], string> = {
  todo: 'checklist',
-  subagent: 'hubot',
+  subagent: 'agent',
  background: 'server-process'
 }

@@ -118,48 +118,59 @@ export function ComposerStatusStack({ queue, sessionId }: ComposerStatusStackPro

  const hasBackgroundGroup = groups.some(g => g.type === 'background')

-  const sections: { key: string; node: ReactNode }[] = groups.map(group => ({
-    key: group.type,
-    node: (
-      <StatusSection
-        accessory={
-          group.type === 'subagent' ? (
-            <Button
-              className="text-muted-foreground/75 hover:text-foreground/90"
-              onClick={openAgents}
-              size="micro"
-              type="button"
-              variant="text"
-            >
-              {t.statusStack.agents}
-            </Button>
-          ) : undefined
-        }
-        defaultCollapsed={group.type !== 'todo'}
-        icon={<Codicon className="text-muted-foreground/70" name={GROUP_ICON[group.type]} size="0.8rem" />}
-        label={groupLabel(group, t.statusStack)}
-      >
-        {group.items.map(item => (
-          <StatusItemRow
-            item={item}
-            key={item.id}
-            onDismiss={sessionId ? id => dismissBackgroundProcess(sessionId, id) : undefined}
-            onOpen={() => openSubagent(item)}
-            onStop={sessionId ? id => void stopBackgroundProcess(sessionId, id) : undefined}
-          />
-        ))}
-        {group.type === 'background' && previewRows}
-      </StatusSection>
-    )
-  }))
+  const previewBlock = <div className="px-1 py-0.5">{previewRows}</div>
+
+  const sections: { key: string; node: ReactNode }[] = []
+
+  for (const group of groups) {
+    sections.push({
+      key: group.type,
+      node: (
+        <StatusSection
+          accessory={
+            group.type === 'subagent' ? (
+              <Button
+                className="text-muted-foreground/75 hover:text-foreground/90"
+                onClick={openAgents}
+                size="micro"
+                type="button"
+                variant="text"
+              >
+                {t.statusStack.agents}
+              </Button>
+            ) : undefined
+          }
+          defaultCollapsed={group.type !== 'todo'}
+          icon={<Codicon className="text-muted-foreground/70" name={GROUP_ICON[group.type]} size="0.8rem" />}
+          label={groupLabel(group, t.statusStack)}
+        >
+          {group.items.map(item => (
+            <StatusItemRow
+              item={item}
+              key={item.id}
+              onDismiss={sessionId ? id => dismissBackgroundProcess(sessionId, id) : undefined}
+              onOpen={() => openSubagent(item)}
+              onStop={sessionId ? id => void stopBackgroundProcess(sessionId, id) : undefined}
+            />
+          ))}
+        </StatusSection>
+      )
+    })
+
+    // Preview links belong to the background group (a localhost dev server and
+    // its preview are the same thing), but they must stay VISIBLE even when that
+    // group is collapsed — the whole point is a one-tap open. Render them as an
+    // always-visible block right after the background section, not as collapsible
+    // children that get swallowed the moment a background task appears.
+    if (group.type === 'background' && previewRows.length > 0) {
+      sections.push({ key: 'preview', node: previewBlock })
+    }
+  }

  // No background group to host them (e.g. a standalone on-disk file preview):
-  // keep the previews as their own row block so they don't disappear.
+  // still render them as their own always-visible block.
  if (previewRows.length > 0 && !hasBackgroundGroup) {
-    sections.push({
-      key: 'preview',
-      node: <div className="px-1 py-0.5">{previewRows}</div>
-    })
+    sections.push({ key: 'preview', node: previewBlock })
  }

  if (queue) {
--- a/apps/desktop/src/app/chat/composer/voice-activity.tsx
+++ b/apps/desktop/src/app/chat/composer/voice-activity.tsx
@@ -3,7 +3,7 @@ import { useEffect, useRef } from 'react'

 import { Button } from '@/components/ui/button'
 import { useI18n } from '@/i18n'
-import { Loader2, Mic, Volume2, VolumeX } from '@/lib/icons'
+import { iconSize, Loader2, Mic, Volume2, VolumeX } from '@/lib/icons'
 import { cn } from '@/lib/utils'
 import { stopVoicePlayback } from '@/lib/voice-playback'
 import { $voicePlayback } from '@/store/voice-playback'
@@ -188,7 +188,7 @@ export function VoiceActivity({ state }: { state: VoiceActivityState }) {
          recording ? 'bg-primary/15 text-primary' : 'bg-primary/10 text-primary'
        )}
      >
-        {recording ? <Mic size={12} /> : <Loader2 className="animate-spin" size={12} />}
+        {recording ? <Mic className={iconSize.xs} /> : <Loader2 className={cn('animate-spin', iconSize.xs)} />}
      </div>

      <div className="flex min-w-0 flex-1 items-center gap-2">
@@ -229,7 +229,7 @@ export function VoicePlaybackActivity() {
      role="status"
    >
      <div className="flex size-5 shrink-0 items-center justify-center rounded-full bg-primary/15 text-primary">
-        {preparing ? <Loader2 className="animate-spin" size={12} /> : <Volume2 size={12} />}
+        {preparing ? <Loader2 className={cn('animate-spin', iconSize.xs)} /> : <Volume2 className={iconSize.xs} />}
      </div>

      <div className="flex min-w-0 flex-1 items-center gap-2">
@@ -244,7 +244,7 @@ export function VoicePlaybackActivity() {
        type="button"
        variant="ghost"
      >
-        <VolumeX size={12} />
+        <VolumeX className={iconSize.xs} />
        Stop
      </Button>
    </div>
--- a/apps/desktop/src/app/chat/hooks/use-composer-actions.test.ts
+++ b/apps/desktop/src/app/chat/hooks/use-composer-actions.test.ts
@@ -1,6 +1,6 @@
-import { describe, expect, it } from 'vitest'
+import { afterEach, describe, expect, it, vi } from 'vitest'

-import { type DroppedFile, partitionDroppedFiles } from './use-composer-actions'
+import { type DroppedFile, extractDroppedFiles, HERMES_PATHS_MIME, partitionDroppedFiles } from './use-composer-actions'

 // A Finder/Explorer drop carries a native File handle; an in-app drag (project
 // tree, gutter line ref) is path-only. The split decides whether a drop becomes
@@ -39,6 +39,18 @@ describe('partitionDroppedFiles', () => {
    expect(inAppRefs).toEqual([lineRef])
  })

+  it('routes an OS folder drop (path-only, isDirectory) to inAppRefs, not the upload pipeline', () => {
+    // extractDroppedFiles emits a dropped directory as a path-only entry so it
+    // stays a @folder: ref instead of hitting file.attach, which can't stage a
+    // directory ("file not found on gateway and no data_url provided").
+    const folder = inAppRef('/Users/jeff/projects/hermes', { isDirectory: true })
+
+    const { inAppRefs, osDrops } = partitionDroppedFiles([folder])
+
+    expect(osDrops).toEqual([])
+    expect(inAppRefs).toEqual([folder])
+  })
+
  it('splits a mixed drop and preserves order within each group', () => {
    const a = inAppRef('a.ts')
    const b = osDrop('/abs/b.pdf')
@@ -55,3 +67,114 @@ describe('partitionDroppedFiles', () => {
    expect(partitionDroppedFiles([])).toEqual({ inAppRefs: [], osDrops: [] })
  })
 })
+
+// Minimal DataTransfer stand-in. A real OS drop populates BOTH `items` (which
+// alone carries webkitGetAsEntry for folder detection) and `files`; the mock
+// mirrors that so the dedup path is exercised too.
+interface StubEntry {
+  path: string
+  isDirectory: boolean
+}
+
+function stubTransfer(entries: StubEntry[], internalRaw = ''): DataTransfer & { _pathByFile: Map<File, string> } {
+  const files = entries.map(entry => new File(['x'], entry.path.split('/').pop() || 'f'))
+  const pathByFile = new Map(files.map((file, i) => [file, entries[i].path]))
+
+  const items: Record<number | string, unknown> = { length: entries.length }
+  entries.forEach((entry, i) => {
+    items[i] = {
+      kind: 'file' as const,
+      getAsFile: () => files[i],
+      webkitGetAsEntry: () => ({ isDirectory: entry.isDirectory, isFile: !entry.isDirectory })
+    }
+  })
+
+  return {
+    getData: (mime: string) => (mime === HERMES_PATHS_MIME ? internalRaw : ''),
+    files: {
+      length: files.length,
+      item: (i: number) => files[i] ?? null
+    },
+    items,
+    _pathByFile: pathByFile
+  } as unknown as DataTransfer & { _pathByFile: Map<File, string> }
+}
+
+describe('extractDroppedFiles', () => {
+  afterEach(() => {
+    vi.unstubAllGlobals()
+  })
+
+  const stubBridge = (transfer: DataTransfer & { _pathByFile: Map<File, string> }) => {
+    vi.stubGlobal('window', {
+      hermesDesktop: {
+        getPathForFile: (file: File) => transfer._pathByFile.get(file) ?? ''
+      }
+    })
+  }
+
+  it('emits a dropped directory as a path-only entry with isDirectory (no File to upload)', () => {
+    const transfer = stubTransfer([
+      { path: '/Users/jeff/projects/hermes', isDirectory: true }
+    ]) as DataTransfer & { _pathByFile: Map<File, string> }
+
+    stubBridge(transfer)
+
+    const result = extractDroppedFiles(transfer)
+
+    expect(result).toHaveLength(1)
+    expect(result[0]?.isDirectory).toBe(true)
+    expect(result[0]?.path).toBe('/Users/jeff/projects/hermes')
+    // A directory carries no bytes — it must NOT ride the File/upload pipeline.
+    expect(result[0]?.file).toBeUndefined()
+    // And it partitions as an in-app ref (→ @folder:), never an OS upload drop.
+    expect(partitionDroppedFiles(result).osDrops).toEqual([])
+  })
+
+  it('still emits a dropped file with its native File handle for the upload pipeline', () => {
+    const transfer = stubTransfer([
+      { path: '/Users/jeff/Downloads/report.pdf', isDirectory: false }
+    ]) as DataTransfer & { _pathByFile: Map<File, string> }
+
+    stubBridge(transfer)
+
+    const result = extractDroppedFiles(transfer)
+
+    expect(result).toHaveLength(1)
+    expect(result[0]?.isDirectory).toBeFalsy()
+    expect(result[0]?.path).toBe('/Users/jeff/Downloads/report.pdf')
+    expect(result[0]?.file).toBeInstanceOf(File)
+    expect(partitionDroppedFiles(result).osDrops).toHaveLength(1)
+  })
+
+  it('classifies a mixed folder+file drop independently', () => {
+    const transfer = stubTransfer([
+      { path: '/abs/src', isDirectory: true },
+      { path: '/abs/notes.txt', isDirectory: false }
+    ]) as DataTransfer & { _pathByFile: Map<File, string> }
+
+    stubBridge(transfer)
+
+    const result = extractDroppedFiles(transfer)
+    const { inAppRefs, osDrops } = partitionDroppedFiles(result)
+
+    expect(inAppRefs.map(entry => entry.path)).toEqual(['/abs/src'])
+    expect(inAppRefs[0]?.isDirectory).toBe(true)
+    expect(osDrops.map(entry => entry.path)).toEqual(['/abs/notes.txt'])
+  })
+
+  it('does not duplicate a folder that appears in both items and files', () => {
+    // Chromium lists a dropped folder in transfer.files too (as a size-0 File);
+    // the items pass claims its path first so the files fallback skips it.
+    const transfer = stubTransfer([
+      { path: '/abs/project', isDirectory: true }
+    ]) as DataTransfer & { _pathByFile: Map<File, string> }
+
+    stubBridge(transfer)
+
+    const result = extractDroppedFiles(transfer)
+
+    expect(result).toHaveLength(1)
+    expect(result[0]?.isDirectory).toBe(true)
+  })
+})
--- a/apps/desktop/src/app/chat/hooks/use-composer-actions.ts
+++ b/apps/desktop/src/app/chat/hooks/use-composer-actions.ts
@@ -44,7 +44,8 @@ export interface DroppedFile {
  file?: File
  /** Absolute filesystem path. Empty when an OS drop didn't carry one. */
  path: string
-  /** True if the entry is a directory. Currently only set by in-app drags. */
+  /** True if the entry is a directory. Set by in-app drags, and by OS drops via
+   * DataTransferItem.webkitGetAsEntry(). */
  isDirectory?: boolean
  /** First line number for in-app line-ref drags (source view gutter). */
  line?: number
@@ -108,39 +109,50 @@ export function extractDroppedFiles(transfer: DataTransfer): DroppedFile[] {
    // Malformed payload — fall through to native files.
  }

-  const fileList = transfer.files
-
-  if (fileList) {
-    for (let i = 0; i < fileList.length; i += 1) {
-      const file = fileList.item(i)
-
-      if (!file || seenFiles.has(file)) {
-        continue
-      }
-
-      seenFiles.add(file)
-      let path = ''
-
-      if (getPath) {
-        try {
-          path = getPath(file) || ''
-        } catch {
-          path = ''
-        }
-      }
-
-      if (path && seenPaths.has(path)) {
-        continue
-      }
-
-      if (path) {
-        seenPaths.add(path)
-      }
-
-      result.push({ file, path })
+  // Add a native OS-drop entry. A dropped directory has no byte content to
+  // upload, so it's emitted as a path-only entry with `isDirectory: true` —
+  // that routes it to a `@folder:` ref / folder attachment (like the folder
+  // picker) instead of the file-upload pipeline, which can't stage a directory
+  // (the gateway can't read its bytes and there's no data_url to send).
+  const pushNativeEntry = (file: File, isDirectory: boolean) => {
+    if (seenFiles.has(file)) {
+      return
    }
+
+    seenFiles.add(file)
+    let path = ''
+
+    if (getPath) {
+      try {
+        path = getPath(file) || ''
+      } catch {
+        path = ''
+      }
+    }
+
+    if (path && seenPaths.has(path)) {
+      return
+    }
+
+    if (path) {
+      seenPaths.add(path)
+    }
+
+    if (isDirectory) {
+      if (path) {
+        result.push({ isDirectory: true, path })
+      }
+
+      return
+    }
+
+    result.push({ file, path })
  }

+  // Process items first: DataTransferItem.webkitGetAsEntry() is the only
+  // synchronous way to tell a dropped folder from a file, and it lives only on
+  // items (not transfer.files). Must be read here, inside the drop handler,
+  // before the DataTransfer detaches.
  const items = transfer.items

  if (items) {
@@ -151,32 +163,39 @@ export function extractDroppedFiles(transfer: DataTransfer): DroppedFile[] {
        continue
      }

+      let isDirectory = false
+
+      try {
+        const entry = typeof item.webkitGetAsEntry === 'function' ? item.webkitGetAsEntry() : null
+        isDirectory = entry?.isDirectory === true
+      } catch {
+        isDirectory = false
+      }
+
      const file = item.getAsFile()

-      if (!file || seenFiles.has(file)) {
+      if (!file) {
        continue
      }

-      seenFiles.add(file)
-      let path = ''
+      pushNativeEntry(file, isDirectory)
+    }
+  }

-      if (getPath) {
-        try {
-          path = getPath(file) || ''
-        } catch {
-          path = ''
-        }
-      }
+  // Fallback for environments that populate transfer.files but not items.
+  // webkitGetAsEntry isn't available on this path, so directory detection
+  // relies on the items pass above; anything reaching here is treated as a file.
+  const fileList = transfer.files

-      if (path && seenPaths.has(path)) {
+  if (fileList) {
+    for (let i = 0; i < fileList.length; i += 1) {
+      const file = fileList.item(i)
+
+      if (!file) {
        continue
      }

-      if (path) {
-        seenPaths.add(path)
-      }
-
-      result.push({ file, path })
+      pushNativeEntry(file, false)
    }
  }

--- a/apps/desktop/src/app/chat/sidebar/chrome.tsx
+++ b/apps/desktop/src/app/chat/sidebar/chrome.tsx
@@ -1,6 +1,7 @@
 import type * as React from 'react'

 import { Codicon } from '@/components/ui/codicon'
+import { RowButton } from '@/components/ui/row-button'
 import { cn } from '@/lib/utils'

 // Shared, content-agnostic sidebar chrome — used by both the flat session
@@ -64,7 +65,7 @@ export function SidebarRowCluster({ className, ...props }: React.ComponentProps<

 /** Session row main tap target. */
 export function SidebarRowBody({ className, ...props }: React.ComponentProps<'button'>) {
-  return <button className={cn(rowInset, 'bg-transparent text-left', className)} type="button" {...props} />
+  return <RowButton className={cn(rowInset, 'bg-transparent text-left', className)} {...props} />
 }

 /** Tappable label — underline/truncate live on the inner span, not the button. */
@@ -75,9 +76,9 @@ export function SidebarRowLink({
  ...props
 }: React.ComponentProps<'button'> & { labelClassName?: string }) {
  return (
-    <button className={cn('min-w-0 shrink bg-transparent p-0 text-left', className)} type="button" {...props}>
+    <RowButton className={cn('min-w-0 shrink bg-transparent p-0 text-left', className)} {...props}>
      <span className={cn(rowLabel, labelClassName)}>{children}</span>
-    </button>
+    </RowButton>
  )
 }

--- a/apps/desktop/src/app/chat/sidebar/index.tsx
+++ b/apps/desktop/src/app/chat/sidebar/index.tsx
@@ -1,19 +1,5 @@
-import {
-  closestCenter,
-  DndContext,
-  type DragEndEvent,
-  KeyboardSensor,
-  PointerSensor,
-  useSensor,
-  useSensors
-} from '@dnd-kit/core'
-import {
-  arrayMove,
-  SortableContext,
-  sortableKeyboardCoordinates,
-  useSortable,
-  verticalListSortingStrategy
-} from '@dnd-kit/sortable'
+import { KeyboardSensor, PointerSensor, useSensor, useSensors } from '@dnd-kit/core'
+import { sortableKeyboardCoordinates } from '@dnd-kit/sortable'
 import { useStore } from '@nanostores/react'
 import type * as React from 'react'
 import { useCallback, useEffect, useMemo, useRef, useState } from 'react'
@@ -21,7 +7,6 @@ import { useCallback, useEffect, useMemo, useRef, useState } from 'react'
 import { PlatformAvatar } from '@/app/messaging/platform-icon'
 import { Button } from '@/components/ui/button'
 import { Codicon } from '@/components/ui/codicon'
-import { DisclosureCaret } from '@/components/ui/disclosure-caret'
 import { GlyphSpinner } from '@/components/ui/glyph-spinner'
 import { KbdGroup } from '@/components/ui/kbd'
 import { SearchField } from '@/components/ui/search-field'
@@ -34,13 +19,10 @@ import {
  SidebarMenuButton,
  SidebarMenuItem
 } from '@/components/ui/sidebar'
-import { Skeleton } from '@/components/ui/skeleton'
-import type { HermesGitWorktree } from '@/global'
 import { searchSessions, type SessionInfo, type SessionSearchResult } from '@/hermes'
 import { useI18n } from '@/i18n'
 import { comboTokens } from '@/lib/keybinds/combo'
 import { profileColor } from '@/lib/profile-color'
-import { flattenSessionsWithBranches } from '@/lib/session-branch-tree'
 import { sessionMatchesSearch } from '@/lib/session-search'
 import { normalizeSessionSource, sessionSourceLabel } from '@/lib/session-source'
 import { cn } from '@/lib/utils'
@@ -114,37 +96,31 @@ import {
 } from '@/store/session'

 import { type AppView, ARTIFACTS_ROUTE, MESSAGING_ROUTE, SKILLS_ROUTE } from '../../routes'
-import { SidebarPanelLabel } from '../../shell/sidebar-label'
 import type { SidebarNavItem } from '../../types'

-import { countLabel, SidebarCount } from './chrome'
+import { countLabel } from './chrome'
 import { SidebarCronJobsSection } from './cron-jobs-section'
 import { SidebarLoadMoreRow } from './load-more-row'
-import { reconcileFreshFirst, resolveManualSessionOrderIds } from './order'
+import { orderByIds, reconcileOrderIds, resolveManualSessionOrderIds, sameIds } from './order'
 import { ProfileRail } from './profile-switcher'
 import { ProjectDialog } from './project-dialog'
 import {
-  EnteredProjectContent,
  overlayLiveLanes,
  overlayLivePreviews,
  PROJECT_PREVIEW_COUNT,
  ProjectBackRow,
  ProjectMenu,
-  ProjectOverviewRow,
  projectTreeCwd,
  sessionRecency as sessionTime,
  type SidebarProjectTree,
  type SidebarSessionGroup,
-  SidebarWorkspaceGroup,
  type SidebarWorkspaceTree,
  sortProjectsForOverview,
  StartWorkButton,
  useRepoWorktreeMap
 } from './projects'
-import { SidebarSessionRow } from './session-row'
-import { VirtualSessionList } from './virtual-session-list'
-
-const VIRTUALIZE_THRESHOLD = 25
+import { SidebarBlankState, SidebarPinnedEmptyState, SidebarSessionSkeletons } from './section-states'
+import { SidebarSessionsSection, VIRTUALIZE_THRESHOLD } from './sessions-section'

 // Non-session groups (messaging platforms) stay compact: show a few rows up
 // front, reveal more in larger steps on demand. Keeps a busy platform from
@@ -196,108 +172,6 @@ const HEADER_ACTION_BTN =
 const HEADER_NAV_BTN =
  'text-(--ui-text-tertiary) opacity-70 transition-opacity hover:bg-(--ui-control-hover-background) hover:text-foreground hover:opacity-100 focus-visible:opacity-100'

-// Sidebar reordering is a strictly vertical list. The dragged item's transform
-// is rendered Y-only in useSortableBindings (no x, no scale); this just stops
-// dnd-kit's auto-scroll from dragging the rail — or the window — sideways when
-// the pointer nears an edge, killing the horizontal "drag to valhalla".
-const reorderAutoScroll = { threshold: { x: 0, y: 0.2 } }
-
-// One self-contained, nesting-safe reorderable list. It owns its DndContext, so a
-// drag only ever collides with THIS list's own items — drop it at any depth (repos,
-// worktrees, sessions) and reordering "just works" without leaking into the lists
-// around or inside it. Pair each item with useSortableBindings(id); the list reports
-// the new id order and the caller persists it. This is the single generic primitive
-// behind every reorderable surface in the sidebar.
-function ReorderableList({
-  children,
-  ids,
-  onReorder,
-  sensors
-}: {
-  children: React.ReactNode
-  ids: string[]
-  onReorder: (ids: string[]) => void
-  sensors?: ReturnType<typeof useSensors>
-}) {
-  const handleDragEnd = ({ activatorEvent, active, over }: DragEndEvent) => {
-    // dnd-kit only restores focus for keyboard drags; after a pointer drop the
-    // browser leaves :focus on the grab handle, which keeps a focus-within
-    // grabber/affordance reveal stuck "on". Drop that focus so the row returns
-    // to its resting state once the pointer moves away.
-    if (!(activatorEvent instanceof KeyboardEvent)) {
-      ;(document.activeElement as HTMLElement | null)?.blur()
-    }
-
-    if (!over || active.id === over.id) {
-      return
-    }
-
-    const from = ids.indexOf(String(active.id))
-    const to = ids.indexOf(String(over.id))
-
-    if (from >= 0 && to >= 0) {
-      onReorder(arrayMove(ids, from, to))
-    }
-  }
-
-  return (
-    <DndContext
-      autoScroll={reorderAutoScroll}
-      collisionDetection={closestCenter}
-      onDragEnd={handleDragEnd}
-      sensors={sensors}
-    >
-      <SortableContext items={ids} strategy={verticalListSortingStrategy}>
-        {children}
-      </SortableContext>
-    </DndContext>
-  )
-}
-
-function orderByIds<T>(items: T[], getId: (item: T) => string, orderIds: string[]): T[] {
-  if (!orderIds.length) {
-    return items
-  }
-
-  const byId = new Map(items.map(item => [getId(item), item]))
-  const seen = new Set<string>()
-  const ordered: T[] = []
-
-  for (const id of orderIds) {
-    const item = byId.get(id)
-
-    if (item) {
-      ordered.push(item)
-      seen.add(id)
-    }
-  }
-
-  // Items missing from the persisted order are new since it was last
-  // reconciled. Callers pass recency-sorted lists (newest first), so surface
-  // these at the TOP instead of burying them beneath the saved order —
-  // otherwise a brand-new session sinks to the bottom of the sidebar and reads
-  // as "my latest session never showed up".
-  const fresh = items.filter(item => !seen.has(getId(item)))
-
-  return fresh.length ? [...fresh, ...ordered] : ordered
-}
-
-function reconcileOrderIds(currentIds: string[], orderIds: string[]): string[] {
-  if (!currentIds.length) {
-    return []
-  }
-
-  if (!orderIds.length) {
-    return currentIds
-  }
-
-  return reconcileFreshFirst(currentIds, orderIds)
-}
-
-function sameIds(left: string[], right: string[]) {
-  return left.length === right.length && left.every((item, index) => item === right[index])
-}
-
 // FTS results cover sessions that aren't in the loaded page; synthesize a
 // minimal SessionInfo so they render in the same row component (resume works
 // by id; the snippet stands in for the preview).
@@ -324,25 +198,6 @@ function searchResultToSession(result: SessionSearchResult): SessionInfo {
  }
 }

-function useSortableBindings(id: string) {
-  const { attributes, isDragging, listeners, setNodeRef, transform, transition } = useSortable({ id })
-
-  return {
-    dragging: isDragging,
-    dragHandleProps: { ...attributes, ...listeners },
-    ref: setNodeRef,
-    reorderable: true as const,
-    style: {
-      // Uniform vertical list: only ever translate on Y. Ignoring x and the
-      // scaleX/scaleY that CSS.Transform.toString would emit keeps a dragged
-      // group/row from drifting sideways or morphing its size mid-drag.
-      transform: transform ? `translate3d(0px, ${transform.y}px, 0)` : undefined,
-      transition: isDragging ? undefined : transition,
-      willChange: isDragging ? 'transform' : undefined
-    }
-  }
-}
-
 interface ChatSidebarProps extends React.ComponentProps<typeof Sidebar> {
  currentView: AppView
  onNavigate: (item: SidebarNavItem) => void
@@ -1149,8 +1004,7 @@ export function ChatSidebar({

  const showSessionSkeletons = sessionsLoading && sortedSessions.length === 0

-  const showSessionSections =
-    showSessionSkeletons || sortedSessions.length > 0 || projectModel.length > 0
+  const showSessionSections = showSessionSkeletons || sortedSessions.length > 0 || projectModel.length > 0

  // Each reorderable list reports its OWN new id order; persisting is a direct,
  // typed write — no id-prefix sniffing to figure out which level moved.
@@ -1551,110 +1405,6 @@ export function ChatSidebar({
  )
 }

-interface SidebarSectionHeaderProps {
-  label: string
-  open: boolean
-  onToggle: () => void
-  action?: React.ReactNode
-  meta?: React.ReactNode
-  icon?: React.ReactNode
-  // When false the section can't be collapsed: the label renders static (no
-  // toggle, no caret) and the section is always open. Used for the single-
-  // project view, where collapsing one project makes no sense.
-  collapsible?: boolean
-}
-
-function SidebarSectionHeader({
-  label,
-  open,
-  onToggle,
-  action,
-  meta,
-  icon,
-  collapsible = true
-}: SidebarSectionHeaderProps) {
-  const labelBody = (
-    <>
-      {icon}
-      <SidebarPanelLabel>{label}</SidebarPanelLabel>
-      {meta && <SidebarCount>{meta}</SidebarCount>}
-    </>
-  )
-
-  return (
-    <div className="group/section flex shrink-0 items-center justify-between gap-1 pb-1 pt-1.5">
-      {collapsible ? (
-        <button
-          className="group/section-label flex w-fit items-center gap-1 bg-transparent text-left leading-none"
-          onClick={onToggle}
-          type="button"
-        >
-          {labelBody}
-          <DisclosureCaret
-            className="text-(--ui-text-tertiary) opacity-0 transition group-hover/section-label:opacity-100"
-            open={open}
-          />
-        </button>
-      ) : (
-        <div className="flex w-fit items-center gap-1 leading-none">{labelBody}</div>
-      )}
-      {action}
-    </div>
-  )
-}
-
-function SidebarSessionSkeletons() {
-  return (
-    <div aria-hidden="true" className="grid gap-px">
-      {['w-32', 'w-40', 'w-28', 'w-36', 'w-24'].map((width, i) => (
-        <div
-          className="grid min-h-[1.625rem] grid-cols-[minmax(0,1fr)_1.375rem] items-center rounded-md pl-2"
-          key={`${width}-${i}`}
-        >
-          <Skeleton className={cn('h-3 rounded-sm', width)} />
-          <Skeleton className="mx-auto size-3.5 rounded-sm opacity-60" />
-        </div>
-      ))}
-    </div>
-  )
-}
-
-function SidebarBlankState({ onNewProject }: { onNewProject: () => void }) {
-  const { t } = useI18n()
-  const s = t.sidebar
-
-  return (
-    <div className="grid min-h-0 flex-1 place-items-center px-4 text-center">
-      <div className="flex flex-col items-center gap-2">
-        <Codicon className="text-(--ui-text-quaternary)" name="root-folder" size="1.25rem" />
-        <p className="text-xs text-(--ui-text-tertiary)">{s.noSessions}</p>
-        <Button
-          className="mt-0.5 text-(--ui-text-secondary)"
-          onClick={onNewProject}
-          size="sm"
-          variant="ghost"
-        >
-          <Codicon name="add" size="0.75rem" />
-          {s.projects.newButton}
-        </Button>
-      </div>
-    </div>
-  )
-}
-
-function SidebarPinnedEmptyState() {
-  const { t } = useI18n()
-
-  return (
-    <div className="flex min-h-7 items-center gap-1.5 rounded-lg pl-2 text-[0.75rem] text-(--ui-text-tertiary)">
-      <span className="grid w-3.5 shrink-0 place-items-center text-(--ui-text-quaternary)">
-        <Codicon name="pin" size="0.75rem" />
-      </span>
-      <span>{t.sidebar.shiftClickHint}</span>
-    </div>
-  )
-}
-
 interface MessagingSection {
  sourceId: string
  label: string
@@ -1662,302 +1412,3 @@ interface MessagingSection {
  total: number
  hasMore: boolean
 }
-
-interface SidebarSessionsSectionProps {
-  label: string
-  open: boolean
-  onToggle: () => void
-  sessions: SessionInfo[]
-  activeSessionId: null | string
-  workingSessionIdSet: Set<string>
-  onResumeSession: (sessionId: string) => void
-  onDeleteSession: (sessionId: string) => void
-  onArchiveSession: (sessionId: string) => void
-  onBranchSession?: (sessionId: string, profile?: string) => void
-  onTogglePin: (sessionId: string) => void
-  onNewSessionInWorkspace?: (path: null | string) => void
-  pinned: boolean
-  rootClassName?: string
-  contentClassName?: string
-  emptyState: React.ReactNode
-  forceEmptyState?: boolean
-  headerAction?: React.ReactNode
-  footer?: React.ReactNode
-  groups?: SidebarSessionGroup[]
-  tree?: SidebarWorkspaceTree[]
-  // Project overview: when present, render a drill-in list of project rows
-  // instead of sessions. Clicking a row enters that project (onEnterProject),
-  // which then passes `projectContent` on the next render. Takes precedence
-  // over `tree` / `groups`.
-  projectOverview?: SidebarProjectTree[]
-  // Per-project preview rows (from the backend tree), keyed by project path.
-  projectOverviewPreviews?: Record<string, SessionInfo[]>
-  // True while the backend project tree is loading (overview skeleton).
-  projectsLoading?: boolean
-  onEnterProject?: (id: string) => void
-  // The entered project's flattened content: main-checkout sessions render
-  // directly (no redundant repo/branch header); only linked worktrees nest.
-  projectContent?: SidebarProjectTree
-  // Live git lanes (`git worktree list`) for repos in the entered project —
-  // a VISUAL enhancer only (empty lanes), never session membership.
-  projectRepoWorktrees?: Record<string, HermesGitWorktree[]>
-  // Live session cache used for optimistic placement inside entered-project lanes.
-  liveSessions?: SessionInfo[]
-  // Client-side optimistic eviction layer (deleted/archived ids).
-  removedSessionIds?: ReadonlySet<string>
-  activeProjectId?: null | string
-  labelMeta?: React.ReactNode
-  labelIcon?: React.ReactNode
-  // When false the section header is static (no caret/toggle) and always open.
-  collapsible?: boolean
-  sortable?: boolean
-  // The flat session list is the only hand-reorderable surface (grouped/project
-  // views sort deterministically), so it owns the one ReorderableList.
-  onReorderSessions?: (ids: string[]) => void
-  // Drag-to-reorder for the project overview list (top-level projects).
-  onReorderProjects?: (ids: string[]) => void
-  // Rendered atop the entered-project body (a "back to overview" row).
-  projectBackRow?: React.ReactNode
-  dndSensors?: ReturnType<typeof useSensors>
-}
-
-function SidebarSessionsSection({
-  label,
-  open,
-  onToggle,
-  sessions,
-  activeSessionId,
-  workingSessionIdSet,
-  onResumeSession,
-  onDeleteSession,
-  onArchiveSession,
-  onBranchSession,
-  onTogglePin,
-  onNewSessionInWorkspace,
-  pinned,
-  rootClassName,
-  contentClassName,
-  emptyState,
-  forceEmptyState = false,
-  headerAction,
-  footer,
-  groups,
-  projectOverview,
-  projectOverviewPreviews,
-  projectsLoading = false,
-  onEnterProject,
-  projectContent,
-  projectRepoWorktrees,
-  liveSessions,
-  removedSessionIds,
-  activeProjectId,
-  labelMeta,
-  labelIcon,
-  collapsible = true,
-  sortable = false,
-  onReorderSessions,
-  onReorderProjects,
-  projectBackRow,
-  dndSensors
-}: SidebarSessionsSectionProps) {
-  const sectionOpen = collapsible ? open : true
-  const hasGroupedSessions = Boolean(groups?.some(group => group.sessions.length > 0))
-  // A defined project list is itself content (even an empty project should
-  // render as a drill-in row so the user can see it exists).
-  const hasProjectOverview = Boolean(projectOverview?.length)
-  const hasProjectContent = Boolean(projectContent && projectContent.sessionCount > 0)
-
-  const showEmptyState =
-    forceEmptyState || (!hasGroupedSessions && !hasProjectOverview && !hasProjectContent && sessions.length === 0)
-
-  // The flat recents/pinned list is the only place sessions reorder by hand;
-  // grouped/tree views always sort by creation date and never drag.
-  const sessionsDraggable = sortable && !!onReorderSessions
-  const displayEntries = useMemo(() => flattenSessionsWithBranches(sessions), [sessions])
-
-  const renderRow = (session: SessionInfo, draggable: boolean, branchStem?: string) => {
-    const rowProps = {
-      branchStem,
-      isPinned: pinned,
-      isSelected: session.id === activeSessionId,
-      isWorking: workingSessionIdSet.has(session.id),
-      onArchive: () => onArchiveSession(session.id),
-      onBranch: onBranchSession ? () => onBranchSession(session.id, session.profile) : undefined,
-      onDelete: () => onDeleteSession(session.id),
-      onPin: () => onTogglePin(sessionPinId(session)),
-      onResume: () => onResumeSession(session.id),
-      reorderable: draggable && !branchStem,
-      session
-    }
-
-    return draggable && !branchStem ? (
-      <SortableSidebarSessionRow key={session.id} {...rowProps} />
-    ) : (
-      <SidebarSessionRow key={session.id} {...rowProps} />
-    )
-  }
-
-  // Sessions inside repos/worktrees are date-ordered and static.
-  const renderRows = (items: SessionInfo[]) =>
-    flattenSessionsWithBranches(items).map(({ branchStem, session }) => renderRow(session, false, branchStem))
-
-  const flatVirtualized =
-    !showEmptyState &&
-    !groups?.length &&
-    !projectOverview?.length &&
-    !projectContent &&
-    sessions.length >= VIRTUALIZE_THRESHOLD
-
-  // First paint into the grouped view (e.g. the app restoring the Projects tab)
-  // has flat recents in `sessions` but no tree yet. Show skeletons rather than
-  // flashing the flat session list until the overview/content/groups resolve. A
-  // background refresh keeps the prior tree, so this only fires when empty.
-  const showProjectsSkeleton =
-    projectsLoading && !hasProjectOverview && !hasProjectContent && !projectContent && !groups?.length
-
-  let inner: React.ReactNode
-
-  if (showProjectsSkeleton) {
-    inner = <SidebarSessionSkeletons />
-  } else if (projectContent) {
-    // Entered a project: the back row is always present, then either the
-    // (overlay-aware) content or a clean empty state — never a bare spinner or a
-    // blank pane while lanes hydrate.
-    inner = (
-      <>
-        {projectBackRow}
-        {hasProjectContent ? (
-          <EnteredProjectContent
-            liveSessions={liveSessions}
-            onNewSession={onNewSessionInWorkspace}
-            project={projectContent}
-            removedSessionIds={removedSessionIds}
-            renderRows={renderRows}
-            repoWorktrees={projectRepoWorktrees}
-          />
-        ) : (
-          emptyState
-        )}
-      </>
-    )
-  } else if (showEmptyState) {
-    inner = emptyState
-  } else if (projectOverview?.length) {
-    // The model is already ordered (default sort groups explicit-before-auto;
-    // a manual drag-order, when present, wins). Render in that order and make
-    // rows drag-to-reorder when a handler is wired.
-    const projectsDraggable = projectOverview.length > 1 && !!onReorderProjects
-    const Row = projectsDraggable ? SortableProjectOverviewRow : ProjectOverviewRow
-
-    const rows = projectOverview.map(project => (
-      <Row
-        activeProjectId={activeProjectId}
-        key={project.id}
-        onEnter={onEnterProject}
-        onNewSession={onNewSessionInWorkspace}
-        previewSessions={project.path ? projectOverviewPreviews?.[project.path] : undefined}
-        project={project}
-        renderRows={renderRows}
-      />
-    ))
-
-    inner =
-      projectsDraggable && onReorderProjects ? (
-        <ReorderableList
-          ids={projectOverview.map(project => project.id)}
-          onReorder={onReorderProjects}
-          sensors={dndSensors}
-        >
-          {rows}
-        </ReorderableList>
-      ) : (
-        rows
-      )
-  } else if (groups?.length) {
-    // Profile/source groups never reorder; render them flat with static rows.
-    inner = groups.map(group => (
-      <SidebarWorkspaceGroup
-        group={group}
-        key={group.id}
-        onNewSession={onNewSessionInWorkspace}
-        renderRows={renderRows}
-      />
-    ))
-  } else if (flatVirtualized) {
-    const virtual = (
-      <VirtualSessionList
-        activeSessionId={activeSessionId}
-        className={contentClassName}
-        entries={displayEntries}
-        onArchiveSession={onArchiveSession}
-        onBranchSession={onBranchSession}
-        onDeleteSession={onDeleteSession}
-        onResumeSession={onResumeSession}
-        onTogglePin={onTogglePin}
-        pinned={pinned}
-        sortable={sessionsDraggable}
-        workingSessionIdSet={workingSessionIdSet}
-      />
-    )
-
-    inner =
-      sessionsDraggable && onReorderSessions ? (
-        <ReorderableList ids={sessions.map(s => s.id)} onReorder={onReorderSessions} sensors={dndSensors}>
-          {virtual}
-        </ReorderableList>
-      ) : (
-        virtual
-      )
-  } else if (sessionsDraggable && onReorderSessions) {
-    inner = (
-      <ReorderableList ids={sessions.map(s => s.id)} onReorder={onReorderSessions} sensors={dndSensors}>
-        {displayEntries.map(({ branchStem, session }) => renderRow(session, true, branchStem))}
-      </ReorderableList>
-    )
-  } else {
-    inner = displayEntries.map(({ branchStem, session }) => renderRow(session, false, branchStem))
-  }
-
-  // The virtualizer owns its own scroller, so suppress the wrapper's overflow
-  // to avoid a double scroll container.
-  const resolvedContentClassName = cn(contentClassName, flatVirtualized && 'overflow-y-visible')
-
-  return (
-    <SidebarGroup className={rootClassName}>
-      <SidebarSectionHeader
-        action={headerAction}
-        collapsible={collapsible}
-        icon={labelIcon}
-        label={label}
-        meta={labelMeta}
-        onToggle={onToggle}
-        open={sectionOpen}
-      />
-      {sectionOpen && (
-        <SidebarGroupContent className={resolvedContentClassName}>
-          {inner}
-          {footer}
-        </SidebarGroupContent>
-      )}
-    </SidebarGroup>
-  )
-}
-
-interface SortableSessionRowProps {
-  session: SessionInfo
-  isPinned: boolean
-  isSelected: boolean
-  isWorking: boolean
-  onArchive: () => void
-  onDelete: () => void
-  onPin: () => void
-  onResume: () => void
-}
-
-function SortableSidebarSessionRow(props: SortableSessionRowProps) {
-  return <SidebarSessionRow {...props} {...useSortableBindings(props.session.id)} />
-}
-
-function SortableProjectOverviewRow(props: React.ComponentProps<typeof ProjectOverviewRow>) {
-  return <ProjectOverviewRow {...props} {...useSortableBindings(props.project.id)} />
-}
--- a/apps/desktop/src/app/chat/sidebar/order.test.ts
+++ b/apps/desktop/src/app/chat/sidebar/order.test.ts
@@ -1,6 +1,6 @@
 import { describe, expect, it } from 'vitest'

-import { resolveManualSessionOrderIds } from './order'
+import { orderByIds, reconcileOrderIds, resolveManualSessionOrderIds, sameIds } from './order'

 describe('resolveManualSessionOrderIds', () => {
  it('clears legacy auto-seeded order until the user manually reorders sessions', () => {
@@ -19,3 +19,44 @@ describe('resolveManualSessionOrderIds', () => {
    expect(resolveManualSessionOrderIds(['newest'], ['gone'], true)).toEqual([])
  })
 })
+
+describe('orderByIds', () => {
+  const id = (item: { id: string }) => item.id
+
+  it('returns items untouched when no order is given', () => {
+    const items = [{ id: 'a' }, { id: 'b' }]
+    expect(orderByIds(items, id, [])).toBe(items)
+  })
+
+  it('reorders by the given ids and drops missing ones', () => {
+    const items = [{ id: 'a' }, { id: 'b' }, { id: 'c' }]
+    expect(orderByIds(items, id, ['c', 'gone', 'a'])).toEqual([{ id: 'b' }, { id: 'c' }, { id: 'a' }])
+  })
+
+  it('surfaces items absent from the order first', () => {
+    const items = [{ id: 'fresh' }, { id: 'a' }, { id: 'b' }]
+    expect(orderByIds(items, id, ['b', 'a'])).toEqual([{ id: 'fresh' }, { id: 'b' }, { id: 'a' }])
+  })
+})
+
+describe('reconcileOrderIds', () => {
+  it('returns empty for no current ids', () => {
+    expect(reconcileOrderIds([], ['a'])).toEqual([])
+  })
+
+  it('returns current ids when there is no saved order', () => {
+    expect(reconcileOrderIds(['a', 'b'], [])).toEqual(['a', 'b'])
+  })
+
+  it('puts newly-seen ids ahead of the retained saved order', () => {
+    expect(reconcileOrderIds(['fresh', 'a', 'b'], ['b', 'a', 'gone'])).toEqual(['fresh', 'b', 'a'])
+  })
+})
+
+describe('sameIds', () => {
+  it('is true only for identical ordered lists', () => {
+    expect(sameIds(['a', 'b'], ['a', 'b'])).toBe(true)
+    expect(sameIds(['a', 'b'], ['b', 'a'])).toBe(false)
+    expect(sameIds(['a'], ['a', 'b'])).toBe(false)
+  })
+})
--- a/apps/desktop/src/app/chat/sidebar/order.ts
+++ b/apps/desktop/src/app/chat/sidebar/order.ts
@@ -21,3 +21,50 @@ export function resolveManualSessionOrderIds(currentIds: string[], orderIds: str

  return reconcileFreshFirst(currentIds, orderIds)
 }
+
+/** Reorder `items` by `orderIds`; items missing from the order surface first. */
+export function orderByIds<T>(items: T[], getId: (item: T) => string, orderIds: string[]): T[] {
+  if (!orderIds.length) {
+    return items
+  }
+
+  const byId = new Map(items.map(item => [getId(item), item]))
+  const seen = new Set<string>()
+  const ordered: T[] = []
+
+  for (const id of orderIds) {
+    const item = byId.get(id)
+
+    if (item) {
+      ordered.push(item)
+      seen.add(id)
+    }
+  }
+
+  // Items missing from the persisted order are new since it was last
+  // reconciled. Callers pass recency-sorted lists (newest first), so surface
+  // these at the TOP instead of burying them beneath the saved order —
+  // otherwise a brand-new session sinks to the bottom of the sidebar and reads
+  // as "my latest session never showed up".
+  const fresh = items.filter(item => !seen.has(getId(item)))
+
+  return fresh.length ? [...fresh, ...ordered] : ordered
+}
+
+/** Reconcile a persisted order against the live id set (fresh-first). */
+export function reconcileOrderIds(currentIds: string[], orderIds: string[]): string[] {
+  if (!currentIds.length) {
+    return []
+  }
+
+  if (!orderIds.length) {
+    return currentIds
+  }
+
+  return reconcileFreshFirst(currentIds, orderIds)
+}
+
+/** True when two id lists are element-for-element identical. */
+export function sameIds(left: string[], right: string[]): boolean {
+  return left.length === right.length && left.every((item, index) => item === right[index])
+}
--- a/apps/desktop/src/app/chat/sidebar/reorderable-list.tsx
+++ b/apps/desktop/src/app/chat/sidebar/reorderable-list.tsx
@@ -0,0 +1,81 @@
+import type { useSensors } from '@dnd-kit/core';
+import { closestCenter, DndContext, type DragEndEvent } from '@dnd-kit/core'
+import { arrayMove, SortableContext, useSortable, verticalListSortingStrategy } from '@dnd-kit/sortable'
+import type * as React from 'react'
+
+// Sidebar reordering is a strictly vertical list. The dragged item's transform
+// is rendered Y-only in useSortableBindings (no x, no scale); this just stops
+// dnd-kit's auto-scroll from dragging the rail — or the window — sideways when
+// the pointer nears an edge, killing the horizontal "drag to valhalla".
+const reorderAutoScroll = { threshold: { x: 0, y: 0.2 } }
+
+// One self-contained, nesting-safe reorderable list. It owns its DndContext, so a
+// drag only ever collides with THIS list's own items — drop it at any depth (repos,
+// worktrees, sessions) and reordering "just works" without leaking into the lists
+// around or inside it. Pair each item with useSortableBindings(id); the list reports
+// the new id order and the caller persists it. This is the single generic primitive
+// behind every reorderable surface in the sidebar.
+export function ReorderableList({
+  children,
+  ids,
+  onReorder,
+  sensors
+}: {
+  children: React.ReactNode
+  ids: string[]
+  onReorder: (ids: string[]) => void
+  sensors?: ReturnType<typeof useSensors>
+}) {
+  const handleDragEnd = ({ activatorEvent, active, over }: DragEndEvent) => {
+    // dnd-kit only restores focus for keyboard drags; after a pointer drop the
+    // browser leaves :focus on the grab handle, which keeps a focus-within
+    // grabber/affordance reveal stuck "on". Drop that focus so the row returns
+    // to its resting state once the pointer moves away.
+    if (!(activatorEvent instanceof KeyboardEvent)) {
+      ;(document.activeElement as HTMLElement | null)?.blur()
+    }
+
+    if (!over || active.id === over.id) {
+      return
+    }
+
+    const from = ids.indexOf(String(active.id))
+    const to = ids.indexOf(String(over.id))
+
+    if (from >= 0 && to >= 0) {
+      onReorder(arrayMove(ids, from, to))
+    }
+  }
+
+  return (
+    <DndContext
+      autoScroll={reorderAutoScroll}
+      collisionDetection={closestCenter}
+      onDragEnd={handleDragEnd}
+      sensors={sensors}
+    >
+      <SortableContext items={ids} strategy={verticalListSortingStrategy}>
+        {children}
+      </SortableContext>
+    </DndContext>
+  )
+}
+
+export function useSortableBindings(id: string) {
+  const { attributes, isDragging, listeners, setNodeRef, transform, transition } = useSortable({ id })
+
+  return {
+    dragging: isDragging,
+    dragHandleProps: { ...attributes, ...listeners },
+    ref: setNodeRef,
+    reorderable: true as const,
+    style: {
+      // Uniform vertical list: only ever translate on Y. Ignoring x and the
+      // scaleX/scaleY that CSS.Transform.toString would emit keeps a dragged
+      // group/row from drifting sideways or morphing its size mid-drag.
+      transform: transform ? `translate3d(0px, ${transform.y}px, 0)` : undefined,
+      transition: isDragging ? undefined : transition,
+      willChange: isDragging ? 'transform' : undefined
+    }
+  }
+}
--- a/apps/desktop/src/app/chat/sidebar/section-states.tsx
+++ b/apps/desktop/src/app/chat/sidebar/section-states.tsx
@@ -0,0 +1,52 @@
+import { Button } from '@/components/ui/button'
+import { Codicon } from '@/components/ui/codicon'
+import { Skeleton } from '@/components/ui/skeleton'
+import { useI18n } from '@/i18n'
+import { cn } from '@/lib/utils'
+
+export function SidebarSessionSkeletons() {
+  return (
+    <div aria-hidden="true" className="grid gap-px">
+      {['w-32', 'w-40', 'w-28', 'w-36', 'w-24'].map((width, i) => (
+        <div
+          className="grid min-h-[1.625rem] grid-cols-[minmax(0,1fr)_1.375rem] items-center rounded-md pl-2"
+          key={`${width}-${i}`}
+        >
+          <Skeleton className={cn('h-3 rounded-sm', width)} />
+          <Skeleton className="mx-auto size-3.5 rounded-sm opacity-60" />
+        </div>
+      ))}
+    </div>
+  )
+}
+
+export function SidebarBlankState({ onNewProject }: { onNewProject: () => void }) {
+  const { t } = useI18n()
+  const s = t.sidebar
+
+  return (
+    <div className="grid min-h-0 flex-1 place-items-center px-4 text-center">
+      <div className="flex flex-col items-center gap-2">
+        <Codicon className="text-(--ui-text-quaternary)" name="root-folder" size="1.25rem" />
+        <p className="text-xs text-(--ui-text-tertiary)">{s.noSessions}</p>
+        <Button className="mt-0.5 text-(--ui-text-secondary)" onClick={onNewProject} size="sm" variant="ghost">
+          <Codicon name="add" size="0.75rem" />
+          {s.projects.newButton}
+        </Button>
+      </div>
+    </div>
+  )
+}
+
+export function SidebarPinnedEmptyState() {
+  const { t } = useI18n()
+
+  return (
+    <div className="flex min-h-7 items-center gap-1.5 rounded-lg pl-2 text-[0.75rem] text-(--ui-text-tertiary)">
+      <span className="grid w-3.5 shrink-0 place-items-center text-(--ui-text-quaternary)">
+        <Codicon name="pin" size="0.75rem" />
+      </span>
+      <span>{t.sidebar.shiftClickHint}</span>
+    </div>
+  )
+}
--- a/apps/desktop/src/app/chat/sidebar/sessions-section.tsx
+++ b/apps/desktop/src/app/chat/sidebar/sessions-section.tsx
@@ -0,0 +1,379 @@
+import type { useSensors } from '@dnd-kit/core'
+import type * as React from 'react'
+import { useMemo } from 'react'
+
+import { SidebarPanelLabel } from '@/app/shell/sidebar-label'
+import { DisclosureCaret } from '@/components/ui/disclosure-caret'
+import { SidebarGroup, SidebarGroupContent } from '@/components/ui/sidebar'
+import type { HermesGitWorktree } from '@/global'
+import type { SessionInfo } from '@/hermes'
+import { flattenSessionsWithBranches } from '@/lib/session-branch-tree'
+import { cn } from '@/lib/utils'
+import { sessionPinId } from '@/store/session'
+
+import { SidebarCount } from './chrome'
+import {
+  EnteredProjectContent,
+  ProjectOverviewRow,
+  type SidebarProjectTree,
+  type SidebarSessionGroup,
+  SidebarWorkspaceGroup,
+  type SidebarWorkspaceTree
+} from './projects'
+import { ReorderableList, useSortableBindings } from './reorderable-list'
+import { SidebarSessionSkeletons } from './section-states'
+import { SidebarSessionRow } from './session-row'
+import { VirtualSessionList } from './virtual-session-list'
+
+export const VIRTUALIZE_THRESHOLD = 25
+
+interface SidebarSectionHeaderProps {
+  label: string
+  open: boolean
+  onToggle: () => void
+  action?: React.ReactNode
+  meta?: React.ReactNode
+  icon?: React.ReactNode
+  // When false the section can't be collapsed: the label renders static (no
+  // toggle, no caret) and the section is always open. Used for the single-
+  // project view, where collapsing one project makes no sense.
+  collapsible?: boolean
+}
+
+function SidebarSectionHeader({
+  label,
+  open,
+  onToggle,
+  action,
+  meta,
+  icon,
+  collapsible = true
+}: SidebarSectionHeaderProps) {
+  const labelBody = (
+    <>
+      {icon}
+      <SidebarPanelLabel>{label}</SidebarPanelLabel>
+      {meta && <SidebarCount>{meta}</SidebarCount>}
+    </>
+  )
+
+  return (
+    <div className="group/section flex shrink-0 items-center justify-between gap-1 pb-1 pt-1.5">
+      {collapsible ? (
+        <button
+          className="group/section-label flex w-fit items-center gap-1 bg-transparent text-left leading-none"
+          onClick={onToggle}
+          type="button"
+        >
+          {labelBody}
+          <DisclosureCaret
+            className="text-(--ui-text-tertiary) opacity-0 transition group-hover/section-label:opacity-100"
+            open={open}
+          />
+        </button>
+      ) : (
+        <div className="flex w-fit items-center gap-1 leading-none">{labelBody}</div>
+      )}
+      {action}
+    </div>
+  )
+}
+
+interface SidebarSessionsSectionProps {
+  label: string
+  open: boolean
+  onToggle: () => void
+  sessions: SessionInfo[]
+  activeSessionId: null | string
+  workingSessionIdSet: Set<string>
+  onResumeSession: (sessionId: string) => void
+  onDeleteSession: (sessionId: string) => void
+  onArchiveSession: (sessionId: string) => void
+  onBranchSession?: (sessionId: string, profile?: string) => void
+  onTogglePin: (sessionId: string) => void
+  onNewSessionInWorkspace?: (path: null | string) => void
+  pinned: boolean
+  rootClassName?: string
+  contentClassName?: string
+  emptyState: React.ReactNode
+  forceEmptyState?: boolean
+  headerAction?: React.ReactNode
+  footer?: React.ReactNode
+  groups?: SidebarSessionGroup[]
+  tree?: SidebarWorkspaceTree[]
+  // Project overview: when present, render a drill-in list of project rows
+  // instead of sessions. Clicking a row enters that project (onEnterProject),
+  // which then passes `projectContent` on the next render. Takes precedence
+  // over `tree` / `groups`.
+  projectOverview?: SidebarProjectTree[]
+  // Per-project preview rows (from the backend tree), keyed by project path.
+  projectOverviewPreviews?: Record<string, SessionInfo[]>
+  // True while the backend project tree is loading (overview skeleton).
+  projectsLoading?: boolean
+  onEnterProject?: (id: string) => void
+  // The entered project's flattened content: main-checkout sessions render
+  // directly (no redundant repo/branch header); only linked worktrees nest.
+  projectContent?: SidebarProjectTree
+  // Live git lanes (`git worktree list`) for repos in the entered project —
+  // a VISUAL enhancer only (empty lanes), never session membership.
+  projectRepoWorktrees?: Record<string, HermesGitWorktree[]>
+  // Live session cache used for optimistic placement inside entered-project lanes.
+  liveSessions?: SessionInfo[]
+  // Client-side optimistic eviction layer (deleted/archived ids).
+  removedSessionIds?: ReadonlySet<string>
+  activeProjectId?: null | string
+  labelMeta?: React.ReactNode
+  labelIcon?: React.ReactNode
+  // When false the section header is static (no caret/toggle) and always open.
+  collapsible?: boolean
+  sortable?: boolean
+  // The flat session list is the only hand-reorderable surface (grouped/project
+  // views sort deterministically), so it owns the one ReorderableList.
+  onReorderSessions?: (ids: string[]) => void
+  // Drag-to-reorder for the project overview list (top-level projects).
+  onReorderProjects?: (ids: string[]) => void
+  // Rendered atop the entered-project body (a "back to overview" row).
+  projectBackRow?: React.ReactNode
+  dndSensors?: ReturnType<typeof useSensors>
+}
+
+export function SidebarSessionsSection({
+  label,
+  open,
+  onToggle,
+  sessions,
+  activeSessionId,
+  workingSessionIdSet,
+  onResumeSession,
+  onDeleteSession,
+  onArchiveSession,
+  onBranchSession,
+  onTogglePin,
+  onNewSessionInWorkspace,
+  pinned,
+  rootClassName,
+  contentClassName,
+  emptyState,
+  forceEmptyState = false,
+  headerAction,
+  footer,
+  groups,
+  projectOverview,
+  projectOverviewPreviews,
+  projectsLoading = false,
+  onEnterProject,
+  projectContent,
+  projectRepoWorktrees,
+  liveSessions,
+  removedSessionIds,
+  activeProjectId,
+  labelMeta,
+  labelIcon,
+  collapsible = true,
+  sortable = false,
+  onReorderSessions,
+  onReorderProjects,
+  projectBackRow,
+  dndSensors
+}: SidebarSessionsSectionProps) {
+  const sectionOpen = collapsible ? open : true
+  const hasGroupedSessions = Boolean(groups?.some(group => group.sessions.length > 0))
+  // A defined project list is itself content (even an empty project should
+  // render as a drill-in row so the user can see it exists).
+  const hasProjectOverview = Boolean(projectOverview?.length)
+  const hasProjectContent = Boolean(projectContent && projectContent.sessionCount > 0)
+
+  const showEmptyState =
+    forceEmptyState || (!hasGroupedSessions && !hasProjectOverview && !hasProjectContent && sessions.length === 0)
+
+  // The flat recents/pinned list is the only place sessions reorder by hand;
+  // grouped/tree views always sort by creation date and never drag.
+  const sessionsDraggable = sortable && !!onReorderSessions
+  const displayEntries = useMemo(() => flattenSessionsWithBranches(sessions), [sessions])
+
+  const renderRow = (session: SessionInfo, draggable: boolean, branchStem?: string) => {
+    const rowProps = {
+      branchStem,
+      isPinned: pinned,
+      isSelected: session.id === activeSessionId,
+      isWorking: workingSessionIdSet.has(session.id),
+      onArchive: () => onArchiveSession(session.id),
+      onBranch: onBranchSession ? () => onBranchSession(session.id, session.profile) : undefined,
+      onDelete: () => onDeleteSession(session.id),
+      onPin: () => onTogglePin(sessionPinId(session)),
+      onResume: () => onResumeSession(session.id),
+      reorderable: draggable && !branchStem,
+      session
+    }
+
+    return draggable && !branchStem ? (
+      <SortableSidebarSessionRow key={session.id} {...rowProps} />
+    ) : (
+      <SidebarSessionRow key={session.id} {...rowProps} />
+    )
+  }
+
+  // Sessions inside repos/worktrees are date-ordered and static.
+  const renderRows = (items: SessionInfo[]) =>
+    flattenSessionsWithBranches(items).map(({ branchStem, session }) => renderRow(session, false, branchStem))
+
+  const flatVirtualized =
+    !showEmptyState &&
+    !groups?.length &&
+    !projectOverview?.length &&
+    !projectContent &&
+    sessions.length >= VIRTUALIZE_THRESHOLD
+
+  // First paint into the grouped view (e.g. the app restoring the Projects tab)
+  // has flat recents in `sessions` but no tree yet. Show skeletons rather than
+  // flashing the flat session list until the overview/content/groups resolve. A
+  // background refresh keeps the prior tree, so this only fires when empty.
+  const showProjectsSkeleton =
+    projectsLoading && !hasProjectOverview && !hasProjectContent && !projectContent && !groups?.length
+
+  let inner: React.ReactNode
+
+  if (showProjectsSkeleton) {
+    inner = <SidebarSessionSkeletons />
+  } else if (projectContent) {
+    // Entered a project: the back row is always present, then either the
+    // (overlay-aware) content or a clean empty state — never a bare spinner or a
+    // blank pane while lanes hydrate.
+    inner = (
+      <>
+        {projectBackRow}
+        {hasProjectContent ? (
+          <EnteredProjectContent
+            liveSessions={liveSessions}
+            onNewSession={onNewSessionInWorkspace}
+            project={projectContent}
+            removedSessionIds={removedSessionIds}
+            renderRows={renderRows}
+            repoWorktrees={projectRepoWorktrees}
+          />
+        ) : (
+          emptyState
+        )}
+      </>
+    )
+  } else if (showEmptyState) {
+    inner = emptyState
+  } else if (projectOverview?.length) {
+    // The model is already ordered (default sort groups explicit-before-auto;
+    // a manual drag-order, when present, wins). Render in that order and make
+    // rows drag-to-reorder when a handler is wired.
+    const projectsDraggable = projectOverview.length > 1 && !!onReorderProjects
+    const Row = projectsDraggable ? SortableProjectOverviewRow : ProjectOverviewRow
+
+    const rows = projectOverview.map(project => (
+      <Row
+        activeProjectId={activeProjectId}
+        key={project.id}
+        onEnter={onEnterProject}
+        onNewSession={onNewSessionInWorkspace}
+        previewSessions={project.path ? projectOverviewPreviews?.[project.path] : undefined}
+        project={project}
+        renderRows={renderRows}
+      />
+    ))
+
+    inner =
+      projectsDraggable && onReorderProjects ? (
+        <ReorderableList
+          ids={projectOverview.map(project => project.id)}
+          onReorder={onReorderProjects}
+          sensors={dndSensors}
+        >
+          {rows}
+        </ReorderableList>
+      ) : (
+        rows
+      )
+  } else if (groups?.length) {
+    // Profile/source groups never reorder; render them flat with static rows.
+    inner = groups.map(group => (
+      <SidebarWorkspaceGroup
+        group={group}
+        key={group.id}
+        onNewSession={onNewSessionInWorkspace}
+        renderRows={renderRows}
+      />
+    ))
+  } else if (flatVirtualized) {
+    const virtual = (
+      <VirtualSessionList
+        activeSessionId={activeSessionId}
+        className={contentClassName}
+        entries={displayEntries}
+        onArchiveSession={onArchiveSession}
+        onBranchSession={onBranchSession}
+        onDeleteSession={onDeleteSession}
+        onResumeSession={onResumeSession}
+        onTogglePin={onTogglePin}
+        pinned={pinned}
+        sortable={sessionsDraggable}
+        workingSessionIdSet={workingSessionIdSet}
+      />
+    )
+
+    inner =
+      sessionsDraggable && onReorderSessions ? (
+        <ReorderableList ids={sessions.map(s => s.id)} onReorder={onReorderSessions} sensors={dndSensors}>
+          {virtual}
+        </ReorderableList>
+      ) : (
+        virtual
+      )
+  } else if (sessionsDraggable && onReorderSessions) {
+    inner = (
+      <ReorderableList ids={sessions.map(s => s.id)} onReorder={onReorderSessions} sensors={dndSensors}>
+        {displayEntries.map(({ branchStem, session }) => renderRow(session, true, branchStem))}
+      </ReorderableList>
+    )
+  } else {
+    inner = displayEntries.map(({ branchStem, session }) => renderRow(session, false, branchStem))
+  }
+
+  // The virtualizer owns its own scroller, so suppress the wrapper's overflow
+  // to avoid a double scroll container.
+  const resolvedContentClassName = cn(contentClassName, flatVirtualized && 'overflow-y-visible')
+
+  return (
+    <SidebarGroup className={rootClassName}>
+      <SidebarSectionHeader
+        action={headerAction}
+        collapsible={collapsible}
+        icon={labelIcon}
+        label={label}
+        meta={labelMeta}
+        onToggle={onToggle}
+        open={sectionOpen}
+      />
+      {sectionOpen && (
+        <SidebarGroupContent className={resolvedContentClassName}>
+          {inner}
+          {footer}
+        </SidebarGroupContent>
+      )}
+    </SidebarGroup>
+  )
+}
+
+interface SortableSessionRowProps {
+  session: SessionInfo
+  isPinned: boolean
+  isSelected: boolean
+  isWorking: boolean
+  onArchive: () => void
+  onDelete: () => void
+  onPin: () => void
+  onResume: () => void
+}
+
+function SortableSidebarSessionRow(props: SortableSessionRowProps) {
+  return <SidebarSessionRow {...props} {...useSortableBindings(props.session.id)} />
+}
+
+function SortableProjectOverviewRow(props: React.ComponentProps<typeof ProjectOverviewRow>) {
+  return <ProjectOverviewRow {...props} {...useSortableBindings(props.project.id)} />
+}
--- a/apps/desktop/src/app/command-palette/index.tsx
+++ b/apps/desktop/src/app/command-palette/index.tsx
@@ -36,6 +36,7 @@ import {
  RefreshCw,
  Settings,
  Settings2,
+  Starmap,
  Sun,
  Terminal,
  Users,
@@ -68,7 +69,8 @@ import {
  PROFILES_ROUTE,
  sessionRoute,
  SETTINGS_ROUTE,
-  SKILLS_ROUTE
+  SKILLS_ROUTE,
+  STARMAP_ROUTE
 } from '../routes'
 import { FIELD_LABELS, SECTIONS } from '../settings/constants'
 import { fieldCopyForSchemaKey } from '../settings/field-copy'
@@ -383,7 +385,14 @@ export function CommandPalette() {
            run: go(CRON_ROUTE)
          },
          { action: 'nav.profiles', icon: Users, id: 'nav-profiles', label: t.profiles.title, run: go(PROFILES_ROUTE) },
-          { action: 'nav.agents', icon: Cpu, id: 'nav-agents', label: t.agents.title, run: go(AGENTS_ROUTE) }
+          { action: 'nav.agents', icon: Cpu, id: 'nav-agents', label: t.agents.title, run: go(AGENTS_ROUTE) },
+          {
+            icon: Starmap,
+            id: 'nav-starmap',
+            keywords: ['star map', 'memory', 'memories', 'skills', 'graph', 'learning', 'constellation'],
+            label: t.starmap.title,
+            run: go(STARMAP_ROUTE)
+          }
        ]
      },
      ...branchGroup,
--- a/apps/desktop/src/app/desktop-controller-utils.test.ts
+++ b/apps/desktop/src/app/desktop-controller-utils.test.ts
@@ -0,0 +1,31 @@
+import { describe, expect, it } from 'vitest'
+
+import type { SessionInfo } from '@/hermes'
+
+import { sameCronSignature } from './desktop-controller-utils'
+
+const session = (id: string, title: string | null): SessionInfo => ({ id, title }) as SessionInfo
+
+describe('sameCronSignature', () => {
+  it('is false when the lengths differ', () => {
+    expect(sameCronSignature([session('a', 't')], [])).toBe(false)
+  })
+
+  it('is true when ids and titles match in order', () => {
+    const a = [session('a', 'one'), session('b', 'two')]
+    const b = [session('a', 'one'), session('b', 'two')]
+    expect(sameCronSignature(a, b)).toBe(true)
+  })
+
+  it('is false when a title changed', () => {
+    const a = [session('a', 'one')]
+    const b = [session('a', 'renamed')]
+    expect(sameCronSignature(a, b)).toBe(false)
+  })
+
+  it('is false when order differs', () => {
+    const a = [session('a', 't'), session('b', 't')]
+    const b = [session('b', 't'), session('a', 't')]
+    expect(sameCronSignature(a, b)).toBe(false)
+  })
+})
--- a/apps/desktop/src/app/desktop-controller-utils.ts
+++ b/apps/desktop/src/app/desktop-controller-utils.ts
@@ -0,0 +1,11 @@
+import type { SessionInfo } from '@/hermes'
+
+// Cheap signature compare so a poll only swaps the atom (and re-renders the
+// sidebar) when the visible rows actually changed.
+export function sameCronSignature(a: SessionInfo[], b: SessionInfo[]): boolean {
+  if (a.length !== b.length) {
+    return false
+  }
+
+  return a.every((session, i) => session.id === b[i]?.id && session.title === b[i]?.title)
+}
--- a/apps/desktop/src/app/desktop-controller.tsx
+++ b/apps/desktop/src/app/desktop-controller.tsx
@@ -5,8 +5,8 @@ import { Navigate, Route, Routes, useLocation, useNavigate, useParams } from 're

 import { BootFailureOverlay } from '@/components/boot-failure-overlay'
 import { DesktopInstallOverlay } from '@/components/desktop-install-overlay'
-import { DesktopOnboardingOverlay } from '@/components/desktop-onboarding-overlay'
 import { GatewayConnectingOverlay } from '@/components/gateway-connecting-overlay'
+import { DesktopOnboardingOverlay } from '@/components/onboarding'
 import { Pane, PaneMain } from '@/components/pane-shell'
 import { RemoteDisplayBanner } from '@/components/remote-display-banner'
 import { useMediaQuery } from '@/hooks/use-media-query'
@@ -15,23 +15,15 @@ import { cn } from '@/lib/utils'
 import { useSkinCommand } from '@/themes/use-skin-command'

 import { formatRefValue } from '../components/assistant-ui/directive-text'
-import { getCronJobs, getSessionMessages, listAllProfileSessions, type SessionInfo, triggerCronJob } from '../hermes'
+import { getSessionMessages, triggerCronJob } from '../hermes'
 import { type ChatMessage, chatMessageText, preserveLocalAssistantErrors, toChatMessages } from '../lib/chat-messages'
 import { storedSessionIdForNotification } from '../lib/session-ids'
-import {
-  isMessagingSource,
-  LOCAL_SESSION_SOURCE_IDS,
-  MESSAGING_SESSION_SOURCE_IDS,
-  normalizeSessionSource
-} from '../lib/session-source'
 import { latestSessionTodos } from '../lib/todos'
-import { setCronFocusJobId, setCronJobs } from '../store/cron'
+import { setCronFocusJobId } from '../store/cron'
 import {
  $fileBrowserOpen,
  $panesFlipped,
  $pinnedSessionIds,
-  $sessionsLimit,
-  bumpSessionsLimit,
  FILE_BROWSER_DEFAULT_WIDTH,
  FILE_BROWSER_MAX_WIDTH,
  FILE_BROWSER_MIN_WIDTH,
@@ -41,7 +33,6 @@ import {
  setSidebarOverlayMounted,
  SIDEBAR_DEFAULT_WIDTH,
  SIDEBAR_MAX_WIDTH,
-  SIDEBAR_SESSIONS_PAGE_SIZE,
  unpinSession
 } from '../store/layout'
 import { respondToApprovalAction } from '../store/native-notifications'
@@ -58,8 +49,6 @@ import {
  $activeGatewayProfile,
  $freshSessionRequest,
  $profileScope,
-  ALL_PROFILES,
-  normalizeProfileKey,
  refreshActiveProfile
 } from '../store/profile'
 import { $startWorkSessionRequest, followActiveSessionCwd, resolveNewSessionCwd } from '../store/projects'
@@ -71,34 +60,20 @@ import {
  $freshDraftReady,
  $gatewayState,
  $messages,
-  $messagingSessions,
  $resumeExhaustedSessionId,
  $resumeFailedSessionId,
  $selectedStoredSessionId,
  $sessions,
-  $workingSessionIds,
-  CRON_SECTION_LIMIT,
-  getRecentlySettledSessionIds,
  getRememberedSessionId,
-  mergeSessionPage,
-  MESSAGING_SECTION_LIMIT,
  sessionPinId,
  setAwaitingResponse,
  setBusy,
-  setCronSessions,
  setCurrentBranch,
  setCurrentCwd,
  setCurrentModel,
  setCurrentProvider,
  setMessages,
-  setMessagingPlatformTotals,
-  setMessagingSessions,
-  setMessagingTruncated,
-  setRememberedSessionId,
-  setSessionProfileTotals,
-  setSessions,
-  setSessionsLoading,
-  setSessionsTotal
+  setRememberedSessionId
 } from '../store/session'
 import { onSessionsChanged } from '../store/session-sync'
 import { clearSessionTodos, setSessionTodos, todoListActive } from '../store/todos'
@@ -143,6 +118,7 @@ import { usePreviewRouting } from './session/hooks/use-preview-routing'
 import { usePromptActions } from './session/hooks/use-prompt-actions'
 import { useRouteResume } from './session/hooks/use-route-resume'
 import { useSessionActions } from './session/hooks/use-session-actions'
+import { useSessionListActions } from './session/hooks/use-session-list-actions'
 import { useSessionStateCache } from './session/hooks/use-session-state-cache'
 import { AppShell } from './shell/app-shell'
 import { useOverlayRouting } from './shell/hooks/use-overlay-routing'
@@ -158,6 +134,7 @@ const AgentsView = lazy(async () => ({ default: (await import('./agents')).Agent
 const ArtifactsView = lazy(async () => ({ default: (await import('./artifacts')).ArtifactsView }))
 const CommandCenterView = lazy(async () => ({ default: (await import('./command-center')).CommandCenterView }))
 const CronView = lazy(async () => ({ default: (await import('./cron')).CronView }))
+const StarmapView = lazy(async () => ({ default: (await import('./starmap')).StarmapView }))
 const MessagingView = lazy(async () => ({ default: (await import('./messaging')).MessagingView }))
 const ProfilesView = lazy(async () => ({ default: (await import('./profiles')).ProfilesView }))
 const SettingsView = lazy(async () => ({ default: (await import('./settings')).SettingsView }))
@@ -169,51 +146,6 @@ const SkillsView = lazy(async () => ({ default: (await import('./skills')).Skill
 // this cadence while the app is open + visible so new runs surface promptly
 // instead of waiting for the next user-triggered refreshSessions().
 const CRON_POLL_INTERVAL_MS = 30_000
-// The recents list is local-only: cron rows have their own section, and each
-// messaging platform (telegram, discord, …) is fetched separately into its own
-// self-managed sidebar section (refreshMessagingSessions). Excluding both here
-// keeps "Load more" paging through interactive local chats instead of
-// interleaving gateway threads that bury them.
-const SIDEBAR_EXCLUDED_SOURCES = ['cron', 'subagent', 'tool', ...MESSAGING_SESSION_SOURCE_IDS]
-// The messaging slice is the inverse: drop cron + every local source so only
-// external-platform conversations remain, then split per platform in the UI.
-const MESSAGING_EXCLUDED_SOURCES = ['cron', ...LOCAL_SESSION_SOURCE_IDS]
-
-// Cheap signature compare so the poll only swaps the atom (and re-renders the
-// sidebar) when the visible cron rows actually changed.
-function sameCronSignature(a: SessionInfo[], b: SessionInfo[]): boolean {
-  if (a.length !== b.length) {
-    return false
-  }
-
-  return a.every((session, i) => session.id === b[i]?.id && session.title === b[i]?.title)
-}
-
-// Rows a session refresh must preserve even if the aggregator omits them:
-// in-flight first turns (message_count 0), pinned rows aged off the page, the
-// actively-viewed chat (its "working" flag clears a beat before the aggregator
-// sees the persisted row), and sessions whose turn just settled (same race, but
-// for a chat the user has already navigated away from). Pass `scope` to only
-// keep the active row when it belongs to the profile being paged.
-function sessionsToKeep(scope?: string): Set<string> {
-  const keep = new Set<string>([
-    ...$workingSessionIds.get(),
-    ...$pinnedSessionIds.get(),
-    ...getRecentlySettledSessionIds()
-  ])
-
-  const active = $selectedStoredSessionId.get()
-
-  if (active) {
-    const session = scope ? $sessions.get().find(s => s.id === active) : null
-
-    if (!scope || !session || normalizeProfileKey(session.profile) === scope) {
-      keep.add(active)
-    }
-  }
-
-  return keep
-}

 export function DesktopController() {
  const queryClient = useQueryClient()
@@ -222,7 +154,6 @@ export function DesktopController() {

  const busyRef = useRef(false)
  const creatingSessionRef = useRef(false)
-  const refreshSessionsRequestRef = useRef(0)

  const gatewayState = useStore($gatewayState)
  const activeSessionId = useStore($activeSessionId)
@@ -262,6 +193,7 @@ export function DesktopController() {
    openCommandCenterSection,
    profilesOpen,
    settingsOpen,
+    starmapOpen,
    toggleCommandCenter
  } = useOverlayRouting()

@@ -426,126 +358,13 @@ export function DesktopController() {
    }
  }, [])

-  // Cron-job sessions as their own list (latest N). Independent of the recents
-  // page so the two never compete for slots. Cheap + bounded. Kept (even though
-  // the sidebar now lists cron *jobs*, not run sessions) so a pinned cron run
-  // still resolves into the Pinned section via sessionByAnyId.
-  const refreshCronSessions = useCallback(async () => {
-    try {
-      const { sessions } = await listAllProfileSessions(CRON_SECTION_LIMIT, 1, 'exclude', 'recent', 'all', {
-        source: 'cron'
-      })
-
-      setCronSessions(prev => (sameCronSignature(prev, sessions) ? prev : sessions))
-    } catch {
-      // Non-fatal: the cron section just stays empty/stale.
-    }
-  }, [])
-
-  // Messaging-platform sessions as their own slice, fetched separately from
-  // local recents so each platform renders a self-managed section and never
-  // competes with local chats for the recents page budget. One combined fetch
-  // seeds every platform; the sidebar splits the rows per source.
-  const refreshMessagingSessions = useCallback(async () => {
-    try {
-      const result = await listAllProfileSessions(MESSAGING_SECTION_LIMIT, 1, 'exclude', 'recent', 'all', {
-        excludeSources: MESSAGING_EXCLUDED_SOURCES
-      })
-
-      // Drop any non-messaging source the broad exclude didn't catch (custom
-      // sources) — those stay in local recents, not a platform section.
-      const rows = result.sessions.filter(s => isMessagingSource(s.source))
-
-      setMessagingSessions(prev => (sameCronSignature(prev, rows) ? prev : rows))
-      // Hit the cap → at least one platform may have more on disk than loaded,
-      // so platform sections offer their own per-platform "load more".
-      setMessagingTruncated(result.sessions.length >= MESSAGING_SECTION_LIMIT)
-    } catch {
-      // Non-fatal: the messaging sections just stay empty/stale.
-    }
-  }, [])
-
-  // Page a single platform's section independently (mirrors the per-profile
-  // pager): fetch that source's next window and merge it back in place, leaving
-  // every other platform's rows untouched. Resolves the platform's exact total.
-  const loadMoreMessagingForPlatform = useCallback(async (platform: string) => {
-    const inPlatform = (s: SessionInfo) => normalizeSessionSource(s.source) === platform
-    const loaded = $messagingSessions.get().filter(inPlatform).length
-
-    const result = await listAllProfileSessions(loaded + SIDEBAR_SESSIONS_PAGE_SIZE, 1, 'exclude', 'recent', 'all', {
-      source: platform
-    })
-
-    const incoming = result.sessions.filter(s => normalizeSessionSource(s.source) === platform)
-
-    setMessagingSessions(prev => [
-      ...prev.filter(s => !inPlatform(s)),
-      ...mergeSessionPage(prev.filter(inPlatform), incoming, sessionsToKeep())
-    ])
-
-    const total = result.total ?? incoming.length
-    setMessagingPlatformTotals(prev => ({ ...prev, [platform]: Math.max(total, incoming.length) }))
-  }, [])
-
-  // Cron *jobs* drive the sidebar "Cron jobs" section. Jobs are created
-  // synchronously (agent tool call or the cron UI), so refreshing here right
-  // after an agent turn surfaces a new job immediately; the interval poll keeps
-  // next-run/state fresh as the scheduler advances them.
-  const refreshCronJobs = useCallback(async () => {
-    try {
-      const jobs = await getCronJobs()
-
-      setCronJobs(jobs)
-    } catch {
-      // Non-fatal: the cron section just keeps its last-known jobs.
-    }
-  }, [])
-
-  const refreshSessions = useCallback(async () => {
-    const requestId = refreshSessionsRequestRef.current + 1
-    refreshSessionsRequestRef.current = requestId
-    setSessionsLoading(true)
-
-    try {
-      const limit = $sessionsLimit.get()
-
-      // Require at least one message so abandoned/empty "Untitled" drafts (one
-      // was created per TUI/desktop launch before the lazy-create fix) don't
-      // clutter the sidebar.
-      // Unified cross-profile list (served read-only off each profile's
-      // state.db; no per-profile backend is spawned). Single-profile users get
-      // the same rows tagged profile="default". Cron sessions are excluded here
-      // and fetched separately (refreshCronSessions) so the scheduler's
-      // always-newest rows can't consume the recents page budget.
-      // Scope the fetch to the active profile (not always 'all') so a profile
-      // with few recent sessions isn't windowed out of the cross-profile
-      // recency page — the empty-history-on-profile-switch bug.
-      const sessionProfile = profileScope === ALL_PROFILES ? 'all' : profileScope
-
-      const result = await listAllProfileSessions(limit, 1, 'exclude', 'recent', sessionProfile, {
-        excludeSources: SIDEBAR_EXCLUDED_SOURCES
-      })
-
-      if (refreshSessionsRequestRef.current === requestId) {
-        setSessions(prev => mergeSessionPage(prev, result.sessions, sessionsToKeep()))
-        setSessionsTotal(typeof result.total === 'number' ? result.total : result.sessions.length)
-        setSessionProfileTotals(result.profile_totals ?? {})
-      }
-    } finally {
-      if (refreshSessionsRequestRef.current === requestId) {
-        setSessionsLoading(false)
-      }
-    }
-
-    void refreshCronSessions()
-    void refreshCronJobs()
-    void refreshMessagingSessions()
-  }, [profileScope, refreshCronSessions, refreshCronJobs, refreshMessagingSessions])
-
-  const loadMoreSessions = useCallback(async () => {
-    bumpSessionsLimit()
-    await refreshSessions()
-  }, [refreshSessions])
+  const {
+    loadMoreMessagingForPlatform,
+    loadMoreSessions,
+    loadMoreSessionsForProfile,
+    refreshCronJobs,
+    refreshSessions
+  } = useSessionListActions({ profileScope })

  // Another window mutated the shared session list (e.g. a chat started in the
  // pop-out). Re-pull so the sidebar reflects it. Pop-outs have no sidebar, so
@@ -558,28 +377,6 @@ export function DesktopController() {
    return onSessionsChanged(() => void refreshSessions().catch(() => undefined))
  }, [refreshSessions])

-  // ALL-profiles view pages one profile at a time: fetch that profile's next
-  // page and merge it in place, leaving every other profile's rows untouched.
-  const loadMoreSessionsForProfile = useCallback(async (profile: string) => {
-    const key = normalizeProfileKey(profile)
-    const inKey = (s: SessionInfo) => normalizeProfileKey(s.profile) === key
-    const loaded = $sessions.get().filter(inKey).length
-
-    const result = await listAllProfileSessions(loaded + SIDEBAR_SESSIONS_PAGE_SIZE, 1, 'exclude', 'recent', key, {
-      excludeSources: SIDEBAR_EXCLUDED_SOURCES
-    })
-
-    const keep = sessionsToKeep(key)
-
-    setSessions(prev => [
-      ...prev.filter(s => !inKey(s)),
-      ...mergeSessionPage(prev.filter(inKey), result.sessions, keep)
-    ])
-
-    const total = result.profile_totals?.[key] ?? result.total ?? result.sessions.length
-    setSessionProfileTotals(prev => ({ ...prev, [key]: Math.max(total, result.sessions.length) }))
-  }, [])
-
  const toggleSelectedPin = useCallback(() => {
    const sessionId = $selectedStoredSessionId.get()

@@ -1117,9 +914,7 @@ export function DesktopController() {
  // layer) so pane resize handles still paint above it. Terminals own their state
  // (incl. a snapshotted cwd) independent of the session, so switching sessions
  // never rebuilds or closes them; toggling the pane never rebuilds the shells.
-  const mainOverlays = (
-    <PersistentTerminal onAddSelectionToChat={composer.addTerminalSelectionAttachment} />
-  )
+  const mainOverlays = <PersistentTerminal onAddSelectionToChat={composer.addTerminalSelectionAttachment} />

  const overlays = (
    <>
@@ -1201,6 +996,12 @@ export function DesktopController() {
          <ProfilesView onClose={closeOverlayToPreviousRoute} />
        </Suspense>
      )}
+
+      {starmapOpen && (
+        <Suspense fallback={null}>
+          <StarmapView onClose={closeOverlayToPreviousRoute} />
+        </Suspense>
+      )}
    </>
  )

--- a/apps/desktop/src/app/floating-hud.ts
+++ b/apps/desktop/src/app/floating-hud.ts
@@ -6,7 +6,11 @@ export const HUD_POSITION = 'fixed left-1/2 top-3 -translate-x-1/2'

 // Matches the app's borderless-overlay surface (dialog, keybind panel, …):
 // hairline `--stroke-nous` paired with the soft `--shadow-nous` float.
-export const HUD_SURFACE = 'rounded-xl border border-(--stroke-nous) bg-(--ui-chat-bubble-background) shadow-nous'
+// `no-drag`: these HUDs overlap the titlebar's `[-webkit-app-region:drag]` band
+// (app-shell.tsx), which wins hit-testing over DOM regardless of z-index — so
+// without it the top of the surface (the search input) swallows clicks.
+export const HUD_SURFACE =
+  'rounded-xl border border-(--stroke-nous) bg-(--ui-chat-bubble-background) shadow-nous [-webkit-app-region:no-drag]'

 // One row/text size for both HUDs (compact — two notches under `text-sm`).
 export const HUD_TEXT = 'text-xs'
--- a/apps/desktop/src/app/overlays/panel.tsx
+++ b/apps/desktop/src/app/overlays/panel.tsx
@@ -3,6 +3,7 @@ import type { ReactNode } from 'react'
 import { Button } from '@/components/ui/button'
 import { Codicon } from '@/components/ui/codicon'
 import { DropdownMenu, DropdownMenuContent, DropdownMenuItem, DropdownMenuTrigger } from '@/components/ui/dropdown-menu'
+import { RowButton } from '@/components/ui/row-button'
 import { SearchField } from '@/components/ui/search-field'
 import { translateNow } from '@/i18n'
 import { cn } from '@/lib/utils'
@@ -162,10 +163,9 @@ export function PanelListRow({
      )}
      data-panel-row={rowKey}
    >
-      <button
+      <RowButton
        className="flex h-full min-w-0 flex-1 items-center gap-2 rounded-md pl-2 pr-1 text-left"
        onClick={onSelect}
-        type="button"
      >
        {lead ??
          (dotClassName ? (
@@ -174,7 +174,7 @@ export function PanelListRow({
            <Codicon className="shrink-0 text-muted-foreground/55" name={icon} size="0.85rem" />
          ) : null)}
        <span className="min-w-0 flex-1 truncate font-medium text-foreground/85">{title}</span>
-      </button>
+      </RowButton>
      {meta ? <span className="shrink-0 pr-2 text-[0.62rem] tabular-nums text-muted-foreground/45">{meta}</span> : null}
      {menu ? <div className="shrink-0 pr-1">{menu}</div> : null}
    </div>
--- a/apps/desktop/src/app/routes.ts
+++ b/apps/desktop/src/app/routes.ts
@@ -8,6 +8,7 @@ export const ARTIFACTS_ROUTE = '/artifacts'
 export const CRON_ROUTE = '/cron'
 export const PROFILES_ROUTE = '/profiles'
 export const AGENTS_ROUTE = '/agents'
+export const STARMAP_ROUTE = '/starmap'

 export type AppView =
  | 'agents'
@@ -19,6 +20,7 @@ export type AppView =
  | 'profiles'
  | 'settings'
  | 'skills'
+  | 'starmap'

 export type AppRouteId =
  | 'agents'
@@ -30,6 +32,7 @@ export type AppRouteId =
  | 'profiles'
  | 'settings'
  | 'skills'
+  | 'starmap'

 export interface AppRoute {
  id: AppRouteId
@@ -46,7 +49,8 @@ export const APP_ROUTES = [
  { id: 'artifacts', path: ARTIFACTS_ROUTE, view: 'artifacts' },
  { id: 'cron', path: CRON_ROUTE, view: 'cron' },
  { id: 'profiles', path: PROFILES_ROUTE, view: 'profiles' },
-  { id: 'agents', path: AGENTS_ROUTE, view: 'agents' }
+  { id: 'agents', path: AGENTS_ROUTE, view: 'agents' },
+  { id: 'starmap', path: STARMAP_ROUTE, view: 'starmap' }
 ] as const satisfies readonly AppRoute[]

 const APP_VIEW_BY_PATH = new Map<string, AppView>(APP_ROUTES.map(route => [route.path, route.view]))
@@ -55,7 +59,14 @@ const RESERVED_PATHS: ReadonlySet<string> = new Set(APP_ROUTES.map(route => rout
 // Views that render as a full-screen modal card (OverlayView) over the shell.
 // While one is open the app's titlebar control clusters must hide so they don't
 // bleed over the overlay (they sit at a higher z-index than the overlay card).
-export const OVERLAY_VIEWS: ReadonlySet<AppView> = new Set(['agents', 'command-center', 'cron', 'profiles', 'settings'])
+export const OVERLAY_VIEWS: ReadonlySet<AppView> = new Set([
+  'agents',
+  'command-center',
+  'cron',
+  'profiles',
+  'settings',
+  'starmap'
+])

 export function isOverlayView(view: AppView): boolean {
  return OVERLAY_VIEWS.has(view)
--- a/apps/desktop/src/app/session/hooks/use-message-stream/gateway-event.ts
+++ b/apps/desktop/src/app/session/hooks/use-message-stream/gateway-event.ts
@@ -1,34 +1,16 @@
 import type { QueryClient } from '@tanstack/react-query'
-import { type MutableRefObject, useCallback, useEffect, useRef } from 'react'
+import { type MutableRefObject, useCallback } from 'react'

 import { writeAgentTerminalChunk } from '@/app/right-sidebar/terminal/agent-terminal-stream'
-import { closeAgentTerminalByProc } from '@/app/right-sidebar/terminal/terminals'
 import { readActiveTerminal } from '@/app/right-sidebar/terminal/buffer'
+import { closeAgentTerminalByProc } from '@/app/right-sidebar/terminal/terminals'
 import { translateNow } from '@/i18n'
-import {
-  appendAssistantTextPart,
-  appendReasoningPart,
-  assistantTextPart,
-  type ChatMessage,
-  type ChatMessagePart,
-  chatMessageText,
-  type GatewayEventPayload,
-  reasoningPart,
-  renderMediaTags,
-  textPart,
-  upsertToolPart
-} from '@/lib/chat-messages'
+import { type GatewayEventPayload, textPart } from '@/lib/chat-messages'
 import { coerceGatewayText, coerceThinkingText, normalizePersonalityValue } from '@/lib/chat-runtime'
 import { playCompletionSound } from '@/lib/completion-sound'
 import { gatewayEventRequiresSessionId } from '@/lib/gateway-events'
-import {
-  dedupeGeneratedImageEchoesInParts,
-  generatedImageEchoSources,
-  stripGeneratedImageEchoes
-} from '@/lib/generated-images'
 import { triggerHaptic } from '@/lib/haptics'
 import { isProviderSetupErrorMessage } from '@/lib/provider-setup-errors'
-import { parseTodos } from '@/lib/todos'
 import { clearClarifyRequest, setClarifyRequest } from '@/store/clarify'
 import { setSessionCompacting } from '@/store/compaction'
 import { refreshBackgroundProcesses } from '@/store/composer-status'
@@ -54,674 +36,61 @@ import {
  setTurnStartedAt,
  setYoloActive
 } from '@/store/session'
-import { broadcastSessionsChanged } from '@/store/session-sync'
 import { clearSessionSubagents, pruneDelegateFallbackSubagents, upsertSubagent } from '@/store/subagents'
-import { setSessionTodos } from '@/store/todos'
 import { recordToolDiff } from '@/store/tool-diffs'
 import { notifyWorkspaceChanged, toolMayMutateFiles } from '@/store/workspace-events'
 import type { RpcEvent } from '@/types/hermes'

-import type { ClientSessionState } from '../../types'
+import type { ClientSessionState } from '../../../types'

-interface MessageStreamOptions {
+import { hasSessionInfoStatePatch, sessionInfoStatePatch, SUBAGENT_EVENT_TYPES, toTodoPayload } from './utils'
+
+interface GatewayEventDeps {
  activeSessionIdRef: MutableRefObject<string | null>
-  hydrateFromStoredSession: (
-    attempts?: number,
-    storedSessionId?: string | null,
-    runtimeSessionId?: string | null
-  ) => Promise<void>
+  compactedTurnRef: MutableRefObject<Set<string>>
+  lastCwdInfoSessionRef: MutableRefObject<string | null>
+  nativeSubagentSessionsRef: MutableRefObject<Set<string>>
+  appendAssistantDelta: (sessionId: string, delta: string) => void
+  appendReasoningDelta: (sessionId: string, delta: string, replace?: boolean) => void
+  completeAssistantMessage: (sessionId: string, text: string) => void
+  failAssistantMessage: (sessionId: string, errorMessage: string) => void
+  flushQueuedDeltas: (sessionId?: string) => void
  queryClient: QueryClient
  refreshHermesConfig: () => Promise<void>
-  refreshSessions: () => Promise<void>
-  sessionStateByRuntimeIdRef: MutableRefObject<Map<string, ClientSessionState>>
+  sessionInterrupted: (sessionId: string) => boolean
  updateSessionState: (
    sessionId: string,
    updater: (state: ClientSessionState) => ClientSessionState,
    storedSessionId?: string | null
  ) => ClientSessionState
+  upsertToolCall: (
+    sessionId: string,
+    payload: GatewayEventPayload | undefined,
+    phase: 'running' | 'complete',
+    sourceEventType?: string
+  ) => void
 }

-interface QueuedStreamDeltas {
-  assistant: string
-  reasoning: string
-}
-
-type SessionRuntimeStatePatch = Partial<
-  Pick<
-    ClientSessionState,
-    'branch' | 'cwd' | 'fast' | 'model' | 'personality' | 'provider' | 'reasoningEffort' | 'serviceTier' | 'yolo'
-  >
->
-
-function sessionInfoStatePatch(payload: GatewayEventPayload | undefined): SessionRuntimeStatePatch {
-  const patch: SessionRuntimeStatePatch = {}
-
-  if (typeof payload?.model === 'string') {
-    patch.model = payload.model || ''
-  }
-
-  if (typeof payload?.provider === 'string') {
-    patch.provider = payload.provider || ''
-  }
-
-  if (typeof payload?.cwd === 'string') {
-    patch.cwd = payload.cwd
-  }
-
-  if (typeof payload?.branch === 'string') {
-    patch.branch = payload.branch
-  }
-
-  if (typeof payload?.personality === 'string') {
-    patch.personality = normalizePersonalityValue(payload.personality)
-  }
-
-  if (typeof payload?.reasoning_effort === 'string') {
-    patch.reasoningEffort = payload.reasoning_effort
-  }
-
-  if (typeof payload?.service_tier === 'string') {
-    patch.serviceTier = payload.service_tier
-  }
-
-  if (typeof payload?.fast === 'boolean') {
-    patch.fast = payload.fast
-  }
-
-  if (typeof payload?.yolo === 'boolean') {
-    patch.yolo = payload.yolo
-  }
-
-  return patch
-}
-
-function hasSessionInfoStatePatch(patch: SessionRuntimeStatePatch): boolean {
-  return Object.keys(patch).length > 0
-}
-
-// Minimum gap between two assistant-text flushes during a stream. Was 16ms
-// (rAF only), which at typical LLM token rates of ~30-80 tok/sec meant every
-// token got its own React commit + Streamdown markdown re-parse, scaling
-// linearly with the growing last-block length. Bumping to 33ms lets ~2 tokens
-// batch into one commit at 60 tok/sec without introducing visible lag on the
-// streaming text (still 30 fps of visible text growth). Big perceived
-// smoothness win on long messages with big trailing paragraphs; see
-// `scripts/profile-typing-lag.md` for the measurement work behind this.
-const STREAM_DELTA_FLUSH_MS = 33
-
-// Gateway/provider failures sometimes arrive as message.complete text instead
-// of an explicit error event. Treat matches as inline assistant errors so they
-// persist like real error events and don't get erased by hydrate fallback.
-const COMPLETION_ERROR_PATTERNS = [
-  /^API call failed after \d+ retries:/i,
-  /^HTTP\s+\d{3}\b/i,
-  /^(Provider|Gateway)\s+error:/i
-]
-
-function completionErrorText(finalText: string): string | null {
-  const text = finalText.trim()
-
-  return text && COMPLETION_ERROR_PATTERNS.some(re => re.test(text)) ? text : null
-}
-
-const SUBAGENT_EVENT_TYPES = new Set([
-  'subagent.spawn_requested',
-  'subagent.start',
-  'subagent.thinking',
-  'subagent.tool',
-  'subagent.progress',
-  'subagent.complete'
-])
-
-// Anonymous progress events that carry todos but no name still belong to the
-// todo stream; named todo events are obviously routed there too.
-function toTodoPayload(payload: GatewayEventPayload | undefined): GatewayEventPayload | undefined {
-  if (!payload) {
-    return undefined
-  }
-
-  const isTodo = payload.name === 'todo' || (!payload.name && Object.hasOwn(payload, 'todos'))
-
-  return isTodo ? { ...payload, name: 'todo', tool_id: payload.tool_id || 'todo-live' } : undefined
-}
-
-function asRecord(value: unknown): Record<string, unknown> {
-  return value && typeof value === 'object' && !Array.isArray(value) ? (value as Record<string, unknown>) : {}
-}
-
-function parseMaybeRecord(value: unknown): Record<string, unknown> {
-  if (typeof value === 'string') {
-    try {
-      return asRecord(JSON.parse(value))
-    } catch {
-      return {}
-    }
-  }
-
-  return asRecord(value)
-}
-
-const firstString = (...candidates: unknown[]): string => {
-  for (const v of candidates) {
-    if (typeof v === 'string' && v) {
-      return v
-    }
-  }
-
-  return ''
-}
-
-function delegateTaskPayloads(
-  payload: GatewayEventPayload | undefined,
-  phase: 'running' | 'complete',
-  sourceEventType?: string
-): Record<string, unknown>[] {
-  if (payload?.name !== 'delegate_task') {
-    return []
-  }
-
-  const args = parseMaybeRecord(payload.args ?? payload.input)
-  const result = parseMaybeRecord(payload.result)
-  const rawTasks = Array.isArray(args.tasks) ? args.tasks : []
-  const tasks = rawTasks.length ? rawTasks.map(parseMaybeRecord) : [args]
-  const status = phase === 'complete' ? (payload.error ? 'failed' : 'completed') : 'running'
-  const toolId = payload.tool_id || payload.tool_call_id || payload.id || 'delegate_task'
-  const progressText = firstString(payload.preview, payload.message, payload.context)
-
-  const eventType =
-    phase === 'complete'
-      ? 'subagent.complete'
-      : sourceEventType === 'tool.start'
-        ? 'subagent.start'
-        : 'subagent.progress'
-
-  return tasks.map((task, index) => {
-    const goal = firstString(task.goal, args.goal, payload.context) || 'Delegated task'
-    const summary = firstString(result.summary, payload.summary, payload.message)
-
-    return {
-      depth: 0,
-      duration_seconds: payload.duration_s,
-      goal,
-      status,
-      subagent_id: `delegate-tool:${toolId}:${index}`,
-      summary: summary || undefined,
-      task_count: tasks.length,
-      task_index: index,
-      text: eventType === 'subagent.progress' ? progressText || goal : undefined,
-      tool_name: eventType === 'subagent.start' ? 'delegate_task' : undefined,
-      tool_preview: eventType === 'subagent.start' ? progressText : undefined,
-      toolsets: Array.isArray(task.toolsets) ? task.toolsets : Array.isArray(args.toolsets) ? args.toolsets : [],
-      event_type: eventType,
-      output_tail:
-        phase === 'complete' && summary
-          ? [{ is_error: Boolean(payload.error), preview: summary, tool: 'delegate_task' }]
-          : undefined
-    }
-  })
-}
-
-export function useMessageStream({
-  activeSessionIdRef,
-  hydrateFromStoredSession,
-  queryClient,
-  refreshHermesConfig,
-  refreshSessions,
-  sessionStateByRuntimeIdRef,
-  updateSessionState
-}: MessageStreamOptions) {
-  const sessionInterrupted = useCallback(
-    (sessionId: string) => sessionStateByRuntimeIdRef.current.get(sessionId)?.interrupted ?? false,
-    [sessionStateByRuntimeIdRef]
-  )
-
-  // Patch the in-flight assistant message (or seed it). Centralises the
-  // streamId/groupId bookkeeping every event callback would otherwise repeat.
-  const mutateStream = useCallback(
-    (
-      sessionId: string,
-      transform: (parts: ChatMessagePart[], message: ChatMessage) => ChatMessagePart[],
-      seed: () => ChatMessagePart[],
-      opts: {
-        pending?: (message: ChatMessage) => boolean
-      } = {}
-    ) => {
-      const apply = () => {
-        updateSessionState(sessionId, state => {
-          // After a stop, drop any late deltas / tool events for the
-          // cancelled turn so they don't keep growing the (now finalized)
-          // assistant bubble or, worse, seed a brand-new bubble that
-          // appears to belong to the next user message.
-          if (state.interrupted) {
-            return state
-          }
-
-          const streamId = state.streamId ?? `assistant-stream-${Date.now()}`
-          const groupId = state.pendingBranchGroup ?? undefined
-          const prev = state.messages
-          let nextMessages: ChatMessage[]
-
-          if (!prev.some(m => m.id === streamId)) {
-            nextMessages = [
-              ...prev,
-              {
-                id: streamId,
-                role: 'assistant',
-                parts: seed(),
-                pending: true,
-                branchGroupId: groupId
-              }
-            ]
-          } else {
-            nextMessages = prev.map(m =>
-              m.id === streamId
-                ? {
-                    ...m,
-                    parts: transform(m.parts, m),
-                    pending: opts.pending ? opts.pending(m) : true
-                  }
-                : m
-            )
-          }
-
-          return {
-            ...state,
-            messages: nextMessages,
-            streamId,
-            sawAssistantPayload: true,
-            awaitingResponse: false
-          }
-        })
-      }
-
-      apply()
-    },
-    [updateSessionState]
-  )
-
-  const queuedDeltasRef = useRef<Map<string, QueuedStreamDeltas>>(new Map())
-  const flushHandleRef = useRef<number | null>(null)
-  const lastFlushAtRef = useRef<number>(0)
-  const nativeSubagentSessionsRef = useRef<Set<string>>(new Set())
-  // Turns that auto-compacted: skip post-turn hydrate so live scrollback survives.
-  const compactedTurnRef = useRef<Set<string>>(new Set())
-  // Last session we applied a session.info cwd for — lets us tell an agent
-  // relocating the SAME session (follow it) from a session switch (don't yank).
-  const lastCwdInfoSessionRef = useRef<null | string>(null)
-
-  const flushQueuedDeltas = useCallback(
-    (sessionId?: string) => {
-      const queue = queuedDeltasRef.current
-      const ids = sessionId ? [sessionId] : [...queue.keys()]
-
-      for (const id of ids) {
-        const queued = queue.get(id)
-
-        if (!queued) {
-          continue
-        }
-
-        queue.delete(id)
-
-        if (queued.assistant) {
-          mutateStream(
-            id,
-            parts => dedupeGeneratedImageEchoesInParts(appendAssistantTextPart(parts, queued.assistant)),
-            () => [assistantTextPart(queued.assistant)]
-          )
-        }
-
-        if (queued.reasoning) {
-          mutateStream(
-            id,
-            parts => appendReasoningPart(parts, queued.reasoning),
-            () => [reasoningPart(queued.reasoning)]
-          )
-        }
-      }
-    },
-    [mutateStream]
-  )
-
-  const scheduleDeltaFlush = useCallback(() => {
-    if (flushHandleRef.current !== null) {
-      return
-    }
-
-    if (typeof window === 'undefined') {
-      flushQueuedDeltas()
-
-      return
-    }
-
-    // Enforce a floor on the gap between two flushes. Without it, an LLM
-    // emitting tokens slower than the rAF cadence (~30-80 tok/sec is typical)
-    // forces one React commit + Streamdown re-parse per token, and the
-    // last-block markdown re-parse cost is roughly linear in current block
-    // length. With this floor, slower streams still coalesce ~2 tokens per
-    // commit and the synthetic harness shows longtask counts drop from ~5/5s
-    // to ~1/5s on big sessions (see scripts/profile-typing-lag.md).
-    const sinceLast = performance.now() - lastFlushAtRef.current
-
-    const runFlush = () => {
-      flushHandleRef.current = null
-      lastFlushAtRef.current = performance.now()
-      flushQueuedDeltas()
-    }
-
-    if (sinceLast >= STREAM_DELTA_FLUSH_MS && typeof window.requestAnimationFrame === 'function') {
-      flushHandleRef.current = window.requestAnimationFrame(runFlush)
-
-      return
-    }
-
-    flushHandleRef.current = window.setTimeout(runFlush, Math.max(0, STREAM_DELTA_FLUSH_MS - sinceLast))
-  }, [flushQueuedDeltas])
-
-  const queueDelta = useCallback(
-    (sessionId: string, key: keyof QueuedStreamDeltas, delta: string) => {
-      if (!delta) {
-        return
-      }
-
-      const queued = queuedDeltasRef.current.get(sessionId) ?? { assistant: '', reasoning: '' }
-      queued[key] += delta
-      queuedDeltasRef.current.set(sessionId, queued)
-      scheduleDeltaFlush()
-    },
-    [scheduleDeltaFlush]
-  )
-
-  useEffect(
-    () => () => {
-      if (flushHandleRef.current !== null && typeof window !== 'undefined') {
-        if (typeof window.cancelAnimationFrame === 'function') {
-          window.cancelAnimationFrame(flushHandleRef.current)
-        } else {
-          window.clearTimeout(flushHandleRef.current)
-        }
-      }
-
-      flushHandleRef.current = null
-      flushQueuedDeltas()
-    },
-    [flushQueuedDeltas]
-  )
-
-  const appendAssistantDelta = useCallback(
-    (sessionId: string, delta: string) => {
-      if (!delta) {
-        return
-      }
-
-      queueDelta(sessionId, 'assistant', delta)
-    },
-    [queueDelta]
-  )
-
-  const appendReasoningDelta = useCallback(
-    (sessionId: string, delta: string, replace = false) => {
-      if (!delta) {
-        return
-      }
-
-      if (!replace) {
-        queueDelta(sessionId, 'reasoning', delta)
-
-        return
-      }
-
-      flushQueuedDeltas(sessionId)
-
-      mutateStream(
-        sessionId,
-        (parts, message) => {
-          if (replace && chatMessageText(message).trim()) {
-            return parts
-          }
-
-          if (replace) {
-            return [...parts.filter(part => part.type !== 'reasoning'), reasoningPart(delta)]
-          }
-
-          return appendReasoningPart(parts, delta)
-        },
-        () => [reasoningPart(delta)]
-      )
-    },
-    [flushQueuedDeltas, mutateStream, queueDelta]
-  )
-
-  const upsertToolCall = useCallback(
-    (
-      sessionId: string,
-      payload: GatewayEventPayload | undefined,
-      phase: 'running' | 'complete',
-      sourceEventType?: string
-    ) => {
-      // Text deltas flush on a timer but tool events apply now; flush first so
-      // a tool part can't jump ahead of the text that preceded it.
-      flushQueuedDeltas(sessionId)
-
-      if (sessionInterrupted(sessionId)) {
-        return
-      }
-
-      // The composer status stack owns todo display now (no inline panel) —
-      // mirror every todo state the tool reports into its session store.
-      if (payload?.name === 'todo') {
-        const todos = parseTodos(payload.todos) ?? parseTodos(payload.result) ?? parseTodos(payload.args)
-
-        if (todos) {
-          setSessionTodos(sessionId, todos)
-        }
-      }
-
-      if (!nativeSubagentSessionsRef.current.has(sessionId)) {
-        for (const subagentPayload of delegateTaskPayloads(payload, phase, sourceEventType)) {
-          upsertSubagent(
-            sessionId,
-            subagentPayload,
-            true,
-            phase === 'complete' ? 'delegate.complete' : 'delegate.running'
-          )
-        }
-      }
-
-      mutateStream(
-        sessionId,
-        parts => dedupeGeneratedImageEchoesInParts(upsertToolPart(parts, payload, phase)),
-        () => upsertToolPart([], payload, phase),
-        { pending: m => phase !== 'complete' || (m.pending ?? false) }
-      )
-    },
-    [flushQueuedDeltas, mutateStream, sessionInterrupted]
-  )
-
-  const completeAssistantMessage = useCallback(
-    (sessionId: string, text: string) => {
-      let shouldHydrate = false
-
-      const completedState = updateSessionState(sessionId, state => {
-        // Late completion from an already-cancelled turn: cancelRun has
-        // already finalized the bubble (kept the partial text, dropped it if
-        // empty). Re-running the dedupe below would replace the partial with
-        // the just-cancelled full text, so we settle and bail instead.
-        if (state.interrupted) {
-          return {
-            ...state,
-            awaitingResponse: false,
-            busy: false,
-            needsInput: false,
-            pendingBranchGroup: null,
-            streamId: null,
-            turnStartedAt: null
-          }
-        }
-
-        const streamId = state.streamId
-        const finalText = renderMediaTags(text).trim()
-        const completionError = completionErrorText(finalText)
-        const normalize = (value: string) => value.replace(/\s+/g, ' ').trim()
-
-        const replaceTextPart = (parts: ChatMessagePart[]) => {
-          const visibleFinalText = stripGeneratedImageEchoes(finalText, generatedImageEchoSources(parts)).trim()
-          const dedupeReference = normalize(visibleFinalText)
-
-          const kept = parts.filter(part => {
-            if (part.type === 'text') {
-              return false
-            }
-
-            if (part.type !== 'reasoning' || !dedupeReference) {
-              return true
-            }
-
-            const r = normalize(part.text)
-
-            return !(r && (dedupeReference.startsWith(r) || r.startsWith(dedupeReference)))
-          })
-
-          return visibleFinalText ? [...kept, assistantTextPart(visibleFinalText)] : kept
-        }
-
-        const completeMessage = (message: ChatMessage): ChatMessage =>
-          completionError
-            ? {
-                ...message,
-                error: completionError,
-                parts: message.parts.filter(part => part.type !== 'text'),
-                pending: false
-              }
-            : {
-                ...message,
-                parts: replaceTextPart(message.parts),
-                pending: false
-              }
-
-        const newAssistantFromCompletion = (): ChatMessage => ({
-          id: `assistant-${Date.now()}`,
-          role: 'assistant',
-          parts: completionError ? [] : [assistantTextPart(finalText)],
-          branchGroupId: state.pendingBranchGroup ?? undefined,
-          ...(completionError && { error: completionError })
-        })
-
-        const prev = state.messages
-        let nextMessages = prev
-
-        if (streamId && prev.some(m => m.id === streamId)) {
-          nextMessages = prev.map(m => (m.id === streamId ? completeMessage(m) : m))
-        } else {
-          const fallbackIndex = [...prev]
-            .reverse()
-            .findIndex(message => message.role === 'assistant' && !message.hidden)
-
-          if (fallbackIndex >= 0) {
-            const index = prev.length - 1 - fallbackIndex
-            const existing = prev[index]
-            const existingText = chatMessageText(existing).trim()
-
-            if (existing.pending || (finalText && existingText === finalText)) {
-              nextMessages = prev.map((message, messageIndex) =>
-                messageIndex === index ? completeMessage(message) : message
-              )
-            } else if (finalText) {
-              nextMessages = [...prev, newAssistantFromCompletion()]
-            }
-          } else if (finalText) {
-            nextMessages = [...prev, newAssistantFromCompletion()]
-          }
-        }
-
-        const hasInlineError = nextMessages.some(m => m.role === 'assistant' && m.error && !m.hidden)
-        const lastVisible = [...nextMessages].reverse().find(m => !m.hidden)
-        const unresolvedUserTail = lastVisible?.role === 'user'
-        shouldHydrate =
-          !completionError && !hasInlineError && !unresolvedUserTail && (!state.sawAssistantPayload || !finalText)
-
-        return {
-          ...state,
-          messages: nextMessages,
-          streamId: null,
-          pendingBranchGroup: null,
-          awaitingResponse: false,
-          busy: false,
-          needsInput: false,
-          turnStartedAt: null
-        }
-      })
-
-      void refreshSessions().catch(() => undefined)
-      // Sync the freshly-titled row to other windows (e.g. main, when the turn
-      // ran in the pop-out).
-      broadcastSessionsChanged()
-
-      if (compactedTurnRef.current.delete(sessionId)) {
-        shouldHydrate = false
-      }
-
-      if (shouldHydrate) {
-        void hydrateFromStoredSession(3, completedState.storedSessionId, sessionId)
-      }
-
-      dispatchNativeNotification({
-        body: text.slice(0, 140) || translateNow('notifications.native.turnDoneBody'),
-        kind: 'turnDone',
-        sessionId,
-        title: translateNow('notifications.native.turnDoneTitle')
-      })
-    },
-    [hydrateFromStoredSession, refreshSessions, updateSessionState]
-  )
-
-  const failAssistantMessage = useCallback(
-    (sessionId: string, errorMessage: string) => {
-      updateSessionState(sessionId, state => {
-        const streamId = state.streamId ?? `assistant-error-${Date.now()}`
-        const groupId = state.pendingBranchGroup ?? undefined
-        const prev = state.messages
-        const error = errorMessage.trim() || 'Hermes reported an error'
-
-        const nextMessages = prev.some(m => m.id === streamId)
-          ? prev.map(message =>
-              message.id === streamId
-                ? {
-                    ...message,
-                    error,
-                    pending: false
-                  }
-                : message
-            )
-          : [
-              ...prev,
-              {
-                id: streamId,
-                role: 'assistant' as const,
-                parts: [],
-                error,
-                pending: false,
-                branchGroupId: groupId
-              }
-            ]
-
-        return {
-          ...state,
-          messages: nextMessages,
-          streamId: null,
-          pendingBranchGroup: null,
-          sawAssistantPayload: true,
-          awaitingResponse: false,
-          busy: false,
-          needsInput: false,
-          turnStartedAt: null
-        }
-      })
-    },
-    [updateSessionState]
-  )
-
-  const handleGatewayEvent = useCallback(
+/** The gateway-event dispatcher, extracted from useMessageStream. */
+export function useGatewayEventHandler(deps: GatewayEventDeps) {
+  const {
+    appendAssistantDelta,
+    appendReasoningDelta,
+    activeSessionIdRef,
+    compactedTurnRef,
+    lastCwdInfoSessionRef,
+    nativeSubagentSessionsRef,
+    completeAssistantMessage,
+    failAssistantMessage,
+    flushQueuedDeltas,
+    queryClient,
+    refreshHermesConfig,
+    sessionInterrupted,
+    updateSessionState,
+    upsertToolCall
+  } = deps
+
+  return useCallback(
    (event: RpcEvent) => {
      const payload = event.payload as GatewayEventPayload | undefined
      const explicitSid = event.session_id || ''
@@ -1264,9 +633,12 @@ export function useMessageStream({
      appendAssistantDelta,
      appendReasoningDelta,
      activeSessionIdRef,
+      compactedTurnRef,
      completeAssistantMessage,
      failAssistantMessage,
      flushQueuedDeltas,
+      lastCwdInfoSessionRef,
+      nativeSubagentSessionsRef,
      queryClient,
      refreshHermesConfig,
      sessionInterrupted,
@@ -1274,12 +646,4 @@ export function useMessageStream({
      upsertToolCall
    ]
  )
-
-  return {
-    appendAssistantDelta,
-    appendReasoningDelta,
-    completeAssistantMessage,
-    handleGatewayEvent,
-    upsertToolCall
-  }
 }
--- a/apps/desktop/src/app/session/hooks/use-message-stream/index.ts
+++ b/apps/desktop/src/app/session/hooks/use-message-stream/index.ts
@@ -0,0 +1,540 @@
+import type { QueryClient } from '@tanstack/react-query'
+import { type MutableRefObject, useCallback, useEffect, useRef } from 'react'
+
+import { translateNow } from '@/i18n'
+import {
+  appendAssistantTextPart,
+  appendReasoningPart,
+  assistantTextPart,
+  type ChatMessage,
+  type ChatMessagePart,
+  chatMessageText,
+  type GatewayEventPayload,
+  reasoningPart,
+  renderMediaTags,
+  upsertToolPart
+} from '@/lib/chat-messages'
+import {
+  dedupeGeneratedImageEchoesInParts,
+  generatedImageEchoSources,
+  stripGeneratedImageEchoes
+} from '@/lib/generated-images'
+import { parseTodos } from '@/lib/todos'
+import { dispatchNativeNotification } from '@/store/native-notifications'
+import { broadcastSessionsChanged } from '@/store/session-sync'
+import { upsertSubagent } from '@/store/subagents'
+import { setSessionTodos } from '@/store/todos'
+
+import type { ClientSessionState } from '../../../types'
+
+import { useGatewayEventHandler } from './gateway-event'
+import { completionErrorText, delegateTaskPayloads, STREAM_DELTA_FLUSH_MS } from './utils'
+
+interface MessageStreamOptions {
+  activeSessionIdRef: MutableRefObject<string | null>
+  hydrateFromStoredSession: (
+    attempts?: number,
+    storedSessionId?: string | null,
+    runtimeSessionId?: string | null
+  ) => Promise<void>
+  queryClient: QueryClient
+  refreshHermesConfig: () => Promise<void>
+  refreshSessions: () => Promise<void>
+  sessionStateByRuntimeIdRef: MutableRefObject<Map<string, ClientSessionState>>
+  updateSessionState: (
+    sessionId: string,
+    updater: (state: ClientSessionState) => ClientSessionState,
+    storedSessionId?: string | null
+  ) => ClientSessionState
+}
+
+interface QueuedStreamDeltas {
+  assistant: string
+  reasoning: string
+}
+
+export function useMessageStream({
+  activeSessionIdRef,
+  hydrateFromStoredSession,
+  queryClient,
+  refreshHermesConfig,
+  refreshSessions,
+  sessionStateByRuntimeIdRef,
+  updateSessionState
+}: MessageStreamOptions) {
+  const sessionInterrupted = useCallback(
+    (sessionId: string) => sessionStateByRuntimeIdRef.current.get(sessionId)?.interrupted ?? false,
+    [sessionStateByRuntimeIdRef]
+  )
+
+  // Patch the in-flight assistant message (or seed it). Centralises the
+  // streamId/groupId bookkeeping every event callback would otherwise repeat.
+  const mutateStream = useCallback(
+    (
+      sessionId: string,
+      transform: (parts: ChatMessagePart[], message: ChatMessage) => ChatMessagePart[],
+      seed: () => ChatMessagePart[],
+      opts: {
+        pending?: (message: ChatMessage) => boolean
+      } = {}
+    ) => {
+      const apply = () => {
+        updateSessionState(sessionId, state => {
+          // After a stop, drop any late deltas / tool events for the
+          // cancelled turn so they don't keep growing the (now finalized)
+          // assistant bubble or, worse, seed a brand-new bubble that
+          // appears to belong to the next user message.
+          if (state.interrupted) {
+            return state
+          }
+
+          const streamId = state.streamId ?? `assistant-stream-${Date.now()}`
+          const groupId = state.pendingBranchGroup ?? undefined
+          const prev = state.messages
+          let nextMessages: ChatMessage[]
+
+          if (!prev.some(m => m.id === streamId)) {
+            nextMessages = [
+              ...prev,
+              {
+                id: streamId,
+                role: 'assistant',
+                parts: seed(),
+                pending: true,
+                branchGroupId: groupId
+              }
+            ]
+          } else {
+            nextMessages = prev.map(m =>
+              m.id === streamId
+                ? {
+                    ...m,
+                    parts: transform(m.parts, m),
+                    pending: opts.pending ? opts.pending(m) : true
+                  }
+                : m
+            )
+          }
+
+          return {
+            ...state,
+            messages: nextMessages,
+            streamId,
+            sawAssistantPayload: true,
+            awaitingResponse: false
+          }
+        })
+      }
+
+      apply()
+    },
+    [updateSessionState]
+  )
+
+  const queuedDeltasRef = useRef<Map<string, QueuedStreamDeltas>>(new Map())
+  const flushHandleRef = useRef<number | null>(null)
+  const lastFlushAtRef = useRef<number>(0)
+  const nativeSubagentSessionsRef = useRef<Set<string>>(new Set())
+  // Turns that auto-compacted: skip post-turn hydrate so live scrollback survives.
+  const compactedTurnRef = useRef<Set<string>>(new Set())
+  // Last session we applied a session.info cwd for — lets us tell an agent
+  // relocating the SAME session (follow it) from a session switch (don't yank).
+  const lastCwdInfoSessionRef = useRef<null | string>(null)
+
+  const flushQueuedDeltas = useCallback(
+    (sessionId?: string) => {
+      const queue = queuedDeltasRef.current
+      const ids = sessionId ? [sessionId] : [...queue.keys()]
+
+      for (const id of ids) {
+        const queued = queue.get(id)
+
+        if (!queued) {
+          continue
+        }
+
+        queue.delete(id)
+
+        if (queued.assistant) {
+          mutateStream(
+            id,
+            parts => dedupeGeneratedImageEchoesInParts(appendAssistantTextPart(parts, queued.assistant)),
+            () => [assistantTextPart(queued.assistant)]
+          )
+        }
+
+        if (queued.reasoning) {
+          mutateStream(
+            id,
+            parts => appendReasoningPart(parts, queued.reasoning),
+            () => [reasoningPart(queued.reasoning)]
+          )
+        }
+      }
+    },
+    [mutateStream]
+  )
+
+  const scheduleDeltaFlush = useCallback(() => {
+    if (flushHandleRef.current !== null) {
+      return
+    }
+
+    if (typeof window === 'undefined') {
+      flushQueuedDeltas()
+
+      return
+    }
+
+    // Enforce a floor on the gap between two flushes. Without it, an LLM
+    // emitting tokens slower than the rAF cadence (~30-80 tok/sec is typical)
+    // forces one React commit + Streamdown re-parse per token, and the
+    // last-block markdown re-parse cost is roughly linear in current block
+    // length. With this floor, slower streams still coalesce ~2 tokens per
+    // commit and the synthetic harness shows longtask counts drop from ~5/5s
+    // to ~1/5s on big sessions (see scripts/profile-typing-lag.md).
+    const sinceLast = performance.now() - lastFlushAtRef.current
+
+    const runFlush = () => {
+      flushHandleRef.current = null
+      lastFlushAtRef.current = performance.now()
+      flushQueuedDeltas()
+    }
+
+    if (sinceLast >= STREAM_DELTA_FLUSH_MS && typeof window.requestAnimationFrame === 'function') {
+      flushHandleRef.current = window.requestAnimationFrame(runFlush)
+
+      return
+    }
+
+    flushHandleRef.current = window.setTimeout(runFlush, Math.max(0, STREAM_DELTA_FLUSH_MS - sinceLast))
+  }, [flushQueuedDeltas])
+
+  const queueDelta = useCallback(
+    (sessionId: string, key: keyof QueuedStreamDeltas, delta: string) => {
+      if (!delta) {
+        return
+      }
+
+      const queued = queuedDeltasRef.current.get(sessionId) ?? { assistant: '', reasoning: '' }
+      queued[key] += delta
+      queuedDeltasRef.current.set(sessionId, queued)
+      scheduleDeltaFlush()
+    },
+    [scheduleDeltaFlush]
+  )
+
+  useEffect(
+    () => () => {
+      if (flushHandleRef.current !== null && typeof window !== 'undefined') {
+        if (typeof window.cancelAnimationFrame === 'function') {
+          window.cancelAnimationFrame(flushHandleRef.current)
+        } else {
+          window.clearTimeout(flushHandleRef.current)
+        }
+      }
+
+      flushHandleRef.current = null
+      flushQueuedDeltas()
+    },
+    [flushQueuedDeltas]
+  )
+
+  const appendAssistantDelta = useCallback(
+    (sessionId: string, delta: string) => {
+      if (!delta) {
+        return
+      }
+
+      queueDelta(sessionId, 'assistant', delta)
+    },
+    [queueDelta]
+  )
+
+  const appendReasoningDelta = useCallback(
+    (sessionId: string, delta: string, replace = false) => {
+      if (!delta) {
+        return
+      }
+
+      if (!replace) {
+        queueDelta(sessionId, 'reasoning', delta)
+
+        return
+      }
+
+      flushQueuedDeltas(sessionId)
+
+      mutateStream(
+        sessionId,
+        (parts, message) => {
+          if (replace && chatMessageText(message).trim()) {
+            return parts
+          }
+
+          if (replace) {
+            return [...parts.filter(part => part.type !== 'reasoning'), reasoningPart(delta)]
+          }
+
+          return appendReasoningPart(parts, delta)
+        },
+        () => [reasoningPart(delta)]
+      )
+    },
+    [flushQueuedDeltas, mutateStream, queueDelta]
+  )
+
+  const upsertToolCall = useCallback(
+    (
+      sessionId: string,
+      payload: GatewayEventPayload | undefined,
+      phase: 'running' | 'complete',
+      sourceEventType?: string
+    ) => {
+      // Text deltas flush on a timer but tool events apply now; flush first so
+      // a tool part can't jump ahead of the text that preceded it.
+      flushQueuedDeltas(sessionId)
+
+      if (sessionInterrupted(sessionId)) {
+        return
+      }
+
+      // The composer status stack owns todo display now (no inline panel) —
+      // mirror every todo state the tool reports into its session store.
+      if (payload?.name === 'todo') {
+        const todos = parseTodos(payload.todos) ?? parseTodos(payload.result) ?? parseTodos(payload.args)
+
+        if (todos) {
+          setSessionTodos(sessionId, todos)
+        }
+      }
+
+      if (!nativeSubagentSessionsRef.current.has(sessionId)) {
+        for (const subagentPayload of delegateTaskPayloads(payload, phase, sourceEventType)) {
+          upsertSubagent(
+            sessionId,
+            subagentPayload,
+            true,
+            phase === 'complete' ? 'delegate.complete' : 'delegate.running'
+          )
+        }
+      }
+
+      mutateStream(
+        sessionId,
+        parts => dedupeGeneratedImageEchoesInParts(upsertToolPart(parts, payload, phase)),
+        () => upsertToolPart([], payload, phase),
+        { pending: m => phase !== 'complete' || (m.pending ?? false) }
+      )
+    },
+    [flushQueuedDeltas, mutateStream, sessionInterrupted]
+  )
+
+  const completeAssistantMessage = useCallback(
+    (sessionId: string, text: string) => {
+      let shouldHydrate = false
+
+      const completedState = updateSessionState(sessionId, state => {
+        // Late completion from an already-cancelled turn: cancelRun has
+        // already finalized the bubble (kept the partial text, dropped it if
+        // empty). Re-running the dedupe below would replace the partial with
+        // the just-cancelled full text, so we settle and bail instead.
+        if (state.interrupted) {
+          return {
+            ...state,
+            awaitingResponse: false,
+            busy: false,
+            needsInput: false,
+            pendingBranchGroup: null,
+            streamId: null,
+            turnStartedAt: null
+          }
+        }
+
+        const streamId = state.streamId
+        const finalText = renderMediaTags(text).trim()
+        const completionError = completionErrorText(finalText)
+        const normalize = (value: string) => value.replace(/\s+/g, ' ').trim()
+
+        const replaceTextPart = (parts: ChatMessagePart[]) => {
+          const visibleFinalText = stripGeneratedImageEchoes(finalText, generatedImageEchoSources(parts)).trim()
+          const dedupeReference = normalize(visibleFinalText)
+
+          const kept = parts.filter(part => {
+            if (part.type === 'text') {
+              return false
+            }
+
+            if (part.type !== 'reasoning' || !dedupeReference) {
+              return true
+            }
+
+            const r = normalize(part.text)
+
+            return !(r && (dedupeReference.startsWith(r) || r.startsWith(dedupeReference)))
+          })
+
+          return visibleFinalText ? [...kept, assistantTextPart(visibleFinalText)] : kept
+        }
+
+        const completeMessage = (message: ChatMessage): ChatMessage =>
+          completionError
+            ? {
+                ...message,
+                error: completionError,
+                parts: message.parts.filter(part => part.type !== 'text'),
+                pending: false
+              }
+            : {
+                ...message,
+                parts: replaceTextPart(message.parts),
+                pending: false
+              }
+
+        const newAssistantFromCompletion = (): ChatMessage => ({
+          id: `assistant-${Date.now()}`,
+          role: 'assistant',
+          parts: completionError ? [] : [assistantTextPart(finalText)],
+          branchGroupId: state.pendingBranchGroup ?? undefined,
+          ...(completionError && { error: completionError })
+        })
+
+        const prev = state.messages
+        let nextMessages = prev
+
+        if (streamId && prev.some(m => m.id === streamId)) {
+          nextMessages = prev.map(m => (m.id === streamId ? completeMessage(m) : m))
+        } else {
+          const fallbackIndex = [...prev]
+            .reverse()
+            .findIndex(message => message.role === 'assistant' && !message.hidden)
+
+          if (fallbackIndex >= 0) {
+            const index = prev.length - 1 - fallbackIndex
+            const existing = prev[index]
+            const existingText = chatMessageText(existing).trim()
+
+            if (existing.pending || (finalText && existingText === finalText)) {
+              nextMessages = prev.map((message, messageIndex) =>
+                messageIndex === index ? completeMessage(message) : message
+              )
+            } else if (finalText) {
+              nextMessages = [...prev, newAssistantFromCompletion()]
+            }
+          } else if (finalText) {
+            nextMessages = [...prev, newAssistantFromCompletion()]
+          }
+        }
+
+        const hasInlineError = nextMessages.some(m => m.role === 'assistant' && m.error && !m.hidden)
+        const lastVisible = [...nextMessages].reverse().find(m => !m.hidden)
+        const unresolvedUserTail = lastVisible?.role === 'user'
+        shouldHydrate =
+          !completionError && !hasInlineError && !unresolvedUserTail && (!state.sawAssistantPayload || !finalText)
+
+        return {
+          ...state,
+          messages: nextMessages,
+          streamId: null,
+          pendingBranchGroup: null,
+          awaitingResponse: false,
+          busy: false,
+          needsInput: false,
+          turnStartedAt: null
+        }
+      })
+
+      void refreshSessions().catch(() => undefined)
+      // Sync the freshly-titled row to other windows (e.g. main, when the turn
+      // ran in the pop-out).
+      broadcastSessionsChanged()
+
+      if (compactedTurnRef.current.delete(sessionId)) {
+        shouldHydrate = false
+      }
+
+      if (shouldHydrate) {
+        void hydrateFromStoredSession(3, completedState.storedSessionId, sessionId)
+      }
+
+      dispatchNativeNotification({
+        body: text.slice(0, 140) || translateNow('notifications.native.turnDoneBody'),
+        kind: 'turnDone',
+        sessionId,
+        title: translateNow('notifications.native.turnDoneTitle')
+      })
+    },
+    [hydrateFromStoredSession, refreshSessions, updateSessionState]
+  )
+
+  const failAssistantMessage = useCallback(
+    (sessionId: string, errorMessage: string) => {
+      updateSessionState(sessionId, state => {
+        const streamId = state.streamId ?? `assistant-error-${Date.now()}`
+        const groupId = state.pendingBranchGroup ?? undefined
+        const prev = state.messages
+        const error = errorMessage.trim() || 'Hermes reported an error'
+
+        const nextMessages = prev.some(m => m.id === streamId)
+          ? prev.map(message =>
+              message.id === streamId
+                ? {
+                    ...message,
+                    error,
+                    pending: false
+                  }
+                : message
+            )
+          : [
+              ...prev,
+              {
+                id: streamId,
+                role: 'assistant' as const,
+                parts: [],
+                error,
+                pending: false,
+                branchGroupId: groupId
+              }
+            ]
+
+        return {
+          ...state,
+          messages: nextMessages,
+          streamId: null,
+          pendingBranchGroup: null,
+          sawAssistantPayload: true,
+          awaitingResponse: false,
+          busy: false,
+          needsInput: false,
+          turnStartedAt: null
+        }
+      })
+    },
+    [updateSessionState]
+  )
+
+  const handleGatewayEvent = useGatewayEventHandler({
+    appendAssistantDelta,
+    appendReasoningDelta,
+    activeSessionIdRef,
+    compactedTurnRef,
+    lastCwdInfoSessionRef,
+    nativeSubagentSessionsRef,
+    completeAssistantMessage,
+    failAssistantMessage,
+    flushQueuedDeltas,
+    queryClient,
+    refreshHermesConfig,
+    sessionInterrupted,
+    updateSessionState,
+    upsertToolCall
+  })
+
+  return {
+    appendAssistantDelta,
+    appendReasoningDelta,
+    completeAssistantMessage,
+    handleGatewayEvent,
+    upsertToolCall
+  }
+}
--- a/apps/desktop/src/app/session/hooks/use-message-stream/utils.test.ts
+++ b/apps/desktop/src/app/session/hooks/use-message-stream/utils.test.ts
@@ -0,0 +1,66 @@
+import { describe, expect, it } from 'vitest'
+
+import type { GatewayEventPayload } from '@/lib/chat-messages'
+
+import {
+  completionErrorText,
+  delegateTaskPayloads,
+  hasSessionInfoStatePatch,
+  sessionInfoStatePatch,
+  toTodoPayload
+} from './utils'
+
+const payload = (over: Record<string, unknown>): GatewayEventPayload => over as GatewayEventPayload
+
+describe('completionErrorText', () => {
+  it('flags provider/HTTP/retry failures, ignores normal text', () => {
+    expect(completionErrorText('API call failed after 3 retries: boom')).toMatch(/^API call failed/)
+    expect(completionErrorText('HTTP 500 upstream')).toMatch(/^HTTP 500/)
+    expect(completionErrorText('Gateway error: nope')).toMatch(/^Gateway error/)
+    expect(completionErrorText('here is your answer')).toBeNull()
+    expect(completionErrorText('   ')).toBeNull()
+  })
+})
+
+describe('toTodoPayload', () => {
+  it('routes named todo and anonymous todos-bearing events to the todo stream', () => {
+    expect(toTodoPayload(payload({ name: 'todo' }))?.tool_id).toBe('todo-live')
+    expect(toTodoPayload(payload({ todos: [] }))?.name).toBe('todo')
+    expect(toTodoPayload(payload({ name: 'web_search' }))).toBeUndefined()
+    expect(toTodoPayload(undefined)).toBeUndefined()
+  })
+})
+
+describe('sessionInfoStatePatch / hasSessionInfoStatePatch', () => {
+  it('extracts only present runtime fields', () => {
+    const patch = sessionInfoStatePatch(payload({ model: 'gpt', fast: true, branch: 'main' }))
+    expect(patch).toMatchObject({ model: 'gpt', fast: true, branch: 'main' })
+    expect(hasSessionInfoStatePatch(patch)).toBe(true)
+    expect(hasSessionInfoStatePatch(sessionInfoStatePatch(payload({})))).toBe(false)
+  })
+})
+
+describe('delegateTaskPayloads', () => {
+  it('returns [] for non-delegate events', () => {
+    expect(delegateTaskPayloads(payload({ name: 'web_search' }), 'running')).toEqual([])
+  })
+
+  it('maps a running tool.start to a subagent.start spec', () => {
+    const [spec] = delegateTaskPayloads(
+      payload({ name: 'delegate_task', tool_id: 't1', args: { goal: 'do it' } }),
+      'running',
+      'tool.start'
+    )
+
+    expect(spec).toMatchObject({ event_type: 'subagent.start', goal: 'do it', status: 'running' })
+  })
+
+  it('maps completion (with error) to a failed subagent.complete', () => {
+    const [spec] = delegateTaskPayloads(
+      payload({ name: 'delegate_task', error: 'boom', result: { summary: 'failed run' } }),
+      'complete'
+    )
+
+    expect(spec).toMatchObject({ event_type: 'subagent.complete', status: 'failed' })
+  })
+})
--- a/apps/desktop/src/app/session/hooks/use-message-stream/utils.ts
+++ b/apps/desktop/src/app/session/hooks/use-message-stream/utils.ts
@@ -0,0 +1,179 @@
+import type { GatewayEventPayload } from '@/lib/chat-messages'
+import { normalizePersonalityValue } from '@/lib/chat-runtime'
+
+import type { ClientSessionState } from '../../../types'
+
+type SessionRuntimeStatePatch = Partial<
+  Pick<
+    ClientSessionState,
+    'branch' | 'cwd' | 'fast' | 'model' | 'personality' | 'provider' | 'reasoningEffort' | 'serviceTier' | 'yolo'
+  >
+>
+
+export function sessionInfoStatePatch(payload: GatewayEventPayload | undefined): SessionRuntimeStatePatch {
+  const patch: SessionRuntimeStatePatch = {}
+
+  if (typeof payload?.model === 'string') {
+    patch.model = payload.model || ''
+  }
+
+  if (typeof payload?.provider === 'string') {
+    patch.provider = payload.provider || ''
+  }
+
+  if (typeof payload?.cwd === 'string') {
+    patch.cwd = payload.cwd
+  }
+
+  if (typeof payload?.branch === 'string') {
+    patch.branch = payload.branch
+  }
+
+  if (typeof payload?.personality === 'string') {
+    patch.personality = normalizePersonalityValue(payload.personality)
+  }
+
+  if (typeof payload?.reasoning_effort === 'string') {
+    patch.reasoningEffort = payload.reasoning_effort
+  }
+
+  if (typeof payload?.service_tier === 'string') {
+    patch.serviceTier = payload.service_tier
+  }
+
+  if (typeof payload?.fast === 'boolean') {
+    patch.fast = payload.fast
+  }
+
+  if (typeof payload?.yolo === 'boolean') {
+    patch.yolo = payload.yolo
+  }
+
+  return patch
+}
+
+export function hasSessionInfoStatePatch(patch: SessionRuntimeStatePatch): boolean {
+  return Object.keys(patch).length > 0
+}
+
+// Minimum gap between two assistant-text flushes during a stream. Was 16ms
+// (rAF only), which at typical LLM token rates of ~30-80 tok/sec meant every
+// token got its own React commit + Streamdown markdown re-parse, scaling
+// linearly with the growing last-block length. Bumping to 33ms lets ~2 tokens
+// batch into one commit at 60 tok/sec without introducing visible lag on the
+// streaming text (still 30 fps of visible text growth). Big perceived
+// smoothness win on long messages with big trailing paragraphs; see
+// `scripts/profile-typing-lag.md` for the measurement work behind this.
+export const STREAM_DELTA_FLUSH_MS = 33
+
+// Gateway/provider failures sometimes arrive as message.complete text instead
+// of an explicit error event. Treat matches as inline assistant errors so they
+// persist like real error events and don't get erased by hydrate fallback.
+const COMPLETION_ERROR_PATTERNS = [
+  /^API call failed after \d+ retries:/i,
+  /^HTTP\s+\d{3}\b/i,
+  /^(Provider|Gateway)\s+error:/i
+]
+
+export function completionErrorText(finalText: string): string | null {
+  const text = finalText.trim()
+
+  return text && COMPLETION_ERROR_PATTERNS.some(re => re.test(text)) ? text : null
+}
+
+export const SUBAGENT_EVENT_TYPES = new Set([
+  'subagent.spawn_requested',
+  'subagent.start',
+  'subagent.thinking',
+  'subagent.tool',
+  'subagent.progress',
+  'subagent.complete'
+])
+
+// Anonymous progress events that carry todos but no name still belong to the
+// todo stream; named todo events are obviously routed there too.
+export function toTodoPayload(payload: GatewayEventPayload | undefined): GatewayEventPayload | undefined {
+  if (!payload) {
+    return undefined
+  }
+
+  const isTodo = payload.name === 'todo' || (!payload.name && Object.hasOwn(payload, 'todos'))
+
+  return isTodo ? { ...payload, name: 'todo', tool_id: payload.tool_id || 'todo-live' } : undefined
+}
+
+function asRecord(value: unknown): Record<string, unknown> {
+  return value && typeof value === 'object' && !Array.isArray(value) ? (value as Record<string, unknown>) : {}
+}
+
+function parseMaybeRecord(value: unknown): Record<string, unknown> {
+  if (typeof value === 'string') {
+    try {
+      return asRecord(JSON.parse(value))
+    } catch {
+      return {}
+    }
+  }
+
+  return asRecord(value)
+}
+
+const firstString = (...candidates: unknown[]): string => {
+  for (const v of candidates) {
+    if (typeof v === 'string' && v) {
+      return v
+    }
+  }
+
+  return ''
+}
+
+export function delegateTaskPayloads(
+  payload: GatewayEventPayload | undefined,
+  phase: 'running' | 'complete',
+  sourceEventType?: string
+): Record<string, unknown>[] {
+  if (payload?.name !== 'delegate_task') {
+    return []
+  }
+
+  const args = parseMaybeRecord(payload.args ?? payload.input)
+  const result = parseMaybeRecord(payload.result)
+  const rawTasks = Array.isArray(args.tasks) ? args.tasks : []
+  const tasks = rawTasks.length ? rawTasks.map(parseMaybeRecord) : [args]
+  const status = phase === 'complete' ? (payload.error ? 'failed' : 'completed') : 'running'
+  const toolId = payload.tool_id || payload.tool_call_id || payload.id || 'delegate_task'
+  const progressText = firstString(payload.preview, payload.message, payload.context)
+
+  const eventType =
+    phase === 'complete'
+      ? 'subagent.complete'
+      : sourceEventType === 'tool.start'
+        ? 'subagent.start'
+        : 'subagent.progress'
+
+  return tasks.map((task, index) => {
+    const goal = firstString(task.goal, args.goal, payload.context) || 'Delegated task'
+    const summary = firstString(result.summary, payload.summary, payload.message)
+
+    return {
+      depth: 0,
+      duration_seconds: payload.duration_s,
+      goal,
+      status,
+      subagent_id: `delegate-tool:${toolId}:${index}`,
+      summary: summary || undefined,
+      task_count: tasks.length,
+      task_index: index,
+      text: eventType === 'subagent.progress' ? progressText || goal : undefined,
+      tool_name: eventType === 'subagent.start' ? 'delegate_task' : undefined,
+      tool_preview: eventType === 'subagent.start' ? progressText : undefined,
+      toolsets: Array.isArray(task.toolsets) ? task.toolsets : Array.isArray(args.toolsets) ? args.toolsets : [],
+      event_type: eventType,
+      output_tail:
+        phase === 'complete' && summary
+          ? [{ is_error: Boolean(payload.error), preview: summary, tool: 'delegate_task' }]
+          : undefined
+    }
+  })
+}
--- a/apps/desktop/src/app/session/hooks/use-prompt-actions.ts
+++ b/apps/desktop/src/app/session/hooks/use-prompt-actions.ts
--- a/apps/desktop/src/app/session/hooks/use-prompt-actions/index.test.tsx
+++ b/apps/desktop/src/app/session/hooks/use-prompt-actions/index.test.tsx
@@ -8,7 +8,7 @@ import { $composerAttachments, type ComposerAttachment } from '@/store/composer'
 import { $busy, $connection, $messages, $sessions, setSessions } from '@/store/session'
 import type { SessionInfo } from '@/types/hermes'

-import { uploadComposerAttachment, usePromptActions } from './use-prompt-actions'
+import { uploadComposerAttachment, usePromptActions } from '.'

 vi.mock('@/hermes', () => ({
  getProfiles: vi.fn(async () => ({ profiles: [] })),
--- a/apps/desktop/src/app/session/hooks/use-prompt-actions/index.ts
+++ b/apps/desktop/src/app/session/hooks/use-prompt-actions/index.ts
@@ -0,0 +1,937 @@
+import type { AppendMessage, ThreadMessage } from '@assistant-ui/react'
+import { useStore } from '@nanostores/react'
+import { type MutableRefObject, useCallback, useEffect, useRef } from 'react'
+
+import { transcribeAudio } from '@/hermes'
+import { useI18n } from '@/i18n'
+import { stripAnsi } from '@/lib/ansi'
+import { branchGroupForUser, type ChatMessage, chatMessageText, textPart } from '@/lib/chat-messages'
+import { pathLabel, SLASH_COMMAND_RE } from '@/lib/chat-runtime'
+import { triggerHaptic } from '@/lib/haptics'
+import { setMutableRef } from '@/lib/mutable-ref'
+import { clearClarifyRequest } from '@/store/clarify'
+import {
+  $composerAttachments,
+  type ComposerAttachment,
+  setComposerAttachmentUploadState,
+  updateComposerAttachment
+} from '@/store/composer'
+import { resetSessionBackground } from '@/store/composer-status'
+import { clearNotifications, notify, notifyError } from '@/store/notifications'
+import { clearPreviewArtifacts } from '@/store/preview-status'
+import { clearAllPrompts } from '@/store/prompts'
+import { $busy, $connection, $messages, setAwaitingResponse, setBusy, setMessages } from '@/store/session'
+import { clearSessionSubagents } from '@/store/subagents'
+import { clearSessionTodos } from '@/store/todos'
+
+import type {
+  ClientSessionState,
+  FileAttachResponse,
+  HandoffFailResponse,
+  HandoffRequestResponse,
+  HandoffStateResponse,
+  ImageAttachResponse,
+  SessionSteerResponse
+} from '../../../types'
+
+import { useSlashCommand } from './slash'
+import { useSubmitPrompt } from './submit'
+import {
+  appendText,
+  blobToDataUrl,
+  delay,
+  friendlyRemoteAttachError,
+  type GatewayRequest,
+  inlineErrorMessage,
+  isSessionBusyError,
+  isSessionNotFoundError,
+  readFileDataUrlForAttach,
+  readImageForRemoteAttach,
+  type SubmitTextOptions,
+  visibleUserIndexAtOrdinal,
+  visibleUserOrdinal,
+  withSessionBusyRetry
+} from './utils'
+
+interface HandoffResult {
+  ok: boolean
+  error?: string
+}
+
+/**
+ * Stage one file/image attachment into the session workspace and return the
+ * attachment rewritten with the gateway-side ref. Images upload their bytes in
+ * remote mode (so vision works) and pass the path locally; non-image files
+ * upload bytes remotely and pass the path locally. Throws on failure so callers
+ * can surface an error. Shared by submit-time sync, the eager drop-time upload,
+ * and the message-edit composer drop — keep them in lockstep.
+ */
+export async function uploadComposerAttachment(
+  attachment: ComposerAttachment,
+  opts: { remote: boolean; requestGateway: GatewayRequest; sessionId: string }
+): Promise<ComposerAttachment> {
+  const { remote, requestGateway, sessionId } = opts
+  const path = attachment.path ?? ''
+  const label = attachment.label || pathLabel(path)
+
+  if (attachment.kind === 'image') {
+    let result: ImageAttachResponse
+
+    if (remote) {
+      let payload: Awaited<ReturnType<typeof readImageForRemoteAttach>>
+
+      try {
+        payload = await readImageForRemoteAttach(path)
+      } catch (err) {
+        throw friendlyRemoteAttachError(err, label)
+      }
+
+      if (!payload) {
+        throw new Error(`Could not read ${label}`)
+      }
+
+      result = await requestGateway<ImageAttachResponse>('image.attach_bytes', {
+        session_id: sessionId,
+        content_base64: payload.contentBase64,
+        filename: payload.filename
+      })
+    } else {
+      result = await requestGateway<ImageAttachResponse>('image.attach', {
+        path,
+        session_id: sessionId
+      })
+    }
+
+    if (!result.attached) {
+      throw new Error(result.message || `Could not attach ${label}`)
+    }
+
+    const attachedPath = result.path || path
+
+    return {
+      ...attachment,
+      attachedSessionId: sessionId,
+      label: attachedPath ? pathLabel(attachedPath) : attachment.label,
+      path: attachedPath,
+      uploadState: undefined
+    }
+  }
+
+  // Non-image file.
+  let dataUrl: string | null = null
+
+  if (remote) {
+    try {
+      dataUrl = await readFileDataUrlForAttach(path)
+    } catch (err) {
+      throw friendlyRemoteAttachError(err, label)
+    }
+
+    if (!dataUrl) {
+      throw new Error(`Could not read ${label}`)
+    }
+  }
+
+  const result = await requestGateway<FileAttachResponse>('file.attach', {
+    name: label,
+    path,
+    session_id: sessionId,
+    ...(dataUrl ? { data_url: dataUrl } : {})
+  })
+
+  if (!result.attached || !result.ref_text) {
+    throw new Error(result.message || `Could not attach ${label}`)
+  }
+
+  return {
+    ...attachment,
+    attachedSessionId: sessionId,
+    refText: result.ref_text,
+    uploadState: undefined
+  }
+}
+
+interface PromptActionsOptions {
+  activeSessionId: string | null
+  activeSessionIdRef: MutableRefObject<string | null>
+  busyRef: MutableRefObject<boolean>
+  branchCurrentSession: () => Promise<boolean>
+  createBackendSessionForSend: (preview?: string | null) => Promise<string | null>
+  handleSkinCommand: (arg: string) => string
+  refreshSessions: () => Promise<void>
+  requestGateway: <T>(method: string, params?: Record<string, unknown>) => Promise<T>
+  resumeStoredSession: (storedSessionId: string) => Promise<void> | void
+  selectedStoredSessionIdRef: MutableRefObject<string | null>
+  startFreshSessionDraft: () => void
+  sttEnabled: boolean
+  updateSessionState: (
+    sessionId: string,
+    updater: (state: ClientSessionState) => ClientSessionState,
+    storedSessionId?: string | null
+  ) => ClientSessionState
+}
+
+/** Everything a slash handler needs about the invocation it's serving. */
+
+interface RestoreMessageTarget {
+  text?: string
+  userOrdinal?: number | null
+}
+
+export function usePromptActions({
+  activeSessionId,
+  activeSessionIdRef,
+  busyRef,
+  branchCurrentSession,
+  createBackendSessionForSend,
+  handleSkinCommand,
+  refreshSessions,
+  requestGateway,
+  resumeStoredSession,
+  selectedStoredSessionIdRef,
+  startFreshSessionDraft,
+  sttEnabled,
+  updateSessionState
+}: PromptActionsOptions) {
+  const { t } = useI18n()
+  const copy = t.desktop
+
+  const appendSessionTextMessage = useCallback(
+    (sessionId: string, role: ChatMessage['role'], text: string) => {
+      // Strip ANSI: slash-command output from the backend worker carries SGR
+      // color codes (e.g. "Unknown command" in red). The ESC byte is invisible
+      // in the chat panel, so without this the `[1;31m…[0m` payload leaks as
+      // literal text.
+      const body = stripAnsi(text).trim()
+
+      if (!body) {
+        return
+      }
+
+      updateSessionState(
+        sessionId,
+        state => ({
+          ...state,
+          messages: [
+            ...state.messages,
+            {
+              id: `${role}-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`,
+              role,
+              parts: [textPart(body)]
+            }
+          ]
+        }),
+        selectedStoredSessionIdRef.current
+      )
+    },
+    [selectedStoredSessionIdRef, updateSessionState]
+  )
+
+  // In-flight drop-time eager uploads, keyed by attachment id. Submit joins
+  // these before re-uploading so a drop-then-immediately-Enter can't fire
+  // file.attach twice and stage duplicate copies on the gateway.
+  const eagerUploadInFlight = useRef<Map<string, Promise<void>>>(new Map())
+
+  const syncAttachmentsForSubmit = useCallback(
+    async (
+      sessionId: string,
+      attachments: ComposerAttachment[],
+      options: { updateComposerAttachments?: boolean } = {}
+    ): Promise<ComposerAttachment[]> => {
+      const updateComposerAttachments = options.updateComposerAttachments ?? true
+      const remote = $connection.get()?.mode === 'remote'
+      const synced: ComposerAttachment[] = []
+
+      for (const original of attachments) {
+        let attachment = original
+
+        // Join a drop-time eager upload still in flight for this attachment
+        // before deciding anything — otherwise submit and the eager task both
+        // call file.attach and stage duplicate files. After it settles, take the
+        // store's updated copy (its gateway ref, or its failure) over the stale
+        // pre-upload snapshot.
+        const inFlight = eagerUploadInFlight.current.get(attachment.id)
+
+        if (inFlight) {
+          await inFlight
+          attachment = $composerAttachments.get().find(item => item.id === attachment.id) ?? attachment
+        }
+
+        // Already-synced or pathless refs (terminal, url, etc.) pass through.
+        // A drop-time eager upload may already have staged this one (matching
+        // attachedSessionId) — don't re-upload it.
+        if (!attachment.path || attachment.attachedSessionId === sessionId) {
+          synced.push(attachment)
+
+          continue
+        }
+
+        if (attachment.kind === 'image' || attachment.kind === 'file') {
+          const nextAttachment = await uploadComposerAttachment(attachment, { remote, requestGateway, sessionId })
+
+          // Update-only: never resurrect a chip the user removed mid-upload.
+          if (updateComposerAttachments) {
+            updateComposerAttachment(nextAttachment)
+          }
+
+          synced.push(nextAttachment)
+
+          continue
+        }
+
+        synced.push(attachment)
+      }
+
+      return synced
+    },
+    [requestGateway]
+  )
+
+  // Stage a freshly dropped file as soon as it lands (when a session already
+  // exists), so the upload runs while the user is still typing rather than
+  // stalling the send. The card shows a spinner via `uploadState`; on success
+  // the chip carries its gateway-side ref so submit skips re-uploading.
+  //
+  // Images are intentionally NOT eager-uploaded: attachImagePath adds the chip
+  // and then fills in `previewUrl` (the base64 thumbnail) on a second tick, so
+  // an eager upload would race that write — clobbering the thumbnail and
+  // swapping `path` to a gateway path the local preview can't read. Images are
+  // small and still byte-upload at submit via image.attach_bytes.
+  const eagerlyUploadAttachment = useCallback(
+    async (sessionId: string, attachment: ComposerAttachment) => {
+      const remote = $connection.get()?.mode === 'remote'
+
+      setComposerAttachmentUploadState(attachment.id, 'uploading')
+
+      try {
+        // Update-only: if the user removed the chip while this was uploading,
+        // don't resurrect it — just drop the staged result on the floor.
+        updateComposerAttachment(await uploadComposerAttachment(attachment, { remote, requestGateway, sessionId }))
+      } catch (err) {
+        // Leave the chip in place so submit-time sync can retry (or the user can
+        // remove it) and flag the card; also toast so a hard failure (unreadable
+        // file, gateway perms) isn't swallowed while the user keeps typing.
+        setComposerAttachmentUploadState(attachment.id, 'error')
+        notifyError(err, copy.dropFiles)
+      }
+    },
+    [copy.dropFiles, requestGateway]
+  )
+
+  const composerAttachments = useStore($composerAttachments)
+
+  useEffect(() => {
+    if (!activeSessionId) {
+      return
+    }
+
+    for (const attachment of composerAttachments) {
+      const needsUpload =
+        attachment.kind === 'file' &&
+        Boolean(attachment.path) &&
+        !attachment.attachedSessionId &&
+        !attachment.uploadState &&
+        !eagerUploadInFlight.current.has(attachment.id)
+
+      if (!needsUpload) {
+        continue
+      }
+
+      const task = eagerlyUploadAttachment(activeSessionId, attachment).finally(() =>
+        eagerUploadInFlight.current.delete(attachment.id)
+      )
+
+      eagerUploadInFlight.current.set(attachment.id, task)
+    }
+  }, [activeSessionId, composerAttachments, eagerlyUploadAttachment])
+
+  const submitPromptText = useSubmitPrompt({
+    activeSessionId,
+    activeSessionIdRef,
+    busyRef,
+    copy,
+    createBackendSessionForSend,
+    requestGateway,
+    selectedStoredSessionIdRef,
+    syncAttachmentsForSubmit,
+    updateSessionState
+  })
+
+  // Queue a handoff of this session to a messaging platform and watch it to
+  // a terminal state. We only write the request through the gateway; the
+  // separate `hermes gateway` process performs the actual transfer, so we
+  // poll `handoff.state` (mirror of the CLI's block-poll) for the result.
+  const handoffSession = useCallback(
+    async (
+      platform: string,
+      options?: { onProgress?: (state: string) => void; sessionId?: string }
+    ): Promise<HandoffResult> => {
+      const sid = options?.sessionId || activeSessionIdRef.current
+
+      if (!sid) {
+        return { error: copy.sessionUnavailable, ok: false }
+      }
+
+      const target = platform.trim().toLowerCase()
+
+      if (!target) {
+        return { error: copy.handoff.failed(''), ok: false }
+      }
+
+      try {
+        options?.onProgress?.('pending')
+        await requestGateway<HandoffRequestResponse>('handoff.request', {
+          platform: target,
+          session_id: sid
+        })
+      } catch (err) {
+        return { error: inlineErrorMessage(err, copy.handoff.failed(target)), ok: false }
+      }
+
+      const deadline = Date.now() + 60_000
+      let lastState = 'pending'
+
+      while (Date.now() < deadline) {
+        await delay(800)
+
+        let record: HandoffStateResponse
+
+        try {
+          record = await requestGateway<HandoffStateResponse>('handoff.state', { session_id: sid })
+        } catch {
+          continue
+        }
+
+        const state = record.state || 'pending'
+
+        if (state !== lastState) {
+          options?.onProgress?.(state)
+          lastState = state
+        }
+
+        if (state === 'completed') {
+          appendSessionTextMessage(sid, 'system', copy.handoff.systemNote(target))
+          notify({ kind: 'success', message: copy.handoff.success(target) })
+
+          return { ok: true }
+        }
+
+        if (state === 'failed') {
+          return { error: record.error || copy.handoff.failed(target), ok: false }
+        }
+      }
+
+      const cleanup = await requestGateway<HandoffFailResponse>('handoff.fail', {
+        error: copy.handoff.timedOut,
+        session_id: sid
+      }).catch(() => null)
+
+      if (cleanup?.state === 'completed') {
+        appendSessionTextMessage(sid, 'system', copy.handoff.systemNote(target))
+        notify({ kind: 'success', message: copy.handoff.success(target) })
+
+        return { ok: true }
+      }
+
+      return { error: copy.handoff.timedOut, ok: false }
+    },
+    [activeSessionIdRef, appendSessionTextMessage, copy, requestGateway]
+  )
+
+  const executeSlashCommand = useSlashCommand({
+    activeSessionIdRef,
+    appendSessionTextMessage,
+    branchCurrentSession,
+    busyRef,
+    copy,
+    createBackendSessionForSend,
+    handleSkinCommand,
+    handoffSession,
+    refreshSessions,
+    requestGateway,
+    resumeStoredSession,
+    startFreshSessionDraft,
+    submitPromptText
+  })
+
+  const submitText = useCallback(
+    async (rawText: string, options?: SubmitTextOptions) => {
+      const visibleText = rawText.trim()
+      const attachments = options?.attachments ?? $composerAttachments.get()
+
+      if (!attachments.length && SLASH_COMMAND_RE.test(visibleText)) {
+        triggerHaptic('selection')
+        await executeSlashCommand(visibleText)
+
+        return true
+      }
+
+      return await submitPromptText(rawText, options)
+    },
+    [executeSlashCommand, submitPromptText]
+  )
+
+  const transcribeVoiceAudio = useCallback(
+    async (audio: Blob) => {
+      if (!sttEnabled) {
+        throw new Error(copy.sttDisabled)
+      }
+
+      const dataUrl = await blobToDataUrl(audio)
+      const result = await transcribeAudio(dataUrl, audio.type)
+
+      return result.transcript
+    },
+    [copy.sttDisabled, sttEnabled]
+  )
+
+  const cancelRun = useCallback(async () => {
+    const sessionId = activeSessionId || activeSessionIdRef.current
+
+    const releaseBusy = () => {
+      setMutableRef(busyRef, false)
+      setBusy(false)
+    }
+
+    setAwaitingResponse(false)
+
+    const finalizeMessages = (messages: ChatMessage[], streamId?: string | null) =>
+      messages
+        .filter(message => !((message.pending || message.id === streamId) && !chatMessageText(message).trim()))
+        .map(message => (message.pending || message.id === streamId ? { ...message, pending: false } : message))
+
+    if (!sessionId) {
+      releaseBusy()
+      setMessages(finalizeMessages($messages.get()))
+
+      return
+    }
+
+    updateSessionState(sessionId, state => {
+      const streamId = state.streamId
+      const messages = finalizeMessages(state.messages, streamId)
+
+      return {
+        ...state,
+        messages,
+        busy: false,
+        awaitingResponse: false,
+        streamId: null,
+        pendingBranchGroup: null,
+        needsInput: false,
+        interrupted: true
+      }
+    })
+
+    clearSessionTodos(sessionId)
+    clearSessionSubagents(sessionId)
+    resetSessionBackground(sessionId)
+    // Stop ends the turn, so the gateway is no longer blocked on any prompt it
+    // raised. Drop this session's pending clarify / approval / sudo / secret so
+    // a dead panel (and the sidebar "needs input" dot) can't linger and accept
+    // an answer the backend will reject.
+    clearAllPrompts(sessionId)
+    clearClarifyRequest(undefined, sessionId)
+
+    try {
+      await requestGateway('session.interrupt', { session_id: sessionId })
+      releaseBusy()
+    } catch (err) {
+      let stopError = err
+
+      if (isSessionNotFoundError(err) && selectedStoredSessionIdRef.current) {
+        try {
+          const resumed = await requestGateway<{ session_id: string }>('session.resume', {
+            session_id: selectedStoredSessionIdRef.current
+          })
+
+          const recoveredId = resumed?.session_id
+
+          if (recoveredId) {
+            activeSessionIdRef.current = recoveredId
+            await requestGateway('session.interrupt', { session_id: recoveredId })
+            releaseBusy()
+
+            return
+          }
+        } catch (resumeErr) {
+          stopError = resumeErr
+        }
+      }
+
+      releaseBusy()
+      notifyError(stopError, copy.stopFailed)
+    }
+  }, [
+    activeSessionId,
+    activeSessionIdRef,
+    busyRef,
+    copy.stopFailed,
+    requestGateway,
+    selectedStoredSessionIdRef,
+    updateSessionState
+  ])
+
+  // Steer = nudge the live turn without interrupting: the gateway appends the
+  // text to the next tool result so the model reads it on its next iteration
+  // (desktop parity with `/steer`). Returns false on reject (no live tool
+  // window) so the caller can fall back to queueing the words for the next turn.
+  const steerPrompt = useCallback(
+    async (rawText: string): Promise<boolean> => {
+      const text = rawText.trim()
+      const sessionId = activeSessionId || activeSessionIdRef.current
+
+      if (!text || !sessionId) {
+        return false
+      }
+
+      try {
+        const result = await requestGateway<SessionSteerResponse>('session.steer', { session_id: sessionId, text })
+
+        if (result?.status === 'queued') {
+          triggerHaptic('submit')
+          // Inline note (not a toast) so the nudge lives in the transcript next
+          // to the turn it steered. The `steer:` prefix is rendered as a codicon
+          // row by SystemMessage (see STEER_NOTE_RE), same style as slash output.
+          appendSessionTextMessage(sessionId, 'system', `steer:${text}`)
+
+          return true
+        }
+      } catch {
+        // Swallow — caller queues the text so nothing is lost.
+      }
+
+      return false
+    },
+    [activeSessionId, activeSessionIdRef, appendSessionTextMessage, requestGateway]
+  )
+
+  const reloadFromMessage = useCallback(
+    async (parentId: string | null) => {
+      if (!activeSessionId || $busy.get()) {
+        return
+      }
+
+      const messages = $messages.get()
+      const parentIndex = parentId ? messages.findIndex(message => message.id === parentId) : messages.length - 1
+
+      const userIndex =
+        parentIndex >= 0
+          ? [...messages.slice(0, parentIndex + 1)].reverse().findIndex(message => message.role === 'user')
+          : -1
+
+      if (userIndex < 0) {
+        return
+      }
+
+      const absoluteUserIndex = parentIndex - userIndex
+      const userMessage = messages[absoluteUserIndex]
+      const userText = userMessage ? chatMessageText(userMessage).trim() : ''
+
+      if (!userText) {
+        return
+      }
+
+      const targetAssistant =
+        parentId && messages[parentIndex]?.role === 'assistant'
+          ? messages[parentIndex]
+          : messages.slice(absoluteUserIndex + 1).find(message => message.role === 'assistant')
+
+      const branchGroupId = targetAssistant?.branchGroupId ?? branchGroupForUser(userMessage)
+      const truncateBeforeUserOrdinal = visibleUserOrdinal(messages, absoluteUserIndex)
+
+      clearNotifications()
+      updateSessionState(activeSessionId, state => {
+        const nextUserIndex = state.messages.findIndex(
+          (message, index) => index > absoluteUserIndex && message.role === 'user'
+        )
+
+        const end = nextUserIndex < 0 ? state.messages.length : nextUserIndex
+
+        return {
+          ...state,
+          busy: true,
+          awaitingResponse: true,
+          pendingBranchGroup: branchGroupId,
+          sawAssistantPayload: false,
+          interrupted: false,
+          messages: [
+            ...state.messages.slice(0, absoluteUserIndex + 1),
+            ...state.messages
+              .slice(absoluteUserIndex + 1, end)
+              .map(message => (message.role === 'assistant' ? { ...message, branchGroupId, hidden: true } : message))
+          ]
+        }
+      })
+
+      try {
+        await requestGateway('prompt.submit', {
+          session_id: activeSessionId,
+          text: userText,
+          truncate_before_user_ordinal: truncateBeforeUserOrdinal
+        })
+      } catch (err) {
+        updateSessionState(activeSessionId, state => ({
+          ...state,
+          busy: false,
+          awaitingResponse: false
+        }))
+        notifyError(err, copy.regenerateFailed)
+      }
+    },
+    [activeSessionId, copy.regenerateFailed, requestGateway, updateSessionState]
+  )
+
+  // Cursor-style "restore checkpoint": rewind the conversation to a past user
+  // prompt and run it again from there. Reuses the edit composer's rewind
+  // mechanism — `prompt.submit` with `truncate_before_user_ordinal` drops that
+  // user turn and everything after it from the session history, then the same
+  // text is submitted as a fresh turn. Callers confirm before invoking; errors
+  // are rethrown so callers can surface failures. Idle rewinds submit directly:
+  // interrupting an idle agent can leave a stale interrupt flag that cancels the
+  // fresh turn. Live/stuck turns interrupt first, and a raced "session busy"
+  // response interrupts + retries through the shared busy gate.
+  const submitRewindPrompt = useCallback(
+    async (sessionId: string, text: string, truncateOrdinal: number | undefined, interruptFirst: boolean) => {
+      const interrupt = async () => {
+        try {
+          await requestGateway('session.interrupt', { session_id: sessionId })
+        } catch {
+          // Best-effort. The submit path still gates on the gateway state.
+        }
+      }
+
+      const submit = () =>
+        requestGateway('prompt.submit', {
+          session_id: sessionId,
+          text,
+          ...(truncateOrdinal !== undefined && { truncate_before_user_ordinal: truncateOrdinal })
+        })
+
+      if (interruptFirst) {
+        await interrupt()
+      }
+
+      try {
+        await submit()
+      } catch (err) {
+        if (!isSessionBusyError(err)) {
+          throw err
+        }
+
+        await interrupt()
+        await withSessionBusyRetry(submit)
+      }
+    },
+    [requestGateway]
+  )
+
+  const restoreToMessage = useCallback(
+    async (messageId: string, target?: RestoreMessageTarget) => {
+      const sessionId = activeSessionId || activeSessionIdRef.current
+
+      if (!sessionId) {
+        throw new Error('No active session to restore.')
+      }
+
+      const messages = $messages.get()
+      const idIndex = messages.findIndex(m => m.id === messageId && m.role === 'user')
+
+      const fallbackIndex =
+        target?.userOrdinal === null || target?.userOrdinal === undefined
+          ? -1
+          : visibleUserIndexAtOrdinal(messages, target.userOrdinal)
+
+      const sourceIndex = idIndex >= 0 ? idIndex : fallbackIndex
+      const source = messages[sourceIndex]
+
+      if (!source || source.role !== 'user') {
+        throw new Error('Could not find the message to restore.')
+      }
+
+      const text = (chatMessageText(source).trim() || target?.text?.trim() || '').trim()
+
+      if (!text) {
+        throw new Error('Cannot restore an empty message.')
+      }
+
+      const truncateBeforeUserOrdinal =
+        target?.userOrdinal === null || target?.userOrdinal === undefined
+          ? visibleUserOrdinal(messages, sourceIndex)
+          : target.userOrdinal
+
+      // The turns we're discarding may have spawned todos and background
+      // processes; they belong to the abandoned timeline, so wipe their status
+      // rows (and kill the live processes) before the fresh run repopulates.
+      clearSessionTodos(sessionId)
+      resetSessionBackground(sessionId)
+      clearPreviewArtifacts(sessionId)
+
+      clearNotifications()
+      setMutableRef(busyRef, true)
+      setBusy(true)
+      setAwaitingResponse(true)
+      updateSessionState(sessionId, state => ({
+        ...state,
+        busy: true,
+        awaitingResponse: true,
+        pendingBranchGroup: null,
+        sawAssistantPayload: false,
+        interrupted: false,
+        messages: state.messages.slice(0, sourceIndex + 1)
+      }))
+
+      try {
+        await submitRewindPrompt(sessionId, text, truncateBeforeUserOrdinal, busyRef.current || $busy.get())
+      } catch (err) {
+        // The rewind never landed (e.g. the gateway stayed busy past the retry
+        // deadline). Roll the optimistic truncation back to the full original
+        // history so the UI doesn't desync from what's persisted — leaving it
+        // truncated is what made subsequent sends look duplicative.
+        setMutableRef(busyRef, false)
+        setBusy(false)
+        setAwaitingResponse(false)
+        updateSessionState(sessionId, state => ({
+          ...state,
+          busy: false,
+          awaitingResponse: false,
+          messages
+        }))
+        throw err
+      }
+    },
+    [activeSessionId, activeSessionIdRef, busyRef, submitRewindPrompt, updateSessionState]
+  )
+
+  const editMessage = useCallback(
+    async (edited: AppendMessage) => {
+      const sessionId = activeSessionId || activeSessionIdRef.current
+      const sourceId = edited.sourceId || edited.parentId
+      const text = appendText(edited)
+
+      if (!sessionId || !sourceId || !text || edited.role !== 'user') {
+        return
+      }
+
+      const messages = $messages.get()
+      const sourceIndex = messages.findIndex(m => m.id === sourceId)
+      const source = messages[sourceIndex]
+
+      if (!source || source.role !== 'user' || chatMessageText(source).trim() === text) {
+        return
+      }
+
+      // Sending an edit is a revert: rewind to this prompt and re-run with the
+      // new text. It can fire mid-turn; submitRewindPrompt always interrupts
+      // first, so a live turn is wound down before the resubmit.
+
+      // Failed turn: optimistic user msg never reached the gateway, so truncating
+      // by ordinal would 422. Submit as a plain resend instead.
+      const nextMessage = messages[sourceIndex + 1]
+      const isFailedTurn = nextMessage?.role === 'assistant' && Boolean(nextMessage.error)
+      const editedMessage: ChatMessage = { ...source, parts: [textPart(text)] }
+
+      // Editing rewinds the conversation to this prompt — same as restore — so
+      // drop the abandoned timeline's todos/background rows (and kill the live
+      // processes) before the re-run repopulates them.
+      clearSessionTodos(sessionId)
+      resetSessionBackground(sessionId)
+      clearPreviewArtifacts(sessionId)
+
+      clearNotifications()
+      setMutableRef(busyRef, true)
+      setBusy(true)
+      setAwaitingResponse(true)
+      updateSessionState(sessionId, state => ({
+        ...state,
+        busy: true,
+        awaitingResponse: true,
+        pendingBranchGroup: null,
+        sawAssistantPayload: false,
+        interrupted: false,
+        messages: [...state.messages.slice(0, sourceIndex), editedMessage]
+      }))
+
+      const isStaleTargetError = (err: unknown) =>
+        /no longer in session history|not in session history/i.test(err instanceof Error ? err.message : String(err))
+
+      try {
+        await submitRewindPrompt(
+          sessionId,
+          text,
+          isFailedTurn ? undefined : visibleUserOrdinal(messages, sourceIndex),
+          busyRef.current || $busy.get()
+        )
+      } catch (err) {
+        let surfaced = err
+
+        if (!isFailedTurn && isStaleTargetError(err)) {
+          try {
+            // Already interrupted on the first attempt — submit as a plain resend.
+            await submitRewindPrompt(sessionId, text, undefined, false)
+
+            return
+          } catch (retryErr) {
+            surfaced = retryErr
+          }
+        }
+
+        // Roll the optimistic edit/truncation back to the original history so the
+        // UI stays in sync with what's persisted instead of stranding a partial
+        // timeline.
+        setMutableRef(busyRef, false)
+        setBusy(false)
+        setAwaitingResponse(false)
+        updateSessionState(sessionId, state => ({ ...state, busy: false, awaitingResponse: false, messages }))
+        notifyError(surfaced, copy.editFailed)
+      }
+    },
+    [activeSessionId, activeSessionIdRef, busyRef, copy.editFailed, submitRewindPrompt, updateSessionState]
+  )
+
+  const handleThreadMessagesChange = useCallback(
+    (nextMessages: readonly ThreadMessage[]) => {
+      const visibleIds = new Set(nextMessages.map(m => m.id))
+      const sessionId = activeSessionIdRef.current
+
+      if (!sessionId) {
+        return
+      }
+
+      updateSessionState(sessionId, state => {
+        let changed = false
+
+        const messages = state.messages.map(message => {
+          if (message.role !== 'assistant' || !message.branchGroupId) {
+            return message
+          }
+
+          const hidden = !visibleIds.has(message.id)
+
+          if (message.hidden === hidden) {
+            return message
+          }
+
+          changed = true
+
+          return { ...message, hidden }
+        })
+
+        return changed ? { ...state, messages } : state
+      })
+    },
+    [activeSessionIdRef, updateSessionState]
+  )
+
+  return {
+    cancelRun,
+    editMessage,
+    handleThreadMessagesChange,
+    handoffSession,
+    reloadFromMessage,
+    restoreToMessage,
+    steerPrompt,
+    submitText,
+    transcribeVoiceAudio
+  }
+}
--- a/apps/desktop/src/app/session/hooks/use-prompt-actions/slash.ts
+++ b/apps/desktop/src/app/session/hooks/use-prompt-actions/slash.ts
@@ -0,0 +1,614 @@
+import { type MutableRefObject, useCallback } from 'react'
+
+import { getProfiles } from '@/hermes'
+import type { Translations } from '@/i18n'
+import { type ChatMessage } from '@/lib/chat-messages'
+import { parseCommandDispatch, parseSlashCommand, sessionTitle } from '@/lib/chat-runtime'
+import {
+  type CommandsCatalogLike,
+  type DesktopActionId,
+  type DesktopPickerId,
+  desktopSlashUnavailableMessage,
+  isDesktopSlashCommand,
+  resolveDesktopCommand
+} from '@/lib/desktop-slash-commands'
+import { setSessionYolo } from '@/lib/yolo-session'
+import { openCommandPalettePage } from '@/store/command-palette'
+import { type ComposerAttachment, setComposerDraft } from '@/store/composer'
+import { notify, notifyError } from '@/store/notifications'
+import { setPetScale } from '@/store/pet-gallery'
+import { $petGenInput, openPetGenerate } from '@/store/pet-generate'
+import { $activeGatewayProfile, $newChatProfile, ensureGatewayProfile, normalizeProfileKey } from '@/store/profile'
+import {
+  $connection,
+  $sessions,
+  $yoloActive,
+  setModelPickerOpen,
+  setSessionPickerOpen,
+  setSessions,
+  setYoloActive
+} from '@/store/session'
+
+import type { BrowserManageResponse, SessionTitleResponse, SlashExecResponse } from '../../../types'
+
+import { type GatewayRequest, isSessionIdCandidate, renderCommandsCatalog, slashStatusText } from './utils'
+
+/** Everything a slash handler needs about the invocation it's serving. */
+interface SlashActionCtx {
+  arg: string
+  command: string
+  name: string
+  recordInput: boolean
+  sessionHint?: string
+}
+
+interface SlashCommandDeps {
+  activeSessionIdRef: MutableRefObject<string | null>
+  appendSessionTextMessage: (sessionId: string, role: ChatMessage['role'], text: string) => void
+  branchCurrentSession: () => Promise<boolean>
+  busyRef: MutableRefObject<boolean>
+  copy: Translations['desktop']
+  createBackendSessionForSend: (preview?: string | null) => Promise<string | null>
+  handleSkinCommand: (arg: string) => string
+  handoffSession: (
+    platform: string,
+    options?: { onProgress?: (state: string) => void; sessionId?: string }
+  ) => Promise<{ ok: boolean; error?: string }>
+  refreshSessions: () => Promise<void>
+  requestGateway: GatewayRequest
+  resumeStoredSession: (storedSessionId: string) => Promise<void> | void
+  startFreshSessionDraft: () => void
+  submitPromptText: (
+    rawText: string,
+    options?: { attachments?: ComposerAttachment[]; fromQueue?: boolean }
+  ) => Promise<boolean>
+}
+
+/** The /slash command dispatcher, extracted from usePromptActions. */
+export function useSlashCommand(deps: SlashCommandDeps) {
+  const {
+    activeSessionIdRef,
+    appendSessionTextMessage,
+    branchCurrentSession,
+    busyRef,
+    copy,
+    createBackendSessionForSend,
+    handleSkinCommand,
+    handoffSession,
+    refreshSessions,
+    requestGateway,
+    resumeStoredSession,
+    startFreshSessionDraft,
+    submitPromptText
+  } = deps
+
+  return useCallback(
+    async (rawCommand: string, options?: { sessionId?: string; recordInput?: boolean }) => {
+      const ensureSessionId = async (sessionHint?: string) =>
+        sessionHint || activeSessionIdRef.current || (await createBackendSessionForSend())
+
+      // Resolve the target session plus a writer for inline slash output, or
+      // notify + return null when none can be created. Folds the ensure / bail /
+      // build-renderSlashOutput boilerplate every exec-style handler repeats.
+      const withSlashOutput = async (
+        ctx: SlashActionCtx
+      ): Promise<{ render: (text: string) => void; sessionId: string } | null> => {
+        const sessionId = await ensureSessionId(ctx.sessionHint)
+
+        if (!sessionId) {
+          notify({ kind: 'error', title: copy.sessionUnavailable, message: copy.createSessionFailed })
+
+          return null
+        }
+
+        const render = (text: string) =>
+          appendSessionTextMessage(sessionId, 'system', ctx.recordInput ? slashStatusText(ctx.command, text) : text)
+
+        return { render, sessionId }
+      }
+
+      // `exec` commands (and unknown skill / quick commands the backend owns)
+      // run on the gateway and render their text output inline. This is the only
+      // path that talks to slash.exec / command.dispatch.
+      async function runExec(ctx: SlashActionCtx): Promise<void> {
+        const { arg, command, name } = ctx
+        const resolved = await withSlashOutput(ctx)
+
+        if (!resolved) {
+          return
+        }
+
+        const { render: renderSlashOutput, sessionId } = resolved
+
+        if (!isDesktopSlashCommand(name)) {
+          renderSlashOutput(desktopSlashUnavailableMessage(name) || `/${name} is not available in the desktop app.`)
+
+          return
+        }
+
+        const handleDispatch = async (
+          dispatch: NonNullable<ReturnType<typeof parseCommandDispatch>>
+        ): Promise<void> => {
+          if (dispatch.type === 'exec' || dispatch.type === 'plugin') {
+            renderSlashOutput(dispatch.output ?? '(no output)')
+
+            return
+          }
+
+          if (dispatch.type === 'alias') {
+            await runSlash(`/${dispatch.target}${arg ? ` ${arg}` : ''}`, sessionId, false)
+
+            return
+          }
+
+          // send / prefill carry an optional `notice` (e.g. "⊙ Goal set …")
+          // that the backend wants shown as a system line before the message
+          // is acted on. Mirrors the TUI's createSlashHandler — without it a
+          // `/goal <text>` looked like it did nothing.
+          if ((dispatch.type === 'send' || dispatch.type === 'prefill') && dispatch.notice?.trim()) {
+            renderSlashOutput(dispatch.notice.trim())
+          }
+
+          const message = ('message' in dispatch ? dispatch.message : '')?.trim() ?? ''
+
+          // /undo returns a prefill directive: drop the backed-up message into
+          // the composer for editing instead of submitting it immediately.
+          if (dispatch.type === 'prefill') {
+            if (message) {
+              setComposerDraft(message)
+            }
+
+            return
+          }
+
+          if (!message) {
+            renderSlashOutput(
+              `/${name}: ${dispatch.type === 'skill' ? 'skill payload missing message' : 'empty message'}`
+            )
+
+            return
+          }
+
+          if (dispatch.type === 'skill') {
+            renderSlashOutput(`⚡ loading skill: ${dispatch.name}`)
+          }
+
+          if (busyRef.current) {
+            renderSlashOutput('session busy — /interrupt the current turn before sending this command')
+
+            return
+          }
+
+          await submitPromptText(message)
+        }
+
+        try {
+          const result = await requestGateway<unknown>('slash.exec', {
+            session_id: sessionId,
+            command: command.replace(/^\/+/, '')
+          })
+
+          const dispatch = parseCommandDispatch(result)
+
+          if (dispatch) {
+            await handleDispatch(dispatch)
+
+            return
+          }
+
+          const output = result && typeof result === 'object' ? (result as SlashExecResponse) : null
+          const body = output?.output || `/${name}: no output`
+          renderSlashOutput(output?.warning ? `warning: ${output.warning}\n${body}` : body)
+
+          return
+        } catch {
+          // Fall back to command.dispatch for skill/send/alias directives.
+        }
+
+        try {
+          const dispatch = parseCommandDispatch(
+            await requestGateway<unknown>('command.dispatch', { session_id: sessionId, name, arg })
+          )
+
+          if (!dispatch) {
+            renderSlashOutput('error: invalid response: command.dispatch')
+
+            return
+          }
+
+          await handleDispatch(dispatch)
+        } catch (err) {
+          renderSlashOutput(`error: ${err instanceof Error ? err.message : String(err)}`)
+        }
+      }
+
+      // One handler per `action` command. Adding a desktop-native command is a
+      // registry row in desktop-slash-commands.ts plus an entry here — never a
+      // new branch in a dispatch ladder.
+      const actionHandlers: Record<DesktopActionId, (ctx: SlashActionCtx) => Promise<void>> = {
+        new: async () => {
+          startFreshSessionDraft()
+        },
+        branch: async () => {
+          await branchCurrentSession()
+        },
+        // /yolo maps to the status-bar YOLO control — a per-session approval
+        // bypass, same scope as the TUI's Shift+Tab. With no session yet we arm
+        // it locally; the session-create path applies it on the first message.
+        yolo: async ({ sessionHint }) => {
+          const sid = sessionHint || activeSessionIdRef.current
+          const next = !$yoloActive.get()
+
+          if (!sid) {
+            setYoloActive(next)
+            notify({ kind: 'success', message: next ? copy.yoloArmed : copy.yoloOff })
+
+            return
+          }
+
+          try {
+            const active = await setSessionYolo(requestGateway, sid, next)
+            appendSessionTextMessage(sid, 'system', copy.yoloSystem(active))
+          } catch {
+            notify({ kind: 'error', title: copy.yoloTitle, message: copy.yoloToggleFailed })
+          }
+        },
+        // /handoff hands this session to a messaging platform. The platform is
+        // completed inline in the slash popover (backend _handoff_completions),
+        // so there is no overlay: `/handoff <platform>` runs the desktop's own
+        // handoff RPC. cli_only on the backend, so it must not reach slash.exec.
+        handoff: async ({ arg, command, recordInput, sessionHint }) => {
+          const platform = arg.trim()
+
+          if (!platform) {
+            notify({ kind: 'success', message: copy.handoff.pickPlatform })
+
+            return
+          }
+
+          const sid = sessionHint || activeSessionIdRef.current
+
+          if (!sid) {
+            notify({ kind: 'error', title: copy.sessionUnavailable, message: copy.createSessionFailed })
+
+            return
+          }
+
+          const result = await handoffSession(platform, { sessionId: sid })
+
+          if (!result.ok && result.error) {
+            appendSessionTextMessage(sid, 'system', recordInput ? slashStatusText(command, result.error) : result.error)
+          }
+        },
+        // /profile selects which profile new chats open in — no app relaunch.
+        // A profile is per-session now, so an existing thread can't change its
+        // profile mid-stream; `/profile <name>` points the next new chat (and
+        // the current empty draft) at that profile's backend.
+        profile: async ({ arg }) => {
+          const target = arg.trim()
+          const current = normalizeProfileKey($activeGatewayProfile.get())
+
+          if (!target) {
+            notify({ kind: 'success', message: copy.profileStatus(current) })
+
+            return
+          }
+
+          try {
+            const { profiles } = await getProfiles()
+            const match = profiles.find(profile => profile.name === target)
+
+            if (!match) {
+              notify({
+                kind: 'error',
+                title: copy.unknownProfile,
+                message: copy.noProfileNamed(target, profiles.map(profile => profile.name).join(', '))
+              })
+
+              return
+            }
+
+            const key = normalizeProfileKey(match.name)
+
+            $newChatProfile.set(key)
+            await ensureGatewayProfile(key)
+            notify({ kind: 'success', message: copy.newChatsProfile(match.name) })
+          } catch (err) {
+            notifyError(err, copy.setProfileFailed)
+          }
+        },
+        skin: async ({ arg, command, recordInput, sessionHint }) => {
+          const sid = sessionHint || activeSessionIdRef.current
+          const message = handleSkinCommand(arg)
+
+          // No session to print into yet — surface it as a toast instead of
+          // spinning up a backend session just to change the theme.
+          if (!sid) {
+            notify({ kind: 'success', message })
+
+            return
+          }
+
+          appendSessionTextMessage(sid, 'system', recordInput ? slashStatusText(command, message) : message)
+        },
+        // /title <name> renames via the gateway's session.title RPC — the same
+        // path the TUI uses, NOT REST renameSession (which 404s on runtime ids)
+        // nor the slash worker (whose DB write can silently fail). Bare /title
+        // shows the current title, which the worker owns, so delegate to exec.
+        title: async ctx => {
+          if (!ctx.arg) {
+            await runExec(ctx)
+
+            return
+          }
+
+          const resolved = await withSlashOutput(ctx)
+
+          if (!resolved) {
+            return
+          }
+
+          const { render: renderSlashOutput, sessionId } = resolved
+          const { arg } = ctx
+
+          try {
+            const result = await requestGateway<SessionTitleResponse>('session.title', {
+              session_id: sessionId,
+              title: arg
+            })
+
+            const finalTitle = (result?.title || arg).trim()
+            const queued = result?.pending === true
+
+            setSessions(prev => prev.map(s => (s.id === sessionId ? { ...s, title: finalTitle || null } : s)))
+            await refreshSessions().catch(() => undefined)
+            renderSlashOutput(
+              finalTitle
+                ? `Session title set: ${finalTitle}${queued ? ' (queued while session initializes)' : ''}`
+                : 'Session title cleared.'
+            )
+          } catch (err) {
+            renderSlashOutput(`error: ${err instanceof Error ? err.message : String(err)}`)
+          }
+        },
+        help: async ctx => {
+          const resolved = await withSlashOutput(ctx)
+
+          if (!resolved) {
+            return
+          }
+
+          const { render: renderSlashOutput, sessionId } = resolved
+
+          try {
+            const catalog = await requestGateway<CommandsCatalogLike>('commands.catalog', { session_id: sessionId })
+
+            renderSlashOutput(renderCommandsCatalog(catalog, copy))
+          } catch (err) {
+            renderSlashOutput(`error: ${err instanceof Error ? err.message : String(err)}`)
+          }
+        },
+        // /hatch opens the pet generator overlay (the desktop's rich, multi-step
+        // generate→pick→hatch→adopt flow). A typed description seeds the prompt
+        // so `/hatch a cyber fox` lands on the composer step prefilled.
+        hatch: async ({ arg }) => {
+          const concept = arg.trim()
+
+          if (concept) {
+            $petGenInput.set(concept)
+          }
+
+          openPetGenerate()
+        },
+        pet: async ctx => {
+          const [sub = '', rawValue = ''] = ctx.arg.trim().split(/\s+/)
+          const lower = sub.toLowerCase()
+
+          if (lower === 'list' || lower === 'gallery' || lower === 'browse' || lower === 'all') {
+            openCommandPalettePage('pets')
+
+            return
+          }
+
+          // `/pet scale <n>` resizes the floating pet locally (instant) and
+          // persists via the store — no round-trip to the slash worker.
+          if (lower === 'scale') {
+            const value = Number(rawValue)
+
+            if (!rawValue || Number.isNaN(value)) {
+              const resolved = await withSlashOutput(ctx)
+              resolved?.render('usage: /pet scale <factor>  (e.g. /pet scale 0.5)')
+
+              return
+            }
+
+            setPetScale(requestGateway, value)
+
+            return
+          }
+
+          await runExec(ctx)
+        },
+        // /browser connect|disconnect|status manages the live CDP connection on
+        // the gateway host, mirroring the TUI's browser.manage RPC. It mutates
+        // BROWSER_CDP_URL (and may launch Chrome) in the gateway process — only
+        // meaningful when that process runs on this machine, so it's gated to
+        // local connections. A remote gateway would act on the wrong host.
+        browser: async ctx => {
+          const resolved = await withSlashOutput(ctx)
+
+          if (!resolved) {
+            return
+          }
+
+          const { render: renderSlashOutput, sessionId } = resolved
+
+          if ($connection.get()?.mode === 'remote') {
+            renderSlashOutput(
+              '/browser manages a Chromium-family browser on the gateway host — only available when connected to a local gateway.'
+            )
+
+            return
+          }
+
+          const [rawAction = 'status', ...rest] = ctx.arg.trim().split(/\s+/).filter(Boolean)
+          const cmdAction = rawAction.toLowerCase()
+
+          if (!['connect', 'disconnect', 'status'].includes(cmdAction)) {
+            renderSlashOutput(
+              'usage: /browser [connect|disconnect|status] [url] · persistent: set browser.cdp_url in config.yaml'
+            )
+
+            return
+          }
+
+          const url = cmdAction === 'connect' ? rest.join(' ').trim() || 'http://127.0.0.1:9222' : undefined
+
+          if (url) {
+            renderSlashOutput(`checking Chromium-family browser remote debugging at ${url}...`)
+          }
+
+          try {
+            const result = await requestGateway<BrowserManageResponse>('browser.manage', {
+              action: cmdAction,
+              session_id: sessionId,
+              ...(url && { url })
+            })
+
+            // Without a streamed session subscription, the gateway bundles its
+            // progress lines into `messages` — flush them inline.
+            result?.messages?.forEach(message => renderSlashOutput(message))
+
+            if (cmdAction === 'status') {
+              renderSlashOutput(
+                result?.connected
+                  ? `browser connected: ${result.url || '(url unavailable)'}`
+                  : 'browser not connected (try /browser connect <url> or set browser.cdp_url in config.yaml)'
+              )
+
+              return
+            }
+
+            if (cmdAction === 'disconnect') {
+              renderSlashOutput('browser disconnected')
+
+              return
+            }
+
+            if (result?.connected) {
+              renderSlashOutput('Browser connected to live Chromium-family browser via CDP')
+              renderSlashOutput(`Endpoint: ${result.url || '(url unavailable)'}`)
+              renderSlashOutput('next browser tool call will use this CDP endpoint')
+            }
+          } catch (err) {
+            renderSlashOutput(`error: ${err instanceof Error ? err.message : String(err)}`)
+          }
+        }
+      }
+
+      // Picker commands open a desktop overlay; a typed arg is resolved by that
+      // picker so the command never dead-ends or falls through to the backend.
+      const openPicker = async (pickerId: DesktopPickerId, ctx: SlashActionCtx): Promise<void> => {
+        if (pickerId === 'model') {
+          if (!ctx.arg.trim()) {
+            setModelPickerOpen(true)
+
+            return
+          }
+
+          // Power users can still type `/model <name>` — run it on the backend.
+          await runExec(ctx)
+
+          return
+        }
+
+        // session picker — /resume, /sessions, /switch
+        const query = ctx.arg.trim()
+
+        if (!query) {
+          setSessionPickerOpen(true)
+
+          return
+        }
+
+        const sessions = $sessions.get()
+        const lower = query.toLowerCase()
+
+        const match =
+          sessions.find(session => session.id === query) ||
+          sessions.find(session => sessionTitle(session).toLowerCase().includes(lower)) ||
+          sessions.find(session => (session.preview ?? '').toLowerCase().includes(lower))
+
+        if (!match) {
+          if (isSessionIdCandidate(query)) {
+            await resumeStoredSession(query)
+
+            return
+          }
+
+          notify({ kind: 'error', message: copy.resumeFailed })
+
+          return
+        }
+
+        await resumeStoredSession(match.id)
+      }
+
+      // The whole dispatcher: resolve the command's desktop surface, then act on
+      // its kind. No per-command ladder — behavior lives in the registry.
+      async function runSlash(commandText: string, sessionHint?: string, recordInput = true): Promise<void> {
+        const command = commandText.trim()
+        const { name, arg } = parseSlashCommand(command)
+
+        if (!name) {
+          const sessionId = await ensureSessionId(sessionHint)
+
+          if (sessionId) {
+            appendSessionTextMessage(sessionId, 'system', copy.emptySlashCommand)
+          }
+
+          return
+        }
+
+        const ctx: SlashActionCtx = { arg, command, name, recordInput, sessionHint }
+        const surface = resolveDesktopCommand(`/${name}`)?.surface
+
+        switch (surface?.kind) {
+          case 'unavailable': {
+            const resolved = await withSlashOutput(ctx)
+            resolved?.render(desktopSlashUnavailableMessage(name) || `/${name} is not available in the desktop app.`)
+
+            return
+          }
+
+          case 'picker':
+            return openPicker(surface.picker, ctx)
+
+          case 'action':
+            return actionHandlers[surface.action](ctx)
+
+          default:
+            // exec spec, or an unknown skill / quick command the backend owns.
+            return runExec(ctx)
+        }
+      }
+
+      await runSlash(rawCommand, options?.sessionId, options?.recordInput ?? true)
+    },
+    [
+      activeSessionIdRef,
+      appendSessionTextMessage,
+      branchCurrentSession,
+      busyRef,
+      copy,
+      createBackendSessionForSend,
+      handleSkinCommand,
+      handoffSession,
+      refreshSessions,
+      requestGateway,
+      resumeStoredSession,
+      startFreshSessionDraft,
+      submitPromptText
+    ]
+  )
+}
--- a/apps/desktop/src/app/session/hooks/use-prompt-actions/submit.ts
+++ b/apps/desktop/src/app/session/hooks/use-prompt-actions/submit.ts
@@ -0,0 +1,342 @@
+import { type MutableRefObject, useCallback } from 'react'
+
+import type { Translations } from '@/i18n'
+import { type ChatMessage, textPart } from '@/lib/chat-messages'
+import { optimisticAttachmentRef } from '@/lib/chat-runtime'
+import { setMutableRef } from '@/lib/mutable-ref'
+import {
+  $composerAttachments,
+  clearComposerAttachments,
+  type ComposerAttachment,
+  terminalContextBlocksFromDraft
+} from '@/store/composer'
+import { clearNotifications, notify, notifyError } from '@/store/notifications'
+import { requestDesktopOnboarding } from '@/store/onboarding'
+import { setAwaitingResponse, setBusy, setMessages } from '@/store/session'
+
+import type { ClientSessionState } from '../../../types'
+
+import {
+  _submitInFlight,
+  type GatewayRequest,
+  inlineErrorMessage,
+  isProviderSetupError,
+  isSessionBusyError,
+  isSessionNotFoundError,
+  type SubmitTextOptions,
+  withSessionBusyRetry
+} from './utils'
+
+interface SubmitPromptDeps {
+  activeSessionId: string | null
+  activeSessionIdRef: MutableRefObject<string | null>
+  busyRef: MutableRefObject<boolean>
+  copy: Translations['desktop']
+  createBackendSessionForSend: (preview?: string | null) => Promise<string | null>
+  requestGateway: GatewayRequest
+  selectedStoredSessionIdRef: MutableRefObject<string | null>
+  syncAttachmentsForSubmit: (
+    sessionId: string,
+    attachments: ComposerAttachment[],
+    options?: { updateComposerAttachments?: boolean }
+  ) => Promise<ComposerAttachment[]>
+  updateSessionState: (
+    sessionId: string,
+    updater: (state: ClientSessionState) => ClientSessionState,
+    storedSessionId?: string | null
+  ) => ClientSessionState
+}
+
+/** The prompt submit pipeline, extracted from usePromptActions. */
+export function useSubmitPrompt(deps: SubmitPromptDeps) {
+  const {
+    activeSessionId,
+    activeSessionIdRef,
+    busyRef,
+    copy,
+    createBackendSessionForSend,
+    requestGateway,
+    selectedStoredSessionIdRef,
+    syncAttachmentsForSubmit,
+    updateSessionState
+  } = deps
+
+  return useCallback(
+    async (rawText: string, options?: SubmitTextOptions) => {
+      const visibleText = rawText.trim()
+      const usingComposerAttachments = !options?.attachments
+
+      // Drop undefined/null holes a session switch or draft restore can leave in
+      // the attachments array (same bug class as AttachmentList #49624). Without
+      // this, the sibling iterations below (a.kind / a.label / a.refText, and the
+      // sync step) throw "Cannot read properties of undefined (reading 'refText')"
+      // and break the chat surface.
+      const attachments = (options?.attachments ?? $composerAttachments.get()).filter((a): a is ComposerAttachment =>
+        Boolean(a)
+      )
+
+      const terminalContextBlocks = terminalContextBlocksFromDraft(rawText).join('\n\n')
+      const hasImage = attachments.some(a => a.kind === 'image')
+
+      // Refs are recomputed after sync (file.attach rewrites @file: refs to
+      // workspace-relative paths the remote gateway can resolve). Seed the
+      // optimistic message with the pre-sync refs, then rewrite once synced.
+      // Images use their base64 preview so the thumbnail renders inline without
+      // a (remote-mode 403-prone) /api/media fetch — see optimisticAttachmentRef.
+      let attachmentRefs = attachments.map(optimisticAttachmentRef).filter((r): r is string => Boolean(r))
+
+      const buildContextText = (atts: ComposerAttachment[]): string => {
+        // atts may be the post-sync array, which can reintroduce holes; filter
+        // before touching a.refText / a.kind.
+        const present = atts.filter((a): a is ComposerAttachment => Boolean(a))
+
+        const contextRefs = present
+          .map(a => a.refText)
+          .filter(Boolean)
+          .join('\n')
+
+        return (
+          [contextRefs, terminalContextBlocks, visibleText].filter(Boolean).join('\n\n') ||
+          (present.some(a => a.kind === 'image') ? 'What do you see in this image?' : '')
+        )
+      }
+
+      // Queue drains fire on the busy→false settle edge, where busyRef (synced
+      // from $busy by a separate effect) may still read true — honoring it would
+      // bounce the drained send. The drain lock serializes them; the user path
+      // keeps the guard so a stray Enter mid-turn can't double-submit.
+      const hasSendable = Boolean(visibleText || terminalContextBlocks || attachments.length || hasImage)
+
+      if (!hasSendable || (!options?.fromQueue && busyRef.current)) {
+        return false
+      }
+
+      // One submit in flight per session — drop any concurrent re-fire so a
+      // stalled turn can't stack the same prompt into multiple real turns.
+      const submitLockKey = selectedStoredSessionIdRef.current || activeSessionId || '__pending_new__'
+
+      if (_submitInFlight.has(submitLockKey)) {
+        return false
+      }
+
+      _submitInFlight.add(submitLockKey)
+      let submitLockReleased = false
+
+      const releaseSubmitLock = () => {
+        if (!submitLockReleased) {
+          submitLockReleased = true
+          _submitInFlight.delete(submitLockKey)
+        }
+      }
+
+      const optimisticId = `user-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`
+
+      const buildUserMessage = (): ChatMessage => ({
+        id: optimisticId,
+        role: 'user',
+        parts: [textPart(visibleText || (attachmentRefs.length ? '' : attachments.map(a => a.label).join(', ')))],
+        attachmentRefs
+      })
+
+      const releaseBusy = () => {
+        releaseSubmitLock()
+        setMutableRef(busyRef, false)
+        setBusy(false)
+        setAwaitingResponse(false)
+      }
+
+      // Idempotent optimistic insert — re-running with the resolved sessionId
+      // after createBackendSessionForSend just overwrites with the same id.
+      const seedOptimistic = (sid: string) =>
+        updateSessionState(
+          sid,
+          state => ({
+            ...state,
+            messages: state.messages.some(m => m.id === optimisticId)
+              ? state.messages
+              : [...state.messages, buildUserMessage()],
+            busy: true,
+            awaitingResponse: true,
+            pendingBranchGroup: null,
+            sawAssistantPayload: false,
+            // Fresh submit = new turn — clear any leftover interrupt flag, else
+            // mutateStream/completeAssistantMessage drop every delta of this turn
+            // (what made drained-after-interrupt sends go silent).
+            interrupted: false
+          }),
+          selectedStoredSessionIdRef.current
+        )
+
+      // After sync rewrites refs, refresh the optimistic message in place so the
+      // transcript shows the resolved @file: ref rather than the local path.
+      const rewriteOptimistic = (sid: string) =>
+        updateSessionState(
+          sid,
+          state => ({
+            ...state,
+            messages: state.messages.map(message => (message.id === optimisticId ? buildUserMessage() : message))
+          }),
+          selectedStoredSessionIdRef.current
+        )
+
+      const dropOptimistic = (sid: null | string) => {
+        if (!sid) {
+          setMessages(current => current.filter(m => m.id !== optimisticId))
+
+          return
+        }
+
+        updateSessionState(
+          sid,
+          state => ({
+            ...state,
+            messages: state.messages.filter(m => m.id !== optimisticId),
+            busy: false,
+            awaitingResponse: false,
+            pendingBranchGroup: null
+          }),
+          selectedStoredSessionIdRef.current
+        )
+      }
+
+      setMutableRef(busyRef, true)
+      setBusy(true)
+      setAwaitingResponse(true)
+      clearNotifications()
+
+      let sessionId: null | string = activeSessionId
+
+      if (sessionId) {
+        seedOptimistic(sessionId)
+      } else {
+        setMessages(current => [...current, buildUserMessage()])
+      }
+
+      if (!sessionId) {
+        try {
+          sessionId = await createBackendSessionForSend(visibleText)
+        } catch (err) {
+          dropOptimistic(null)
+          releaseBusy()
+          notifyError(err, copy.sessionUnavailable)
+
+          return false
+        }
+
+        if (!sessionId) {
+          dropOptimistic(null)
+          releaseBusy()
+          notify({ kind: 'error', title: copy.sessionUnavailable, message: copy.createSessionFailed })
+
+          return false
+        }
+
+        seedOptimistic(sessionId)
+      }
+
+      try {
+        const syncedAttachments = await syncAttachmentsForSubmit(sessionId, attachments, {
+          updateComposerAttachments: usingComposerAttachments
+        })
+
+        // Rewrite the optimistic message + prompt text with the synced refs so
+        // the gateway receives @file: paths that resolve in its workspace.
+        // (Images keep their inline base64 preview — see optimisticAttachmentRef.)
+        attachmentRefs = syncedAttachments.map(optimisticAttachmentRef).filter((r): r is string => Boolean(r))
+        rewriteOptimistic(sessionId)
+        const text = buildContextText(syncedAttachments)
+
+        // On sleep/wake the gateway's in-memory session may have been cleared
+        // while the desktop app still holds the old session ID. Detect this,
+        // resume the stored session to re-register it, and retry once.
+        let submitErr: unknown = null
+
+        try {
+          await withSessionBusyRetry(() => requestGateway('prompt.submit', { session_id: sessionId, text }))
+        } catch (firstErr) {
+          if (isSessionNotFoundError(firstErr) && selectedStoredSessionIdRef.current) {
+            // Re-register the session in the gateway and get a fresh live ID.
+            const resumed = await requestGateway<{ session_id: string }>('session.resume', {
+              session_id: selectedStoredSessionIdRef.current
+            })
+
+            const recoveredId = resumed?.session_id
+
+            if (recoveredId) {
+              activeSessionIdRef.current = recoveredId
+              await withSessionBusyRetry(() => requestGateway('prompt.submit', { session_id: recoveredId, text }))
+            } else {
+              submitErr = firstErr
+            }
+          } else {
+            submitErr = firstErr
+          }
+        }
+
+        if (submitErr !== null) {
+          throw submitErr
+        }
+
+        if (usingComposerAttachments) {
+          clearComposerAttachments()
+        }
+
+        // Submit landed — the turn now runs (busy stays true), but the submit
+        // window is closed, so release the lock for the next (sequential) send.
+        releaseSubmitLock()
+
+        return true
+      } catch (err) {
+        releaseBusy()
+
+        // A queued drain that raced a not-yet-settled turn gets a transient
+        // "session busy" (4009). Don't surface an error bubble/toast — the entry
+        // stays queued and the composer's bounded auto-drain retries when idle.
+        if (options?.fromQueue && isSessionBusyError(err)) {
+          return false
+        }
+
+        const message = inlineErrorMessage(err, copy.promptFailed)
+
+        updateSessionState(sessionId, state => ({
+          ...state,
+          messages: [
+            ...state.messages,
+            {
+              id: `assistant-error-${Date.now()}`,
+              role: 'assistant',
+              parts: [],
+              error: message || copy.promptFailed,
+              branchGroupId: state.pendingBranchGroup ?? undefined
+            }
+          ],
+          busy: false,
+          awaitingResponse: false,
+          pendingBranchGroup: null,
+          sawAssistantPayload: true
+        }))
+
+        if (isProviderSetupError(err)) {
+          requestDesktopOnboarding(copy.providerCredentialRequired)
+
+          return false
+        }
+
+        notifyError(err, copy.promptFailed)
+
+        return false
+      }
+    },
+    [
+      activeSessionId,
+      activeSessionIdRef,
+      busyRef,
+      copy,
+      createBackendSessionForSend,
+      requestGateway,
+      selectedStoredSessionIdRef,
+      syncAttachmentsForSubmit,
+      updateSessionState
+    ]
+  )
+}
--- a/apps/desktop/src/app/session/hooks/use-prompt-actions/utils.test.ts
+++ b/apps/desktop/src/app/session/hooks/use-prompt-actions/utils.test.ts
@@ -0,0 +1,127 @@
+import type { AppendMessage } from '@assistant-ui/react'
+import { describe, expect, it } from 'vitest'
+
+import type { ChatMessage } from '@/lib/chat-messages'
+
+import {
+  appendText,
+  base64FromDataUrl,
+  friendlyRemoteAttachError,
+  imageFilenameFromPath,
+  inlineErrorMessage,
+  isSessionBusyError,
+  isSessionIdCandidate,
+  isSessionNotFoundError,
+  slashStatusText,
+  visibleUserIndexAtOrdinal,
+  visibleUserOrdinal
+} from './utils'
+
+describe('isSessionIdCandidate', () => {
+  it('accepts the timestamped and hex id forms', () => {
+    expect(isSessionIdCandidate('20260101_120000_abc123')).toBe(true)
+    expect(isSessionIdCandidate('a'.repeat(32))).toBe(true)
+  })
+
+  it('rejects arbitrary text', () => {
+    expect(isSessionIdCandidate('hello world')).toBe(false)
+    expect(isSessionIdCandidate('abc')).toBe(false)
+  })
+})
+
+describe('inlineErrorMessage', () => {
+  it('unwraps an electron remote-method error', () => {
+    expect(inlineErrorMessage(new Error("Error invoking remote method 'x': Error: boom"), 'fallback')).toBe('boom')
+  })
+
+  it('strips a leading Error: prefix', () => {
+    expect(inlineErrorMessage(new Error('Error: nope'), 'fallback')).toBe('nope')
+  })
+
+  it('falls back for non-error, non-string input', () => {
+    expect(inlineErrorMessage(undefined, 'fallback')).toBe('fallback')
+  })
+})
+
+describe('session error classifiers', () => {
+  it('detects not-found and busy errors', () => {
+    expect(isSessionNotFoundError(new Error('Session not found'))).toBe(true)
+    expect(isSessionBusyError(new Error('session busy'))).toBe(true)
+    expect(isSessionNotFoundError(new Error('other'))).toBe(false)
+    expect(isSessionBusyError(new Error('other'))).toBe(false)
+  })
+})
+
+describe('base64FromDataUrl', () => {
+  it('returns the part after the comma', () => {
+    expect(base64FromDataUrl('data:image/png;base64,AAAA')).toBe('AAAA')
+  })
+
+  it('returns empty when there is no comma', () => {
+    expect(base64FromDataUrl('nope')).toBe('')
+  })
+})
+
+describe('imageFilenameFromPath', () => {
+  it('takes the last path segment', () => {
+    expect(imageFilenameFromPath('/a/b/c.png')).toBe('c.png')
+    expect(imageFilenameFromPath('C:\\a\\b\\d.jpg')).toBe('d.jpg')
+  })
+
+  it('defaults when the path is empty', () => {
+    expect(imageFilenameFromPath('')).toBe('image.png')
+  })
+})
+
+describe('friendlyRemoteAttachError', () => {
+  it('rewrites a too-large error with the parsed cap', () => {
+    const err = friendlyRemoteAttachError(new Error('file is too large (20 bytes; limit 16777216 bytes)'), 'pic.png')
+    expect(err.message).toBe('pic.png is too large to upload to the remote gateway (max 16 MB).')
+  })
+
+  it('passes non-cap errors through', () => {
+    const original = new Error('something else')
+    expect(friendlyRemoteAttachError(original, 'pic.png')).toBe(original)
+  })
+})
+
+describe('slashStatusText', () => {
+  it('joins command and trimmed output', () => {
+    expect(slashStatusText('/model', '  gpt  ')).toBe('slash:/model\ngpt')
+  })
+
+  it('omits empty output', () => {
+    expect(slashStatusText('/clear', '   ')).toBe('slash:/clear')
+  })
+})
+
+describe('appendText', () => {
+  it('concatenates text parts and trims', () => {
+    const message = {
+      content: [
+        { type: 'text', text: ' a' },
+        { type: 'text', text: 'b ' }
+      ]
+    } as unknown as AppendMessage
+
+    expect(appendText(message)).toBe('ab')
+  })
+})
+
+describe('visible user ordinals', () => {
+  const messages = [
+    { role: 'user', hidden: false },
+    { role: 'assistant' },
+    { role: 'user', hidden: true },
+    { role: 'user', hidden: false }
+  ] as ChatMessage[]
+
+  it('counts visible user messages before an index', () => {
+    expect(visibleUserOrdinal(messages, messages.length)).toBe(2)
+  })
+
+  it('maps an ordinal back to a message index, skipping hidden', () => {
+    expect(visibleUserIndexAtOrdinal(messages, 1)).toBe(3)
+    expect(visibleUserIndexAtOrdinal(messages, 5)).toBe(-1)
+  })
+})
--- a/apps/desktop/src/app/session/hooks/use-prompt-actions/utils.ts
+++ b/apps/desktop/src/app/session/hooks/use-prompt-actions/utils.ts
@@ -0,0 +1,217 @@
+import type { AppendMessage } from '@assistant-ui/react'
+
+import { translateNow, type Translations } from '@/i18n'
+import type { ChatMessage } from '@/lib/chat-messages'
+import { type CommandsCatalogLike, filterDesktopCommandsCatalog } from '@/lib/desktop-slash-commands'
+import { isProviderSetupErrorMessage } from '@/lib/provider-setup-errors'
+import type { ComposerAttachment } from '@/store/composer'
+
+export type GatewayRequest = <T>(method: string, params?: Record<string, unknown>) => Promise<T>
+
+export function delay(ms: number): Promise<void> {
+  return new Promise(resolve => setTimeout(resolve, ms))
+}
+
+export function isSessionIdCandidate(value: string): boolean {
+  const trimmed = value.trim()
+
+  return /^\d{8}_\d{6}_[A-Fa-f0-9]{6}$/.test(trimmed) || /^[A-Fa-f0-9]{32}$/.test(trimmed)
+}
+
+export function blobToDataUrl(blob: Blob): Promise<string> {
+  return new Promise((resolve, reject) => {
+    const reader = new FileReader()
+
+    reader.addEventListener('load', () => {
+      if (typeof reader.result === 'string') {
+        resolve(reader.result)
+      } else {
+        reject(new Error(translateNow('desktop.audioReadFailed')))
+      }
+    })
+    reader.addEventListener('error', () => reject(reader.error || new Error(translateNow('desktop.audioReadFailed'))))
+    reader.readAsDataURL(blob)
+  })
+}
+
+export function isProviderSetupError(error: unknown) {
+  const message = error instanceof Error ? error.message : String(error)
+
+  return isProviderSetupErrorMessage(message)
+}
+
+export function inlineErrorMessage(error: unknown, fallback: string): string {
+  const raw = error instanceof Error ? error.message : typeof error === 'string' ? error : fallback
+
+  return (raw.match(/Error invoking remote method '[^']+': Error: (.+)$/)?.[1] ?? raw).replace(/^Error:\s*/, '').trim()
+}
+
+export function isSessionNotFoundError(error: unknown): boolean {
+  const message = error instanceof Error ? error.message : String(error)
+
+  return /session not found/i.test(message)
+}
+
+// The gateway refuses prompt.submit while a turn is running (4009 "session
+// busy"). It's a transient concurrency guard, never a user-facing error: a
+// submit racing the settle edge (or a rewind interrupting mid-turn) just waits
+// a beat for the turn to wind down, then lands. Bounded so a genuinely stuck
+// turn still surfaces eventually.
+export const SESSION_BUSY_RETRY_TIMEOUT_MS = 6_000
+export const SESSION_BUSY_RETRY_INTERVAL_MS = 150
+
+export function isSessionBusyError(error: unknown): boolean {
+  return /session busy/i.test(error instanceof Error ? error.message : String(error))
+}
+
+const sleep = (ms: number) => new Promise<void>(resolve => setTimeout(resolve, ms))
+
+// Retry a gateway call across transient "session busy" so it never reaches the
+// user — the turn settles within the deadline and the call lands.
+export async function withSessionBusyRetry<T>(call: () => Promise<T>): Promise<T> {
+  const deadline = Date.now() + SESSION_BUSY_RETRY_TIMEOUT_MS
+
+  for (;;) {
+    try {
+      return await call()
+    } catch (err) {
+      if (isSessionBusyError(err) && Date.now() < deadline) {
+        await sleep(SESSION_BUSY_RETRY_INTERVAL_MS)
+
+        continue
+      }
+
+      throw err
+    }
+  }
+}
+
+// Hard guard: at most one prompt.submit in flight per session. Every submit
+// path — user Enter, queue drain, busy-retry, slash fallthrough — funnels
+// through submitPromptText. Without this, a stalled turn (e.g. a context-bloated
+// session whose first call hangs) let the SAME prompt launch several real turns
+// at once (the "message stacked 5×" bug). Keyed by stored/active session id.
+export const _submitInFlight = new Set<string>()
+
+export function base64FromDataUrl(dataUrl: string): string {
+  const comma = dataUrl.indexOf(',')
+
+  return comma >= 0 ? dataUrl.slice(comma + 1) : ''
+}
+
+export function imageFilenameFromPath(filePath: string): string {
+  return filePath.split(/[\\/]/).filter(Boolean).pop() || 'image.png'
+}
+
+// Remote gateway: the local composer-image file lives on THIS machine's disk,
+// not the gateway's, so read the bytes here and upload them via
+// image.attach_bytes. Returns null when the file can't be read.
+export async function readImageForRemoteAttach(
+  filePath: string
+): Promise<{ contentBase64: string; filename: string } | null> {
+  const dataUrl = await window.hermesDesktop?.readFileDataUrl(filePath)
+  const contentBase64 = dataUrl ? base64FromDataUrl(dataUrl) : ''
+
+  return contentBase64 ? { contentBase64, filename: imageFilenameFromPath(filePath) } : null
+}
+
+// Read a non-image file as a data URL for upload via file.attach. Returns null
+// when the desktop bridge can't read the file (e.g. it was moved/deleted).
+export async function readFileDataUrlForAttach(filePath: string): Promise<string | null> {
+  const reader = window.hermesDesktop?.readFileDataUrl
+
+  if (!reader) {
+    return null
+  }
+
+  const dataUrl = await reader(filePath)
+
+  return dataUrl || null
+}
+
+// The readFileDataUrl IPC base64-loads the whole file into memory and is
+// hard-capped (DATA_URL_READ_MAX_BYTES, 16 MB) in electron/hardening.cjs, which
+// rejects with a raw "file is too large (N bytes; limit M bytes)" string. In
+// remote mode every attachment's bytes go through that read, so a big file
+// surfaces that internal message verbatim in the failure toast. Translate it
+// into a friendly "too large to upload to the remote gateway" line, parsing the
+// limit out of the message so it tracks the real cap. Non-cap errors pass
+// through unchanged.
+export function friendlyRemoteAttachError(err: unknown, label: string): Error {
+  const message = err instanceof Error ? err.message : String(err)
+
+  if (!/too large/i.test(message)) {
+    return err instanceof Error ? err : new Error(message)
+  }
+
+  const limitBytes = Number(message.match(/limit (\d+) bytes/)?.[1])
+  const cap = Number.isFinite(limitBytes) && limitBytes > 0 ? ` (max ${Math.floor(limitBytes / (1024 * 1024))} MB)` : ''
+
+  return new Error(`${label} is too large to upload to the remote gateway${cap}.`)
+}
+
+export function renderCommandsCatalog(catalog: CommandsCatalogLike, copy: Translations['desktop']): string {
+  const desktopCatalog = filterDesktopCommandsCatalog(catalog)
+
+  const sections = desktopCatalog.categories?.length
+    ? desktopCatalog.categories
+    : [{ name: copy.desktopCommands, pairs: desktopCatalog.pairs ?? [] }]
+
+  const body = sections
+    .filter(section => section.pairs.length > 0)
+    .map(section => {
+      const rows = section.pairs.map(([cmd, desc]) => `${cmd.padEnd(18)} ${desc}`)
+
+      return [`${section.name}:`, ...rows].join('\n')
+    })
+    .join('\n\n')
+
+  const tail = [
+    desktopCatalog.skill_count ? copy.skillCommandsAvailable(desktopCatalog.skill_count) : '',
+    desktopCatalog.warning ? copy.warningLine(desktopCatalog.warning) : ''
+  ]
+    .filter(Boolean)
+    .join('\n')
+
+  return [body || 'No desktop commands available.', tail].filter(Boolean).join('\n\n')
+}
+
+export function slashStatusText(command: string, output: string): string {
+  return [`slash:${command}`, output.trim()].filter(Boolean).join('\n')
+}
+
+export function appendText(message: AppendMessage): string {
+  return message.content
+    .map(part => ('text' in part ? part.text : ''))
+    .join('')
+    .trim()
+}
+
+export function visibleUserOrdinal(messages: readonly ChatMessage[], end: number): number {
+  return messages.slice(0, end).filter(m => m.role === 'user' && !m.hidden).length
+}
+
+export function visibleUserIndexAtOrdinal(messages: readonly ChatMessage[], targetOrdinal: number): number {
+  let ordinal = 0
+
+  for (let index = 0; index < messages.length; index += 1) {
+    const message = messages[index]
+
+    if (message.role !== 'user' || message.hidden) {
+      continue
+    }
+
+    if (ordinal === targetOrdinal) {
+      return index
+    }
+
+    ordinal += 1
+  }
+
+  return -1
+}
+
+export interface SubmitTextOptions {
+  attachments?: ComposerAttachment[]
+  fromQueue?: boolean
+}
--- a/apps/desktop/src/app/session/hooks/use-session-actions/index.ts
+++ b/apps/desktop/src/app/session/hooks/use-session-actions/index.ts
@@ -2,20 +2,16 @@ import type { MutableRefObject } from 'react'
 import { useCallback, useRef } from 'react'
 import type { NavigateFunction } from 'react-router-dom'

-import { deleteSession, getSession, getSessionMessages, setSessionArchived } from '@/hermes'
+import { deleteSession, getSessionMessages, setSessionArchived } from '@/hermes'
 import { useI18n } from '@/i18n'
-import { type ChatMessage, chatMessageText, preserveLocalAssistantErrors, toChatMessages } from '@/lib/chat-messages'
-import { normalizePersonalityValue } from '@/lib/chat-runtime'
-import { embeddedImageUrls, textWithoutEmbeddedImages } from '@/lib/embedded-images'
+import { preserveLocalAssistantErrors, toChatMessages } from '@/lib/chat-messages'
 import { setSessionYolo } from '@/lib/yolo-session'
 import { clearQueuedPrompts } from '@/store/composer-queue'
 import { $pinnedSessionIds } from '@/store/layout'
 import { clearNotifications, notify, notifyError } from '@/store/notifications'
-import { requestDesktopOnboarding } from '@/store/onboarding'
 import {
  $activeGatewayProfile,
  $newChatProfile,
-  $profiles,
  ensureGatewayProfile,
  normalizeProfileKey
 } from '@/store/profile'
@@ -35,11 +31,6 @@ import {
  setBusy,
  setCurrentBranch,
  setCurrentCwd,
-  setCurrentFastMode,
-  setCurrentModel,
-  setCurrentPersonality,
-  setCurrentProvider,
-  setCurrentReasoningEffort,
  setCurrentServiceTier,
  setCurrentUsage,
  setFreshDraftReady,
@@ -56,18 +47,30 @@ import {
  workspaceCwdForNewSession
 } from '@/store/session'
 import { broadcastSessionsChanged } from '@/store/session-sync'
-import { reportBackendContract } from '@/store/updates'
 import { isWatchWindow } from '@/store/windows'
 import type {
  SessionCreateResponse,
-  SessionInfo,
  SessionResumeResponse,
-  SessionRuntimeInfo,
  UsageStats
 } from '@/types/hermes'

-import { NEW_CHAT_ROUTE, sessionRoute, SETTINGS_ROUTE } from '../../routes'
-import type { ClientSessionState, SidebarNavItem } from '../../types'
+import { NEW_CHAT_ROUTE, sessionRoute, SETTINGS_ROUTE } from '../../../routes'
+import type { ClientSessionState, SidebarNavItem } from '../../../types'
+
+import {
+  applyRuntimeInfo,
+  applyStoredSessionPreviewRuntimeInfo,
+  type BranchMessage,
+  chatMessageArraysEquivalent,
+  isSessionGoneError,
+  patchSessionWorkspace,
+  reconcileResumeMessages,
+  resolveStoredSession,
+  sessionMatchesStoredId,
+  sessionShouldHaveTranscript,
+  toBranchMessages,
+  upsertOptimisticSession
+} from './utils'

 interface SessionActionsOptions {
  activeSessionId: string | null
@@ -90,325 +93,6 @@ interface SessionActionsOptions {
  ) => ClientSessionState
 }

-function withAppendedText(message: ChatMessage, suffix: string): ChatMessage {
-  let appended = false
-
-  const parts = message.parts.map(part => {
-    if (part.type !== 'text' || appended) {
-      return part
-    }
-
-    appended = true
-
-    return { ...part, text: `${part.text}${suffix}` }
-  })
-
-  return appended ? { ...message, parts } : message
-}
-
-function preserveReasoningParts(message: ChatMessage, previous: ChatMessage): ChatMessage {
-  if (message.parts.some(part => part.type === 'reasoning')) {
-    return message
-  }
-
-  const reasoningParts = previous.parts.filter(part => part.type === 'reasoning')
-
-  return reasoningParts.length ? { ...message, parts: [...reasoningParts, ...message.parts] } : message
-}
-
-function chatMessagesEquivalent(a: ChatMessage, b: ChatMessage): boolean {
-  if (
-    a.id !== b.id ||
-    a.role !== b.role ||
-    a.pending !== b.pending ||
-    a.error !== b.error ||
-    a.hidden !== b.hidden ||
-    a.branchGroupId !== b.branchGroupId
-  ) {
-    return false
-  }
-
-  if (a.parts.length !== b.parts.length) {
-    return false
-  }
-
-  return a.parts.every((part, index) => JSON.stringify(part) === JSON.stringify(b.parts[index]))
-}
-
-function chatMessageArraysEquivalent(a: ChatMessage[], b: ChatMessage[]): boolean {
-  return a.length === b.length && a.every((message, index) => chatMessagesEquivalent(message, b[index]))
-}
-
-function reconcileResumeMessages(nextMessages: ChatMessage[], previousMessages: ChatMessage[]): ChatMessage[] {
-  if (!previousMessages.length) {
-    return nextMessages
-  }
-
-  const previousByRoleOrdinal = new Map<string, ChatMessage>()
-  const previousRoleCounts = new Map<string, number>()
-
-  for (const message of previousMessages) {
-    const ordinal = previousRoleCounts.get(message.role) ?? 0
-    previousRoleCounts.set(message.role, ordinal + 1)
-    previousByRoleOrdinal.set(`${message.role}:${ordinal}`, message)
-  }
-
-  const nextRoleCounts = new Map<string, number>()
-
-  return nextMessages.map(message => {
-    const ordinal = nextRoleCounts.get(message.role) ?? 0
-    nextRoleCounts.set(message.role, ordinal + 1)
-
-    const previous = previousByRoleOrdinal.get(`${message.role}:${ordinal}`)
-
-    if (!previous) {
-      return message
-    }
-
-    const nextText = chatMessageText(message).trim()
-    const previousText = chatMessageText(previous)
-    const previousVisibleText = textWithoutEmbeddedImages(previousText)
-    let preserved = message
-
-    if (nextText === previousVisibleText || nextText === previousText.trim()) {
-      preserved = preserveReasoningParts(preserved, previous)
-    }
-
-    const previousImages = embeddedImageUrls(previousText)
-
-    if (!previousImages.length || embeddedImageUrls(chatMessageText(preserved)).length) {
-      return preserved
-    }
-
-    if (nextText !== previousVisibleText) {
-      return preserved
-    }
-
-    return withAppendedText(preserved, previousImages.map(url => `\n${url}`).join(''))
-  })
-}
-
-interface BranchMessage {
-  content: string
-  role: ChatMessage['role']
-  source: ChatMessage
-}
-
-// The copyable spine of a branch: user/assistant turns that carry text.
-const toBranchMessages = (messages: ChatMessage[]): BranchMessage[] =>
-  messages
-    .map(message => ({ content: chatMessageText(message), role: message.role, source: message }))
-    .filter(({ content, role }) => content.trim() && (role === 'assistant' || role === 'user'))
-
-function upsertOptimisticSession(
-  created: SessionCreateResponse,
-  id: string,
-  title: string | null = null,
-  preview: string | null = null,
-  parentSessionId: string | null = null,
-  lastActive?: number
-) {
-  const now = lastActive ?? Date.now() / 1000
-  // Stamp the profile the session was just created on (= the live gateway's
-  // profile) so the scoped sidebar shows the new row immediately instead of
-  // filtering it out as "default" until the aggregator re-fetches.
-  const profileKey = normalizeProfileKey($activeGatewayProfile.get())
-
-  const session: SessionInfo = {
-    // Seed cwd so the grouped sidebar can place the new row in its repo/worktree
-    // lane immediately (the overlay groups by path); fall back to the workspace
-    // the session was just started in when the create response omits it.
-    cwd: created.info?.cwd ?? ($currentCwd.get().trim() || null),
-    ended_at: null,
-    id,
-    input_tokens: 0,
-    is_active: true,
-    is_default_profile: profileKey === 'default',
-    last_active: now,
-    message_count: created.message_count ?? created.messages?.length ?? 0,
-    model: created.info?.model ?? null,
-    output_tokens: 0,
-    parent_session_id: parentSessionId,
-    preview,
-    profile: profileKey,
-    source: 'tui',
-    started_at: now,
-    title,
-    tool_call_count: 0
-  }
-
-  setSessions(prev => [session, ...prev.filter(s => s.id !== id)])
-}
-
-function patchSessionWorkspace(sessionId: string, cwd: string | undefined) {
-  if (!cwd) {
-    return
-  }
-
-  setSessions(prev => prev.map(session => (session.id === sessionId ? { ...session, cwd } : session)))
-}
-
-function sessionMatchesStoredId(session: SessionInfo, storedSessionId: string): boolean {
-  return session.id === storedSessionId || session._lineage_root_id === storedSessionId
-}
-
-function sessionShouldHaveTranscript(session: SessionInfo | undefined): boolean {
-  return (session?.message_count ?? 0) > 0
-}
-
-function upsertResolvedSession(session: SessionInfo, storedSessionId: string) {
-  const lineage = session._lineage_root_id ?? session.id
-
-  setSessions(prev => [
-    session,
-    ...prev.filter(existing => {
-      if (sessionMatchesStoredId(existing, storedSessionId)) {
-        return false
-      }
-
-      return (existing._lineage_root_id ?? existing.id) !== lineage
-    })
-  ])
-}
-
-async function resolveStoredSession(storedSessionId: string): Promise<SessionInfo | undefined> {
-  const cached = $sessions.get().find(session => sessionMatchesStoredId(session, storedSessionId))
-
-  if (cached) {
-    return cached
-  }
-
-  // Direct by-id on the live backend — one row lookup, no list scan. Covers
-  // single-profile users and any id on the active profile (e.g. an old session
-  // past the sidebar's recent window). 404 just means it's not on this profile.
-  try {
-    const session = await getSession(storedSessionId)
-
-    upsertResolvedSession(session, storedSessionId)
-
-    return session
-  } catch {
-    // Not on the active profile — fall through to the cross-profile probe.
-  }
-
-  // Multi-profile only: probe each other profile by id (still one cheap lookup
-  // each) rather than pulling every profile's recent sessions. The first hit
-  // carries its owning `profile`, which routes the resume to the right backend.
-  const activeKey = normalizeProfileKey($activeGatewayProfile.get())
-
-  const otherProfiles = $profiles
-    .get()
-    .map(profile => normalizeProfileKey(profile.name))
-    .filter(key => key !== activeKey)
-
-  for (const profile of otherProfiles) {
-    try {
-      const session = await getSession(storedSessionId, profile)
-
-      upsertResolvedSession(session, storedSessionId)
-
-      return session
-    } catch {
-      // Not on this profile; try the next.
-    }
-  }
-
-  return undefined
-}
-
-type SessionRuntimeStatePatch = Partial<
-  Pick<
-    ClientSessionState,
-    'branch' | 'cwd' | 'fast' | 'model' | 'personality' | 'provider' | 'reasoningEffort' | 'serviceTier' | 'yolo'
-  >
->
-
-function applyRuntimeInfo(info: SessionRuntimeInfo | undefined): SessionRuntimeStatePatch | null {
-  if (!info) {
-    return null
-  }
-
-  const sessionState: SessionRuntimeStatePatch = {}
-
-  reportBackendContract(info.desktop_contract)
-
-  if (info.credential_warning) {
-    requestDesktopOnboarding(info.credential_warning)
-  }
-
-  if (typeof info.model === 'string') {
-    setCurrentModel(info.model)
-    sessionState.model = info.model
-  }
-
-  if (typeof info.provider === 'string') {
-    setCurrentProvider(info.provider)
-    sessionState.provider = info.provider
-  }
-
-  if (info.cwd) {
-    setCurrentCwd(info.cwd)
-    sessionState.cwd = info.cwd
-  }
-
-  if (info.branch !== undefined) {
-    setCurrentBranch(info.branch || '')
-    sessionState.branch = info.branch || ''
-  }
-
-  if (typeof info.personality === 'string') {
-    const personality = normalizePersonalityValue(info.personality)
-    setCurrentPersonality(personality)
-    sessionState.personality = personality
-  }
-
-  if (typeof info.reasoning_effort === 'string') {
-    setCurrentReasoningEffort(info.reasoning_effort)
-    sessionState.reasoningEffort = info.reasoning_effort
-  }
-
-  if (typeof info.service_tier === 'string') {
-    setCurrentServiceTier(info.service_tier)
-    sessionState.serviceTier = info.service_tier
-  }
-
-  if (typeof info.fast === 'boolean') {
-    setCurrentFastMode(info.fast)
-    sessionState.fast = info.fast
-  }
-
-  if (typeof info.yolo === 'boolean') {
-    setYoloActive(info.yolo)
-    sessionState.yolo = info.yolo
-  }
-
-  if (info.usage) {
-    setCurrentUsage(current => ({ ...current, ...info.usage }))
-  }
-
-  return sessionState
-}
-
-function applyStoredSessionPreviewRuntimeInfo(stored: { model?: null | string } | undefined) {
-  setCurrentModel(stored?.model || '')
-  setCurrentProvider('')
-  setCurrentReasoningEffort('')
-  setCurrentServiceTier('')
-  setCurrentFastMode(false)
-  setYoloActive(false)
-  setCurrentPersonality('')
-}
-
-// A "session genuinely doesn't exist" failure (deleted, or an id from a wiped /
-// rotated backend) — the REST transcript 404s with `Session not found`. Distinct
-// from a transient/wedged backend (ECONNREFUSED, timeout), which must still
-// retry rather than discard the id.
-function isSessionGoneError(err: unknown): boolean {
-  const message = err instanceof Error ? err.message : String(err ?? '')
-
-  return message.includes('404') || /session not found/i.test(message)
-}
-
 export function useSessionActions({
  activeSessionId,
  activeSessionIdRef,
@@ -685,7 +369,9 @@ export function useSessionActions({
      if (warmHit) {
        const cachedRuntimeId = warmHit.runtimeId
        const cachedState = warmHit.state
-        const stored = $sessions.get().find(session => sessionMatchesStoredId(session, storedSessionId)) ?? storedForProfile
+
+        const stored =
+          $sessions.get().find(session => sessionMatchesStoredId(session, storedSessionId)) ?? storedForProfile

        const cachedViewState =
          !cachedState.model && stored?.model != null
@@ -752,7 +438,10 @@ export function useSessionActions({
      setSelectedStoredSessionId(storedSessionId)
      selectedStoredSessionIdRef.current = storedSessionId
      setSessionStartedAt(Date.now())
-      const stored = $sessions.get().find(session => sessionMatchesStoredId(session, storedSessionId)) ?? storedForProfile
+
+      const stored =
+        $sessions.get().find(session => sessionMatchesStoredId(session, storedSessionId)) ?? storedForProfile
+
      applyStoredSessionPreviewRuntimeInfo(stored)

      if (stored) {
--- a/apps/desktop/src/app/session/hooks/use-session-actions/utils.test.ts
+++ b/apps/desktop/src/app/session/hooks/use-session-actions/utils.test.ts
@@ -0,0 +1,89 @@
+import { describe, expect, it } from 'vitest'
+
+import type { ChatMessage } from '@/lib/chat-messages'
+import type { SessionInfo } from '@/types/hermes'
+
+import {
+  chatMessageArraysEquivalent,
+  isSessionGoneError,
+  reconcileResumeMessages,
+  sessionMatchesStoredId,
+  sessionShouldHaveTranscript,
+  toBranchMessages
+} from './utils'
+
+const msg = (id: string, role: ChatMessage['role'], text: string, extra: Partial<ChatMessage> = {}): ChatMessage =>
+  ({ id, role, parts: [{ type: 'text', text }], ...extra }) as ChatMessage
+
+const session = (over: Partial<SessionInfo>): SessionInfo => over as SessionInfo
+
+describe('isSessionGoneError', () => {
+  it('is true for 404 / session-not-found, false otherwise', () => {
+    expect(isSessionGoneError(new Error('Request failed 404'))).toBe(true)
+    expect(isSessionGoneError(new Error('Session not found'))).toBe(true)
+    expect(isSessionGoneError(new Error('ECONNREFUSED'))).toBe(false)
+    expect(isSessionGoneError(null)).toBe(false)
+  })
+})
+
+describe('sessionMatchesStoredId', () => {
+  it('matches on live id or lineage root', () => {
+    expect(sessionMatchesStoredId(session({ id: 'a' }), 'a')).toBe(true)
+    expect(sessionMatchesStoredId(session({ id: 'live', _lineage_root_id: 'root' }), 'root')).toBe(true)
+    expect(sessionMatchesStoredId(session({ id: 'a' }), 'b')).toBe(false)
+  })
+})
+
+describe('sessionShouldHaveTranscript', () => {
+  it('is true only when the session has messages', () => {
+    expect(sessionShouldHaveTranscript(session({ message_count: 3 }))).toBe(true)
+    expect(sessionShouldHaveTranscript(session({ message_count: 0 }))).toBe(false)
+    expect(sessionShouldHaveTranscript(undefined)).toBe(false)
+  })
+})
+
+describe('toBranchMessages', () => {
+  it('keeps only user/assistant turns that carry text', () => {
+    const out = toBranchMessages([
+      msg('u', 'user', 'hi'),
+      msg('blank', 'assistant', '   '),
+      msg('sys', 'system', 'ignored'),
+      msg('a', 'assistant', 'hello')
+    ])
+
+    expect(out.map(b => b.source.id)).toEqual(['u', 'a'])
+    expect(out[0]).toMatchObject({ content: 'hi', role: 'user' })
+  })
+})
+
+describe('chatMessageArraysEquivalent', () => {
+  it('compares length and per-message equivalence', () => {
+    const a = [msg('1', 'user', 'x'), msg('2', 'assistant', 'y')]
+    expect(chatMessageArraysEquivalent(a, [msg('1', 'user', 'x'), msg('2', 'assistant', 'y')])).toBe(true)
+    expect(chatMessageArraysEquivalent(a, [msg('1', 'user', 'x')])).toBe(false)
+    expect(chatMessageArraysEquivalent(a, [msg('1', 'user', 'x'), msg('2', 'assistant', 'changed')])).toBe(false)
+  })
+})
+
+describe('reconcileResumeMessages', () => {
+  it('returns next untouched when there is no previous transcript', () => {
+    const next = [msg('1', 'user', 'hi')]
+    expect(reconcileResumeMessages(next, [])).toBe(next)
+  })
+
+  it('re-grafts reasoning parts onto a matching assistant turn', () => {
+    const next = [msg('a', 'assistant', 'answer')]
+
+    const previous = [
+      msg('a', 'assistant', 'answer', {
+        parts: [
+          { type: 'reasoning', text: 'thinking' },
+          { type: 'text', text: 'answer' }
+        ]
+      } as Partial<ChatMessage>)
+    ]
+
+    const [out] = reconcileResumeMessages(next, previous)
+    expect(out.parts.some(p => p.type === 'reasoning')).toBe(true)
+  })
+})
--- a/apps/desktop/src/app/session/hooks/use-session-actions/utils.ts
+++ b/apps/desktop/src/app/session/hooks/use-session-actions/utils.ts
@@ -0,0 +1,344 @@
+import { getSession } from '@/hermes'
+import { type ChatMessage, chatMessageText } from '@/lib/chat-messages'
+import { normalizePersonalityValue } from '@/lib/chat-runtime'
+import { embeddedImageUrls, textWithoutEmbeddedImages } from '@/lib/embedded-images'
+import { requestDesktopOnboarding } from '@/store/onboarding'
+import { $activeGatewayProfile, $profiles, normalizeProfileKey } from '@/store/profile'
+import {
+  $currentCwd,
+  $sessions,
+  setCurrentBranch,
+  setCurrentCwd,
+  setCurrentFastMode,
+  setCurrentModel,
+  setCurrentPersonality,
+  setCurrentProvider,
+  setCurrentReasoningEffort,
+  setCurrentServiceTier,
+  setCurrentUsage,
+  setSessions,
+  setYoloActive
+} from '@/store/session'
+import { reportBackendContract } from '@/store/updates'
+import type { SessionCreateResponse, SessionInfo, SessionRuntimeInfo } from '@/types/hermes'
+
+import type { ClientSessionState } from '../../../types'
+
+function withAppendedText(message: ChatMessage, suffix: string): ChatMessage {
+  let appended = false
+
+  const parts = message.parts.map(part => {
+    if (part.type !== 'text' || appended) {
+      return part
+    }
+
+    appended = true
+
+    return { ...part, text: `${part.text}${suffix}` }
+  })
+
+  return appended ? { ...message, parts } : message
+}
+
+function preserveReasoningParts(message: ChatMessage, previous: ChatMessage): ChatMessage {
+  if (message.parts.some(part => part.type === 'reasoning')) {
+    return message
+  }
+
+  const reasoningParts = previous.parts.filter(part => part.type === 'reasoning')
+
+  return reasoningParts.length ? { ...message, parts: [...reasoningParts, ...message.parts] } : message
+}
+
+function chatMessagesEquivalent(a: ChatMessage, b: ChatMessage): boolean {
+  if (
+    a.id !== b.id ||
+    a.role !== b.role ||
+    a.pending !== b.pending ||
+    a.error !== b.error ||
+    a.hidden !== b.hidden ||
+    a.branchGroupId !== b.branchGroupId
+  ) {
+    return false
+  }
+
+  if (a.parts.length !== b.parts.length) {
+    return false
+  }
+
+  return a.parts.every((part, index) => JSON.stringify(part) === JSON.stringify(b.parts[index]))
+}
+
+export function chatMessageArraysEquivalent(a: ChatMessage[], b: ChatMessage[]): boolean {
+  return a.length === b.length && a.every((message, index) => chatMessagesEquivalent(message, b[index]))
+}
+
+export function reconcileResumeMessages(nextMessages: ChatMessage[], previousMessages: ChatMessage[]): ChatMessage[] {
+  if (!previousMessages.length) {
+    return nextMessages
+  }
+
+  const previousByRoleOrdinal = new Map<string, ChatMessage>()
+  const previousRoleCounts = new Map<string, number>()
+
+  for (const message of previousMessages) {
+    const ordinal = previousRoleCounts.get(message.role) ?? 0
+    previousRoleCounts.set(message.role, ordinal + 1)
+    previousByRoleOrdinal.set(`${message.role}:${ordinal}`, message)
+  }
+
+  const nextRoleCounts = new Map<string, number>()
+
+  return nextMessages.map(message => {
+    const ordinal = nextRoleCounts.get(message.role) ?? 0
+    nextRoleCounts.set(message.role, ordinal + 1)
+
+    const previous = previousByRoleOrdinal.get(`${message.role}:${ordinal}`)
+
+    if (!previous) {
+      return message
+    }
+
+    const nextText = chatMessageText(message).trim()
+    const previousText = chatMessageText(previous)
+    const previousVisibleText = textWithoutEmbeddedImages(previousText)
+    let preserved = message
+
+    if (nextText === previousVisibleText || nextText === previousText.trim()) {
+      preserved = preserveReasoningParts(preserved, previous)
+    }
+
+    const previousImages = embeddedImageUrls(previousText)
+
+    if (!previousImages.length || embeddedImageUrls(chatMessageText(preserved)).length) {
+      return preserved
+    }
+
+    if (nextText !== previousVisibleText) {
+      return preserved
+    }
+
+    return withAppendedText(preserved, previousImages.map(url => `\n${url}`).join(''))
+  })
+}
+
+export interface BranchMessage {
+  content: string
+  role: ChatMessage['role']
+  source: ChatMessage
+}
+
+// The copyable spine of a branch: user/assistant turns that carry text.
+export const toBranchMessages = (messages: ChatMessage[]): BranchMessage[] =>
+  messages
+    .map(message => ({ content: chatMessageText(message), role: message.role, source: message }))
+    .filter(({ content, role }) => content.trim() && (role === 'assistant' || role === 'user'))
+
+export function upsertOptimisticSession(
+  created: SessionCreateResponse,
+  id: string,
+  title: string | null = null,
+  preview: string | null = null,
+  parentSessionId: string | null = null,
+  lastActive?: number
+) {
+  const now = lastActive ?? Date.now() / 1000
+  // Stamp the profile the session was just created on (= the live gateway's
+  // profile) so the scoped sidebar shows the new row immediately instead of
+  // filtering it out as "default" until the aggregator re-fetches.
+  const profileKey = normalizeProfileKey($activeGatewayProfile.get())
+
+  const session: SessionInfo = {
+    // Seed cwd so the grouped sidebar can place the new row in its repo/worktree
+    // lane immediately (the overlay groups by path); fall back to the workspace
+    // the session was just started in when the create response omits it.
+    cwd: created.info?.cwd ?? ($currentCwd.get().trim() || null),
+    ended_at: null,
+    id,
+    input_tokens: 0,
+    is_active: true,
+    is_default_profile: profileKey === 'default',
+    last_active: now,
+    message_count: created.message_count ?? created.messages?.length ?? 0,
+    model: created.info?.model ?? null,
+    output_tokens: 0,
+    parent_session_id: parentSessionId,
+    preview,
+    profile: profileKey,
+    source: 'tui',
+    started_at: now,
+    title,
+    tool_call_count: 0
+  }
+
+  setSessions(prev => [session, ...prev.filter(s => s.id !== id)])
+}
+
+export function patchSessionWorkspace(sessionId: string, cwd: string | undefined) {
+  if (!cwd) {
+    return
+  }
+
+  setSessions(prev => prev.map(session => (session.id === sessionId ? { ...session, cwd } : session)))
+}
+
+export function sessionMatchesStoredId(session: SessionInfo, storedSessionId: string): boolean {
+  return session.id === storedSessionId || session._lineage_root_id === storedSessionId
+}
+
+export function sessionShouldHaveTranscript(session: SessionInfo | undefined): boolean {
+  return (session?.message_count ?? 0) > 0
+}
+
+function upsertResolvedSession(session: SessionInfo, storedSessionId: string) {
+  const lineage = session._lineage_root_id ?? session.id
+
+  setSessions(prev => [
+    session,
+    ...prev.filter(existing => {
+      if (sessionMatchesStoredId(existing, storedSessionId)) {
+        return false
+      }
+
+      return (existing._lineage_root_id ?? existing.id) !== lineage
+    })
+  ])
+}
+
+export async function resolveStoredSession(storedSessionId: string): Promise<SessionInfo | undefined> {
+  const cached = $sessions.get().find(session => sessionMatchesStoredId(session, storedSessionId))
+
+  if (cached) {
+    return cached
+  }
+
+  // Direct by-id on the live backend — one row lookup, no list scan. Covers
+  // single-profile users and any id on the active profile (e.g. an old session
+  // past the sidebar's recent window). 404 just means it's not on this profile.
+  try {
+    const session = await getSession(storedSessionId)
+
+    upsertResolvedSession(session, storedSessionId)
+
+    return session
+  } catch {
+    // Not on the active profile — fall through to the cross-profile probe.
+  }
+
+  // Multi-profile only: probe each other profile by id (still one cheap lookup
+  // each) rather than pulling every profile's recent sessions. The first hit
+  // carries its owning `profile`, which routes the resume to the right backend.
+  const activeKey = normalizeProfileKey($activeGatewayProfile.get())
+
+  const otherProfiles = $profiles
+    .get()
+    .map(profile => normalizeProfileKey(profile.name))
+    .filter(key => key !== activeKey)
+
+  for (const profile of otherProfiles) {
+    try {
+      const session = await getSession(storedSessionId, profile)
+
+      upsertResolvedSession(session, storedSessionId)
+
+      return session
+    } catch {
+      // Not on this profile; try the next.
+    }
+  }
+
+  return undefined
+}
+
+type SessionRuntimeStatePatch = Partial<
+  Pick<
+    ClientSessionState,
+    'branch' | 'cwd' | 'fast' | 'model' | 'personality' | 'provider' | 'reasoningEffort' | 'serviceTier' | 'yolo'
+  >
+>
+
+export function applyRuntimeInfo(info: SessionRuntimeInfo | undefined): SessionRuntimeStatePatch | null {
+  if (!info) {
+    return null
+  }
+
+  const sessionState: SessionRuntimeStatePatch = {}
+
+  reportBackendContract(info.desktop_contract)
+
+  if (info.credential_warning) {
+    requestDesktopOnboarding(info.credential_warning)
+  }
+
+  if (typeof info.model === 'string') {
+    setCurrentModel(info.model)
+    sessionState.model = info.model
+  }
+
+  if (typeof info.provider === 'string') {
+    setCurrentProvider(info.provider)
+    sessionState.provider = info.provider
+  }
+
+  if (info.cwd) {
+    setCurrentCwd(info.cwd)
+    sessionState.cwd = info.cwd
+  }
+
+  if (info.branch !== undefined) {
+    setCurrentBranch(info.branch || '')
+    sessionState.branch = info.branch || ''
+  }
+
+  if (typeof info.personality === 'string') {
+    const personality = normalizePersonalityValue(info.personality)
+    setCurrentPersonality(personality)
+    sessionState.personality = personality
+  }
+
+  if (typeof info.reasoning_effort === 'string') {
+    setCurrentReasoningEffort(info.reasoning_effort)
+    sessionState.reasoningEffort = info.reasoning_effort
+  }
+
+  if (typeof info.service_tier === 'string') {
+    setCurrentServiceTier(info.service_tier)
+    sessionState.serviceTier = info.service_tier
+  }
+
+  if (typeof info.fast === 'boolean') {
+    setCurrentFastMode(info.fast)
+    sessionState.fast = info.fast
+  }
+
+  if (typeof info.yolo === 'boolean') {
+    setYoloActive(info.yolo)
+    sessionState.yolo = info.yolo
+  }
+
+  if (info.usage) {
+    setCurrentUsage(current => ({ ...current, ...info.usage }))
+  }
+
+  return sessionState
+}
+
+export function applyStoredSessionPreviewRuntimeInfo(stored: { model?: null | string } | undefined) {
+  setCurrentModel(stored?.model || '')
+  setCurrentProvider('')
+  setCurrentReasoningEffort('')
+  setCurrentServiceTier('')
+  setCurrentFastMode(false)
+  setYoloActive(false)
+  setCurrentPersonality('')
+}
+
+// A "session genuinely doesn't exist" failure (deleted, or an id from a wiped /
+// rotated backend) — the REST transcript 404s with `Session not found`. Distinct
+// from a transient/wedged backend (ECONNREFUSED, timeout), which must still
+// retry rather than discard the id.
+export function isSessionGoneError(err: unknown): boolean {
+  const message = err instanceof Error ? err.message : String(err ?? '')
+
+  return message.includes('404') || /session not found/i.test(message)
+}
--- a/Show More
+++ b/Show More