Compare commits

...

1 Commits

Author SHA1 Message Date
teknium1
5288341fd7 fix(desktop): remote-ready boot no longer latched by local helper exit
The desktop's onBackendExit handler failed the boot whenever the local
helper process exited, with no mode check. In Remote Gateway mode the
local process is not the desktop's backend: it crashes on the 9120 bind
and is then SIGTERM'd when the app switches to remote. Each exit latched
a boot failure that the subsequent remote-ready progress could not clear,
stranding the user in the failure/repair overlay despite a healthy remote.

- main.cjs: track activeConnectionMode; mark deliberate teardowns in
  resetHermesConnection; tag backend-exit payload with {mode, deliberate};
  skip the fatal boot-progress update on deliberate exits.
- use-gateway-boot: a local exit is only fatal when attached in local mode
  and not a deliberate teardown (isFatalBackendExit).
- Extract the decision into src/lib/backend-exit.ts + vitest coverage.

Fixes #37869
2026-06-03 14:42:23 -07:00
5 changed files with 103 additions and 4 deletions

View File

@@ -469,6 +469,11 @@ let bootstrapAbortController = null
// existing-install adopt branch (3b) so repair re-drives the installer instead
// of re-adopting the install we're repairing. Cleared once a bootstrap runs.
let forceBootstrapRepair = false
// The mode the desktop is currently operating against ('local' | 'remote'),
// set whenever startHermes() resolves a backend. When 'remote', the local
// helper process (if one ever spawned and is now being torn down) is NOT the
// desktop's backend, so its exit must not be reported as a fatal backend exit.
let activeConnectionMode = null
let connectionConfigCache = null
const hermesLog = []
const previewWatchers = new Map()
@@ -3245,6 +3250,11 @@ function resetHermesConnection() {
connectionPromise = null
if (hermesProcess && !hermesProcess.killed) {
// Mark this teardown as deliberate so the process 'exit' handler does not
// broadcast a fatal backend-exit (which would latch a desktop boot failure
// even though we're intentionally tearing the local helper down — e.g. to
// switch the desktop over to a remote gateway).
hermesProcess.__deliberateTeardown = true
hermesProcess.kill('SIGTERM')
}
@@ -3270,6 +3280,7 @@ async function startHermes() {
if (remote) {
await advanceBootProgress('backend.remote', `Connecting to remote Hermes backend at ${remote.baseUrl}`, 24)
await waitForHermes(remote.baseUrl, remote.token)
activeConnectionMode = 'remote'
updateBootProgress({
phase: 'backend.ready',
message: 'Remote Hermes backend is ready',
@@ -3299,6 +3310,7 @@ async function startHermes() {
await advanceBootProgress('backend.spawn', `Starting Hermes backend via ${backend.label}`, 84)
rememberLog(`Starting Hermes backend via ${backend.label}`)
activeConnectionMode = 'local'
hermesProcess = spawn(backend.command, backend.args, {
cwd: hermesCwd,
@@ -3324,6 +3336,10 @@ async function startHermes() {
hermesProcess.stdout.on('data', rememberLog)
hermesProcess.stderr.on('data', rememberLog)
// Capture a stable reference for the exit/error closures: resetHermesConnection
// nulls out the module-level hermesProcess before the 'exit' event fires, so
// the handler needs its own handle to read the deliberate-teardown flag.
const spawnedProcess = hermesProcess
let backendReady = false
let rejectBackendStart = null
const backendStartFailed = new Promise((_resolve, reject) => {
@@ -3342,15 +3358,20 @@ async function startHermes() {
)
hermesProcess = null
connectionPromise = null
sendBackendExit({ code: null, signal: null, error: error.message })
sendBackendExit({ code: null, signal: null, error: error.message, mode: activeConnectionMode })
rejectBackendStart?.(error)
})
hermesProcess.once('exit', (code, signal) => {
rememberLog(`Hermes backend exited (${signal || code})`)
const deliberate = Boolean(spawnedProcess?.__deliberateTeardown)
hermesProcess = null
connectionPromise = null
sendBackendExit({ code, signal })
if (!backendReady) {
// Tag the exit so the renderer can decide whether it's fatal. A deliberate
// teardown (resetHermesConnection — e.g. switching to a remote gateway)
// must not latch a desktop boot failure: the local helper is being torn
// down on purpose, not crashing.
sendBackendExit({ code, signal, mode: activeConnectionMode, deliberate })
if (!backendReady && !deliberate) {
const message = `Hermes backend exited before it became ready (${signal || code}).`
updateBootProgress(
{

View File

@@ -14,6 +14,8 @@ import { notify, notifyError } from '@/store/notifications'
import { $connection, setConnection, setGatewayState, setSessionsLoading } from '@/store/session'
import type { RpcEvent } from '@/types/hermes'
import { isFatalBackendExit } from '@/lib/backend-exit'
interface GatewayBootOptions {
handleGatewayEvent: (event: RpcEvent) => void
onConnectionReady: (
@@ -90,7 +92,18 @@ export function useGatewayBoot({
}
})
const offExit = desktop.onBackendExit(() => {
const offExit = desktop.onBackendExit(payload => {
// A local helper process exiting is only fatal when the desktop is
// actually attached to that local backend. In remote mode (or when the
// exit was a deliberate teardown, e.g. switching to a remote gateway),
// the local process is not our backend — ignore it so a remote-ready
// boot doesn't stay latched behind a stale local-exit failure (#37869).
const fatal = isFatalBackendExit(payload, $connection.get()?.mode)
if (!fatal) {
return
}
if ($desktopBoot.get().running || $desktopBoot.get().visible) {
failDesktopBoot('Hermes background process exited during startup.')
}

View File

@@ -320,4 +320,11 @@ export interface HermesSelectPathsOptions {
export interface BackendExit {
code: number | null
signal: string | null
// The mode the desktop was operating against when the local helper process
// exited. When 'remote', the local process is not the desktop's backend, so
// its exit is not a fatal failure.
mode?: 'local' | 'remote' | null
// True when the exit was triggered by a deliberate teardown (e.g. switching
// gateways) rather than a crash.
deliberate?: boolean
}

View File

@@ -0,0 +1,33 @@
import { describe, expect, it } from 'vitest'
import { isFatalBackendExit } from './backend-exit'
describe('isFatalBackendExit', () => {
it('treats a local-mode crash as fatal', () => {
expect(isFatalBackendExit({ code: 1, signal: null }, 'local')).toBe(true)
})
it('treats a crash with no known mode as fatal', () => {
expect(isFatalBackendExit({ code: 1, signal: null }, null)).toBe(true)
expect(isFatalBackendExit({ code: 1, signal: null }, undefined)).toBe(true)
})
it('ignores exits while attached to a remote gateway (#37869)', () => {
expect(isFatalBackendExit({ code: 0, signal: null }, 'remote')).toBe(false)
expect(isFatalBackendExit({ code: null, signal: 'SIGTERM' }, 'remote')).toBe(false)
})
it('ignores exits tagged remote in the payload even before the connection mode is known', () => {
expect(isFatalBackendExit({ code: 1, signal: null, mode: 'remote' }, null)).toBe(false)
})
it('ignores deliberate teardowns (e.g. switching gateways) regardless of mode', () => {
expect(isFatalBackendExit({ code: null, signal: 'SIGTERM', deliberate: true }, 'local')).toBe(false)
expect(isFatalBackendExit({ code: null, signal: 'SIGTERM', deliberate: true, mode: 'local' }, null)).toBe(false)
})
it('handles a null/undefined payload defensively', () => {
expect(isFatalBackendExit(null, 'local')).toBe(true)
expect(isFatalBackendExit(undefined, 'remote')).toBe(false)
})
})

View File

@@ -0,0 +1,25 @@
import type { BackendExit } from '@/global'
/**
* Decide whether a backend-process exit should fail the desktop boot.
*
* The local Hermes helper process is only the desktop's backend when we're
* attached in local mode. When the desktop is operating against a remote
* gateway — or when the exit was a deliberate teardown (e.g. switching
* gateways, which SIGTERMs the leftover local helper) — the exiting process
* is not our backend, so its exit must not latch a boot failure.
*
* See issue #37869: remote-ready boots were getting stuck behind a stale
* local-exit failure overlay.
*/
export function isFatalBackendExit(payload: BackendExit | null | undefined, connectionMode: string | null | undefined): boolean {
if (payload?.deliberate) {
return false
}
if (connectionMode === 'remote' || payload?.mode === 'remote') {
return false
}
return true
}