fix(mcp): stability fix pack — reload timeout, shutdown cleanup, event loop handler, OAuth non-blocking (#4757)
Four fixes for MCP server stability issues reported by community member
(terminal lockup, zombie processes, escape sequence pollution, startup hang):
1. MCP reload timeout guard (cli.py): _check_config_mcp_changes now runs
_reload_mcp in a separate daemon thread with a 30s hard timeout. Previously,
a hung MCP server could block the process_loop thread indefinitely, freezing
the entire TUI (user can type but nothing happens, only Ctrl+D/Ctrl+\ work).
2. MCP stdio subprocess PID tracking (mcp_tool.py): Tracks child PIDs spawned
by stdio_client via before/after snapshots of /proc children. On shutdown,
_stop_mcp_loop force-kills any tracked PIDs that survived the SDK's graceful
SIGTERM→SIGKILL cleanup. Prevents zombie MCP server processes from
accumulating across sessions.
3. MCP event loop exception handler (mcp_tool.py): Installs
_mcp_loop_exception_handler on the MCP background event loop — same pattern
as the existing _suppress_closed_loop_errors on prompt_toolkit's loop.
Suppresses benign 'Event loop is closed' RuntimeError from httpx transport
__del__ during MCP shutdown. Salvaged from PR #2538 (acsezen).
4. MCP OAuth non-blocking (mcp_oauth.py): Replaces blocking input() call in
_wait_for_callback with OAuthNonInteractiveError raise. Adds _is_interactive()
TTY detection. In non-interactive environments, build_oauth_auth() still
returns a provider (cached tokens + refresh work), but the callback handler
raises immediately instead of blocking the MCP event loop for 120s. Re-raises
OAuth setup failures in _run_http so failed servers are reported cleanly
without blocking others. Salvaged from PRs #4521 (voidborne-d) and #4465
(heathley).
Closes #2537, closes #4462
Related: #4128, #3436
2026-04-03 02:29:20 -07:00
|
|
|
"""Tests for MCP stability fixes — event loop handler, PID tracking, shutdown robustness."""
|
|
|
|
|
|
|
|
|
|
import asyncio
|
|
|
|
|
import os
|
|
|
|
|
import signal
|
|
|
|
|
import threading
|
|
|
|
|
from unittest.mock import patch, MagicMock
|
|
|
|
|
|
|
|
|
|
import pytest
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
# Fix 1: MCP event loop exception handler
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
class TestMCPLoopExceptionHandler:
|
|
|
|
|
"""_mcp_loop_exception_handler suppresses benign 'Event loop is closed'."""
|
|
|
|
|
|
|
|
|
|
def test_suppresses_event_loop_closed(self):
|
|
|
|
|
from tools.mcp_tool import _mcp_loop_exception_handler
|
|
|
|
|
loop = MagicMock()
|
|
|
|
|
context = {"exception": RuntimeError("Event loop is closed")}
|
|
|
|
|
# Should NOT call default handler
|
|
|
|
|
_mcp_loop_exception_handler(loop, context)
|
|
|
|
|
loop.default_exception_handler.assert_not_called()
|
|
|
|
|
|
|
|
|
|
def test_forwards_other_runtime_errors(self):
|
|
|
|
|
from tools.mcp_tool import _mcp_loop_exception_handler
|
|
|
|
|
loop = MagicMock()
|
|
|
|
|
context = {"exception": RuntimeError("some other error")}
|
|
|
|
|
_mcp_loop_exception_handler(loop, context)
|
|
|
|
|
loop.default_exception_handler.assert_called_once_with(context)
|
|
|
|
|
|
|
|
|
|
def test_forwards_non_runtime_errors(self):
|
|
|
|
|
from tools.mcp_tool import _mcp_loop_exception_handler
|
|
|
|
|
loop = MagicMock()
|
|
|
|
|
context = {"exception": ValueError("bad value")}
|
|
|
|
|
_mcp_loop_exception_handler(loop, context)
|
|
|
|
|
loop.default_exception_handler.assert_called_once_with(context)
|
|
|
|
|
|
|
|
|
|
def test_forwards_contexts_without_exception(self):
|
|
|
|
|
from tools.mcp_tool import _mcp_loop_exception_handler
|
|
|
|
|
loop = MagicMock()
|
|
|
|
|
context = {"message": "just a message"}
|
|
|
|
|
_mcp_loop_exception_handler(loop, context)
|
|
|
|
|
loop.default_exception_handler.assert_called_once_with(context)
|
|
|
|
|
|
|
|
|
|
def test_handler_installed_on_mcp_loop(self):
|
|
|
|
|
"""_ensure_mcp_loop installs the exception handler on the new loop."""
|
|
|
|
|
import tools.mcp_tool as mcp_mod
|
|
|
|
|
try:
|
|
|
|
|
mcp_mod._ensure_mcp_loop()
|
|
|
|
|
with mcp_mod._lock:
|
|
|
|
|
loop = mcp_mod._mcp_loop
|
|
|
|
|
assert loop is not None
|
|
|
|
|
assert loop.get_exception_handler() is mcp_mod._mcp_loop_exception_handler
|
|
|
|
|
finally:
|
|
|
|
|
mcp_mod._stop_mcp_loop()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
# Fix 2: stdio PID tracking
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
class TestStdioPidTracking:
|
|
|
|
|
"""_snapshot_child_pids and _stdio_pids track subprocess PIDs."""
|
|
|
|
|
|
|
|
|
|
def test_snapshot_returns_set(self):
|
|
|
|
|
from tools.mcp_tool import _snapshot_child_pids
|
|
|
|
|
result = _snapshot_child_pids()
|
|
|
|
|
assert isinstance(result, set)
|
|
|
|
|
# All elements should be ints
|
|
|
|
|
for pid in result:
|
|
|
|
|
assert isinstance(pid, int)
|
|
|
|
|
|
|
|
|
|
def test_stdio_pids_starts_empty(self):
|
|
|
|
|
from tools.mcp_tool import _stdio_pids, _lock
|
|
|
|
|
with _lock:
|
|
|
|
|
# Might have residual state from other tests, just check type
|
2026-04-23 20:01:17 +02:00
|
|
|
assert isinstance(_stdio_pids, dict)
|
fix(mcp): stability fix pack — reload timeout, shutdown cleanup, event loop handler, OAuth non-blocking (#4757)
Four fixes for MCP server stability issues reported by community member
(terminal lockup, zombie processes, escape sequence pollution, startup hang):
1. MCP reload timeout guard (cli.py): _check_config_mcp_changes now runs
_reload_mcp in a separate daemon thread with a 30s hard timeout. Previously,
a hung MCP server could block the process_loop thread indefinitely, freezing
the entire TUI (user can type but nothing happens, only Ctrl+D/Ctrl+\ work).
2. MCP stdio subprocess PID tracking (mcp_tool.py): Tracks child PIDs spawned
by stdio_client via before/after snapshots of /proc children. On shutdown,
_stop_mcp_loop force-kills any tracked PIDs that survived the SDK's graceful
SIGTERM→SIGKILL cleanup. Prevents zombie MCP server processes from
accumulating across sessions.
3. MCP event loop exception handler (mcp_tool.py): Installs
_mcp_loop_exception_handler on the MCP background event loop — same pattern
as the existing _suppress_closed_loop_errors on prompt_toolkit's loop.
Suppresses benign 'Event loop is closed' RuntimeError from httpx transport
__del__ during MCP shutdown. Salvaged from PR #2538 (acsezen).
4. MCP OAuth non-blocking (mcp_oauth.py): Replaces blocking input() call in
_wait_for_callback with OAuthNonInteractiveError raise. Adds _is_interactive()
TTY detection. In non-interactive environments, build_oauth_auth() still
returns a provider (cached tokens + refresh work), but the callback handler
raises immediately instead of blocking the MCP event loop for 120s. Re-raises
OAuth setup failures in _run_http so failed servers are reported cleanly
without blocking others. Salvaged from PRs #4521 (voidborne-d) and #4465
(heathley).
Closes #2537, closes #4462
Related: #4128, #3436
2026-04-03 02:29:20 -07:00
|
|
|
|
|
|
|
|
def test_kill_orphaned_noop_when_empty(self):
|
|
|
|
|
"""_kill_orphaned_mcp_children does nothing when no PIDs tracked."""
|
|
|
|
|
from tools.mcp_tool import _kill_orphaned_mcp_children, _stdio_pids, _lock
|
|
|
|
|
|
|
|
|
|
with _lock:
|
|
|
|
|
_stdio_pids.clear()
|
|
|
|
|
|
|
|
|
|
# Should not raise
|
|
|
|
|
_kill_orphaned_mcp_children()
|
|
|
|
|
|
|
|
|
|
def test_kill_orphaned_handles_dead_pids(self):
|
|
|
|
|
"""_kill_orphaned_mcp_children gracefully handles already-dead PIDs."""
|
|
|
|
|
from tools.mcp_tool import _kill_orphaned_mcp_children, _stdio_pids, _lock
|
|
|
|
|
|
|
|
|
|
# Use a PID that definitely doesn't exist
|
|
|
|
|
fake_pid = 999999999
|
|
|
|
|
with _lock:
|
2026-04-23 20:01:17 +02:00
|
|
|
_stdio_pids[fake_pid] = "test"
|
fix(mcp): stability fix pack — reload timeout, shutdown cleanup, event loop handler, OAuth non-blocking (#4757)
Four fixes for MCP server stability issues reported by community member
(terminal lockup, zombie processes, escape sequence pollution, startup hang):
1. MCP reload timeout guard (cli.py): _check_config_mcp_changes now runs
_reload_mcp in a separate daemon thread with a 30s hard timeout. Previously,
a hung MCP server could block the process_loop thread indefinitely, freezing
the entire TUI (user can type but nothing happens, only Ctrl+D/Ctrl+\ work).
2. MCP stdio subprocess PID tracking (mcp_tool.py): Tracks child PIDs spawned
by stdio_client via before/after snapshots of /proc children. On shutdown,
_stop_mcp_loop force-kills any tracked PIDs that survived the SDK's graceful
SIGTERM→SIGKILL cleanup. Prevents zombie MCP server processes from
accumulating across sessions.
3. MCP event loop exception handler (mcp_tool.py): Installs
_mcp_loop_exception_handler on the MCP background event loop — same pattern
as the existing _suppress_closed_loop_errors on prompt_toolkit's loop.
Suppresses benign 'Event loop is closed' RuntimeError from httpx transport
__del__ during MCP shutdown. Salvaged from PR #2538 (acsezen).
4. MCP OAuth non-blocking (mcp_oauth.py): Replaces blocking input() call in
_wait_for_callback with OAuthNonInteractiveError raise. Adds _is_interactive()
TTY detection. In non-interactive environments, build_oauth_auth() still
returns a provider (cached tokens + refresh work), but the callback handler
raises immediately instead of blocking the MCP event loop for 120s. Re-raises
OAuth setup failures in _run_http so failed servers are reported cleanly
without blocking others. Salvaged from PRs #4521 (voidborne-d) and #4465
(heathley).
Closes #2537, closes #4462
Related: #4128, #3436
2026-04-03 02:29:20 -07:00
|
|
|
|
|
|
|
|
# Should not raise (ProcessLookupError is caught)
|
|
|
|
|
_kill_orphaned_mcp_children()
|
|
|
|
|
|
|
|
|
|
with _lock:
|
|
|
|
|
assert fake_pid not in _stdio_pids
|
|
|
|
|
|
2026-04-10 00:23:36 +03:00
|
|
|
def test_kill_orphaned_uses_sigkill_when_available(self, monkeypatch):
|
2026-04-23 20:01:17 +02:00
|
|
|
"""SIGTERM-first then SIGKILL after 2s for orphan cleanup."""
|
2026-04-10 00:23:36 +03:00
|
|
|
from tools.mcp_tool import _kill_orphaned_mcp_children, _stdio_pids, _lock
|
|
|
|
|
|
|
|
|
|
fake_pid = 424242
|
|
|
|
|
with _lock:
|
|
|
|
|
_stdio_pids.clear()
|
2026-04-23 20:01:17 +02:00
|
|
|
_stdio_pids[fake_pid] = "test"
|
2026-04-10 00:23:36 +03:00
|
|
|
|
|
|
|
|
fake_sigkill = 9
|
|
|
|
|
monkeypatch.setattr(signal, "SIGKILL", fake_sigkill, raising=False)
|
|
|
|
|
|
2026-04-23 20:01:17 +02:00
|
|
|
with patch("tools.mcp_tool.os.kill") as mock_kill, \
|
|
|
|
|
patch("time.sleep") as mock_sleep:
|
2026-04-10 00:23:36 +03:00
|
|
|
_kill_orphaned_mcp_children()
|
|
|
|
|
|
2026-04-23 20:01:17 +02:00
|
|
|
# SIGTERM, then alive-check (signal 0), then SIGKILL
|
|
|
|
|
mock_kill.assert_any_call(fake_pid, signal.SIGTERM)
|
|
|
|
|
mock_kill.assert_any_call(fake_pid, 0) # alive check
|
|
|
|
|
mock_kill.assert_any_call(fake_pid, fake_sigkill)
|
|
|
|
|
assert mock_kill.call_count == 3
|
|
|
|
|
mock_sleep.assert_called_once_with(2)
|
2026-04-10 00:23:36 +03:00
|
|
|
|
|
|
|
|
with _lock:
|
|
|
|
|
assert fake_pid not in _stdio_pids
|
|
|
|
|
|
|
|
|
|
def test_kill_orphaned_falls_back_without_sigkill(self, monkeypatch):
|
2026-04-23 20:01:17 +02:00
|
|
|
"""Without SIGKILL, SIGTERM is used for both phases."""
|
2026-04-10 00:23:36 +03:00
|
|
|
from tools.mcp_tool import _kill_orphaned_mcp_children, _stdio_pids, _lock
|
|
|
|
|
|
|
|
|
|
fake_pid = 434343
|
|
|
|
|
with _lock:
|
|
|
|
|
_stdio_pids.clear()
|
2026-04-23 20:01:17 +02:00
|
|
|
_stdio_pids[fake_pid] = "test"
|
2026-04-10 00:23:36 +03:00
|
|
|
|
|
|
|
|
monkeypatch.delattr(signal, "SIGKILL", raising=False)
|
|
|
|
|
|
2026-04-23 20:01:17 +02:00
|
|
|
with patch("tools.mcp_tool.os.kill") as mock_kill, \
|
|
|
|
|
patch("time.sleep") as mock_sleep:
|
2026-04-10 00:23:36 +03:00
|
|
|
_kill_orphaned_mcp_children()
|
|
|
|
|
|
2026-04-23 20:01:17 +02:00
|
|
|
# SIGTERM phase, alive check raises (process gone), no escalation
|
|
|
|
|
mock_kill.assert_any_call(fake_pid, signal.SIGTERM)
|
|
|
|
|
assert mock_sleep.called
|
2026-04-10 00:23:36 +03:00
|
|
|
|
|
|
|
|
with _lock:
|
|
|
|
|
assert fake_pid not in _stdio_pids
|
|
|
|
|
|
fix(mcp): stability fix pack — reload timeout, shutdown cleanup, event loop handler, OAuth non-blocking (#4757)
Four fixes for MCP server stability issues reported by community member
(terminal lockup, zombie processes, escape sequence pollution, startup hang):
1. MCP reload timeout guard (cli.py): _check_config_mcp_changes now runs
_reload_mcp in a separate daemon thread with a 30s hard timeout. Previously,
a hung MCP server could block the process_loop thread indefinitely, freezing
the entire TUI (user can type but nothing happens, only Ctrl+D/Ctrl+\ work).
2. MCP stdio subprocess PID tracking (mcp_tool.py): Tracks child PIDs spawned
by stdio_client via before/after snapshots of /proc children. On shutdown,
_stop_mcp_loop force-kills any tracked PIDs that survived the SDK's graceful
SIGTERM→SIGKILL cleanup. Prevents zombie MCP server processes from
accumulating across sessions.
3. MCP event loop exception handler (mcp_tool.py): Installs
_mcp_loop_exception_handler on the MCP background event loop — same pattern
as the existing _suppress_closed_loop_errors on prompt_toolkit's loop.
Suppresses benign 'Event loop is closed' RuntimeError from httpx transport
__del__ during MCP shutdown. Salvaged from PR #2538 (acsezen).
4. MCP OAuth non-blocking (mcp_oauth.py): Replaces blocking input() call in
_wait_for_callback with OAuthNonInteractiveError raise. Adds _is_interactive()
TTY detection. In non-interactive environments, build_oauth_auth() still
returns a provider (cached tokens + refresh work), but the callback handler
raises immediately instead of blocking the MCP event loop for 120s. Re-raises
OAuth setup failures in _run_http so failed servers are reported cleanly
without blocking others. Salvaged from PRs #4521 (voidborne-d) and #4465
(heathley).
Closes #2537, closes #4462
Related: #4128, #3436
2026-04-03 02:29:20 -07:00
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
# Fix 3: MCP reload timeout (cli.py)
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
class TestMCPReloadTimeout:
|
|
|
|
|
"""_check_config_mcp_changes uses a timeout on _reload_mcp."""
|
|
|
|
|
|
|
|
|
|
def test_reload_timeout_does_not_block_forever(self, tmp_path, monkeypatch):
|
|
|
|
|
"""If _reload_mcp hangs, the config watcher times out and returns."""
|
|
|
|
|
import time
|
|
|
|
|
|
|
|
|
|
# Create a mock HermesCLI-like object with the needed attributes
|
|
|
|
|
class FakeCLI:
|
|
|
|
|
_config_mtime = 0.0
|
|
|
|
|
_config_mcp_servers = {}
|
|
|
|
|
_last_config_check = 0.0
|
|
|
|
|
_command_running = False
|
|
|
|
|
config = {}
|
|
|
|
|
agent = None
|
|
|
|
|
|
|
|
|
|
def _reload_mcp(self):
|
|
|
|
|
# Simulate a hang — sleep longer than the timeout
|
|
|
|
|
time.sleep(60)
|
|
|
|
|
|
|
|
|
|
def _slow_command_status(self, cmd):
|
|
|
|
|
return cmd
|
|
|
|
|
|
|
|
|
|
# This test verifies the timeout mechanism exists in the code
|
|
|
|
|
# by checking that _check_config_mcp_changes doesn't call
|
|
|
|
|
# _reload_mcp directly (it uses a thread now)
|
|
|
|
|
import inspect
|
|
|
|
|
from cli import HermesCLI
|
|
|
|
|
source = inspect.getsource(HermesCLI._check_config_mcp_changes)
|
|
|
|
|
# The fix adds threading.Thread for _reload_mcp
|
|
|
|
|
assert "Thread" in source or "thread" in source.lower(), \
|
|
|
|
|
"_check_config_mcp_changes should use a thread for _reload_mcp"
|
fix: add vLLM/local server error patterns + MCP initial connection retry (#9281)
Port two improvements inspired by Kilo-Org/kilocode analysis:
1. Error classifier: add context overflow patterns for vLLM, Ollama,
and llama.cpp/llama-server. These local inference servers return
different error formats than cloud providers (e.g., 'exceeds the
max_model_len', 'context length exceeded', 'slot context'). Without
these patterns, context overflow errors from local servers are
misclassified as format errors, causing infinite retries instead
of triggering compression.
2. MCP initial connection retry: previously, if the very first
connection attempt to an MCP server failed (e.g., transient DNS
blip at startup), the server was permanently marked as failed with
no retry. Post-connect reconnection had 5 retries with exponential
backoff, but initial connection had zero. Now initial connections
retry up to 3 times with backoff before giving up, matching the
resilience of post-connect reconnection.
(Inspired by Kilo Code's MCP server disappearing fix in v1.3.3)
Tests: 6 new error classifier tests, 4 new MCP retry tests, 1
updated existing test. All 276 affected tests pass.
2026-04-13 18:46:14 -07:00
|
|
|
|
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
# Fix 4: MCP initial connection retry with backoff
|
|
|
|
|
# (Ported from Kilo Code's MCP resilience fix)
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
class TestMCPInitialConnectionRetry:
|
|
|
|
|
"""MCPServerTask.run() retries initial connection failures instead of giving up."""
|
|
|
|
|
|
|
|
|
|
def test_initial_connect_retries_constant_exists(self):
|
|
|
|
|
"""_MAX_INITIAL_CONNECT_RETRIES should be defined."""
|
|
|
|
|
from tools.mcp_tool import _MAX_INITIAL_CONNECT_RETRIES
|
|
|
|
|
assert _MAX_INITIAL_CONNECT_RETRIES >= 1
|
|
|
|
|
|
|
|
|
|
def test_initial_connect_retry_succeeds_on_second_attempt(self):
|
|
|
|
|
"""Server succeeds after one transient initial failure."""
|
|
|
|
|
from tools.mcp_tool import MCPServerTask, _MAX_INITIAL_CONNECT_RETRIES
|
|
|
|
|
|
|
|
|
|
call_count = 0
|
|
|
|
|
|
|
|
|
|
async def _run():
|
|
|
|
|
nonlocal call_count
|
|
|
|
|
server = MCPServerTask("test-retry")
|
|
|
|
|
|
|
|
|
|
# Track calls via patching the method on the class
|
|
|
|
|
original_run_stdio = MCPServerTask._run_stdio
|
|
|
|
|
|
|
|
|
|
async def fake_run_stdio(self_inner, config):
|
|
|
|
|
nonlocal call_count
|
|
|
|
|
call_count += 1
|
|
|
|
|
if call_count == 1:
|
|
|
|
|
raise ConnectionError("DNS resolution failed")
|
|
|
|
|
# Second attempt: success — set ready and "run" until shutdown
|
|
|
|
|
self_inner._ready.set()
|
|
|
|
|
await self_inner._shutdown_event.wait()
|
|
|
|
|
|
|
|
|
|
with patch.object(MCPServerTask, '_run_stdio', fake_run_stdio):
|
|
|
|
|
task = asyncio.ensure_future(server.run({"command": "fake"}))
|
|
|
|
|
await server._ready.wait()
|
|
|
|
|
|
|
|
|
|
# It should have succeeded (no error) after retrying
|
|
|
|
|
assert server._error is None, f"Expected no error, got: {server._error}"
|
|
|
|
|
assert call_count == 2, f"Expected 2 attempts, got {call_count}"
|
|
|
|
|
|
|
|
|
|
# Clean shutdown
|
|
|
|
|
server._shutdown_event.set()
|
|
|
|
|
await task
|
|
|
|
|
|
|
|
|
|
asyncio.get_event_loop().run_until_complete(_run())
|
|
|
|
|
|
|
|
|
|
def test_initial_connect_gives_up_after_max_retries(self):
|
|
|
|
|
"""Server gives up after _MAX_INITIAL_CONNECT_RETRIES failures."""
|
|
|
|
|
from tools.mcp_tool import MCPServerTask, _MAX_INITIAL_CONNECT_RETRIES
|
|
|
|
|
|
|
|
|
|
call_count = 0
|
|
|
|
|
|
|
|
|
|
async def _run():
|
|
|
|
|
nonlocal call_count
|
|
|
|
|
server = MCPServerTask("test-exhaust")
|
|
|
|
|
|
|
|
|
|
async def fake_run_stdio(self_inner, config):
|
|
|
|
|
nonlocal call_count
|
|
|
|
|
call_count += 1
|
|
|
|
|
raise ConnectionError("DNS resolution failed")
|
|
|
|
|
|
|
|
|
|
with patch.object(MCPServerTask, '_run_stdio', fake_run_stdio):
|
|
|
|
|
task = asyncio.ensure_future(server.run({"command": "fake"}))
|
|
|
|
|
await server._ready.wait()
|
|
|
|
|
|
|
|
|
|
# Should have an error after exhausting retries
|
|
|
|
|
assert server._error is not None
|
|
|
|
|
assert "DNS resolution failed" in str(server._error)
|
|
|
|
|
# 1 initial + N retries = _MAX_INITIAL_CONNECT_RETRIES + 1 total attempts
|
|
|
|
|
assert call_count == _MAX_INITIAL_CONNECT_RETRIES + 1
|
|
|
|
|
|
|
|
|
|
await task
|
|
|
|
|
|
|
|
|
|
asyncio.get_event_loop().run_until_complete(_run())
|
|
|
|
|
|
|
|
|
|
def test_initial_connect_retry_respects_shutdown(self):
|
|
|
|
|
"""Shutdown during initial retry backoff aborts cleanly."""
|
|
|
|
|
from tools.mcp_tool import MCPServerTask
|
|
|
|
|
|
|
|
|
|
async def _run():
|
|
|
|
|
server = MCPServerTask("test-shutdown")
|
|
|
|
|
attempt = 0
|
|
|
|
|
|
|
|
|
|
async def fake_run_stdio(self_inner, config):
|
|
|
|
|
nonlocal attempt
|
|
|
|
|
attempt += 1
|
|
|
|
|
if attempt == 1:
|
|
|
|
|
raise ConnectionError("transient failure")
|
|
|
|
|
# Should not reach here because shutdown fires during sleep
|
|
|
|
|
raise AssertionError("Should not attempt after shutdown")
|
|
|
|
|
|
|
|
|
|
with patch.object(MCPServerTask, '_run_stdio', fake_run_stdio):
|
|
|
|
|
task = asyncio.ensure_future(server.run({"command": "fake"}))
|
|
|
|
|
|
|
|
|
|
# Give the first attempt time to fail, then set shutdown
|
|
|
|
|
# during the backoff sleep
|
|
|
|
|
await asyncio.sleep(0.1)
|
|
|
|
|
server._shutdown_event.set()
|
|
|
|
|
await server._ready.wait()
|
|
|
|
|
|
|
|
|
|
# Should have the error set and be done
|
|
|
|
|
assert server._error is not None
|
|
|
|
|
await task
|
|
|
|
|
|
|
|
|
|
asyncio.get_event_loop().run_until_complete(_run())
|