fix: persistent event loop in _run_async prevents 'Event loop is closed' (#2190)

Cherry-picked from PR #2146 by @crazywriter1. Fixes #2104. asyncio.run() creates and closes a fresh event loop each call. Cached httpx/AsyncOpenAI clients bound to the dead loop crash on GC with 'Event loop is closed'. This hit vision_analyze on first use in CLI. Two-layer fix: - model_tools._run_async(): replace asyncio.run() with persistent loop via _get_tool_loop() + run_until_complete() - auxiliary_client._get_cached_client(): track which loop created each async client, discard stale entries if loop is closed 6 regression tests covering loop lifecycle, reuse, and full vision dispatch chain. Co-authored-by: Test <test@test.com>
2026-04-28 06:51:16 +08:00 · 2026-03-20 09:44:50 -07:00
parent 66a1942524
commit 7a427d7b03
3 changed files with 261 additions and 5 deletions
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -1191,8 +1191,18 @@ def _get_cached_client(
    cache_key = (provider, async_mode, base_url or "", api_key or "")
    with _client_cache_lock:
        if cache_key in _client_cache:
-            cached_client, cached_default = _client_cache[cache_key]
-            return cached_client, model or cached_default
+            cached_client, cached_default, cached_loop = _client_cache[cache_key]
+            if async_mode:
+                # Async clients are bound to the event loop that created them.
+                # A cached async client whose loop has been closed will raise
+                # "Event loop is closed" when httpx tries to clean up its
+                # transport.  Discard the stale client and create a fresh one.
+                if cached_loop is not None and cached_loop.is_closed():
+                    del _client_cache[cache_key]
+                else:
+                    return cached_client, model or cached_default
+            else:
+                return cached_client, model or cached_default
    # Build outside the lock
    client, default_model = resolve_provider_client(
        provider,
@@ -1202,11 +1212,20 @@ def _get_cached_client(
        explicit_api_key=api_key,
    )
    if client is not None:
+        # For async clients, remember which loop they were created on so we
+        # can detect stale entries later.
+        bound_loop = None
+        if async_mode:
+            try:
+                import asyncio as _aio
+                bound_loop = _aio.get_event_loop()
+            except RuntimeError:
+                pass
        with _client_cache_lock:
            if cache_key not in _client_cache:
-                _client_cache[cache_key] = (client, default_model)
+                _client_cache[cache_key] = (client, default_model, bound_loop)
            else:
-                client, default_model = _client_cache[cache_key]
+                client, default_model, _ = _client_cache[cache_key]
    return client, model or default_model