From a7fb79efb219d9d473bede1371ce53e830bad42e Mon Sep 17 00:00:00 2001 From: Vlad Ra Date: Wed, 29 Apr 2026 15:47:27 +0100 Subject: [PATCH] fix(agent): spawn OpenRouter pre-warm thread only once per process Each AIAgent.__init__() was unconditionally starting a daemon thread to pre-warm the OpenRouter model metadata cache. In gateway mode a new AIAgent is created for every incoming message, so one OS thread leaked per request. After ~1 000 messages the process hit the Linux thread limit and raised RuntimeError: can't start new thread for all subsequent requests. Add a module-level threading.Event (_openrouter_prewarm_done) that is set before the thread is started. Subsequent AIAgent instantiations skip the spawn entirely; fetch_model_metadata() is cached for 1 hour so the single background call is sufficient. Co-Authored-By: Claude Sonnet 4.6 --- run_agent.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/run_agent.py b/run_agent.py index a7c583f61ce..e31fb80352e 100644 --- a/run_agent.py +++ b/run_agent.py @@ -323,6 +323,12 @@ _PATH_SCOPED_TOOLS = frozenset({"read_file", "write_file", "patch"}) # Maximum number of concurrent worker threads for parallel tool execution. _MAX_TOOL_WORKERS = 8 +# Guard so the OpenRouter metadata pre-warm thread is only spawned once per +# process, not once per AIAgent instantiation. Without this, long-running +# gateway processes leak one OS thread per incoming message and eventually +# exhaust the system thread limit (RuntimeError: can't start new thread). +_openrouter_prewarm_done = threading.Event() + # Patterns that indicate a terminal command may modify/delete files. _DESTRUCTIVE_PATTERNS = re.compile( r"""(?:^|\s|&&|\|\||;|`)(?: @@ -1107,10 +1113,17 @@ class AIAgent: # Pre-warm OpenRouter model metadata cache in a background thread. # fetch_model_metadata() is cached for 1 hour; this avoids a blocking # HTTP request on the first API response when pricing is estimated. - if self.provider == "openrouter" or self._is_openrouter_url(): + # Use a process-level Event so this thread is only spawned once — a new + # AIAgent is created for every gateway request, so without the guard + # each message leaks one OS thread and the process eventually exhausts + # the system thread limit (RuntimeError: can't start new thread). + if (self.provider == "openrouter" or self._is_openrouter_url()) and \ + not _openrouter_prewarm_done.is_set(): + _openrouter_prewarm_done.set() threading.Thread( - target=lambda: fetch_model_metadata(), + target=fetch_model_metadata, daemon=True, + name="openrouter-prewarm", ).start() self.tool_progress_callback = tool_progress_callback