mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-02 08:47:26 +08:00
fix(agent): spawn OpenRouter pre-warm thread only once per process
Each AIAgent.__init__() was unconditionally starting a daemon thread to pre-warm the OpenRouter model metadata cache. In gateway mode a new AIAgent is created for every incoming message, so one OS thread leaked per request. After ~1 000 messages the process hit the Linux thread limit and raised RuntimeError: can't start new thread for all subsequent requests. Add a module-level threading.Event (_openrouter_prewarm_done) that is set before the thread is started. Subsequent AIAgent instantiations skip the spawn entirely; fetch_model_metadata() is cached for 1 hour so the single background call is sufficient. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
17
run_agent.py
17
run_agent.py
@@ -323,6 +323,12 @@ _PATH_SCOPED_TOOLS = frozenset({"read_file", "write_file", "patch"})
|
||||
# Maximum number of concurrent worker threads for parallel tool execution.
|
||||
_MAX_TOOL_WORKERS = 8
|
||||
|
||||
# Guard so the OpenRouter metadata pre-warm thread is only spawned once per
|
||||
# process, not once per AIAgent instantiation. Without this, long-running
|
||||
# gateway processes leak one OS thread per incoming message and eventually
|
||||
# exhaust the system thread limit (RuntimeError: can't start new thread).
|
||||
_openrouter_prewarm_done = threading.Event()
|
||||
|
||||
# Patterns that indicate a terminal command may modify/delete files.
|
||||
_DESTRUCTIVE_PATTERNS = re.compile(
|
||||
r"""(?:^|\s|&&|\|\||;|`)(?:
|
||||
@@ -1107,10 +1113,17 @@ class AIAgent:
|
||||
# Pre-warm OpenRouter model metadata cache in a background thread.
|
||||
# fetch_model_metadata() is cached for 1 hour; this avoids a blocking
|
||||
# HTTP request on the first API response when pricing is estimated.
|
||||
if self.provider == "openrouter" or self._is_openrouter_url():
|
||||
# Use a process-level Event so this thread is only spawned once — a new
|
||||
# AIAgent is created for every gateway request, so without the guard
|
||||
# each message leaks one OS thread and the process eventually exhausts
|
||||
# the system thread limit (RuntimeError: can't start new thread).
|
||||
if (self.provider == "openrouter" or self._is_openrouter_url()) and \
|
||||
not _openrouter_prewarm_done.is_set():
|
||||
_openrouter_prewarm_done.set()
|
||||
threading.Thread(
|
||||
target=lambda: fetch_model_metadata(),
|
||||
target=fetch_model_metadata,
|
||||
daemon=True,
|
||||
name="openrouter-prewarm",
|
||||
).start()
|
||||
|
||||
self.tool_progress_callback = tool_progress_callback
|
||||
|
||||
Reference in New Issue
Block a user