mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-28 06:51:16 +08:00
fix: increase API timeout default from 900s to 1800s for slow-thinking models (#3431)
Models like GLM-5/5.1 can think for 15+ minutes. The previous 900s (15 min) default for HERMES_API_TIMEOUT killed legitimate requests. Raised to 1800s (30 min) in both places that read the env var: - _build_api_kwargs() timeout (non-streaming total timeout) - _call_chat_completions() write timeout (streaming connection) The streaming per-chunk read timeout (60s) and stale stream detector (180-300s) are unchanged — those are appropriate for inter-chunk timing.
This commit is contained in:
@@ -3772,7 +3772,7 @@ class AIAgent:
|
||||
def _call_chat_completions():
|
||||
"""Stream a chat completions response."""
|
||||
import httpx as _httpx
|
||||
_base_timeout = float(os.getenv("HERMES_API_TIMEOUT", 900.0))
|
||||
_base_timeout = float(os.getenv("HERMES_API_TIMEOUT", 1800.0))
|
||||
_stream_read_timeout = float(os.getenv("HERMES_STREAM_READ_TIMEOUT", 60.0))
|
||||
stream_kwargs = {
|
||||
**api_kwargs,
|
||||
@@ -4497,7 +4497,7 @@ class AIAgent:
|
||||
"model": self.model,
|
||||
"messages": sanitized_messages,
|
||||
"tools": self.tools if self.tools else None,
|
||||
"timeout": float(os.getenv("HERMES_API_TIMEOUT", 900.0)),
|
||||
"timeout": float(os.getenv("HERMES_API_TIMEOUT", 1800.0)),
|
||||
}
|
||||
|
||||
if self.max_tokens is not None:
|
||||
|
||||
@@ -637,7 +637,7 @@ class TestBuildApiKwargs:
|
||||
kwargs = agent._build_api_kwargs(messages)
|
||||
assert kwargs["model"] == agent.model
|
||||
assert kwargs["messages"] is messages
|
||||
assert kwargs["timeout"] == 900.0
|
||||
assert kwargs["timeout"] == 1800.0
|
||||
|
||||
def test_provider_preferences_injected(self, agent):
|
||||
agent.providers_allowed = ["Anthropic"]
|
||||
|
||||
Reference in New Issue
Block a user