mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-01 00:11:39 +08:00
Compare commits
1 Commits
fix/plugin
...
hermes/her
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f2f9a641fa |
@@ -113,7 +113,10 @@ DEFAULT_CONTEXT_LENGTHS = {
|
|||||||
"deepseek": 128000,
|
"deepseek": 128000,
|
||||||
# Meta
|
# Meta
|
||||||
"llama": 131072,
|
"llama": 131072,
|
||||||
# Qwen
|
# Qwen — specific model families before the catch-all.
|
||||||
|
# Official docs: https://help.aliyun.com/zh/model-studio/developer-reference/
|
||||||
|
"qwen3-coder-plus": 1000000, # 1M context
|
||||||
|
"qwen3-coder": 262144, # 256K context
|
||||||
"qwen": 131072,
|
"qwen": 131072,
|
||||||
# MiniMax — official docs: 204,800 context for all models
|
# MiniMax — official docs: 204,800 context for all models
|
||||||
# https://platform.minimax.io/docs/api-reference/text-anthropic-api
|
# https://platform.minimax.io/docs/api-reference/text-anthropic-api
|
||||||
|
|||||||
10
run_agent.py
10
run_agent.py
@@ -5888,8 +5888,16 @@ class AIAgent:
|
|||||||
api_kwargs["tools"] = self.tools
|
api_kwargs["tools"] = self.tools
|
||||||
|
|
||||||
if self.max_tokens is not None:
|
if self.max_tokens is not None:
|
||||||
if not self._is_qwen_portal():
|
|
||||||
api_kwargs.update(self._max_tokens_param(self.max_tokens))
|
api_kwargs.update(self._max_tokens_param(self.max_tokens))
|
||||||
|
elif self._is_qwen_portal():
|
||||||
|
# Qwen Portal defaults to a very low max_tokens when omitted.
|
||||||
|
# Reasoning models (qwen3-coder-plus) exhaust that budget on
|
||||||
|
# thinking tokens alone, causing the portal to return
|
||||||
|
# finish_reason="stop" with truncated output — the agent sees
|
||||||
|
# this as an intentional stop and exits the loop. Send 65536
|
||||||
|
# (the documented max output for qwen3-coder models) so the
|
||||||
|
# model has adequate output budget for tool calls.
|
||||||
|
api_kwargs.update(self._max_tokens_param(65536))
|
||||||
elif (self._is_openrouter_url() or "nousresearch" in self._base_url_lower) and "claude" in (self.model or "").lower():
|
elif (self._is_openrouter_url() or "nousresearch" in self._base_url_lower) and "claude" in (self.model or "").lower():
|
||||||
# OpenRouter and Nous Portal translate requests to Anthropic's
|
# OpenRouter and Nous Portal translate requests to Anthropic's
|
||||||
# Messages API, which requires max_tokens as a mandatory field.
|
# Messages API, which requires max_tokens as a mandatory field.
|
||||||
|
|||||||
@@ -222,6 +222,24 @@ class TestGetModelContextLength:
|
|||||||
mock_fetch.return_value = {}
|
mock_fetch.return_value = {}
|
||||||
assert get_model_context_length("openai/gpt-4o") == 128000
|
assert get_model_context_length("openai/gpt-4o") == 128000
|
||||||
|
|
||||||
|
@patch("agent.model_metadata.fetch_model_metadata")
|
||||||
|
def test_qwen3_coder_plus_context_length(self, mock_fetch):
|
||||||
|
"""qwen3-coder-plus has a 1M context window, not the generic 128K Qwen default."""
|
||||||
|
mock_fetch.return_value = {}
|
||||||
|
assert get_model_context_length("qwen3-coder-plus") == 1000000
|
||||||
|
|
||||||
|
@patch("agent.model_metadata.fetch_model_metadata")
|
||||||
|
def test_qwen3_coder_context_length(self, mock_fetch):
|
||||||
|
"""qwen3-coder has a 256K context window, not the generic 128K Qwen default."""
|
||||||
|
mock_fetch.return_value = {}
|
||||||
|
assert get_model_context_length("qwen3-coder") == 262144
|
||||||
|
|
||||||
|
@patch("agent.model_metadata.fetch_model_metadata")
|
||||||
|
def test_qwen_generic_context_length(self, mock_fetch):
|
||||||
|
"""Generic qwen models still get the 128K default."""
|
||||||
|
mock_fetch.return_value = {}
|
||||||
|
assert get_model_context_length("qwen3-plus") == 131072
|
||||||
|
|
||||||
@patch("agent.model_metadata.fetch_model_metadata")
|
@patch("agent.model_metadata.fetch_model_metadata")
|
||||||
def test_api_missing_context_length_key(self, mock_fetch):
|
def test_api_missing_context_length_key(self, mock_fetch):
|
||||||
"""Model in API but without context_length → defaults to 128000."""
|
"""Model in API but without context_length → defaults to 128000."""
|
||||||
|
|||||||
@@ -953,14 +953,24 @@ class TestBuildApiKwargs:
|
|||||||
assert kwargs["messages"][0]["content"][0]["text"] == "hi"
|
assert kwargs["messages"][0]["content"][0]["text"] == "hi"
|
||||||
assert "cache_control" not in kwargs["messages"][0]["content"][0]
|
assert "cache_control" not in kwargs["messages"][0]["content"][0]
|
||||||
|
|
||||||
def test_qwen_portal_omits_max_tokens(self, agent):
|
def test_qwen_portal_sends_explicit_max_tokens(self, agent):
|
||||||
|
"""When the user explicitly sets max_tokens, it should be sent to Qwen Portal."""
|
||||||
agent.base_url = "https://portal.qwen.ai/v1"
|
agent.base_url = "https://portal.qwen.ai/v1"
|
||||||
agent._base_url_lower = agent.base_url.lower()
|
agent._base_url_lower = agent.base_url.lower()
|
||||||
agent.max_tokens = 4096
|
agent.max_tokens = 4096
|
||||||
messages = [{"role": "system", "content": "sys"}, {"role": "user", "content": "hi"}]
|
messages = [{"role": "system", "content": "sys"}, {"role": "user", "content": "hi"}]
|
||||||
kwargs = agent._build_api_kwargs(messages)
|
kwargs = agent._build_api_kwargs(messages)
|
||||||
assert "max_tokens" not in kwargs
|
assert kwargs["max_tokens"] == 4096
|
||||||
assert "max_completion_tokens" not in kwargs
|
|
||||||
|
def test_qwen_portal_default_max_tokens(self, agent):
|
||||||
|
"""When max_tokens is None, Qwen Portal gets a default of 65536
|
||||||
|
to prevent reasoning models from exhausting their output budget."""
|
||||||
|
agent.base_url = "https://portal.qwen.ai/v1"
|
||||||
|
agent._base_url_lower = agent.base_url.lower()
|
||||||
|
agent.max_tokens = None
|
||||||
|
messages = [{"role": "system", "content": "sys"}, {"role": "user", "content": "hi"}]
|
||||||
|
kwargs = agent._build_api_kwargs(messages)
|
||||||
|
assert kwargs["max_tokens"] == 65536
|
||||||
|
|
||||||
|
|
||||||
class TestBuildAssistantMessage:
|
class TestBuildAssistantMessage:
|
||||||
|
|||||||
Reference in New Issue
Block a user