mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-10 04:08:28 +08:00
Compare commits
1 Commits
bb/version
...
cline-port
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
eb1896ef20 |
30
run_agent.py
30
run_agent.py
@@ -336,6 +336,22 @@ _PARALLEL_SAFE_TOOLS = frozenset({
|
||||
# File tools can run concurrently when they target independent paths.
|
||||
_PATH_SCOPED_TOOLS = frozenset({"read_file", "write_file", "patch"})
|
||||
|
||||
# OpenRouter model IDs that require explicit ``cache_control`` breakpoints for
|
||||
# prompt caching on their upstream provider. OpenRouter's prompt-caching docs
|
||||
# list Alibaba's Qwen family and DeepSeek V3.2 as explicit-cache models —
|
||||
# without breakpoints they return 0% cache reads and re-bill the full prompt
|
||||
# on every turn. Claude models are handled separately (``is_claude`` branch).
|
||||
# OpenAI and Google models are intentionally excluded — OpenRouter manages
|
||||
# their caching automatically. Ported from cline/cline#10578.
|
||||
_OPENROUTER_EXPLICIT_CACHE_CONTROL_MODEL_IDS = frozenset({
|
||||
"deepseek/deepseek-v3.2",
|
||||
"qwen/qwen-plus",
|
||||
"qwen/qwen3-max",
|
||||
"qwen/qwen3.6-plus",
|
||||
"qwen/qwen3-coder-plus",
|
||||
"qwen/qwen3-coder-flash",
|
||||
})
|
||||
|
||||
# Maximum number of concurrent worker threads for parallel tool execution.
|
||||
_MAX_TOOL_WORKERS = 8
|
||||
|
||||
@@ -2966,6 +2982,20 @@ class AIAgent:
|
||||
return True, True
|
||||
if is_openrouter and is_claude:
|
||||
return True, False
|
||||
if is_openrouter and model_lower in _OPENROUTER_EXPLICIT_CACHE_CONTROL_MODEL_IDS:
|
||||
# OpenRouter's prompt-caching docs list a set of non-Claude models
|
||||
# (Alibaba Qwen-family, DeepSeek V3.2) that only cache when the
|
||||
# request carries explicit ``cache_control`` breakpoints — otherwise
|
||||
# the upstream provider serves 0% cache hits, re-billing the full
|
||||
# prompt on every turn. Ported from cline/cline#10578 which
|
||||
# verified empirically: qwen/qwen3.6-plus went from 0% cache reads
|
||||
# across 5 turns to a 99.28% hit rate post-warmup after adding
|
||||
# breakpoints. OpenAI- and Google-family models are intentionally
|
||||
# omitted: OpenRouter handles their caching automatically and
|
||||
# sending ``cache_control`` is ignored at best, rejected at worst.
|
||||
# Envelope layout (native_anthropic=False) — OpenRouter's wire
|
||||
# format is OpenAI chat.completions.
|
||||
return True, False
|
||||
if is_anthropic_wire and is_claude:
|
||||
# Third-party Anthropic-compatible gateway.
|
||||
return True, True
|
||||
|
||||
@@ -247,8 +247,12 @@ class TestQwenAlibabaFamily:
|
||||
assert agent._anthropic_prompt_cache_policy() == (False, False)
|
||||
|
||||
def test_qwen_on_openrouter_not_affected(self):
|
||||
# Qwen via OpenRouter falls through — OpenRouter has its own
|
||||
# upstream caching arrangement for Qwen (provider-dependent).
|
||||
# Qwen via OpenRouter falls through for models NOT in the explicit
|
||||
# cache-control allowlist — the bare ``qwen/qwen3-coder`` slug (as
|
||||
# opposed to ``qwen3-coder-plus`` / ``qwen3-coder-flash``) is served
|
||||
# on OpenRouter without the explicit-cache requirement. Models that
|
||||
# DO require ``cache_control`` on OpenRouter are covered separately
|
||||
# by TestOpenRouterExplicitCacheControl.
|
||||
agent = _make_agent(
|
||||
provider="openrouter",
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
@@ -258,6 +262,121 @@ class TestQwenAlibabaFamily:
|
||||
assert agent._anthropic_prompt_cache_policy() == (False, False)
|
||||
|
||||
|
||||
class TestOpenRouterExplicitCacheControl:
|
||||
"""OpenRouter models that need explicit ``cache_control`` breakpoints.
|
||||
|
||||
OpenRouter's prompt-caching docs list a set of non-Claude models
|
||||
(Alibaba Qwen-family, DeepSeek V3.2) as explicit-cache models.
|
||||
Without breakpoints these serve 0% cache reads across turns —
|
||||
re-billing the full prompt every call. Ported from
|
||||
cline/cline#10578 which verified empirically: a 5-turn harness
|
||||
on ``qwen/qwen3.6-plus`` went from 0% cache hits to a 99.28%
|
||||
post-warmup hit rate after adding breakpoints.
|
||||
|
||||
Envelope layout (``native_anthropic=False``) — OpenRouter speaks
|
||||
OpenAI ``chat.completions`` wire format.
|
||||
"""
|
||||
|
||||
def test_qwen_plus(self):
|
||||
agent = _make_agent(
|
||||
provider="openrouter",
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
api_mode="chat_completions",
|
||||
model="qwen/qwen-plus",
|
||||
)
|
||||
assert agent._anthropic_prompt_cache_policy() == (True, False)
|
||||
|
||||
def test_qwen3_max(self):
|
||||
agent = _make_agent(
|
||||
provider="openrouter",
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
api_mode="chat_completions",
|
||||
model="qwen/qwen3-max",
|
||||
)
|
||||
assert agent._anthropic_prompt_cache_policy() == (True, False)
|
||||
|
||||
def test_qwen3_6_plus(self):
|
||||
agent = _make_agent(
|
||||
provider="openrouter",
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
api_mode="chat_completions",
|
||||
model="qwen/qwen3.6-plus",
|
||||
)
|
||||
assert agent._anthropic_prompt_cache_policy() == (True, False)
|
||||
|
||||
def test_qwen3_coder_plus(self):
|
||||
agent = _make_agent(
|
||||
provider="openrouter",
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
api_mode="chat_completions",
|
||||
model="qwen/qwen3-coder-plus",
|
||||
)
|
||||
assert agent._anthropic_prompt_cache_policy() == (True, False)
|
||||
|
||||
def test_qwen3_coder_flash(self):
|
||||
agent = _make_agent(
|
||||
provider="openrouter",
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
api_mode="chat_completions",
|
||||
model="qwen/qwen3-coder-flash",
|
||||
)
|
||||
assert agent._anthropic_prompt_cache_policy() == (True, False)
|
||||
|
||||
def test_deepseek_v3_2(self):
|
||||
agent = _make_agent(
|
||||
provider="openrouter",
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
api_mode="chat_completions",
|
||||
model="deepseek/deepseek-v3.2",
|
||||
)
|
||||
assert agent._anthropic_prompt_cache_policy() == (True, False)
|
||||
|
||||
def test_uppercased_model_id_still_matches(self):
|
||||
# ``_anthropic_prompt_cache_policy`` lowercases the model; explicit
|
||||
# allowlist entries are all lowercase. An uppercase variant of the
|
||||
# same slug must resolve identically.
|
||||
agent = _make_agent(
|
||||
provider="openrouter",
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
api_mode="chat_completions",
|
||||
model="Qwen/Qwen3.6-Plus",
|
||||
)
|
||||
assert agent._anthropic_prompt_cache_policy() == (True, False)
|
||||
|
||||
def test_allowlist_only_applies_on_openrouter(self):
|
||||
# The same model id served via a non-OpenRouter OpenAI-wire gateway
|
||||
# must NOT get cache_control — we have no evidence that third-party
|
||||
# proxies for these models honour the marker.
|
||||
agent = _make_agent(
|
||||
provider="custom",
|
||||
base_url="https://api.fireworks.ai/inference/v1",
|
||||
api_mode="chat_completions",
|
||||
model="qwen/qwen3.6-plus",
|
||||
)
|
||||
assert agent._anthropic_prompt_cache_policy() == (False, False)
|
||||
|
||||
def test_non_allowlisted_qwen_on_openrouter_stays_off(self):
|
||||
# Qwen models NOT in the OpenRouter explicit-cache list (per cline's
|
||||
# empirical testing) stay off — sending unknown cache_control fields
|
||||
# risks breaking strict upstreams that don't silently ignore them.
|
||||
agent = _make_agent(
|
||||
provider="openrouter",
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
api_mode="chat_completions",
|
||||
model="qwen/qwen2.5-72b-instruct",
|
||||
)
|
||||
assert agent._anthropic_prompt_cache_policy() == (False, False)
|
||||
|
||||
def test_non_allowlisted_deepseek_on_openrouter_stays_off(self):
|
||||
agent = _make_agent(
|
||||
provider="openrouter",
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
api_mode="chat_completions",
|
||||
model="deepseek/deepseek-chat",
|
||||
)
|
||||
assert agent._anthropic_prompt_cache_policy() == (False, False)
|
||||
|
||||
|
||||
class TestExplicitOverrides:
|
||||
"""Policy accepts keyword overrides for switch_model / fallback activation."""
|
||||
|
||||
|
||||
Reference in New Issue
Block a user