mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-27 04:14:07 +08:00
Compare commits
1 Commits
bb/ableton
...
chore/remo
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ed0e2ab371 |
@@ -2802,12 +2802,9 @@ def run_conversation(
|
|||||||
if is_rate_limited and agent._fallback_index < len(agent._fallback_chain):
|
if is_rate_limited and agent._fallback_index < len(agent._fallback_chain):
|
||||||
# Don't eagerly fallback if credential pool rotation may
|
# Don't eagerly fallback if credential pool rotation may
|
||||||
# still recover. See _pool_may_recover_from_rate_limit
|
# still recover. See _pool_may_recover_from_rate_limit
|
||||||
# for the single-credential-pool and CloudCode-quota
|
# for the single-credential-pool exception. Fixes #11314.
|
||||||
# exceptions. Fixes #11314 and #13636.
|
|
||||||
pool_may_recover = _ra()._pool_may_recover_from_rate_limit(
|
pool_may_recover = _ra()._pool_may_recover_from_rate_limit(
|
||||||
agent._credential_pool,
|
agent._credential_pool,
|
||||||
provider=agent.provider,
|
|
||||||
base_url=getattr(agent, "base_url", None),
|
|
||||||
)
|
)
|
||||||
if not pool_may_recover:
|
if not pool_may_recover:
|
||||||
if classified.reason == FailoverReason.billing:
|
if classified.reason == FailoverReason.billing:
|
||||||
|
|||||||
31
run_agent.py
31
run_agent.py
@@ -243,26 +243,20 @@ def _routermint_headers() -> dict:
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def _pool_may_recover_from_rate_limit(
|
def _pool_may_recover_from_rate_limit(pool) -> bool:
|
||||||
pool, *, provider: str | None = None, base_url: str | None = None
|
|
||||||
) -> bool:
|
|
||||||
"""Decide whether to wait for credential-pool rotation instead of falling back.
|
"""Decide whether to wait for credential-pool rotation instead of falling back.
|
||||||
|
|
||||||
The existing pool-rotation path requires the pool to (1) exist and (2) have
|
The existing pool-rotation path requires the pool to (1) exist and (2) have
|
||||||
at least one entry not currently in exhaustion cooldown. But rotation is
|
at least one entry not currently in exhaustion cooldown. But rotation is
|
||||||
only meaningful when the pool has more than one entry.
|
only meaningful when the pool has more than one entry.
|
||||||
|
|
||||||
With a single-credential pool (common for Gemini OAuth, Vertex service
|
With a single-credential pool (common for Vertex service accounts and any
|
||||||
accounts, and any "one personal key" configuration), the primary entry
|
"one personal key" configuration), the primary entry just 429'd and there
|
||||||
just 429'd and there is nothing to rotate to. Waiting for the pool
|
is nothing to rotate to. Waiting for the pool cooldown to expire means
|
||||||
cooldown to expire means retrying against the same exhausted quota — the
|
retrying against the same exhausted quota — the daily-quota 429 will recur
|
||||||
daily-quota 429 will recur immediately, and the retry budget is burned.
|
immediately, and the retry budget is burned.
|
||||||
|
|
||||||
Additionally, Google CloudCode / Gemini CLI rate limits are ACCOUNT-level
|
In that case we must fall back to the configured ``fallback_model``
|
||||||
throttles — even a multi-entry pool shares the same quota window, so
|
|
||||||
rotation won't recover. Skip straight to the fallback for those (#13636).
|
|
||||||
|
|
||||||
In those cases we must fall back to the configured ``fallback_model``
|
|
||||||
instead. Returns True only when rotation has somewhere to go.
|
instead. Returns True only when rotation has somewhere to go.
|
||||||
|
|
||||||
See issues #11314 and #13636.
|
See issues #11314 and #13636.
|
||||||
@@ -271,10 +265,6 @@ def _pool_may_recover_from_rate_limit(
|
|||||||
return False
|
return False
|
||||||
if not pool.has_available():
|
if not pool.has_available():
|
||||||
return False
|
return False
|
||||||
# CloudCode / Gemini CLI quotas are account-wide — all pool entries share
|
|
||||||
# the same throttle window, so rotation can't recover. Prefer fallback.
|
|
||||||
if str(base_url or "").startswith("cloudcode-pa://"):
|
|
||||||
return False
|
|
||||||
return len(pool.entries()) > 1
|
return len(pool.entries()) > 1
|
||||||
|
|
||||||
|
|
||||||
@@ -4092,13 +4082,6 @@ class AIAgent:
|
|||||||
pool = self._credential_pool
|
pool = self._credential_pool
|
||||||
if pool is None:
|
if pool is None:
|
||||||
return False
|
return False
|
||||||
if (
|
|
||||||
str(getattr(self, "base_url", "")).startswith("cloudcode-pa://")
|
|
||||||
):
|
|
||||||
# CloudCode/Gemini quota windows are usually account-level throttles.
|
|
||||||
# Prefer the configured fallback immediately instead of waiting out
|
|
||||||
# Retry-After while a pooled OAuth credential may still appear usable.
|
|
||||||
return False
|
|
||||||
return pool.has_available()
|
return pool.has_available()
|
||||||
|
|
||||||
def _anthropic_messages_create(self, api_kwargs: dict):
|
def _anthropic_messages_create(self, api_kwargs: dict):
|
||||||
|
|||||||
@@ -1,9 +1,10 @@
|
|||||||
"""Regression tests for #13636 — CloudCode / Gemini CLI rate-limit fallback.
|
"""Regression tests for #11314 — credential-pool rotation vs. fallback.
|
||||||
|
|
||||||
_pool_may_recover_from_rate_limit() is the hinge between credential-pool
|
_pool_may_recover_from_rate_limit() is the hinge between credential-pool
|
||||||
rotation and fallback-provider activation. For CloudCode (Gemini CLI /
|
rotation and fallback-provider activation. Rotation is only worth waiting on
|
||||||
Gemini OAuth) the 429 is an account-wide throttle, so waiting for pool
|
when the pool exists, has an available entry, and has more than one entry to
|
||||||
rotation is pointless — prefer fallback immediately.
|
rotate to; otherwise we should fall back to the configured fallback provider
|
||||||
|
immediately.
|
||||||
"""
|
"""
|
||||||
import inspect
|
import inspect
|
||||||
from unittest.mock import MagicMock
|
from unittest.mock import MagicMock
|
||||||
@@ -19,39 +20,13 @@ def _pool(entries: int = 2):
|
|||||||
return p
|
return p
|
||||||
|
|
||||||
|
|
||||||
def test_cloudcode_provider_skips_pool_rotation():
|
def test_multi_entry_pool_recovers():
|
||||||
assert _pool_may_recover_from_rate_limit(
|
assert _pool_may_recover_from_rate_limit(_pool(entries=3)) is True
|
||||||
_pool(entries=3),
|
|
||||||
provider="auto",
|
|
||||||
base_url="cloudcode-pa://google",
|
|
||||||
) is False
|
|
||||||
|
|
||||||
|
|
||||||
def test_cloudcode_base_url_skips_pool_rotation_even_on_alias_provider():
|
def test_single_entry_pool_skips_rotation():
|
||||||
# Even if the provider label is something else, a cloudcode-pa:// URL
|
# Single-entry-pool exception (#11314): nothing to rotate to.
|
||||||
# signals the account-wide quota regime.
|
assert _pool_may_recover_from_rate_limit(_pool(entries=1)) is False
|
||||||
assert _pool_may_recover_from_rate_limit(
|
|
||||||
_pool(entries=3),
|
|
||||||
provider="custom-provider",
|
|
||||||
base_url="cloudcode-pa://google",
|
|
||||||
) is False
|
|
||||||
|
|
||||||
|
|
||||||
def test_non_cloudcode_multi_entry_pool_still_recovers():
|
|
||||||
assert _pool_may_recover_from_rate_limit(
|
|
||||||
_pool(entries=3),
|
|
||||||
provider="openrouter",
|
|
||||||
base_url="https://openrouter.ai/api/v1",
|
|
||||||
) is True
|
|
||||||
|
|
||||||
|
|
||||||
def test_single_entry_pool_skips_rotation_regardless_of_provider():
|
|
||||||
# Pre-existing single-entry-pool exception (#11314) still holds.
|
|
||||||
assert _pool_may_recover_from_rate_limit(
|
|
||||||
_pool(entries=1),
|
|
||||||
provider="openrouter",
|
|
||||||
base_url="https://openrouter.ai/api/v1",
|
|
||||||
) is False
|
|
||||||
|
|
||||||
|
|
||||||
def test_exhausted_pool_skips_rotation():
|
def test_exhausted_pool_skips_rotation():
|
||||||
|
|||||||
Reference in New Issue
Block a user