Compare commits

...

1 Commits

Author SHA1 Message Date
Teknium
ed0e2ab371 chore(providers): remove dead cloudcode-pa quota-fallback branches
The google-antigravity and google-gemini-cli OAuth providers were removed
in #50492. They were the only producers of a cloudcode-pa:// base_url, so
the account-level-quota early-returns in _pool_may_recover_from_rate_limit
and _credential_pool_may_recover_rate_limit are now unreachable.

- Drop the dead cloudcode-pa:// checks and the now-unused provider/base_url
  params on _pool_may_recover_from_rate_limit (only caller updated).
- Prune the obsolete CloudCode-specific regression tests; keep the live
  single/multi-entry pool-rotation invariants (#11314).
2026-06-23 11:26:03 -07:00
3 changed files with 18 additions and 63 deletions

View File

@@ -2802,12 +2802,9 @@ def run_conversation(
if is_rate_limited and agent._fallback_index < len(agent._fallback_chain):
# Don't eagerly fallback if credential pool rotation may
# still recover. See _pool_may_recover_from_rate_limit
# for the single-credential-pool and CloudCode-quota
# exceptions. Fixes #11314 and #13636.
# for the single-credential-pool exception. Fixes #11314.
pool_may_recover = _ra()._pool_may_recover_from_rate_limit(
agent._credential_pool,
provider=agent.provider,
base_url=getattr(agent, "base_url", None),
)
if not pool_may_recover:
if classified.reason == FailoverReason.billing:

View File

@@ -243,26 +243,20 @@ def _routermint_headers() -> dict:
}
def _pool_may_recover_from_rate_limit(
pool, *, provider: str | None = None, base_url: str | None = None
) -> bool:
def _pool_may_recover_from_rate_limit(pool) -> bool:
"""Decide whether to wait for credential-pool rotation instead of falling back.
The existing pool-rotation path requires the pool to (1) exist and (2) have
at least one entry not currently in exhaustion cooldown. But rotation is
only meaningful when the pool has more than one entry.
With a single-credential pool (common for Gemini OAuth, Vertex service
accounts, and any "one personal key" configuration), the primary entry
just 429'd and there is nothing to rotate to. Waiting for the pool
cooldown to expire means retrying against the same exhausted quota — the
daily-quota 429 will recur immediately, and the retry budget is burned.
With a single-credential pool (common for Vertex service accounts and any
"one personal key" configuration), the primary entry just 429'd and there
is nothing to rotate to. Waiting for the pool cooldown to expire means
retrying against the same exhausted quota — the daily-quota 429 will recur
immediately, and the retry budget is burned.
Additionally, Google CloudCode / Gemini CLI rate limits are ACCOUNT-level
throttles — even a multi-entry pool shares the same quota window, so
rotation won't recover. Skip straight to the fallback for those (#13636).
In those cases we must fall back to the configured ``fallback_model``
In that case we must fall back to the configured ``fallback_model``
instead. Returns True only when rotation has somewhere to go.
See issues #11314 and #13636.
@@ -271,10 +265,6 @@ def _pool_may_recover_from_rate_limit(
return False
if not pool.has_available():
return False
# CloudCode / Gemini CLI quotas are account-wide — all pool entries share
# the same throttle window, so rotation can't recover. Prefer fallback.
if str(base_url or "").startswith("cloudcode-pa://"):
return False
return len(pool.entries()) > 1
@@ -4092,13 +4082,6 @@ class AIAgent:
pool = self._credential_pool
if pool is None:
return False
if (
str(getattr(self, "base_url", "")).startswith("cloudcode-pa://")
):
# CloudCode/Gemini quota windows are usually account-level throttles.
# Prefer the configured fallback immediately instead of waiting out
# Retry-After while a pooled OAuth credential may still appear usable.
return False
return pool.has_available()
def _anthropic_messages_create(self, api_kwargs: dict):

View File

@@ -1,9 +1,10 @@
"""Regression tests for #13636 — CloudCode / Gemini CLI rate-limit fallback.
"""Regression tests for #11314 — credential-pool rotation vs. fallback.
_pool_may_recover_from_rate_limit() is the hinge between credential-pool
rotation and fallback-provider activation. For CloudCode (Gemini CLI /
Gemini OAuth) the 429 is an account-wide throttle, so waiting for pool
rotation is pointless — prefer fallback immediately.
rotation and fallback-provider activation. Rotation is only worth waiting on
when the pool exists, has an available entry, and has more than one entry to
rotate to; otherwise we should fall back to the configured fallback provider
immediately.
"""
import inspect
from unittest.mock import MagicMock
@@ -19,39 +20,13 @@ def _pool(entries: int = 2):
return p
def test_cloudcode_provider_skips_pool_rotation():
assert _pool_may_recover_from_rate_limit(
_pool(entries=3),
provider="auto",
base_url="cloudcode-pa://google",
) is False
def test_multi_entry_pool_recovers():
assert _pool_may_recover_from_rate_limit(_pool(entries=3)) is True
def test_cloudcode_base_url_skips_pool_rotation_even_on_alias_provider():
# Even if the provider label is something else, a cloudcode-pa:// URL
# signals the account-wide quota regime.
assert _pool_may_recover_from_rate_limit(
_pool(entries=3),
provider="custom-provider",
base_url="cloudcode-pa://google",
) is False
def test_non_cloudcode_multi_entry_pool_still_recovers():
assert _pool_may_recover_from_rate_limit(
_pool(entries=3),
provider="openrouter",
base_url="https://openrouter.ai/api/v1",
) is True
def test_single_entry_pool_skips_rotation_regardless_of_provider():
# Pre-existing single-entry-pool exception (#11314) still holds.
assert _pool_may_recover_from_rate_limit(
_pool(entries=1),
provider="openrouter",
base_url="https://openrouter.ai/api/v1",
) is False
def test_single_entry_pool_skips_rotation():
# Single-entry-pool exception (#11314): nothing to rotate to.
assert _pool_may_recover_from_rate_limit(_pool(entries=1)) is False
def test_exhausted_pool_skips_rotation():