Compare commits

...

1 Commits

Author SHA1 Message Date
Teknium
ed0e2ab371 chore(providers): remove dead cloudcode-pa quota-fallback branches
The google-antigravity and google-gemini-cli OAuth providers were removed
in #50492. They were the only producers of a cloudcode-pa:// base_url, so
the account-level-quota early-returns in _pool_may_recover_from_rate_limit
and _credential_pool_may_recover_rate_limit are now unreachable.

- Drop the dead cloudcode-pa:// checks and the now-unused provider/base_url
  params on _pool_may_recover_from_rate_limit (only caller updated).
- Prune the obsolete CloudCode-specific regression tests; keep the live
  single/multi-entry pool-rotation invariants (#11314).
2026-06-23 11:26:03 -07:00
3 changed files with 18 additions and 63 deletions

View File

@@ -2802,12 +2802,9 @@ def run_conversation(
if is_rate_limited and agent._fallback_index < len(agent._fallback_chain): if is_rate_limited and agent._fallback_index < len(agent._fallback_chain):
# Don't eagerly fallback if credential pool rotation may # Don't eagerly fallback if credential pool rotation may
# still recover. See _pool_may_recover_from_rate_limit # still recover. See _pool_may_recover_from_rate_limit
# for the single-credential-pool and CloudCode-quota # for the single-credential-pool exception. Fixes #11314.
# exceptions. Fixes #11314 and #13636.
pool_may_recover = _ra()._pool_may_recover_from_rate_limit( pool_may_recover = _ra()._pool_may_recover_from_rate_limit(
agent._credential_pool, agent._credential_pool,
provider=agent.provider,
base_url=getattr(agent, "base_url", None),
) )
if not pool_may_recover: if not pool_may_recover:
if classified.reason == FailoverReason.billing: if classified.reason == FailoverReason.billing:

View File

@@ -243,26 +243,20 @@ def _routermint_headers() -> dict:
} }
def _pool_may_recover_from_rate_limit( def _pool_may_recover_from_rate_limit(pool) -> bool:
pool, *, provider: str | None = None, base_url: str | None = None
) -> bool:
"""Decide whether to wait for credential-pool rotation instead of falling back. """Decide whether to wait for credential-pool rotation instead of falling back.
The existing pool-rotation path requires the pool to (1) exist and (2) have The existing pool-rotation path requires the pool to (1) exist and (2) have
at least one entry not currently in exhaustion cooldown. But rotation is at least one entry not currently in exhaustion cooldown. But rotation is
only meaningful when the pool has more than one entry. only meaningful when the pool has more than one entry.
With a single-credential pool (common for Gemini OAuth, Vertex service With a single-credential pool (common for Vertex service accounts and any
accounts, and any "one personal key" configuration), the primary entry "one personal key" configuration), the primary entry just 429'd and there
just 429'd and there is nothing to rotate to. Waiting for the pool is nothing to rotate to. Waiting for the pool cooldown to expire means
cooldown to expire means retrying against the same exhausted quota — the retrying against the same exhausted quota — the daily-quota 429 will recur
daily-quota 429 will recur immediately, and the retry budget is burned. immediately, and the retry budget is burned.
Additionally, Google CloudCode / Gemini CLI rate limits are ACCOUNT-level In that case we must fall back to the configured ``fallback_model``
throttles — even a multi-entry pool shares the same quota window, so
rotation won't recover. Skip straight to the fallback for those (#13636).
In those cases we must fall back to the configured ``fallback_model``
instead. Returns True only when rotation has somewhere to go. instead. Returns True only when rotation has somewhere to go.
See issues #11314 and #13636. See issues #11314 and #13636.
@@ -271,10 +265,6 @@ def _pool_may_recover_from_rate_limit(
return False return False
if not pool.has_available(): if not pool.has_available():
return False return False
# CloudCode / Gemini CLI quotas are account-wide — all pool entries share
# the same throttle window, so rotation can't recover. Prefer fallback.
if str(base_url or "").startswith("cloudcode-pa://"):
return False
return len(pool.entries()) > 1 return len(pool.entries()) > 1
@@ -4092,13 +4082,6 @@ class AIAgent:
pool = self._credential_pool pool = self._credential_pool
if pool is None: if pool is None:
return False return False
if (
str(getattr(self, "base_url", "")).startswith("cloudcode-pa://")
):
# CloudCode/Gemini quota windows are usually account-level throttles.
# Prefer the configured fallback immediately instead of waiting out
# Retry-After while a pooled OAuth credential may still appear usable.
return False
return pool.has_available() return pool.has_available()
def _anthropic_messages_create(self, api_kwargs: dict): def _anthropic_messages_create(self, api_kwargs: dict):

View File

@@ -1,9 +1,10 @@
"""Regression tests for #13636 — CloudCode / Gemini CLI rate-limit fallback. """Regression tests for #11314 — credential-pool rotation vs. fallback.
_pool_may_recover_from_rate_limit() is the hinge between credential-pool _pool_may_recover_from_rate_limit() is the hinge between credential-pool
rotation and fallback-provider activation. For CloudCode (Gemini CLI / rotation and fallback-provider activation. Rotation is only worth waiting on
Gemini OAuth) the 429 is an account-wide throttle, so waiting for pool when the pool exists, has an available entry, and has more than one entry to
rotation is pointless — prefer fallback immediately. rotate to; otherwise we should fall back to the configured fallback provider
immediately.
""" """
import inspect import inspect
from unittest.mock import MagicMock from unittest.mock import MagicMock
@@ -19,39 +20,13 @@ def _pool(entries: int = 2):
return p return p
def test_cloudcode_provider_skips_pool_rotation(): def test_multi_entry_pool_recovers():
assert _pool_may_recover_from_rate_limit( assert _pool_may_recover_from_rate_limit(_pool(entries=3)) is True
_pool(entries=3),
provider="auto",
base_url="cloudcode-pa://google",
) is False
def test_cloudcode_base_url_skips_pool_rotation_even_on_alias_provider(): def test_single_entry_pool_skips_rotation():
# Even if the provider label is something else, a cloudcode-pa:// URL # Single-entry-pool exception (#11314): nothing to rotate to.
# signals the account-wide quota regime. assert _pool_may_recover_from_rate_limit(_pool(entries=1)) is False
assert _pool_may_recover_from_rate_limit(
_pool(entries=3),
provider="custom-provider",
base_url="cloudcode-pa://google",
) is False
def test_non_cloudcode_multi_entry_pool_still_recovers():
assert _pool_may_recover_from_rate_limit(
_pool(entries=3),
provider="openrouter",
base_url="https://openrouter.ai/api/v1",
) is True
def test_single_entry_pool_skips_rotation_regardless_of_provider():
# Pre-existing single-entry-pool exception (#11314) still holds.
assert _pool_may_recover_from_rate_limit(
_pool(entries=1),
provider="openrouter",
base_url="https://openrouter.ai/api/v1",
) is False
def test_exhausted_pool_skips_rotation(): def test_exhausted_pool_skips_rotation():