Compare commits

...

1 Commits

Author SHA1 Message Date
Teknium
969807466c fix(auth): write rotated Codex/xAI pool grant through to global root (#48415)
CredentialPool._sync_device_code_entry_to_auth_store rotated single-use
OAuth refresh tokens but wrote the new chain only into the active profile
store. When a profile resolves a grant from the global-root fallback
(read_credential_pool, #18594) and the pool then refreshes it, root was
left holding a now-revoked refresh token — every other profile reading the
stale root grant subsequently died with refresh_token_reused / invalid_grant
once its access token expired.

This is the credential-pool analog of #43589 (which fixed the non-pool xAI
refresh path in _save_xai_oauth_tokens). Detect the read-from-root case
(profile lacks its own providers.<id> block) BEFORE the profile save and,
after it, write the rotated chain back to the global root via a best-effort,
seat-belted write-through. A profile that genuinely shadows root (owns the
block) is untouched; classic mode (profile == root) is a no-op; a failed root
write never breaks the profile's own save. Covers openai-codex (reported),
xai-oauth, and nous through the shared sync path.
2026-06-24 00:22:11 -07:00
2 changed files with 274 additions and 0 deletions

View File

@@ -11,6 +11,7 @@ import uuid
import re
from dataclasses import dataclass, fields, replace
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Dict, List, Optional, Set, Tuple
from hermes_constants import OPENROUTER_BASE_URL
@@ -447,6 +448,63 @@ def get_pool_strategy(provider: str) -> str:
DEFAULT_MAX_CONCURRENT_PER_CREDENTIAL = 1
def _write_through_provider_state_to_global_root(
provider_id: str, state: Dict[str, Any]
) -> None:
"""Persist a rotated OAuth ``state`` into the global-root auth.json.
Best-effort write-through for the multi-profile rotation hazard
(#48415 / #43589): nous, openai-codex, and xai-oauth rotate the
refresh_token on refresh, so when a profile pool refresh rotates a grant
it resolved from the root fallback, the rotated chain must land back in
root. Otherwise root keeps a now-revoked refresh token and every other
profile reading the stale root grant dies with ``refresh_token_reused`` /
``invalid_grant`` once its access token expires.
Only updates ``providers.<provider_id>`` in the root store; never touches
the profile store (the caller already saved that). Swallows all errors — a
failed write-through degrades to the pre-existing behavior (root stale), it
must never break the profile's own successful save. Mirrors
``hermes_cli.auth._write_through_xai_oauth_to_global_root`` (which covers
the non-pool xAI refresh path) for the credential-pool refresh path.
"""
try:
global_path = auth_mod._global_auth_file_path()
except Exception:
return
if global_path is None:
# Classic mode (profile == root); the profile save already hit root.
return
# Seat belt: under pytest, refuse to write the real user's
# ~/.hermes/auth.json even when HERMES_HOME points at a profile path
# (mirrors the read-side guard in _load_global_auth_store). Uses the
# unmodified HOME env, not Path.home() which fixtures may monkeypatch.
if os.environ.get("PYTEST_CURRENT_TEST"):
real_home_env = os.environ.get("HOME", "")
if real_home_env:
real_root = Path(real_home_env) / ".hermes" / "auth.json"
try:
if global_path.resolve(strict=False) == real_root.resolve(strict=False):
return
except Exception:
return
try:
if global_path.exists():
global_store = _load_auth_store(global_path)
else:
global_store = {}
if not isinstance(global_store, dict):
return
_store_provider_state(global_store, provider_id, dict(state), set_active=False)
auth_mod._save_auth_store(global_store, global_path)
except Exception as exc: # pragma: no cover - best effort
logger.debug(
"%s pool refresh: write-through to global root failed: %s",
provider_id,
exc,
)
class CredentialPool:
def __init__(self, provider: str, entries: List[PooledCredential]):
self.provider = provider
@@ -800,6 +858,28 @@ class CredentialPool:
try:
with _auth_store_lock():
auth_store = _load_auth_store()
# Decide BEFORE writing whether this profile is reading the
# grant from the global root (no own providers.<id> block) vs.
# genuinely shadowing it. A pool refresh rotates single-use
# OAuth refresh tokens, so a profile that resolved the grant
# from root MUST write the rotated chain back to root too —
# otherwise root keeps a revoked refresh token and every other
# profile reading the stale root grant dies with
# refresh_token_reused / invalid_grant once its access token
# expires. This mirrors the xAI write-through in
# hermes_cli.auth._save_xai_oauth_tokens (#43589); the pool
# refresh path is the Codex/xAI analog reported in #48415.
_wt_provider_id = {
"nous": "nous",
"openai-codex": "openai-codex",
"xai-oauth": "xai-oauth",
}.get(self.provider)
write_through_to_root = bool(_wt_provider_id) and not (
isinstance(auth_store.get("providers"), dict)
and isinstance(
auth_store["providers"].get(_wt_provider_id), dict
)
)
if self.provider == "nous":
state = _load_provider_state(auth_store, "nous")
if state is None:
@@ -855,6 +935,10 @@ class CredentialPool:
return
_save_auth_store(auth_store)
if write_through_to_root and _wt_provider_id:
_write_through_provider_state_to_global_root(
_wt_provider_id, state
)
except Exception as exc:
logger.debug("Failed to sync %s pool entry back to auth store: %s", self.provider, exc)

View File

@@ -0,0 +1,190 @@
"""Regression tests for credential-pool OAuth refresh write-through to root.
Companion to ``tests/hermes_cli/test_xai_oauth_writethrough.py``. That file
covers the *non-pool* xAI refresh path (``_save_xai_oauth_tokens``). These
cover the **credential-pool** refresh path
(``CredentialPool._sync_device_code_entry_to_auth_store``): when a profile
that has no own ``providers.<id>`` block refreshes — via the pool — a rotating
OAuth grant it resolved from the global-root fallback, the rotated chain must
be written back to the global root too. Otherwise root keeps a revoked refresh
token and every other profile reading root's stale grant dies with
``refresh_token_reused`` / ``invalid_grant`` once its access token expires
(issue #48415, the Codex/xAI analog of #43589).
The tests drive the real ``_sync_device_code_entry_to_auth_store`` against
real on-disk auth stores (profile + root under ``tmp_path``) rather than
mocking the save boundary, so they exercise the actual atomic write path.
"""
import json
import pytest
from agent import credential_pool as CP
from agent.credential_pool import (
AUTH_TYPE_OAUTH,
CredentialPool,
PooledCredential,
)
from hermes_cli import auth as A
def _write_store(path, store):
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(json.dumps(store), encoding="utf-8")
def _read_store(path):
return json.loads(path.read_text(encoding="utf-8"))
def _entry(provider: str, *, id: str, access_token: str, refresh_token: str):
return PooledCredential(
provider=provider,
id=id,
label="cred",
auth_type=AUTH_TYPE_OAUTH,
priority=0,
source="device_code",
access_token=access_token,
refresh_token=refresh_token,
)
@pytest.fixture
def profile_and_root(tmp_path, monkeypatch):
"""Wire a profile auth store + a distinct global-root auth store on disk.
The pytest seat belt in ``_write_through_provider_state_to_global_root``
only refuses the *real* user's ``$HOME/.hermes/auth.json``; a tmp_path
root is allowed, so point HOME away from the tmp root to keep the guard
from tripping on these fixtures.
"""
profile_path = tmp_path / "profiles" / "work" / "auth.json"
root_path = tmp_path / "root" / "auth.json"
monkeypatch.setattr(A, "_auth_file_path", lambda: profile_path)
monkeypatch.setattr(A, "_global_auth_file_path", lambda: root_path)
monkeypatch.setenv("HOME", str(tmp_path / "not-the-root"))
return profile_path, root_path
@pytest.mark.parametrize(
"provider",
["openai-codex", "xai-oauth"],
)
def test_pool_refresh_writes_through_to_root_when_profile_reads_root(
profile_and_root, provider
):
"""A profile reading root's grant must push rotated tokens back to root."""
profile_path, root_path = profile_and_root
# Profile has NO own provider block (reads root via fallback).
_write_store(profile_path, {"version": 1, "providers": {}})
_write_store(
root_path,
{
"version": 1,
"providers": {
provider: {
"tokens": {
"access_token": "old-access",
"refresh_token": "old-refresh",
}
}
},
},
)
pool = CredentialPool(provider, [])
pool._sync_device_code_entry_to_auth_store(
_entry(provider, id="e1", access_token="new-access", refresh_token="new-refresh")
)
# Profile got the rotated chain (existing behavior).
profile = _read_store(profile_path)
assert (
profile["providers"][provider]["tokens"]["refresh_token"] == "new-refresh"
)
# AND the global root no longer holds the revoked refresh token (#48415).
root = _read_store(root_path)
assert root["providers"][provider]["tokens"]["access_token"] == "new-access"
assert root["providers"][provider]["tokens"]["refresh_token"] == "new-refresh"
@pytest.mark.parametrize(
"provider",
["openai-codex", "xai-oauth"],
)
def test_pool_refresh_does_not_touch_root_when_profile_shadows(
profile_and_root, provider
):
"""A profile that genuinely shadows root must NOT clobber the root grant."""
profile_path, root_path = profile_and_root
# Profile has its OWN provider block: it shadows root legitimately.
_write_store(
profile_path,
{
"version": 1,
"providers": {
provider: {
"tokens": {
"access_token": "profile-old",
"refresh_token": "profile-old-refresh",
}
}
},
},
)
_write_store(
root_path,
{
"version": 1,
"providers": {
provider: {
"tokens": {
"access_token": "root-untouched",
"refresh_token": "root-untouched-refresh",
}
}
},
},
)
pool = CredentialPool(provider, [])
pool._sync_device_code_entry_to_auth_store(
_entry(
provider,
id="e2",
access_token="profile-new",
refresh_token="profile-new-refresh",
)
)
profile = _read_store(profile_path)
assert (
profile["providers"][provider]["tokens"]["refresh_token"]
== "profile-new-refresh"
)
# Root keeps its own grant — write-through must not run when the profile
# owns the block.
root = _read_store(root_path)
assert (
root["providers"][provider]["tokens"]["refresh_token"]
== "root-untouched-refresh"
)
def test_write_through_helper_is_noop_in_classic_mode(monkeypatch, tmp_path):
"""When profile == root (classic mode), the helper must be a no-op.
``_global_auth_file_path`` returns None in classic mode; the profile save
already wrote to root, so a second write would be redundant (and the
helper has nothing to target).
"""
monkeypatch.setattr(A, "_global_auth_file_path", lambda: None)
# Must not raise and must not attempt any write.
CP._write_through_provider_state_to_global_root(
"openai-codex", {"tokens": {"access_token": "a", "refresh_token": "r"}}
)