Compare commits

...

1 Commits

Author SHA1 Message Date
teknium1
b3f5c6525a feat: add direct endpoint overrides for auxiliary and delegation
Add base_url/api_key overrides for auxiliary tasks and delegation so users can
route those flows straight to a custom OpenAI-compatible endpoint without
having to rely on provider=main or named custom providers.

Also clear gateway session env vars in test isolation so the full suite stays
deterministic when run from a messaging-backed agent session.
2026-03-14 20:48:29 -07:00
12 changed files with 455 additions and 91 deletions

View File

@@ -30,6 +30,10 @@ Default "auto" follows the chains above.
Per-task model overrides (e.g. AUXILIARY_VISION_MODEL, Per-task model overrides (e.g. AUXILIARY_VISION_MODEL,
AUXILIARY_WEB_EXTRACT_MODEL) let callers use a different model slug AUXILIARY_WEB_EXTRACT_MODEL) let callers use a different model slug
than the provider's default. than the provider's default.
Per-task direct endpoint overrides (e.g. AUXILIARY_VISION_BASE_URL,
AUXILIARY_VISION_API_KEY) let callers route a specific auxiliary task to a
custom OpenAI-compatible endpoint without touching the main model settings.
""" """
import json import json
@@ -418,6 +422,17 @@ def _get_auxiliary_provider(task: str = "") -> str:
return "auto" return "auto"
def _get_auxiliary_env_override(task: str, suffix: str) -> Optional[str]:
"""Read an auxiliary env override from AUXILIARY_* or CONTEXT_* prefixes."""
if not task:
return None
for prefix in ("AUXILIARY_", "CONTEXT_"):
val = os.getenv(f"{prefix}{task.upper()}_{suffix}", "").strip()
if val:
return val
return None
def _try_openrouter() -> Tuple[Optional[OpenAI], Optional[str]]: def _try_openrouter() -> Tuple[Optional[OpenAI], Optional[str]]:
or_key = os.getenv("OPENROUTER_API_KEY") or_key = os.getenv("OPENROUTER_API_KEY")
if not or_key: if not or_key:
@@ -564,6 +579,8 @@ def resolve_provider_client(
model: str = None, model: str = None,
async_mode: bool = False, async_mode: bool = False,
raw_codex: bool = False, raw_codex: bool = False,
explicit_base_url: str = None,
explicit_api_key: str = None,
) -> Tuple[Optional[Any], Optional[str]]: ) -> Tuple[Optional[Any], Optional[str]]:
"""Central router: given a provider name and optional model, return a """Central router: given a provider name and optional model, return a
configured client with the correct auth, base URL, and API format. configured client with the correct auth, base URL, and API format.
@@ -585,6 +602,8 @@ def resolve_provider_client(
instead of wrapping in CodexAuxiliaryClient. Use this when instead of wrapping in CodexAuxiliaryClient. Use this when
the caller needs direct access to responses.stream() (e.g., the caller needs direct access to responses.stream() (e.g.,
the main agent loop). the main agent loop).
explicit_base_url: Optional direct OpenAI-compatible endpoint.
explicit_api_key: Optional API key paired with explicit_base_url.
Returns: Returns:
(client, resolved_model) or (None, None) if auth is unavailable. (client, resolved_model) or (None, None) if auth is unavailable.
@@ -661,6 +680,18 @@ def resolve_provider_client(
# ── Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY) ─────────── # ── Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY) ───────────
if provider == "custom": if provider == "custom":
if explicit_base_url:
custom_base = explicit_base_url.strip()
custom_key = (
(explicit_api_key or "").strip()
or os.getenv("OPENAI_API_KEY", "").strip()
or os.getenv("OPENROUTER_API_KEY", "").strip()
)
if custom_base and custom_key:
final_model = model or _read_main_model() or "gpt-4o-mini"
client = OpenAI(api_key=custom_key, base_url=custom_base)
return (_to_async_client(client, final_model) if async_mode
else (client, final_model))
# Try custom first, then codex, then API-key providers # Try custom first, then codex, then API-key providers
for try_fn in (_try_custom_endpoint, _try_codex, for try_fn in (_try_custom_endpoint, _try_codex,
_resolve_api_key_provider): _resolve_api_key_provider):
@@ -749,10 +780,13 @@ def get_text_auxiliary_client(task: str = "") -> Tuple[Optional[OpenAI], Optiona
Callers may override the returned model with a per-task env var Callers may override the returned model with a per-task env var
(e.g. CONTEXT_COMPRESSION_MODEL, AUXILIARY_WEB_EXTRACT_MODEL). (e.g. CONTEXT_COMPRESSION_MODEL, AUXILIARY_WEB_EXTRACT_MODEL).
""" """
forced = _get_auxiliary_provider(task) provider, model, base_url, api_key = _resolve_task_provider_model(task or None)
if forced != "auto": return resolve_provider_client(
return resolve_provider_client(forced) provider,
return resolve_provider_client("auto") model=model,
explicit_base_url=base_url,
explicit_api_key=api_key,
)
def get_async_text_auxiliary_client(task: str = ""): def get_async_text_auxiliary_client(task: str = ""):
@@ -762,10 +796,14 @@ def get_async_text_auxiliary_client(task: str = ""):
(AsyncCodexAuxiliaryClient, model) which wraps the Responses API. (AsyncCodexAuxiliaryClient, model) which wraps the Responses API.
Returns (None, None) when no provider is available. Returns (None, None) when no provider is available.
""" """
forced = _get_auxiliary_provider(task) provider, model, base_url, api_key = _resolve_task_provider_model(task or None)
if forced != "auto": return resolve_provider_client(
return resolve_provider_client(forced, async_mode=True) provider,
return resolve_provider_client("auto", async_mode=True) model=model,
async_mode=True,
explicit_base_url=base_url,
explicit_api_key=api_key,
)
def get_vision_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]: def get_vision_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
@@ -781,18 +819,25 @@ def get_vision_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
providers are skipped — they may not handle vision input. To use providers are skipped — they may not handle vision input. To use
them, set AUXILIARY_VISION_PROVIDER explicitly. them, set AUXILIARY_VISION_PROVIDER explicitly.
""" """
forced = _get_auxiliary_provider("vision") provider, model, base_url, api_key = _resolve_task_provider_model("vision")
if forced != "auto": if base_url:
return resolve_provider_client(forced) return resolve_provider_client(
"custom",
model=model,
explicit_base_url=base_url,
explicit_api_key=api_key,
)
if provider != "auto":
return resolve_provider_client(provider, model=model)
# Auto: try providers known to support multimodal first, then fall # Auto: try providers known to support multimodal first, then fall
# back to the user's custom endpoint. Many local models (Qwen-VL, # back to the user's custom endpoint. Many local models (Qwen-VL,
# LLaVA, Pixtral, etc.) support vision — skipping them entirely # LLaVA, Pixtral, etc.) support vision — skipping them entirely
# caused silent failures for local-only users. # caused silent failures for local-only users.
for try_fn in (_try_openrouter, _try_nous, _try_codex, for try_fn in (_try_openrouter, _try_nous, _try_codex,
_try_custom_endpoint): _try_custom_endpoint):
client, model = try_fn() client, auto_model = try_fn()
if client is not None: if client is not None:
return client, model return client, model or auto_model
logger.debug("Auxiliary vision client: none available") logger.debug("Auxiliary vision client: none available")
return None, None return None, None
@@ -851,19 +896,29 @@ def auxiliary_max_tokens_param(value: int) -> dict:
# Every auxiliary LLM consumer should use these instead of manually # Every auxiliary LLM consumer should use these instead of manually
# constructing clients and calling .chat.completions.create(). # constructing clients and calling .chat.completions.create().
# Client cache: (provider, async_mode) -> (client, default_model) # Client cache: (provider, async_mode, base_url, api_key) -> (client, default_model)
_client_cache: Dict[tuple, tuple] = {} _client_cache: Dict[tuple, tuple] = {}
def _get_cached_client( def _get_cached_client(
provider: str, model: str = None, async_mode: bool = False, provider: str,
model: str = None,
async_mode: bool = False,
base_url: str = None,
api_key: str = None,
) -> Tuple[Optional[Any], Optional[str]]: ) -> Tuple[Optional[Any], Optional[str]]:
"""Get or create a cached client for the given provider.""" """Get or create a cached client for the given provider."""
cache_key = (provider, async_mode) cache_key = (provider, async_mode, base_url or "", api_key or "")
if cache_key in _client_cache: if cache_key in _client_cache:
cached_client, cached_default = _client_cache[cache_key] cached_client, cached_default = _client_cache[cache_key]
return cached_client, model or cached_default return cached_client, model or cached_default
client, default_model = resolve_provider_client(provider, model, async_mode) client, default_model = resolve_provider_client(
provider,
model,
async_mode,
explicit_base_url=base_url,
explicit_api_key=api_key,
)
if client is not None: if client is not None:
_client_cache[cache_key] = (client, default_model) _client_cache[cache_key] = (client, default_model)
return client, model or default_model return client, model or default_model
@@ -873,57 +928,75 @@ def _resolve_task_provider_model(
task: str = None, task: str = None,
provider: str = None, provider: str = None,
model: str = None, model: str = None,
) -> Tuple[str, Optional[str]]: base_url: str = None,
api_key: str = None,
) -> Tuple[str, Optional[str], Optional[str], Optional[str]]:
"""Determine provider + model for a call. """Determine provider + model for a call.
Priority: Priority:
1. Explicit provider/model args (always win) 1. Explicit provider/model/base_url/api_key args (always win)
2. Env var overrides (AUXILIARY_{TASK}_PROVIDER, etc.) 2. Env var overrides (AUXILIARY_{TASK}_*, CONTEXT_{TASK}_*)
3. Config file (auxiliary.{task}.provider/model or compression.*) 3. Config file (auxiliary.{task}.* or compression.*)
4. "auto" (full auto-detection chain) 4. "auto" (full auto-detection chain)
Returns (provider, model) where model may be None (use provider default). Returns (provider, model, base_url, api_key) where model may be None
(use provider default). When base_url is set, provider is forced to
"custom" and the task uses that direct endpoint.
""" """
if provider: config = {}
return provider, model cfg_provider = None
cfg_model = None
cfg_base_url = None
cfg_api_key = None
if task: if task:
# Check env var overrides first
env_provider = _get_auxiliary_provider(task)
if env_provider != "auto":
# Check for env var model override too
env_model = None
for prefix in ("AUXILIARY_", "CONTEXT_"):
val = os.getenv(f"{prefix}{task.upper()}_MODEL", "").strip()
if val:
env_model = val
break
return env_provider, model or env_model
# Read from config file
try: try:
from hermes_cli.config import load_config from hermes_cli.config import load_config
config = load_config() config = load_config()
except ImportError: except ImportError:
return "auto", model config = {}
# Check auxiliary.{task} section aux = config.get("auxiliary", {}) if isinstance(config, dict) else {}
aux = config.get("auxiliary", {}) task_config = aux.get(task, {}) if isinstance(aux, dict) else {}
task_config = aux.get(task, {}) if not isinstance(task_config, dict):
cfg_provider = task_config.get("provider", "").strip() or None task_config = {}
cfg_model = task_config.get("model", "").strip() or None cfg_provider = str(task_config.get("provider", "")).strip() or None
cfg_model = str(task_config.get("model", "")).strip() or None
cfg_base_url = str(task_config.get("base_url", "")).strip() or None
cfg_api_key = str(task_config.get("api_key", "")).strip() or None
# Backwards compat: compression section has its own keys # Backwards compat: compression section has its own keys
if task == "compression" and not cfg_provider: if task == "compression" and not cfg_provider:
comp = config.get("compression", {}) comp = config.get("compression", {}) if isinstance(config, dict) else {}
if isinstance(comp, dict):
cfg_provider = comp.get("summary_provider", "").strip() or None cfg_provider = comp.get("summary_provider", "").strip() or None
cfg_model = cfg_model or comp.get("summary_model", "").strip() or None cfg_model = cfg_model or comp.get("summary_model", "").strip() or None
if cfg_provider and cfg_provider != "auto": env_model = _get_auxiliary_env_override(task, "MODEL") if task else None
return cfg_provider, model or cfg_model resolved_model = model or env_model or cfg_model
return "auto", model or cfg_model
return "auto", model if base_url:
return "custom", resolved_model, base_url, api_key
if provider:
return provider, resolved_model, base_url, api_key
if task:
env_base_url = _get_auxiliary_env_override(task, "BASE_URL")
env_api_key = _get_auxiliary_env_override(task, "API_KEY")
if env_base_url:
return "custom", resolved_model, env_base_url, env_api_key or cfg_api_key
env_provider = _get_auxiliary_provider(task)
if env_provider != "auto":
return env_provider, resolved_model, None, None
if cfg_base_url:
return "custom", resolved_model, cfg_base_url, cfg_api_key
if cfg_provider and cfg_provider != "auto":
return cfg_provider, resolved_model, None, None
return "auto", resolved_model, None, None
return "auto", resolved_model, None, None
def _build_call_kwargs( def _build_call_kwargs(
@@ -935,6 +1008,7 @@ def _build_call_kwargs(
tools: Optional[list] = None, tools: Optional[list] = None,
timeout: float = 30.0, timeout: float = 30.0,
extra_body: Optional[dict] = None, extra_body: Optional[dict] = None,
base_url: Optional[str] = None,
) -> dict: ) -> dict:
"""Build kwargs for .chat.completions.create() with model/provider adjustments.""" """Build kwargs for .chat.completions.create() with model/provider adjustments."""
kwargs: Dict[str, Any] = { kwargs: Dict[str, Any] = {
@@ -950,7 +1024,7 @@ def _build_call_kwargs(
# Codex adapter handles max_tokens internally; OpenRouter/Nous use max_tokens. # Codex adapter handles max_tokens internally; OpenRouter/Nous use max_tokens.
# Direct OpenAI api.openai.com with newer models needs max_completion_tokens. # Direct OpenAI api.openai.com with newer models needs max_completion_tokens.
if provider == "custom": if provider == "custom":
custom_base = os.getenv("OPENAI_BASE_URL", "") custom_base = base_url or os.getenv("OPENAI_BASE_URL", "")
if "api.openai.com" in custom_base.lower(): if "api.openai.com" in custom_base.lower():
kwargs["max_completion_tokens"] = max_tokens kwargs["max_completion_tokens"] = max_tokens
else: else:
@@ -976,6 +1050,8 @@ def call_llm(
*, *,
provider: str = None, provider: str = None,
model: str = None, model: str = None,
base_url: str = None,
api_key: str = None,
messages: list, messages: list,
temperature: float = None, temperature: float = None,
max_tokens: int = None, max_tokens: int = None,
@@ -1007,13 +1083,18 @@ def call_llm(
Raises: Raises:
RuntimeError: If no provider is configured. RuntimeError: If no provider is configured.
""" """
resolved_provider, resolved_model = _resolve_task_provider_model( resolved_provider, resolved_model, resolved_base_url, resolved_api_key = _resolve_task_provider_model(
task, provider, model) task, provider, model, base_url, api_key)
client, final_model = _get_cached_client(resolved_provider, resolved_model) client, final_model = _get_cached_client(
resolved_provider,
resolved_model,
base_url=resolved_base_url,
api_key=resolved_api_key,
)
if client is None: if client is None:
# Fallback: try openrouter # Fallback: try openrouter
if resolved_provider != "openrouter": if resolved_provider != "openrouter" and not resolved_base_url:
logger.warning("Provider %s unavailable, falling back to openrouter", logger.warning("Provider %s unavailable, falling back to openrouter",
resolved_provider) resolved_provider)
client, final_model = _get_cached_client( client, final_model = _get_cached_client(
@@ -1026,7 +1107,8 @@ def call_llm(
kwargs = _build_call_kwargs( kwargs = _build_call_kwargs(
resolved_provider, final_model, messages, resolved_provider, final_model, messages,
temperature=temperature, max_tokens=max_tokens, temperature=temperature, max_tokens=max_tokens,
tools=tools, timeout=timeout, extra_body=extra_body) tools=tools, timeout=timeout, extra_body=extra_body,
base_url=resolved_base_url)
# Handle max_tokens vs max_completion_tokens retry # Handle max_tokens vs max_completion_tokens retry
try: try:
@@ -1045,6 +1127,8 @@ async def async_call_llm(
*, *,
provider: str = None, provider: str = None,
model: str = None, model: str = None,
base_url: str = None,
api_key: str = None,
messages: list, messages: list,
temperature: float = None, temperature: float = None,
max_tokens: int = None, max_tokens: int = None,
@@ -1056,13 +1140,18 @@ async def async_call_llm(
Same as call_llm() but async. See call_llm() for full documentation. Same as call_llm() but async. See call_llm() for full documentation.
""" """
resolved_provider, resolved_model = _resolve_task_provider_model( resolved_provider, resolved_model, resolved_base_url, resolved_api_key = _resolve_task_provider_model(
task, provider, model) task, provider, model, base_url, api_key)
client, final_model = _get_cached_client( client, final_model = _get_cached_client(
resolved_provider, resolved_model, async_mode=True) resolved_provider,
resolved_model,
async_mode=True,
base_url=resolved_base_url,
api_key=resolved_api_key,
)
if client is None: if client is None:
if resolved_provider != "openrouter": if resolved_provider != "openrouter" and not resolved_base_url:
logger.warning("Provider %s unavailable, falling back to openrouter", logger.warning("Provider %s unavailable, falling back to openrouter",
resolved_provider) resolved_provider)
client, final_model = _get_cached_client( client, final_model = _get_cached_client(
@@ -1076,7 +1165,8 @@ async def async_call_llm(
kwargs = _build_call_kwargs( kwargs = _build_call_kwargs(
resolved_provider, final_model, messages, resolved_provider, final_model, messages,
temperature=temperature, max_tokens=max_tokens, temperature=temperature, max_tokens=max_tokens,
tools=tools, timeout=timeout, extra_body=extra_body) tools=tools, timeout=timeout, extra_body=extra_body,
base_url=resolved_base_url)
try: try:
return await client.chat.completions.create(**kwargs) return await client.chat.completions.create(**kwargs)

48
cli.py
View File

@@ -217,11 +217,27 @@ def load_cli_config() -> Dict[str, Any]:
"timeout": 300, # Max seconds a sandbox script can run before being killed (5 min) "timeout": 300, # Max seconds a sandbox script can run before being killed (5 min)
"max_tool_calls": 50, # Max RPC tool calls per execution "max_tool_calls": 50, # Max RPC tool calls per execution
}, },
"auxiliary": {
"vision": {
"provider": "auto",
"model": "",
"base_url": "",
"api_key": "",
},
"web_extract": {
"provider": "auto",
"model": "",
"base_url": "",
"api_key": "",
},
},
"delegation": { "delegation": {
"max_iterations": 45, # Max tool-calling turns per child agent "max_iterations": 45, # Max tool-calling turns per child agent
"default_toolsets": ["terminal", "file", "web"], # Default toolsets for subagents "default_toolsets": ["terminal", "file", "web"], # Default toolsets for subagents
"model": "", # Subagent model override (empty = inherit parent model) "model": "", # Subagent model override (empty = inherit parent model)
"provider": "", # Subagent provider override (empty = inherit parent provider) "provider": "", # Subagent provider override (empty = inherit parent provider)
"base_url": "", # Direct OpenAI-compatible endpoint for subagents
"api_key": "", # API key for delegation.base_url (falls back to OPENAI_API_KEY)
}, },
} }
@@ -362,28 +378,44 @@ def load_cli_config() -> Dict[str, Any]:
if config_key in compression_config: if config_key in compression_config:
os.environ[env_var] = str(compression_config[config_key]) os.environ[env_var] = str(compression_config[config_key])
# Apply auxiliary model overrides to environment variables. # Apply auxiliary model/direct-endpoint overrides to environment variables.
# Vision and web_extract each have their own provider + model pair. # Vision and web_extract each have their own provider/model/base_url/api_key tuple.
# (Compression is handled in the compression section above.) # (Compression is handled in the compression section above.)
# Only set env vars for non-empty / non-default values so auto-detection # Only set env vars for non-empty / non-default values so auto-detection
# still works. # still works.
auxiliary_config = defaults.get("auxiliary", {}) auxiliary_config = defaults.get("auxiliary", {})
auxiliary_task_env = { auxiliary_task_env = {
# config key → (provider env var, model env var) # config key → env var mapping
"vision": ("AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL"), "vision": {
"web_extract": ("AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL"), "provider": "AUXILIARY_VISION_PROVIDER",
"model": "AUXILIARY_VISION_MODEL",
"base_url": "AUXILIARY_VISION_BASE_URL",
"api_key": "AUXILIARY_VISION_API_KEY",
},
"web_extract": {
"provider": "AUXILIARY_WEB_EXTRACT_PROVIDER",
"model": "AUXILIARY_WEB_EXTRACT_MODEL",
"base_url": "AUXILIARY_WEB_EXTRACT_BASE_URL",
"api_key": "AUXILIARY_WEB_EXTRACT_API_KEY",
},
} }
for task_key, (prov_env, model_env) in auxiliary_task_env.items(): for task_key, env_map in auxiliary_task_env.items():
task_cfg = auxiliary_config.get(task_key, {}) task_cfg = auxiliary_config.get(task_key, {})
if not isinstance(task_cfg, dict): if not isinstance(task_cfg, dict):
continue continue
prov = str(task_cfg.get("provider", "")).strip() prov = str(task_cfg.get("provider", "")).strip()
model = str(task_cfg.get("model", "")).strip() model = str(task_cfg.get("model", "")).strip()
base_url = str(task_cfg.get("base_url", "")).strip()
api_key = str(task_cfg.get("api_key", "")).strip()
if prov and prov != "auto": if prov and prov != "auto":
os.environ[prov_env] = prov os.environ[env_map["provider"]] = prov
if model: if model:
os.environ[model_env] = model os.environ[env_map["model"]] = model
if base_url:
os.environ[env_map["base_url"]] = base_url
if api_key:
os.environ[env_map["api_key"]] = api_key
# Security settings # Security settings
security_config = defaults.get("security", {}) security_config = defaults.get("security", {})

View File

@@ -100,24 +100,40 @@ if _config_path.exists():
for _cfg_key, _env_var in _compression_env_map.items(): for _cfg_key, _env_var in _compression_env_map.items():
if _cfg_key in _compression_cfg: if _cfg_key in _compression_cfg:
os.environ[_env_var] = str(_compression_cfg[_cfg_key]) os.environ[_env_var] = str(_compression_cfg[_cfg_key])
# Auxiliary model overrides (vision, web_extract). # Auxiliary model/direct-endpoint overrides (vision, web_extract).
# Each task has provider + model; bridge non-default values to env vars. # Each task has provider/model/base_url/api_key; bridge non-default values to env vars.
_auxiliary_cfg = _cfg.get("auxiliary", {}) _auxiliary_cfg = _cfg.get("auxiliary", {})
if _auxiliary_cfg and isinstance(_auxiliary_cfg, dict): if _auxiliary_cfg and isinstance(_auxiliary_cfg, dict):
_aux_task_env = { _aux_task_env = {
"vision": ("AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL"), "vision": {
"web_extract": ("AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL"), "provider": "AUXILIARY_VISION_PROVIDER",
"model": "AUXILIARY_VISION_MODEL",
"base_url": "AUXILIARY_VISION_BASE_URL",
"api_key": "AUXILIARY_VISION_API_KEY",
},
"web_extract": {
"provider": "AUXILIARY_WEB_EXTRACT_PROVIDER",
"model": "AUXILIARY_WEB_EXTRACT_MODEL",
"base_url": "AUXILIARY_WEB_EXTRACT_BASE_URL",
"api_key": "AUXILIARY_WEB_EXTRACT_API_KEY",
},
} }
for _task_key, (_prov_env, _model_env) in _aux_task_env.items(): for _task_key, _env_map in _aux_task_env.items():
_task_cfg = _auxiliary_cfg.get(_task_key, {}) _task_cfg = _auxiliary_cfg.get(_task_key, {})
if not isinstance(_task_cfg, dict): if not isinstance(_task_cfg, dict):
continue continue
_prov = str(_task_cfg.get("provider", "")).strip() _prov = str(_task_cfg.get("provider", "")).strip()
_model = str(_task_cfg.get("model", "")).strip() _model = str(_task_cfg.get("model", "")).strip()
_base_url = str(_task_cfg.get("base_url", "")).strip()
_api_key = str(_task_cfg.get("api_key", "")).strip()
if _prov and _prov != "auto": if _prov and _prov != "auto":
os.environ[_prov_env] = _prov os.environ[_env_map["provider"]] = _prov
if _model: if _model:
os.environ[_model_env] = _model os.environ[_env_map["model"]] = _model
if _base_url:
os.environ[_env_map["base_url"]] = _base_url
if _api_key:
os.environ[_env_map["api_key"]] = _api_key
_agent_cfg = _cfg.get("agent", {}) _agent_cfg = _cfg.get("agent", {})
if _agent_cfg and isinstance(_agent_cfg, dict): if _agent_cfg and isinstance(_agent_cfg, dict):
if "max_turns" in _agent_cfg: if "max_turns" in _agent_cfg:

View File

@@ -150,30 +150,44 @@ DEFAULT_CONFIG = {
"vision": { "vision": {
"provider": "auto", # auto | openrouter | nous | codex | custom "provider": "auto", # auto | openrouter | nous | codex | custom
"model": "", # e.g. "google/gemini-2.5-flash", "gpt-4o" "model": "", # e.g. "google/gemini-2.5-flash", "gpt-4o"
"base_url": "", # direct OpenAI-compatible endpoint (takes precedence over provider)
"api_key": "", # API key for base_url (falls back to OPENAI_API_KEY)
}, },
"web_extract": { "web_extract": {
"provider": "auto", "provider": "auto",
"model": "", "model": "",
"base_url": "",
"api_key": "",
}, },
"compression": { "compression": {
"provider": "auto", "provider": "auto",
"model": "", "model": "",
"base_url": "",
"api_key": "",
}, },
"session_search": { "session_search": {
"provider": "auto", "provider": "auto",
"model": "", "model": "",
"base_url": "",
"api_key": "",
}, },
"skills_hub": { "skills_hub": {
"provider": "auto", "provider": "auto",
"model": "", "model": "",
"base_url": "",
"api_key": "",
}, },
"mcp": { "mcp": {
"provider": "auto", "provider": "auto",
"model": "", "model": "",
"base_url": "",
"api_key": "",
}, },
"flush_memories": { "flush_memories": {
"provider": "auto", "provider": "auto",
"model": "", "model": "",
"base_url": "",
"api_key": "",
}, },
}, },
@@ -243,6 +257,8 @@ DEFAULT_CONFIG = {
"delegation": { "delegation": {
"model": "", # e.g. "google/gemini-3-flash-preview" (empty = inherit parent model) "model": "", # e.g. "google/gemini-3-flash-preview" (empty = inherit parent model)
"provider": "", # e.g. "openrouter" (empty = inherit parent provider + credentials) "provider": "", # e.g. "openrouter" (empty = inherit parent provider + credentials)
"base_url": "", # direct OpenAI-compatible endpoint for subagents
"api_key": "", # API key for delegation.base_url (falls back to OPENAI_API_KEY)
}, },
# Ephemeral prefill messages file — JSON list of {role, content} dicts # Ephemeral prefill messages file — JSON list of {role, content} dicts

View File

@@ -24,9 +24,11 @@ def _clean_env(monkeypatch):
for key in ( for key in (
"OPENROUTER_API_KEY", "OPENAI_BASE_URL", "OPENAI_API_KEY", "OPENROUTER_API_KEY", "OPENAI_BASE_URL", "OPENAI_API_KEY",
"OPENAI_MODEL", "LLM_MODEL", "NOUS_INFERENCE_BASE_URL", "OPENAI_MODEL", "LLM_MODEL", "NOUS_INFERENCE_BASE_URL",
# Per-task provider/model overrides # Per-task provider/model/direct-endpoint overrides
"AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL", "AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL",
"AUXILIARY_VISION_BASE_URL", "AUXILIARY_VISION_API_KEY",
"AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL", "AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL",
"AUXILIARY_WEB_EXTRACT_BASE_URL", "AUXILIARY_WEB_EXTRACT_API_KEY",
"CONTEXT_COMPRESSION_PROVIDER", "CONTEXT_COMPRESSION_MODEL", "CONTEXT_COMPRESSION_PROVIDER", "CONTEXT_COMPRESSION_MODEL",
): ):
monkeypatch.delenv(key, raising=False) monkeypatch.delenv(key, raising=False)
@@ -142,6 +144,17 @@ class TestGetTextAuxiliaryClient:
call_kwargs = mock_openai.call_args call_kwargs = mock_openai.call_args
assert call_kwargs.kwargs["base_url"] == "http://localhost:1234/v1" assert call_kwargs.kwargs["base_url"] == "http://localhost:1234/v1"
def test_task_direct_endpoint_override(self, monkeypatch):
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_BASE_URL", "http://localhost:2345/v1")
monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_API_KEY", "task-key")
monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_MODEL", "task-model")
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
client, model = get_text_auxiliary_client("web_extract")
assert model == "task-model"
assert mock_openai.call_args.kwargs["base_url"] == "http://localhost:2345/v1"
assert mock_openai.call_args.kwargs["api_key"] == "task-key"
def test_codex_fallback_when_nothing_else(self, codex_auth_dir): def test_codex_fallback_when_nothing_else(self, codex_auth_dir):
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \ with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
patch("agent.auxiliary_client.OpenAI") as mock_openai: patch("agent.auxiliary_client.OpenAI") as mock_openai:
@@ -390,6 +403,24 @@ class TestTaskSpecificOverrides:
client, model = get_text_auxiliary_client("web_extract") client, model = get_text_auxiliary_client("web_extract")
assert model == "google/gemini-3-flash-preview" assert model == "google/gemini-3-flash-preview"
def test_task_direct_endpoint_from_config(self, monkeypatch, tmp_path):
hermes_home = tmp_path / "hermes"
hermes_home.mkdir(parents=True, exist_ok=True)
(hermes_home / "config.yaml").write_text(
"""auxiliary:
web_extract:
base_url: http://localhost:3456/v1
api_key: config-key
model: config-model
"""
)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
client, model = get_text_auxiliary_client("web_extract")
assert model == "config-model"
assert mock_openai.call_args.kwargs["base_url"] == "http://localhost:3456/v1"
assert mock_openai.call_args.kwargs["api_key"] == "config-key"
def test_task_without_override_uses_auto(self, monkeypatch): def test_task_without_override_uses_auto(self, monkeypatch):
"""A task with no provider env var falls through to auto chain.""" """A task with no provider env var falls through to auto chain."""
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key") monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")

View File

@@ -26,6 +26,10 @@ def _isolate_hermes_home(tmp_path, monkeypatch):
(fake_home / "memories").mkdir() (fake_home / "memories").mkdir()
(fake_home / "skills").mkdir() (fake_home / "skills").mkdir()
monkeypatch.setenv("HERMES_HOME", str(fake_home)) monkeypatch.setenv("HERMES_HOME", str(fake_home))
# Tests should not inherit the agent's current gateway/messaging surface.
# Individual tests that need gateway behavior set these explicitly.
monkeypatch.delenv("HERMES_SESSION_PLATFORM", raising=False)
monkeypatch.delenv("HERMES_GATEWAY_SESSION", raising=False)
@pytest.fixture() @pytest.fixture()

View File

@@ -25,7 +25,9 @@ def _run_auxiliary_bridge(config_dict, monkeypatch):
# Clear env vars # Clear env vars
for key in ( for key in (
"AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL", "AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL",
"AUXILIARY_VISION_BASE_URL", "AUXILIARY_VISION_API_KEY",
"AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL", "AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL",
"AUXILIARY_WEB_EXTRACT_BASE_URL", "AUXILIARY_WEB_EXTRACT_API_KEY",
"CONTEXT_COMPRESSION_PROVIDER", "CONTEXT_COMPRESSION_MODEL", "CONTEXT_COMPRESSION_PROVIDER", "CONTEXT_COMPRESSION_MODEL",
): ):
monkeypatch.delenv(key, raising=False) monkeypatch.delenv(key, raising=False)
@@ -47,19 +49,35 @@ def _run_auxiliary_bridge(config_dict, monkeypatch):
auxiliary_cfg = config_dict.get("auxiliary", {}) auxiliary_cfg = config_dict.get("auxiliary", {})
if auxiliary_cfg and isinstance(auxiliary_cfg, dict): if auxiliary_cfg and isinstance(auxiliary_cfg, dict):
aux_task_env = { aux_task_env = {
"vision": ("AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL"), "vision": {
"web_extract": ("AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL"), "provider": "AUXILIARY_VISION_PROVIDER",
"model": "AUXILIARY_VISION_MODEL",
"base_url": "AUXILIARY_VISION_BASE_URL",
"api_key": "AUXILIARY_VISION_API_KEY",
},
"web_extract": {
"provider": "AUXILIARY_WEB_EXTRACT_PROVIDER",
"model": "AUXILIARY_WEB_EXTRACT_MODEL",
"base_url": "AUXILIARY_WEB_EXTRACT_BASE_URL",
"api_key": "AUXILIARY_WEB_EXTRACT_API_KEY",
},
} }
for task_key, (prov_env, model_env) in aux_task_env.items(): for task_key, env_map in aux_task_env.items():
task_cfg = auxiliary_cfg.get(task_key, {}) task_cfg = auxiliary_cfg.get(task_key, {})
if not isinstance(task_cfg, dict): if not isinstance(task_cfg, dict):
continue continue
prov = str(task_cfg.get("provider", "")).strip() prov = str(task_cfg.get("provider", "")).strip()
model = str(task_cfg.get("model", "")).strip() model = str(task_cfg.get("model", "")).strip()
base_url = str(task_cfg.get("base_url", "")).strip()
api_key = str(task_cfg.get("api_key", "")).strip()
if prov and prov != "auto": if prov and prov != "auto":
os.environ[prov_env] = prov os.environ[env_map["provider"]] = prov
if model: if model:
os.environ[model_env] = model os.environ[env_map["model"]] = model
if base_url:
os.environ[env_map["base_url"]] = base_url
if api_key:
os.environ[env_map["api_key"]] = api_key
# ── Config bridging tests ──────────────────────────────────────────────────── # ── Config bridging tests ────────────────────────────────────────────────────
@@ -101,6 +119,21 @@ class TestAuxiliaryConfigBridge:
assert os.environ.get("AUXILIARY_WEB_EXTRACT_PROVIDER") == "nous" assert os.environ.get("AUXILIARY_WEB_EXTRACT_PROVIDER") == "nous"
assert os.environ.get("AUXILIARY_WEB_EXTRACT_MODEL") == "gemini-2.5-flash" assert os.environ.get("AUXILIARY_WEB_EXTRACT_MODEL") == "gemini-2.5-flash"
def test_direct_endpoint_bridged(self, monkeypatch):
config = {
"auxiliary": {
"vision": {
"base_url": "http://localhost:1234/v1",
"api_key": "local-key",
"model": "qwen2.5-vl",
}
}
}
_run_auxiliary_bridge(config, monkeypatch)
assert os.environ.get("AUXILIARY_VISION_BASE_URL") == "http://localhost:1234/v1"
assert os.environ.get("AUXILIARY_VISION_API_KEY") == "local-key"
assert os.environ.get("AUXILIARY_VISION_MODEL") == "qwen2.5-vl"
def test_compression_provider_bridged(self, monkeypatch): def test_compression_provider_bridged(self, monkeypatch):
config = { config = {
"compression": { "compression": {
@@ -200,8 +233,12 @@ class TestGatewayBridgeCodeParity:
# Check for key patterns that indicate the bridge is present # Check for key patterns that indicate the bridge is present
assert "AUXILIARY_VISION_PROVIDER" in content assert "AUXILIARY_VISION_PROVIDER" in content
assert "AUXILIARY_VISION_MODEL" in content assert "AUXILIARY_VISION_MODEL" in content
assert "AUXILIARY_VISION_BASE_URL" in content
assert "AUXILIARY_VISION_API_KEY" in content
assert "AUXILIARY_WEB_EXTRACT_PROVIDER" in content assert "AUXILIARY_WEB_EXTRACT_PROVIDER" in content
assert "AUXILIARY_WEB_EXTRACT_MODEL" in content assert "AUXILIARY_WEB_EXTRACT_MODEL" in content
assert "AUXILIARY_WEB_EXTRACT_BASE_URL" in content
assert "AUXILIARY_WEB_EXTRACT_API_KEY" in content
def test_gateway_has_compression_provider(self): def test_gateway_has_compression_provider(self):
"""Gateway must bridge compression.summary_provider.""" """Gateway must bridge compression.summary_provider."""

View File

@@ -10,6 +10,7 @@ Run with: python -m pytest tests/test_delegate.py -v
""" """
import json import json
import os
import sys import sys
import unittest import unittest
from unittest.mock import MagicMock, patch from unittest.mock import MagicMock, patch
@@ -462,6 +463,32 @@ class TestDelegationCredentialResolution(unittest.TestCase):
self.assertEqual(creds["api_mode"], "chat_completions") self.assertEqual(creds["api_mode"], "chat_completions")
mock_resolve.assert_called_once_with(requested="openrouter") mock_resolve.assert_called_once_with(requested="openrouter")
def test_direct_endpoint_uses_configured_base_url_and_api_key(self):
parent = _make_mock_parent(depth=0)
cfg = {
"model": "qwen2.5-coder",
"provider": "openrouter",
"base_url": "http://localhost:1234/v1",
"api_key": "local-key",
}
creds = _resolve_delegation_credentials(cfg, parent)
self.assertEqual(creds["model"], "qwen2.5-coder")
self.assertEqual(creds["provider"], "custom")
self.assertEqual(creds["base_url"], "http://localhost:1234/v1")
self.assertEqual(creds["api_key"], "local-key")
self.assertEqual(creds["api_mode"], "chat_completions")
def test_direct_endpoint_falls_back_to_openai_api_key_env(self):
parent = _make_mock_parent(depth=0)
cfg = {
"model": "qwen2.5-coder",
"base_url": "http://localhost:1234/v1",
}
with patch.dict(os.environ, {"OPENAI_API_KEY": "env-openai-key"}, clear=False):
creds = _resolve_delegation_credentials(cfg, parent)
self.assertEqual(creds["api_key"], "env-openai-key")
self.assertEqual(creds["provider"], "custom")
@patch("hermes_cli.runtime_provider.resolve_runtime_provider") @patch("hermes_cli.runtime_provider.resolve_runtime_provider")
def test_nous_provider_resolves_nous_credentials(self, mock_resolve): def test_nous_provider_resolves_nous_credentials(self, mock_resolve):
"""Nous provider resolves Nous Portal base_url and api_key.""" """Nous provider resolves Nous Portal base_url and api_key."""
@@ -589,6 +616,40 @@ class TestDelegationProviderIntegration(unittest.TestCase):
self.assertNotEqual(kwargs["base_url"], parent.base_url) self.assertNotEqual(kwargs["base_url"], parent.base_url)
self.assertNotEqual(kwargs["api_key"], parent.api_key) self.assertNotEqual(kwargs["api_key"], parent.api_key)
@patch("tools.delegate_tool._load_config")
@patch("tools.delegate_tool._resolve_delegation_credentials")
def test_direct_endpoint_credentials_reach_child_agent(self, mock_creds, mock_cfg):
mock_cfg.return_value = {
"max_iterations": 45,
"model": "qwen2.5-coder",
"base_url": "http://localhost:1234/v1",
"api_key": "local-key",
}
mock_creds.return_value = {
"model": "qwen2.5-coder",
"provider": "custom",
"base_url": "http://localhost:1234/v1",
"api_key": "local-key",
"api_mode": "chat_completions",
}
parent = _make_mock_parent(depth=0)
with patch("run_agent.AIAgent") as MockAgent:
mock_child = MagicMock()
mock_child.run_conversation.return_value = {
"final_response": "done", "completed": True, "api_calls": 1
}
MockAgent.return_value = mock_child
delegate_task(goal="Direct endpoint test", parent_agent=parent)
_, kwargs = MockAgent.call_args
self.assertEqual(kwargs["model"], "qwen2.5-coder")
self.assertEqual(kwargs["provider"], "custom")
self.assertEqual(kwargs["base_url"], "http://localhost:1234/v1")
self.assertEqual(kwargs["api_key"], "local-key")
self.assertEqual(kwargs["api_mode"], "chat_completions")
@patch("tools.delegate_tool._load_config") @patch("tools.delegate_tool._load_config")
@patch("tools.delegate_tool._resolve_delegation_credentials") @patch("tools.delegate_tool._resolve_delegation_credentials")
def test_empty_config_inherits_parent(self, mock_creds, mock_cfg): def test_empty_config_inherits_parent(self, mock_creds, mock_cfg):

View File

@@ -540,18 +540,52 @@ def delegate_task(
def _resolve_delegation_credentials(cfg: dict, parent_agent) -> dict: def _resolve_delegation_credentials(cfg: dict, parent_agent) -> dict:
"""Resolve credentials for subagent delegation. """Resolve credentials for subagent delegation.
If ``delegation.provider`` is configured, resolves the full credential If ``delegation.base_url`` is configured, subagents use that direct
bundle (base_url, api_key, api_mode, provider) via the runtime provider OpenAI-compatible endpoint. Otherwise, if ``delegation.provider`` is
system — the same path used by CLI/gateway startup. This lets subagents configured, the full credential bundle (base_url, api_key, api_mode,
run on a completely different provider:model pair. provider) is resolved via the runtime provider system — the same path used
by CLI/gateway startup. This lets subagents run on a completely different
provider:model pair.
If no provider is configured, returns None values so the child inherits If neither base_url nor provider is configured, returns None values so the
everything from the parent agent. child inherits everything from the parent agent.
Raises ValueError with a user-friendly message on credential failure. Raises ValueError with a user-friendly message on credential failure.
""" """
configured_model = cfg.get("model") or None configured_model = str(cfg.get("model") or "").strip() or None
configured_provider = cfg.get("provider") or None configured_provider = str(cfg.get("provider") or "").strip() or None
configured_base_url = str(cfg.get("base_url") or "").strip() or None
configured_api_key = str(cfg.get("api_key") or "").strip() or None
if configured_base_url:
api_key = (
configured_api_key
or os.getenv("OPENAI_API_KEY", "").strip()
or os.getenv("OPENROUTER_API_KEY", "").strip()
)
if not api_key:
raise ValueError(
"Delegation base_url is configured but no API key was found. "
"Set delegation.api_key or OPENAI_API_KEY."
)
base_lower = configured_base_url.lower()
provider = "custom"
api_mode = "chat_completions"
if "chatgpt.com/backend-api/codex" in base_lower:
provider = "openai-codex"
api_mode = "codex_responses"
elif "api.anthropic.com" in base_lower:
provider = "anthropic"
api_mode = "anthropic_messages"
return {
"model": configured_model,
"provider": provider,
"base_url": configured_base_url,
"api_key": api_key,
"api_mode": api_mode,
}
if not configured_provider: if not configured_provider:
# No provider override — child inherits everything from parent # No provider override — child inherits everything from parent
@@ -570,7 +604,8 @@ def _resolve_delegation_credentials(cfg: dict, parent_agent) -> dict:
except Exception as exc: except Exception as exc:
raise ValueError( raise ValueError(
f"Cannot resolve delegation provider '{configured_provider}': {exc}. " f"Cannot resolve delegation provider '{configured_provider}': {exc}. "
f"Check that the provider is configured (API key set, valid provider name). " f"Check that the provider is configured (API key set, valid provider name), "
f"or set delegation.base_url/delegation.api_key for a direct endpoint. "
f"Available providers: openrouter, nous, zai, kimi-coding, minimax." f"Available providers: openrouter, nous, zai, kimi-coding, minimax."
) from exc ) from exc

View File

@@ -173,6 +173,21 @@ All variables go in `~/.hermes/.env`. You can also set them with `hermes config
| `CONTEXT_COMPRESSION_THRESHOLD` | Trigger at this % of limit (default: 0.50) | | `CONTEXT_COMPRESSION_THRESHOLD` | Trigger at this % of limit (default: 0.50) |
| `CONTEXT_COMPRESSION_MODEL` | Model for summaries | | `CONTEXT_COMPRESSION_MODEL` | Model for summaries |
## Auxiliary Task Overrides
| Variable | Description |
|----------|-------------|
| `AUXILIARY_VISION_PROVIDER` | Override provider for vision tasks |
| `AUXILIARY_VISION_MODEL` | Override model for vision tasks |
| `AUXILIARY_VISION_BASE_URL` | Direct OpenAI-compatible endpoint for vision tasks |
| `AUXILIARY_VISION_API_KEY` | API key paired with `AUXILIARY_VISION_BASE_URL` |
| `AUXILIARY_WEB_EXTRACT_PROVIDER` | Override provider for web extraction/summarization |
| `AUXILIARY_WEB_EXTRACT_MODEL` | Override model for web extraction/summarization |
| `AUXILIARY_WEB_EXTRACT_BASE_URL` | Direct OpenAI-compatible endpoint for web extraction/summarization |
| `AUXILIARY_WEB_EXTRACT_API_KEY` | API key paired with `AUXILIARY_WEB_EXTRACT_BASE_URL` |
| `CONTEXT_COMPRESSION_PROVIDER` | Override provider for context compression summaries |
| `CONTEXT_COMPRESSION_MODEL` | Override model for context compression summaries |
## Provider Routing (config.yaml only) ## Provider Routing (config.yaml only)
These go in `~/.hermes/config.yaml` under the `provider_routing` section: These go in `~/.hermes/config.yaml` under the `provider_routing` section:

View File

@@ -563,11 +563,15 @@ auxiliary:
vision: vision:
provider: "auto" # "auto", "openrouter", "nous", "main" provider: "auto" # "auto", "openrouter", "nous", "main"
model: "" # e.g. "openai/gpt-4o", "google/gemini-2.5-flash" model: "" # e.g. "openai/gpt-4o", "google/gemini-2.5-flash"
base_url: "" # direct OpenAI-compatible endpoint (takes precedence over provider)
api_key: "" # API key for base_url (falls back to OPENAI_API_KEY)
# Web page summarization + browser page text extraction # Web page summarization + browser page text extraction
web_extract: web_extract:
provider: "auto" provider: "auto"
model: "" # e.g. "google/gemini-2.5-flash" model: "" # e.g. "google/gemini-2.5-flash"
base_url: ""
api_key: ""
``` ```
### Changing the Vision Model ### Changing the Vision Model
@@ -598,6 +602,17 @@ AUXILIARY_VISION_MODEL=openai/gpt-4o
### Common Setups ### Common Setups
**Using a direct custom endpoint** (clearer than `provider: "main"` for local/self-hosted APIs):
```yaml
auxiliary:
vision:
base_url: "http://localhost:1234/v1"
api_key: "local-key"
model: "qwen2.5-vl"
```
`base_url` takes precedence over `provider`, so this is the most explicit way to route an auxiliary task to a specific endpoint.
**Using OpenAI API key for vision:** **Using OpenAI API key for vision:**
```yaml ```yaml
# In ~/.hermes/.env: # In ~/.hermes/.env:
@@ -807,13 +822,17 @@ delegation:
- web - web
# model: "google/gemini-3-flash-preview" # Override model (empty = inherit parent) # model: "google/gemini-3-flash-preview" # Override model (empty = inherit parent)
# provider: "openrouter" # Override provider (empty = inherit parent) # provider: "openrouter" # Override provider (empty = inherit parent)
# base_url: "http://localhost:1234/v1" # Direct OpenAI-compatible endpoint (takes precedence over provider)
# api_key: "local-key" # API key for base_url (falls back to OPENAI_API_KEY)
``` ```
**Subagent provider:model override:** By default, subagents inherit the parent agent's provider and model. Set `delegation.provider` and `delegation.model` to route subagents to a different provider:model pair — e.g., use a cheap/fast model for narrowly-scoped subtasks while your primary agent runs an expensive reasoning model. **Subagent provider:model override:** By default, subagents inherit the parent agent's provider and model. Set `delegation.provider` and `delegation.model` to route subagents to a different provider:model pair — e.g., use a cheap/fast model for narrowly-scoped subtasks while your primary agent runs an expensive reasoning model.
**Direct endpoint override:** If you want the obvious custom-endpoint path, set `delegation.base_url`, `delegation.api_key`, and `delegation.model`. That sends subagents directly to that OpenAI-compatible endpoint and takes precedence over `delegation.provider`.
The delegation provider uses the same credential resolution as CLI/gateway startup. All configured providers are supported: `openrouter`, `nous`, `zai`, `kimi-coding`, `minimax`, `minimax-cn`. When a provider is set, the system automatically resolves the correct base URL, API key, and API mode — no manual credential wiring needed. The delegation provider uses the same credential resolution as CLI/gateway startup. All configured providers are supported: `openrouter`, `nous`, `zai`, `kimi-coding`, `minimax`, `minimax-cn`. When a provider is set, the system automatically resolves the correct base URL, API key, and API mode — no manual credential wiring needed.
**Precedence:** `delegation.provider` in config → parent provider (inherited). `delegation.model` in config → parent model (inherited). Setting just `model` without `provider` changes only the model name while keeping the parent's credentials (useful for switching models within the same provider like OpenRouter). **Precedence:** `delegation.base_url` in config → `delegation.provider` in config → parent provider (inherited). `delegation.model` in config → parent model (inherited). Setting just `model` without `provider` changes only the model name while keeping the parent's credentials (useful for switching models within the same provider like OpenRouter).
## Clarify ## Clarify

View File

@@ -209,6 +209,14 @@ Delegation has a **depth limit of 2** — a parent (depth 0) can spawn children
delegation: delegation:
max_iterations: 50 # Max turns per child (default: 50) max_iterations: 50 # Max turns per child (default: 50)
default_toolsets: ["terminal", "file", "web"] # Default toolsets default_toolsets: ["terminal", "file", "web"] # Default toolsets
model: "google/gemini-3-flash-preview" # Optional provider/model override
provider: "openrouter" # Optional built-in provider
# Or use a direct custom endpoint instead of provider:
delegation:
model: "qwen2.5-coder"
base_url: "http://localhost:1234/v1"
api_key: "local-key"
``` ```
:::tip :::tip