mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-01 00:11:39 +08:00
Compare commits
1 Commits
fix/plugin
...
feat/aux-d
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b3f5c6525a |
@@ -30,6 +30,10 @@ Default "auto" follows the chains above.
|
|||||||
Per-task model overrides (e.g. AUXILIARY_VISION_MODEL,
|
Per-task model overrides (e.g. AUXILIARY_VISION_MODEL,
|
||||||
AUXILIARY_WEB_EXTRACT_MODEL) let callers use a different model slug
|
AUXILIARY_WEB_EXTRACT_MODEL) let callers use a different model slug
|
||||||
than the provider's default.
|
than the provider's default.
|
||||||
|
|
||||||
|
Per-task direct endpoint overrides (e.g. AUXILIARY_VISION_BASE_URL,
|
||||||
|
AUXILIARY_VISION_API_KEY) let callers route a specific auxiliary task to a
|
||||||
|
custom OpenAI-compatible endpoint without touching the main model settings.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import json
|
import json
|
||||||
@@ -418,6 +422,17 @@ def _get_auxiliary_provider(task: str = "") -> str:
|
|||||||
return "auto"
|
return "auto"
|
||||||
|
|
||||||
|
|
||||||
|
def _get_auxiliary_env_override(task: str, suffix: str) -> Optional[str]:
|
||||||
|
"""Read an auxiliary env override from AUXILIARY_* or CONTEXT_* prefixes."""
|
||||||
|
if not task:
|
||||||
|
return None
|
||||||
|
for prefix in ("AUXILIARY_", "CONTEXT_"):
|
||||||
|
val = os.getenv(f"{prefix}{task.upper()}_{suffix}", "").strip()
|
||||||
|
if val:
|
||||||
|
return val
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def _try_openrouter() -> Tuple[Optional[OpenAI], Optional[str]]:
|
def _try_openrouter() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||||
or_key = os.getenv("OPENROUTER_API_KEY")
|
or_key = os.getenv("OPENROUTER_API_KEY")
|
||||||
if not or_key:
|
if not or_key:
|
||||||
@@ -564,6 +579,8 @@ def resolve_provider_client(
|
|||||||
model: str = None,
|
model: str = None,
|
||||||
async_mode: bool = False,
|
async_mode: bool = False,
|
||||||
raw_codex: bool = False,
|
raw_codex: bool = False,
|
||||||
|
explicit_base_url: str = None,
|
||||||
|
explicit_api_key: str = None,
|
||||||
) -> Tuple[Optional[Any], Optional[str]]:
|
) -> Tuple[Optional[Any], Optional[str]]:
|
||||||
"""Central router: given a provider name and optional model, return a
|
"""Central router: given a provider name and optional model, return a
|
||||||
configured client with the correct auth, base URL, and API format.
|
configured client with the correct auth, base URL, and API format.
|
||||||
@@ -585,6 +602,8 @@ def resolve_provider_client(
|
|||||||
instead of wrapping in CodexAuxiliaryClient. Use this when
|
instead of wrapping in CodexAuxiliaryClient. Use this when
|
||||||
the caller needs direct access to responses.stream() (e.g.,
|
the caller needs direct access to responses.stream() (e.g.,
|
||||||
the main agent loop).
|
the main agent loop).
|
||||||
|
explicit_base_url: Optional direct OpenAI-compatible endpoint.
|
||||||
|
explicit_api_key: Optional API key paired with explicit_base_url.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
(client, resolved_model) or (None, None) if auth is unavailable.
|
(client, resolved_model) or (None, None) if auth is unavailable.
|
||||||
@@ -661,6 +680,18 @@ def resolve_provider_client(
|
|||||||
|
|
||||||
# ── Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY) ───────────
|
# ── Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY) ───────────
|
||||||
if provider == "custom":
|
if provider == "custom":
|
||||||
|
if explicit_base_url:
|
||||||
|
custom_base = explicit_base_url.strip()
|
||||||
|
custom_key = (
|
||||||
|
(explicit_api_key or "").strip()
|
||||||
|
or os.getenv("OPENAI_API_KEY", "").strip()
|
||||||
|
or os.getenv("OPENROUTER_API_KEY", "").strip()
|
||||||
|
)
|
||||||
|
if custom_base and custom_key:
|
||||||
|
final_model = model or _read_main_model() or "gpt-4o-mini"
|
||||||
|
client = OpenAI(api_key=custom_key, base_url=custom_base)
|
||||||
|
return (_to_async_client(client, final_model) if async_mode
|
||||||
|
else (client, final_model))
|
||||||
# Try custom first, then codex, then API-key providers
|
# Try custom first, then codex, then API-key providers
|
||||||
for try_fn in (_try_custom_endpoint, _try_codex,
|
for try_fn in (_try_custom_endpoint, _try_codex,
|
||||||
_resolve_api_key_provider):
|
_resolve_api_key_provider):
|
||||||
@@ -749,10 +780,13 @@ def get_text_auxiliary_client(task: str = "") -> Tuple[Optional[OpenAI], Optiona
|
|||||||
Callers may override the returned model with a per-task env var
|
Callers may override the returned model with a per-task env var
|
||||||
(e.g. CONTEXT_COMPRESSION_MODEL, AUXILIARY_WEB_EXTRACT_MODEL).
|
(e.g. CONTEXT_COMPRESSION_MODEL, AUXILIARY_WEB_EXTRACT_MODEL).
|
||||||
"""
|
"""
|
||||||
forced = _get_auxiliary_provider(task)
|
provider, model, base_url, api_key = _resolve_task_provider_model(task or None)
|
||||||
if forced != "auto":
|
return resolve_provider_client(
|
||||||
return resolve_provider_client(forced)
|
provider,
|
||||||
return resolve_provider_client("auto")
|
model=model,
|
||||||
|
explicit_base_url=base_url,
|
||||||
|
explicit_api_key=api_key,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def get_async_text_auxiliary_client(task: str = ""):
|
def get_async_text_auxiliary_client(task: str = ""):
|
||||||
@@ -762,10 +796,14 @@ def get_async_text_auxiliary_client(task: str = ""):
|
|||||||
(AsyncCodexAuxiliaryClient, model) which wraps the Responses API.
|
(AsyncCodexAuxiliaryClient, model) which wraps the Responses API.
|
||||||
Returns (None, None) when no provider is available.
|
Returns (None, None) when no provider is available.
|
||||||
"""
|
"""
|
||||||
forced = _get_auxiliary_provider(task)
|
provider, model, base_url, api_key = _resolve_task_provider_model(task or None)
|
||||||
if forced != "auto":
|
return resolve_provider_client(
|
||||||
return resolve_provider_client(forced, async_mode=True)
|
provider,
|
||||||
return resolve_provider_client("auto", async_mode=True)
|
model=model,
|
||||||
|
async_mode=True,
|
||||||
|
explicit_base_url=base_url,
|
||||||
|
explicit_api_key=api_key,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def get_vision_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
|
def get_vision_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||||
@@ -781,18 +819,25 @@ def get_vision_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
|
|||||||
providers are skipped — they may not handle vision input. To use
|
providers are skipped — they may not handle vision input. To use
|
||||||
them, set AUXILIARY_VISION_PROVIDER explicitly.
|
them, set AUXILIARY_VISION_PROVIDER explicitly.
|
||||||
"""
|
"""
|
||||||
forced = _get_auxiliary_provider("vision")
|
provider, model, base_url, api_key = _resolve_task_provider_model("vision")
|
||||||
if forced != "auto":
|
if base_url:
|
||||||
return resolve_provider_client(forced)
|
return resolve_provider_client(
|
||||||
|
"custom",
|
||||||
|
model=model,
|
||||||
|
explicit_base_url=base_url,
|
||||||
|
explicit_api_key=api_key,
|
||||||
|
)
|
||||||
|
if provider != "auto":
|
||||||
|
return resolve_provider_client(provider, model=model)
|
||||||
# Auto: try providers known to support multimodal first, then fall
|
# Auto: try providers known to support multimodal first, then fall
|
||||||
# back to the user's custom endpoint. Many local models (Qwen-VL,
|
# back to the user's custom endpoint. Many local models (Qwen-VL,
|
||||||
# LLaVA, Pixtral, etc.) support vision — skipping them entirely
|
# LLaVA, Pixtral, etc.) support vision — skipping them entirely
|
||||||
# caused silent failures for local-only users.
|
# caused silent failures for local-only users.
|
||||||
for try_fn in (_try_openrouter, _try_nous, _try_codex,
|
for try_fn in (_try_openrouter, _try_nous, _try_codex,
|
||||||
_try_custom_endpoint):
|
_try_custom_endpoint):
|
||||||
client, model = try_fn()
|
client, auto_model = try_fn()
|
||||||
if client is not None:
|
if client is not None:
|
||||||
return client, model
|
return client, model or auto_model
|
||||||
logger.debug("Auxiliary vision client: none available")
|
logger.debug("Auxiliary vision client: none available")
|
||||||
return None, None
|
return None, None
|
||||||
|
|
||||||
@@ -851,19 +896,29 @@ def auxiliary_max_tokens_param(value: int) -> dict:
|
|||||||
# Every auxiliary LLM consumer should use these instead of manually
|
# Every auxiliary LLM consumer should use these instead of manually
|
||||||
# constructing clients and calling .chat.completions.create().
|
# constructing clients and calling .chat.completions.create().
|
||||||
|
|
||||||
# Client cache: (provider, async_mode) -> (client, default_model)
|
# Client cache: (provider, async_mode, base_url, api_key) -> (client, default_model)
|
||||||
_client_cache: Dict[tuple, tuple] = {}
|
_client_cache: Dict[tuple, tuple] = {}
|
||||||
|
|
||||||
|
|
||||||
def _get_cached_client(
|
def _get_cached_client(
|
||||||
provider: str, model: str = None, async_mode: bool = False,
|
provider: str,
|
||||||
|
model: str = None,
|
||||||
|
async_mode: bool = False,
|
||||||
|
base_url: str = None,
|
||||||
|
api_key: str = None,
|
||||||
) -> Tuple[Optional[Any], Optional[str]]:
|
) -> Tuple[Optional[Any], Optional[str]]:
|
||||||
"""Get or create a cached client for the given provider."""
|
"""Get or create a cached client for the given provider."""
|
||||||
cache_key = (provider, async_mode)
|
cache_key = (provider, async_mode, base_url or "", api_key or "")
|
||||||
if cache_key in _client_cache:
|
if cache_key in _client_cache:
|
||||||
cached_client, cached_default = _client_cache[cache_key]
|
cached_client, cached_default = _client_cache[cache_key]
|
||||||
return cached_client, model or cached_default
|
return cached_client, model or cached_default
|
||||||
client, default_model = resolve_provider_client(provider, model, async_mode)
|
client, default_model = resolve_provider_client(
|
||||||
|
provider,
|
||||||
|
model,
|
||||||
|
async_mode,
|
||||||
|
explicit_base_url=base_url,
|
||||||
|
explicit_api_key=api_key,
|
||||||
|
)
|
||||||
if client is not None:
|
if client is not None:
|
||||||
_client_cache[cache_key] = (client, default_model)
|
_client_cache[cache_key] = (client, default_model)
|
||||||
return client, model or default_model
|
return client, model or default_model
|
||||||
@@ -873,57 +928,75 @@ def _resolve_task_provider_model(
|
|||||||
task: str = None,
|
task: str = None,
|
||||||
provider: str = None,
|
provider: str = None,
|
||||||
model: str = None,
|
model: str = None,
|
||||||
) -> Tuple[str, Optional[str]]:
|
base_url: str = None,
|
||||||
|
api_key: str = None,
|
||||||
|
) -> Tuple[str, Optional[str], Optional[str], Optional[str]]:
|
||||||
"""Determine provider + model for a call.
|
"""Determine provider + model for a call.
|
||||||
|
|
||||||
Priority:
|
Priority:
|
||||||
1. Explicit provider/model args (always win)
|
1. Explicit provider/model/base_url/api_key args (always win)
|
||||||
2. Env var overrides (AUXILIARY_{TASK}_PROVIDER, etc.)
|
2. Env var overrides (AUXILIARY_{TASK}_*, CONTEXT_{TASK}_*)
|
||||||
3. Config file (auxiliary.{task}.provider/model or compression.*)
|
3. Config file (auxiliary.{task}.* or compression.*)
|
||||||
4. "auto" (full auto-detection chain)
|
4. "auto" (full auto-detection chain)
|
||||||
|
|
||||||
Returns (provider, model) where model may be None (use provider default).
|
Returns (provider, model, base_url, api_key) where model may be None
|
||||||
|
(use provider default). When base_url is set, provider is forced to
|
||||||
|
"custom" and the task uses that direct endpoint.
|
||||||
"""
|
"""
|
||||||
if provider:
|
config = {}
|
||||||
return provider, model
|
cfg_provider = None
|
||||||
|
cfg_model = None
|
||||||
|
cfg_base_url = None
|
||||||
|
cfg_api_key = None
|
||||||
|
|
||||||
if task:
|
if task:
|
||||||
# Check env var overrides first
|
|
||||||
env_provider = _get_auxiliary_provider(task)
|
|
||||||
if env_provider != "auto":
|
|
||||||
# Check for env var model override too
|
|
||||||
env_model = None
|
|
||||||
for prefix in ("AUXILIARY_", "CONTEXT_"):
|
|
||||||
val = os.getenv(f"{prefix}{task.upper()}_MODEL", "").strip()
|
|
||||||
if val:
|
|
||||||
env_model = val
|
|
||||||
break
|
|
||||||
return env_provider, model or env_model
|
|
||||||
|
|
||||||
# Read from config file
|
|
||||||
try:
|
try:
|
||||||
from hermes_cli.config import load_config
|
from hermes_cli.config import load_config
|
||||||
config = load_config()
|
config = load_config()
|
||||||
except ImportError:
|
except ImportError:
|
||||||
return "auto", model
|
config = {}
|
||||||
|
|
||||||
# Check auxiliary.{task} section
|
aux = config.get("auxiliary", {}) if isinstance(config, dict) else {}
|
||||||
aux = config.get("auxiliary", {})
|
task_config = aux.get(task, {}) if isinstance(aux, dict) else {}
|
||||||
task_config = aux.get(task, {})
|
if not isinstance(task_config, dict):
|
||||||
cfg_provider = task_config.get("provider", "").strip() or None
|
task_config = {}
|
||||||
cfg_model = task_config.get("model", "").strip() or None
|
cfg_provider = str(task_config.get("provider", "")).strip() or None
|
||||||
|
cfg_model = str(task_config.get("model", "")).strip() or None
|
||||||
|
cfg_base_url = str(task_config.get("base_url", "")).strip() or None
|
||||||
|
cfg_api_key = str(task_config.get("api_key", "")).strip() or None
|
||||||
|
|
||||||
# Backwards compat: compression section has its own keys
|
# Backwards compat: compression section has its own keys
|
||||||
if task == "compression" and not cfg_provider:
|
if task == "compression" and not cfg_provider:
|
||||||
comp = config.get("compression", {})
|
comp = config.get("compression", {}) if isinstance(config, dict) else {}
|
||||||
|
if isinstance(comp, dict):
|
||||||
cfg_provider = comp.get("summary_provider", "").strip() or None
|
cfg_provider = comp.get("summary_provider", "").strip() or None
|
||||||
cfg_model = cfg_model or comp.get("summary_model", "").strip() or None
|
cfg_model = cfg_model or comp.get("summary_model", "").strip() or None
|
||||||
|
|
||||||
if cfg_provider and cfg_provider != "auto":
|
env_model = _get_auxiliary_env_override(task, "MODEL") if task else None
|
||||||
return cfg_provider, model or cfg_model
|
resolved_model = model or env_model or cfg_model
|
||||||
return "auto", model or cfg_model
|
|
||||||
|
|
||||||
return "auto", model
|
if base_url:
|
||||||
|
return "custom", resolved_model, base_url, api_key
|
||||||
|
if provider:
|
||||||
|
return provider, resolved_model, base_url, api_key
|
||||||
|
|
||||||
|
if task:
|
||||||
|
env_base_url = _get_auxiliary_env_override(task, "BASE_URL")
|
||||||
|
env_api_key = _get_auxiliary_env_override(task, "API_KEY")
|
||||||
|
if env_base_url:
|
||||||
|
return "custom", resolved_model, env_base_url, env_api_key or cfg_api_key
|
||||||
|
|
||||||
|
env_provider = _get_auxiliary_provider(task)
|
||||||
|
if env_provider != "auto":
|
||||||
|
return env_provider, resolved_model, None, None
|
||||||
|
|
||||||
|
if cfg_base_url:
|
||||||
|
return "custom", resolved_model, cfg_base_url, cfg_api_key
|
||||||
|
if cfg_provider and cfg_provider != "auto":
|
||||||
|
return cfg_provider, resolved_model, None, None
|
||||||
|
return "auto", resolved_model, None, None
|
||||||
|
|
||||||
|
return "auto", resolved_model, None, None
|
||||||
|
|
||||||
|
|
||||||
def _build_call_kwargs(
|
def _build_call_kwargs(
|
||||||
@@ -935,6 +1008,7 @@ def _build_call_kwargs(
|
|||||||
tools: Optional[list] = None,
|
tools: Optional[list] = None,
|
||||||
timeout: float = 30.0,
|
timeout: float = 30.0,
|
||||||
extra_body: Optional[dict] = None,
|
extra_body: Optional[dict] = None,
|
||||||
|
base_url: Optional[str] = None,
|
||||||
) -> dict:
|
) -> dict:
|
||||||
"""Build kwargs for .chat.completions.create() with model/provider adjustments."""
|
"""Build kwargs for .chat.completions.create() with model/provider adjustments."""
|
||||||
kwargs: Dict[str, Any] = {
|
kwargs: Dict[str, Any] = {
|
||||||
@@ -950,7 +1024,7 @@ def _build_call_kwargs(
|
|||||||
# Codex adapter handles max_tokens internally; OpenRouter/Nous use max_tokens.
|
# Codex adapter handles max_tokens internally; OpenRouter/Nous use max_tokens.
|
||||||
# Direct OpenAI api.openai.com with newer models needs max_completion_tokens.
|
# Direct OpenAI api.openai.com with newer models needs max_completion_tokens.
|
||||||
if provider == "custom":
|
if provider == "custom":
|
||||||
custom_base = os.getenv("OPENAI_BASE_URL", "")
|
custom_base = base_url or os.getenv("OPENAI_BASE_URL", "")
|
||||||
if "api.openai.com" in custom_base.lower():
|
if "api.openai.com" in custom_base.lower():
|
||||||
kwargs["max_completion_tokens"] = max_tokens
|
kwargs["max_completion_tokens"] = max_tokens
|
||||||
else:
|
else:
|
||||||
@@ -976,6 +1050,8 @@ def call_llm(
|
|||||||
*,
|
*,
|
||||||
provider: str = None,
|
provider: str = None,
|
||||||
model: str = None,
|
model: str = None,
|
||||||
|
base_url: str = None,
|
||||||
|
api_key: str = None,
|
||||||
messages: list,
|
messages: list,
|
||||||
temperature: float = None,
|
temperature: float = None,
|
||||||
max_tokens: int = None,
|
max_tokens: int = None,
|
||||||
@@ -1007,13 +1083,18 @@ def call_llm(
|
|||||||
Raises:
|
Raises:
|
||||||
RuntimeError: If no provider is configured.
|
RuntimeError: If no provider is configured.
|
||||||
"""
|
"""
|
||||||
resolved_provider, resolved_model = _resolve_task_provider_model(
|
resolved_provider, resolved_model, resolved_base_url, resolved_api_key = _resolve_task_provider_model(
|
||||||
task, provider, model)
|
task, provider, model, base_url, api_key)
|
||||||
|
|
||||||
client, final_model = _get_cached_client(resolved_provider, resolved_model)
|
client, final_model = _get_cached_client(
|
||||||
|
resolved_provider,
|
||||||
|
resolved_model,
|
||||||
|
base_url=resolved_base_url,
|
||||||
|
api_key=resolved_api_key,
|
||||||
|
)
|
||||||
if client is None:
|
if client is None:
|
||||||
# Fallback: try openrouter
|
# Fallback: try openrouter
|
||||||
if resolved_provider != "openrouter":
|
if resolved_provider != "openrouter" and not resolved_base_url:
|
||||||
logger.warning("Provider %s unavailable, falling back to openrouter",
|
logger.warning("Provider %s unavailable, falling back to openrouter",
|
||||||
resolved_provider)
|
resolved_provider)
|
||||||
client, final_model = _get_cached_client(
|
client, final_model = _get_cached_client(
|
||||||
@@ -1026,7 +1107,8 @@ def call_llm(
|
|||||||
kwargs = _build_call_kwargs(
|
kwargs = _build_call_kwargs(
|
||||||
resolved_provider, final_model, messages,
|
resolved_provider, final_model, messages,
|
||||||
temperature=temperature, max_tokens=max_tokens,
|
temperature=temperature, max_tokens=max_tokens,
|
||||||
tools=tools, timeout=timeout, extra_body=extra_body)
|
tools=tools, timeout=timeout, extra_body=extra_body,
|
||||||
|
base_url=resolved_base_url)
|
||||||
|
|
||||||
# Handle max_tokens vs max_completion_tokens retry
|
# Handle max_tokens vs max_completion_tokens retry
|
||||||
try:
|
try:
|
||||||
@@ -1045,6 +1127,8 @@ async def async_call_llm(
|
|||||||
*,
|
*,
|
||||||
provider: str = None,
|
provider: str = None,
|
||||||
model: str = None,
|
model: str = None,
|
||||||
|
base_url: str = None,
|
||||||
|
api_key: str = None,
|
||||||
messages: list,
|
messages: list,
|
||||||
temperature: float = None,
|
temperature: float = None,
|
||||||
max_tokens: int = None,
|
max_tokens: int = None,
|
||||||
@@ -1056,13 +1140,18 @@ async def async_call_llm(
|
|||||||
|
|
||||||
Same as call_llm() but async. See call_llm() for full documentation.
|
Same as call_llm() but async. See call_llm() for full documentation.
|
||||||
"""
|
"""
|
||||||
resolved_provider, resolved_model = _resolve_task_provider_model(
|
resolved_provider, resolved_model, resolved_base_url, resolved_api_key = _resolve_task_provider_model(
|
||||||
task, provider, model)
|
task, provider, model, base_url, api_key)
|
||||||
|
|
||||||
client, final_model = _get_cached_client(
|
client, final_model = _get_cached_client(
|
||||||
resolved_provider, resolved_model, async_mode=True)
|
resolved_provider,
|
||||||
|
resolved_model,
|
||||||
|
async_mode=True,
|
||||||
|
base_url=resolved_base_url,
|
||||||
|
api_key=resolved_api_key,
|
||||||
|
)
|
||||||
if client is None:
|
if client is None:
|
||||||
if resolved_provider != "openrouter":
|
if resolved_provider != "openrouter" and not resolved_base_url:
|
||||||
logger.warning("Provider %s unavailable, falling back to openrouter",
|
logger.warning("Provider %s unavailable, falling back to openrouter",
|
||||||
resolved_provider)
|
resolved_provider)
|
||||||
client, final_model = _get_cached_client(
|
client, final_model = _get_cached_client(
|
||||||
@@ -1076,7 +1165,8 @@ async def async_call_llm(
|
|||||||
kwargs = _build_call_kwargs(
|
kwargs = _build_call_kwargs(
|
||||||
resolved_provider, final_model, messages,
|
resolved_provider, final_model, messages,
|
||||||
temperature=temperature, max_tokens=max_tokens,
|
temperature=temperature, max_tokens=max_tokens,
|
||||||
tools=tools, timeout=timeout, extra_body=extra_body)
|
tools=tools, timeout=timeout, extra_body=extra_body,
|
||||||
|
base_url=resolved_base_url)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return await client.chat.completions.create(**kwargs)
|
return await client.chat.completions.create(**kwargs)
|
||||||
|
|||||||
48
cli.py
48
cli.py
@@ -217,11 +217,27 @@ def load_cli_config() -> Dict[str, Any]:
|
|||||||
"timeout": 300, # Max seconds a sandbox script can run before being killed (5 min)
|
"timeout": 300, # Max seconds a sandbox script can run before being killed (5 min)
|
||||||
"max_tool_calls": 50, # Max RPC tool calls per execution
|
"max_tool_calls": 50, # Max RPC tool calls per execution
|
||||||
},
|
},
|
||||||
|
"auxiliary": {
|
||||||
|
"vision": {
|
||||||
|
"provider": "auto",
|
||||||
|
"model": "",
|
||||||
|
"base_url": "",
|
||||||
|
"api_key": "",
|
||||||
|
},
|
||||||
|
"web_extract": {
|
||||||
|
"provider": "auto",
|
||||||
|
"model": "",
|
||||||
|
"base_url": "",
|
||||||
|
"api_key": "",
|
||||||
|
},
|
||||||
|
},
|
||||||
"delegation": {
|
"delegation": {
|
||||||
"max_iterations": 45, # Max tool-calling turns per child agent
|
"max_iterations": 45, # Max tool-calling turns per child agent
|
||||||
"default_toolsets": ["terminal", "file", "web"], # Default toolsets for subagents
|
"default_toolsets": ["terminal", "file", "web"], # Default toolsets for subagents
|
||||||
"model": "", # Subagent model override (empty = inherit parent model)
|
"model": "", # Subagent model override (empty = inherit parent model)
|
||||||
"provider": "", # Subagent provider override (empty = inherit parent provider)
|
"provider": "", # Subagent provider override (empty = inherit parent provider)
|
||||||
|
"base_url": "", # Direct OpenAI-compatible endpoint for subagents
|
||||||
|
"api_key": "", # API key for delegation.base_url (falls back to OPENAI_API_KEY)
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -362,28 +378,44 @@ def load_cli_config() -> Dict[str, Any]:
|
|||||||
if config_key in compression_config:
|
if config_key in compression_config:
|
||||||
os.environ[env_var] = str(compression_config[config_key])
|
os.environ[env_var] = str(compression_config[config_key])
|
||||||
|
|
||||||
# Apply auxiliary model overrides to environment variables.
|
# Apply auxiliary model/direct-endpoint overrides to environment variables.
|
||||||
# Vision and web_extract each have their own provider + model pair.
|
# Vision and web_extract each have their own provider/model/base_url/api_key tuple.
|
||||||
# (Compression is handled in the compression section above.)
|
# (Compression is handled in the compression section above.)
|
||||||
# Only set env vars for non-empty / non-default values so auto-detection
|
# Only set env vars for non-empty / non-default values so auto-detection
|
||||||
# still works.
|
# still works.
|
||||||
auxiliary_config = defaults.get("auxiliary", {})
|
auxiliary_config = defaults.get("auxiliary", {})
|
||||||
auxiliary_task_env = {
|
auxiliary_task_env = {
|
||||||
# config key → (provider env var, model env var)
|
# config key → env var mapping
|
||||||
"vision": ("AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL"),
|
"vision": {
|
||||||
"web_extract": ("AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL"),
|
"provider": "AUXILIARY_VISION_PROVIDER",
|
||||||
|
"model": "AUXILIARY_VISION_MODEL",
|
||||||
|
"base_url": "AUXILIARY_VISION_BASE_URL",
|
||||||
|
"api_key": "AUXILIARY_VISION_API_KEY",
|
||||||
|
},
|
||||||
|
"web_extract": {
|
||||||
|
"provider": "AUXILIARY_WEB_EXTRACT_PROVIDER",
|
||||||
|
"model": "AUXILIARY_WEB_EXTRACT_MODEL",
|
||||||
|
"base_url": "AUXILIARY_WEB_EXTRACT_BASE_URL",
|
||||||
|
"api_key": "AUXILIARY_WEB_EXTRACT_API_KEY",
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
for task_key, (prov_env, model_env) in auxiliary_task_env.items():
|
for task_key, env_map in auxiliary_task_env.items():
|
||||||
task_cfg = auxiliary_config.get(task_key, {})
|
task_cfg = auxiliary_config.get(task_key, {})
|
||||||
if not isinstance(task_cfg, dict):
|
if not isinstance(task_cfg, dict):
|
||||||
continue
|
continue
|
||||||
prov = str(task_cfg.get("provider", "")).strip()
|
prov = str(task_cfg.get("provider", "")).strip()
|
||||||
model = str(task_cfg.get("model", "")).strip()
|
model = str(task_cfg.get("model", "")).strip()
|
||||||
|
base_url = str(task_cfg.get("base_url", "")).strip()
|
||||||
|
api_key = str(task_cfg.get("api_key", "")).strip()
|
||||||
if prov and prov != "auto":
|
if prov and prov != "auto":
|
||||||
os.environ[prov_env] = prov
|
os.environ[env_map["provider"]] = prov
|
||||||
if model:
|
if model:
|
||||||
os.environ[model_env] = model
|
os.environ[env_map["model"]] = model
|
||||||
|
if base_url:
|
||||||
|
os.environ[env_map["base_url"]] = base_url
|
||||||
|
if api_key:
|
||||||
|
os.environ[env_map["api_key"]] = api_key
|
||||||
|
|
||||||
# Security settings
|
# Security settings
|
||||||
security_config = defaults.get("security", {})
|
security_config = defaults.get("security", {})
|
||||||
|
|||||||
@@ -100,24 +100,40 @@ if _config_path.exists():
|
|||||||
for _cfg_key, _env_var in _compression_env_map.items():
|
for _cfg_key, _env_var in _compression_env_map.items():
|
||||||
if _cfg_key in _compression_cfg:
|
if _cfg_key in _compression_cfg:
|
||||||
os.environ[_env_var] = str(_compression_cfg[_cfg_key])
|
os.environ[_env_var] = str(_compression_cfg[_cfg_key])
|
||||||
# Auxiliary model overrides (vision, web_extract).
|
# Auxiliary model/direct-endpoint overrides (vision, web_extract).
|
||||||
# Each task has provider + model; bridge non-default values to env vars.
|
# Each task has provider/model/base_url/api_key; bridge non-default values to env vars.
|
||||||
_auxiliary_cfg = _cfg.get("auxiliary", {})
|
_auxiliary_cfg = _cfg.get("auxiliary", {})
|
||||||
if _auxiliary_cfg and isinstance(_auxiliary_cfg, dict):
|
if _auxiliary_cfg and isinstance(_auxiliary_cfg, dict):
|
||||||
_aux_task_env = {
|
_aux_task_env = {
|
||||||
"vision": ("AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL"),
|
"vision": {
|
||||||
"web_extract": ("AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL"),
|
"provider": "AUXILIARY_VISION_PROVIDER",
|
||||||
|
"model": "AUXILIARY_VISION_MODEL",
|
||||||
|
"base_url": "AUXILIARY_VISION_BASE_URL",
|
||||||
|
"api_key": "AUXILIARY_VISION_API_KEY",
|
||||||
|
},
|
||||||
|
"web_extract": {
|
||||||
|
"provider": "AUXILIARY_WEB_EXTRACT_PROVIDER",
|
||||||
|
"model": "AUXILIARY_WEB_EXTRACT_MODEL",
|
||||||
|
"base_url": "AUXILIARY_WEB_EXTRACT_BASE_URL",
|
||||||
|
"api_key": "AUXILIARY_WEB_EXTRACT_API_KEY",
|
||||||
|
},
|
||||||
}
|
}
|
||||||
for _task_key, (_prov_env, _model_env) in _aux_task_env.items():
|
for _task_key, _env_map in _aux_task_env.items():
|
||||||
_task_cfg = _auxiliary_cfg.get(_task_key, {})
|
_task_cfg = _auxiliary_cfg.get(_task_key, {})
|
||||||
if not isinstance(_task_cfg, dict):
|
if not isinstance(_task_cfg, dict):
|
||||||
continue
|
continue
|
||||||
_prov = str(_task_cfg.get("provider", "")).strip()
|
_prov = str(_task_cfg.get("provider", "")).strip()
|
||||||
_model = str(_task_cfg.get("model", "")).strip()
|
_model = str(_task_cfg.get("model", "")).strip()
|
||||||
|
_base_url = str(_task_cfg.get("base_url", "")).strip()
|
||||||
|
_api_key = str(_task_cfg.get("api_key", "")).strip()
|
||||||
if _prov and _prov != "auto":
|
if _prov and _prov != "auto":
|
||||||
os.environ[_prov_env] = _prov
|
os.environ[_env_map["provider"]] = _prov
|
||||||
if _model:
|
if _model:
|
||||||
os.environ[_model_env] = _model
|
os.environ[_env_map["model"]] = _model
|
||||||
|
if _base_url:
|
||||||
|
os.environ[_env_map["base_url"]] = _base_url
|
||||||
|
if _api_key:
|
||||||
|
os.environ[_env_map["api_key"]] = _api_key
|
||||||
_agent_cfg = _cfg.get("agent", {})
|
_agent_cfg = _cfg.get("agent", {})
|
||||||
if _agent_cfg and isinstance(_agent_cfg, dict):
|
if _agent_cfg and isinstance(_agent_cfg, dict):
|
||||||
if "max_turns" in _agent_cfg:
|
if "max_turns" in _agent_cfg:
|
||||||
|
|||||||
@@ -150,30 +150,44 @@ DEFAULT_CONFIG = {
|
|||||||
"vision": {
|
"vision": {
|
||||||
"provider": "auto", # auto | openrouter | nous | codex | custom
|
"provider": "auto", # auto | openrouter | nous | codex | custom
|
||||||
"model": "", # e.g. "google/gemini-2.5-flash", "gpt-4o"
|
"model": "", # e.g. "google/gemini-2.5-flash", "gpt-4o"
|
||||||
|
"base_url": "", # direct OpenAI-compatible endpoint (takes precedence over provider)
|
||||||
|
"api_key": "", # API key for base_url (falls back to OPENAI_API_KEY)
|
||||||
},
|
},
|
||||||
"web_extract": {
|
"web_extract": {
|
||||||
"provider": "auto",
|
"provider": "auto",
|
||||||
"model": "",
|
"model": "",
|
||||||
|
"base_url": "",
|
||||||
|
"api_key": "",
|
||||||
},
|
},
|
||||||
"compression": {
|
"compression": {
|
||||||
"provider": "auto",
|
"provider": "auto",
|
||||||
"model": "",
|
"model": "",
|
||||||
|
"base_url": "",
|
||||||
|
"api_key": "",
|
||||||
},
|
},
|
||||||
"session_search": {
|
"session_search": {
|
||||||
"provider": "auto",
|
"provider": "auto",
|
||||||
"model": "",
|
"model": "",
|
||||||
|
"base_url": "",
|
||||||
|
"api_key": "",
|
||||||
},
|
},
|
||||||
"skills_hub": {
|
"skills_hub": {
|
||||||
"provider": "auto",
|
"provider": "auto",
|
||||||
"model": "",
|
"model": "",
|
||||||
|
"base_url": "",
|
||||||
|
"api_key": "",
|
||||||
},
|
},
|
||||||
"mcp": {
|
"mcp": {
|
||||||
"provider": "auto",
|
"provider": "auto",
|
||||||
"model": "",
|
"model": "",
|
||||||
|
"base_url": "",
|
||||||
|
"api_key": "",
|
||||||
},
|
},
|
||||||
"flush_memories": {
|
"flush_memories": {
|
||||||
"provider": "auto",
|
"provider": "auto",
|
||||||
"model": "",
|
"model": "",
|
||||||
|
"base_url": "",
|
||||||
|
"api_key": "",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
|
||||||
@@ -243,6 +257,8 @@ DEFAULT_CONFIG = {
|
|||||||
"delegation": {
|
"delegation": {
|
||||||
"model": "", # e.g. "google/gemini-3-flash-preview" (empty = inherit parent model)
|
"model": "", # e.g. "google/gemini-3-flash-preview" (empty = inherit parent model)
|
||||||
"provider": "", # e.g. "openrouter" (empty = inherit parent provider + credentials)
|
"provider": "", # e.g. "openrouter" (empty = inherit parent provider + credentials)
|
||||||
|
"base_url": "", # direct OpenAI-compatible endpoint for subagents
|
||||||
|
"api_key": "", # API key for delegation.base_url (falls back to OPENAI_API_KEY)
|
||||||
},
|
},
|
||||||
|
|
||||||
# Ephemeral prefill messages file — JSON list of {role, content} dicts
|
# Ephemeral prefill messages file — JSON list of {role, content} dicts
|
||||||
|
|||||||
@@ -24,9 +24,11 @@ def _clean_env(monkeypatch):
|
|||||||
for key in (
|
for key in (
|
||||||
"OPENROUTER_API_KEY", "OPENAI_BASE_URL", "OPENAI_API_KEY",
|
"OPENROUTER_API_KEY", "OPENAI_BASE_URL", "OPENAI_API_KEY",
|
||||||
"OPENAI_MODEL", "LLM_MODEL", "NOUS_INFERENCE_BASE_URL",
|
"OPENAI_MODEL", "LLM_MODEL", "NOUS_INFERENCE_BASE_URL",
|
||||||
# Per-task provider/model overrides
|
# Per-task provider/model/direct-endpoint overrides
|
||||||
"AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL",
|
"AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL",
|
||||||
|
"AUXILIARY_VISION_BASE_URL", "AUXILIARY_VISION_API_KEY",
|
||||||
"AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL",
|
"AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL",
|
||||||
|
"AUXILIARY_WEB_EXTRACT_BASE_URL", "AUXILIARY_WEB_EXTRACT_API_KEY",
|
||||||
"CONTEXT_COMPRESSION_PROVIDER", "CONTEXT_COMPRESSION_MODEL",
|
"CONTEXT_COMPRESSION_PROVIDER", "CONTEXT_COMPRESSION_MODEL",
|
||||||
):
|
):
|
||||||
monkeypatch.delenv(key, raising=False)
|
monkeypatch.delenv(key, raising=False)
|
||||||
@@ -142,6 +144,17 @@ class TestGetTextAuxiliaryClient:
|
|||||||
call_kwargs = mock_openai.call_args
|
call_kwargs = mock_openai.call_args
|
||||||
assert call_kwargs.kwargs["base_url"] == "http://localhost:1234/v1"
|
assert call_kwargs.kwargs["base_url"] == "http://localhost:1234/v1"
|
||||||
|
|
||||||
|
def test_task_direct_endpoint_override(self, monkeypatch):
|
||||||
|
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
||||||
|
monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_BASE_URL", "http://localhost:2345/v1")
|
||||||
|
monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_API_KEY", "task-key")
|
||||||
|
monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_MODEL", "task-model")
|
||||||
|
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
||||||
|
client, model = get_text_auxiliary_client("web_extract")
|
||||||
|
assert model == "task-model"
|
||||||
|
assert mock_openai.call_args.kwargs["base_url"] == "http://localhost:2345/v1"
|
||||||
|
assert mock_openai.call_args.kwargs["api_key"] == "task-key"
|
||||||
|
|
||||||
def test_codex_fallback_when_nothing_else(self, codex_auth_dir):
|
def test_codex_fallback_when_nothing_else(self, codex_auth_dir):
|
||||||
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
|
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
|
||||||
patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
||||||
@@ -390,6 +403,24 @@ class TestTaskSpecificOverrides:
|
|||||||
client, model = get_text_auxiliary_client("web_extract")
|
client, model = get_text_auxiliary_client("web_extract")
|
||||||
assert model == "google/gemini-3-flash-preview"
|
assert model == "google/gemini-3-flash-preview"
|
||||||
|
|
||||||
|
def test_task_direct_endpoint_from_config(self, monkeypatch, tmp_path):
|
||||||
|
hermes_home = tmp_path / "hermes"
|
||||||
|
hermes_home.mkdir(parents=True, exist_ok=True)
|
||||||
|
(hermes_home / "config.yaml").write_text(
|
||||||
|
"""auxiliary:
|
||||||
|
web_extract:
|
||||||
|
base_url: http://localhost:3456/v1
|
||||||
|
api_key: config-key
|
||||||
|
model: config-model
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
||||||
|
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
||||||
|
client, model = get_text_auxiliary_client("web_extract")
|
||||||
|
assert model == "config-model"
|
||||||
|
assert mock_openai.call_args.kwargs["base_url"] == "http://localhost:3456/v1"
|
||||||
|
assert mock_openai.call_args.kwargs["api_key"] == "config-key"
|
||||||
|
|
||||||
def test_task_without_override_uses_auto(self, monkeypatch):
|
def test_task_without_override_uses_auto(self, monkeypatch):
|
||||||
"""A task with no provider env var falls through to auto chain."""
|
"""A task with no provider env var falls through to auto chain."""
|
||||||
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
||||||
|
|||||||
@@ -26,6 +26,10 @@ def _isolate_hermes_home(tmp_path, monkeypatch):
|
|||||||
(fake_home / "memories").mkdir()
|
(fake_home / "memories").mkdir()
|
||||||
(fake_home / "skills").mkdir()
|
(fake_home / "skills").mkdir()
|
||||||
monkeypatch.setenv("HERMES_HOME", str(fake_home))
|
monkeypatch.setenv("HERMES_HOME", str(fake_home))
|
||||||
|
# Tests should not inherit the agent's current gateway/messaging surface.
|
||||||
|
# Individual tests that need gateway behavior set these explicitly.
|
||||||
|
monkeypatch.delenv("HERMES_SESSION_PLATFORM", raising=False)
|
||||||
|
monkeypatch.delenv("HERMES_GATEWAY_SESSION", raising=False)
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture()
|
@pytest.fixture()
|
||||||
|
|||||||
@@ -25,7 +25,9 @@ def _run_auxiliary_bridge(config_dict, monkeypatch):
|
|||||||
# Clear env vars
|
# Clear env vars
|
||||||
for key in (
|
for key in (
|
||||||
"AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL",
|
"AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL",
|
||||||
|
"AUXILIARY_VISION_BASE_URL", "AUXILIARY_VISION_API_KEY",
|
||||||
"AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL",
|
"AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL",
|
||||||
|
"AUXILIARY_WEB_EXTRACT_BASE_URL", "AUXILIARY_WEB_EXTRACT_API_KEY",
|
||||||
"CONTEXT_COMPRESSION_PROVIDER", "CONTEXT_COMPRESSION_MODEL",
|
"CONTEXT_COMPRESSION_PROVIDER", "CONTEXT_COMPRESSION_MODEL",
|
||||||
):
|
):
|
||||||
monkeypatch.delenv(key, raising=False)
|
monkeypatch.delenv(key, raising=False)
|
||||||
@@ -47,19 +49,35 @@ def _run_auxiliary_bridge(config_dict, monkeypatch):
|
|||||||
auxiliary_cfg = config_dict.get("auxiliary", {})
|
auxiliary_cfg = config_dict.get("auxiliary", {})
|
||||||
if auxiliary_cfg and isinstance(auxiliary_cfg, dict):
|
if auxiliary_cfg and isinstance(auxiliary_cfg, dict):
|
||||||
aux_task_env = {
|
aux_task_env = {
|
||||||
"vision": ("AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL"),
|
"vision": {
|
||||||
"web_extract": ("AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL"),
|
"provider": "AUXILIARY_VISION_PROVIDER",
|
||||||
|
"model": "AUXILIARY_VISION_MODEL",
|
||||||
|
"base_url": "AUXILIARY_VISION_BASE_URL",
|
||||||
|
"api_key": "AUXILIARY_VISION_API_KEY",
|
||||||
|
},
|
||||||
|
"web_extract": {
|
||||||
|
"provider": "AUXILIARY_WEB_EXTRACT_PROVIDER",
|
||||||
|
"model": "AUXILIARY_WEB_EXTRACT_MODEL",
|
||||||
|
"base_url": "AUXILIARY_WEB_EXTRACT_BASE_URL",
|
||||||
|
"api_key": "AUXILIARY_WEB_EXTRACT_API_KEY",
|
||||||
|
},
|
||||||
}
|
}
|
||||||
for task_key, (prov_env, model_env) in aux_task_env.items():
|
for task_key, env_map in aux_task_env.items():
|
||||||
task_cfg = auxiliary_cfg.get(task_key, {})
|
task_cfg = auxiliary_cfg.get(task_key, {})
|
||||||
if not isinstance(task_cfg, dict):
|
if not isinstance(task_cfg, dict):
|
||||||
continue
|
continue
|
||||||
prov = str(task_cfg.get("provider", "")).strip()
|
prov = str(task_cfg.get("provider", "")).strip()
|
||||||
model = str(task_cfg.get("model", "")).strip()
|
model = str(task_cfg.get("model", "")).strip()
|
||||||
|
base_url = str(task_cfg.get("base_url", "")).strip()
|
||||||
|
api_key = str(task_cfg.get("api_key", "")).strip()
|
||||||
if prov and prov != "auto":
|
if prov and prov != "auto":
|
||||||
os.environ[prov_env] = prov
|
os.environ[env_map["provider"]] = prov
|
||||||
if model:
|
if model:
|
||||||
os.environ[model_env] = model
|
os.environ[env_map["model"]] = model
|
||||||
|
if base_url:
|
||||||
|
os.environ[env_map["base_url"]] = base_url
|
||||||
|
if api_key:
|
||||||
|
os.environ[env_map["api_key"]] = api_key
|
||||||
|
|
||||||
|
|
||||||
# ── Config bridging tests ────────────────────────────────────────────────────
|
# ── Config bridging tests ────────────────────────────────────────────────────
|
||||||
@@ -101,6 +119,21 @@ class TestAuxiliaryConfigBridge:
|
|||||||
assert os.environ.get("AUXILIARY_WEB_EXTRACT_PROVIDER") == "nous"
|
assert os.environ.get("AUXILIARY_WEB_EXTRACT_PROVIDER") == "nous"
|
||||||
assert os.environ.get("AUXILIARY_WEB_EXTRACT_MODEL") == "gemini-2.5-flash"
|
assert os.environ.get("AUXILIARY_WEB_EXTRACT_MODEL") == "gemini-2.5-flash"
|
||||||
|
|
||||||
|
def test_direct_endpoint_bridged(self, monkeypatch):
|
||||||
|
config = {
|
||||||
|
"auxiliary": {
|
||||||
|
"vision": {
|
||||||
|
"base_url": "http://localhost:1234/v1",
|
||||||
|
"api_key": "local-key",
|
||||||
|
"model": "qwen2.5-vl",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_run_auxiliary_bridge(config, monkeypatch)
|
||||||
|
assert os.environ.get("AUXILIARY_VISION_BASE_URL") == "http://localhost:1234/v1"
|
||||||
|
assert os.environ.get("AUXILIARY_VISION_API_KEY") == "local-key"
|
||||||
|
assert os.environ.get("AUXILIARY_VISION_MODEL") == "qwen2.5-vl"
|
||||||
|
|
||||||
def test_compression_provider_bridged(self, monkeypatch):
|
def test_compression_provider_bridged(self, monkeypatch):
|
||||||
config = {
|
config = {
|
||||||
"compression": {
|
"compression": {
|
||||||
@@ -200,8 +233,12 @@ class TestGatewayBridgeCodeParity:
|
|||||||
# Check for key patterns that indicate the bridge is present
|
# Check for key patterns that indicate the bridge is present
|
||||||
assert "AUXILIARY_VISION_PROVIDER" in content
|
assert "AUXILIARY_VISION_PROVIDER" in content
|
||||||
assert "AUXILIARY_VISION_MODEL" in content
|
assert "AUXILIARY_VISION_MODEL" in content
|
||||||
|
assert "AUXILIARY_VISION_BASE_URL" in content
|
||||||
|
assert "AUXILIARY_VISION_API_KEY" in content
|
||||||
assert "AUXILIARY_WEB_EXTRACT_PROVIDER" in content
|
assert "AUXILIARY_WEB_EXTRACT_PROVIDER" in content
|
||||||
assert "AUXILIARY_WEB_EXTRACT_MODEL" in content
|
assert "AUXILIARY_WEB_EXTRACT_MODEL" in content
|
||||||
|
assert "AUXILIARY_WEB_EXTRACT_BASE_URL" in content
|
||||||
|
assert "AUXILIARY_WEB_EXTRACT_API_KEY" in content
|
||||||
|
|
||||||
def test_gateway_has_compression_provider(self):
|
def test_gateway_has_compression_provider(self):
|
||||||
"""Gateway must bridge compression.summary_provider."""
|
"""Gateway must bridge compression.summary_provider."""
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ Run with: python -m pytest tests/test_delegate.py -v
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
import os
|
||||||
import sys
|
import sys
|
||||||
import unittest
|
import unittest
|
||||||
from unittest.mock import MagicMock, patch
|
from unittest.mock import MagicMock, patch
|
||||||
@@ -462,6 +463,32 @@ class TestDelegationCredentialResolution(unittest.TestCase):
|
|||||||
self.assertEqual(creds["api_mode"], "chat_completions")
|
self.assertEqual(creds["api_mode"], "chat_completions")
|
||||||
mock_resolve.assert_called_once_with(requested="openrouter")
|
mock_resolve.assert_called_once_with(requested="openrouter")
|
||||||
|
|
||||||
|
def test_direct_endpoint_uses_configured_base_url_and_api_key(self):
|
||||||
|
parent = _make_mock_parent(depth=0)
|
||||||
|
cfg = {
|
||||||
|
"model": "qwen2.5-coder",
|
||||||
|
"provider": "openrouter",
|
||||||
|
"base_url": "http://localhost:1234/v1",
|
||||||
|
"api_key": "local-key",
|
||||||
|
}
|
||||||
|
creds = _resolve_delegation_credentials(cfg, parent)
|
||||||
|
self.assertEqual(creds["model"], "qwen2.5-coder")
|
||||||
|
self.assertEqual(creds["provider"], "custom")
|
||||||
|
self.assertEqual(creds["base_url"], "http://localhost:1234/v1")
|
||||||
|
self.assertEqual(creds["api_key"], "local-key")
|
||||||
|
self.assertEqual(creds["api_mode"], "chat_completions")
|
||||||
|
|
||||||
|
def test_direct_endpoint_falls_back_to_openai_api_key_env(self):
|
||||||
|
parent = _make_mock_parent(depth=0)
|
||||||
|
cfg = {
|
||||||
|
"model": "qwen2.5-coder",
|
||||||
|
"base_url": "http://localhost:1234/v1",
|
||||||
|
}
|
||||||
|
with patch.dict(os.environ, {"OPENAI_API_KEY": "env-openai-key"}, clear=False):
|
||||||
|
creds = _resolve_delegation_credentials(cfg, parent)
|
||||||
|
self.assertEqual(creds["api_key"], "env-openai-key")
|
||||||
|
self.assertEqual(creds["provider"], "custom")
|
||||||
|
|
||||||
@patch("hermes_cli.runtime_provider.resolve_runtime_provider")
|
@patch("hermes_cli.runtime_provider.resolve_runtime_provider")
|
||||||
def test_nous_provider_resolves_nous_credentials(self, mock_resolve):
|
def test_nous_provider_resolves_nous_credentials(self, mock_resolve):
|
||||||
"""Nous provider resolves Nous Portal base_url and api_key."""
|
"""Nous provider resolves Nous Portal base_url and api_key."""
|
||||||
@@ -589,6 +616,40 @@ class TestDelegationProviderIntegration(unittest.TestCase):
|
|||||||
self.assertNotEqual(kwargs["base_url"], parent.base_url)
|
self.assertNotEqual(kwargs["base_url"], parent.base_url)
|
||||||
self.assertNotEqual(kwargs["api_key"], parent.api_key)
|
self.assertNotEqual(kwargs["api_key"], parent.api_key)
|
||||||
|
|
||||||
|
@patch("tools.delegate_tool._load_config")
|
||||||
|
@patch("tools.delegate_tool._resolve_delegation_credentials")
|
||||||
|
def test_direct_endpoint_credentials_reach_child_agent(self, mock_creds, mock_cfg):
|
||||||
|
mock_cfg.return_value = {
|
||||||
|
"max_iterations": 45,
|
||||||
|
"model": "qwen2.5-coder",
|
||||||
|
"base_url": "http://localhost:1234/v1",
|
||||||
|
"api_key": "local-key",
|
||||||
|
}
|
||||||
|
mock_creds.return_value = {
|
||||||
|
"model": "qwen2.5-coder",
|
||||||
|
"provider": "custom",
|
||||||
|
"base_url": "http://localhost:1234/v1",
|
||||||
|
"api_key": "local-key",
|
||||||
|
"api_mode": "chat_completions",
|
||||||
|
}
|
||||||
|
parent = _make_mock_parent(depth=0)
|
||||||
|
|
||||||
|
with patch("run_agent.AIAgent") as MockAgent:
|
||||||
|
mock_child = MagicMock()
|
||||||
|
mock_child.run_conversation.return_value = {
|
||||||
|
"final_response": "done", "completed": True, "api_calls": 1
|
||||||
|
}
|
||||||
|
MockAgent.return_value = mock_child
|
||||||
|
|
||||||
|
delegate_task(goal="Direct endpoint test", parent_agent=parent)
|
||||||
|
|
||||||
|
_, kwargs = MockAgent.call_args
|
||||||
|
self.assertEqual(kwargs["model"], "qwen2.5-coder")
|
||||||
|
self.assertEqual(kwargs["provider"], "custom")
|
||||||
|
self.assertEqual(kwargs["base_url"], "http://localhost:1234/v1")
|
||||||
|
self.assertEqual(kwargs["api_key"], "local-key")
|
||||||
|
self.assertEqual(kwargs["api_mode"], "chat_completions")
|
||||||
|
|
||||||
@patch("tools.delegate_tool._load_config")
|
@patch("tools.delegate_tool._load_config")
|
||||||
@patch("tools.delegate_tool._resolve_delegation_credentials")
|
@patch("tools.delegate_tool._resolve_delegation_credentials")
|
||||||
def test_empty_config_inherits_parent(self, mock_creds, mock_cfg):
|
def test_empty_config_inherits_parent(self, mock_creds, mock_cfg):
|
||||||
|
|||||||
@@ -540,18 +540,52 @@ def delegate_task(
|
|||||||
def _resolve_delegation_credentials(cfg: dict, parent_agent) -> dict:
|
def _resolve_delegation_credentials(cfg: dict, parent_agent) -> dict:
|
||||||
"""Resolve credentials for subagent delegation.
|
"""Resolve credentials for subagent delegation.
|
||||||
|
|
||||||
If ``delegation.provider`` is configured, resolves the full credential
|
If ``delegation.base_url`` is configured, subagents use that direct
|
||||||
bundle (base_url, api_key, api_mode, provider) via the runtime provider
|
OpenAI-compatible endpoint. Otherwise, if ``delegation.provider`` is
|
||||||
system — the same path used by CLI/gateway startup. This lets subagents
|
configured, the full credential bundle (base_url, api_key, api_mode,
|
||||||
run on a completely different provider:model pair.
|
provider) is resolved via the runtime provider system — the same path used
|
||||||
|
by CLI/gateway startup. This lets subagents run on a completely different
|
||||||
|
provider:model pair.
|
||||||
|
|
||||||
If no provider is configured, returns None values so the child inherits
|
If neither base_url nor provider is configured, returns None values so the
|
||||||
everything from the parent agent.
|
child inherits everything from the parent agent.
|
||||||
|
|
||||||
Raises ValueError with a user-friendly message on credential failure.
|
Raises ValueError with a user-friendly message on credential failure.
|
||||||
"""
|
"""
|
||||||
configured_model = cfg.get("model") or None
|
configured_model = str(cfg.get("model") or "").strip() or None
|
||||||
configured_provider = cfg.get("provider") or None
|
configured_provider = str(cfg.get("provider") or "").strip() or None
|
||||||
|
configured_base_url = str(cfg.get("base_url") or "").strip() or None
|
||||||
|
configured_api_key = str(cfg.get("api_key") or "").strip() or None
|
||||||
|
|
||||||
|
if configured_base_url:
|
||||||
|
api_key = (
|
||||||
|
configured_api_key
|
||||||
|
or os.getenv("OPENAI_API_KEY", "").strip()
|
||||||
|
or os.getenv("OPENROUTER_API_KEY", "").strip()
|
||||||
|
)
|
||||||
|
if not api_key:
|
||||||
|
raise ValueError(
|
||||||
|
"Delegation base_url is configured but no API key was found. "
|
||||||
|
"Set delegation.api_key or OPENAI_API_KEY."
|
||||||
|
)
|
||||||
|
|
||||||
|
base_lower = configured_base_url.lower()
|
||||||
|
provider = "custom"
|
||||||
|
api_mode = "chat_completions"
|
||||||
|
if "chatgpt.com/backend-api/codex" in base_lower:
|
||||||
|
provider = "openai-codex"
|
||||||
|
api_mode = "codex_responses"
|
||||||
|
elif "api.anthropic.com" in base_lower:
|
||||||
|
provider = "anthropic"
|
||||||
|
api_mode = "anthropic_messages"
|
||||||
|
|
||||||
|
return {
|
||||||
|
"model": configured_model,
|
||||||
|
"provider": provider,
|
||||||
|
"base_url": configured_base_url,
|
||||||
|
"api_key": api_key,
|
||||||
|
"api_mode": api_mode,
|
||||||
|
}
|
||||||
|
|
||||||
if not configured_provider:
|
if not configured_provider:
|
||||||
# No provider override — child inherits everything from parent
|
# No provider override — child inherits everything from parent
|
||||||
@@ -570,7 +604,8 @@ def _resolve_delegation_credentials(cfg: dict, parent_agent) -> dict:
|
|||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"Cannot resolve delegation provider '{configured_provider}': {exc}. "
|
f"Cannot resolve delegation provider '{configured_provider}': {exc}. "
|
||||||
f"Check that the provider is configured (API key set, valid provider name). "
|
f"Check that the provider is configured (API key set, valid provider name), "
|
||||||
|
f"or set delegation.base_url/delegation.api_key for a direct endpoint. "
|
||||||
f"Available providers: openrouter, nous, zai, kimi-coding, minimax."
|
f"Available providers: openrouter, nous, zai, kimi-coding, minimax."
|
||||||
) from exc
|
) from exc
|
||||||
|
|
||||||
|
|||||||
@@ -173,6 +173,21 @@ All variables go in `~/.hermes/.env`. You can also set them with `hermes config
|
|||||||
| `CONTEXT_COMPRESSION_THRESHOLD` | Trigger at this % of limit (default: 0.50) |
|
| `CONTEXT_COMPRESSION_THRESHOLD` | Trigger at this % of limit (default: 0.50) |
|
||||||
| `CONTEXT_COMPRESSION_MODEL` | Model for summaries |
|
| `CONTEXT_COMPRESSION_MODEL` | Model for summaries |
|
||||||
|
|
||||||
|
## Auxiliary Task Overrides
|
||||||
|
|
||||||
|
| Variable | Description |
|
||||||
|
|----------|-------------|
|
||||||
|
| `AUXILIARY_VISION_PROVIDER` | Override provider for vision tasks |
|
||||||
|
| `AUXILIARY_VISION_MODEL` | Override model for vision tasks |
|
||||||
|
| `AUXILIARY_VISION_BASE_URL` | Direct OpenAI-compatible endpoint for vision tasks |
|
||||||
|
| `AUXILIARY_VISION_API_KEY` | API key paired with `AUXILIARY_VISION_BASE_URL` |
|
||||||
|
| `AUXILIARY_WEB_EXTRACT_PROVIDER` | Override provider for web extraction/summarization |
|
||||||
|
| `AUXILIARY_WEB_EXTRACT_MODEL` | Override model for web extraction/summarization |
|
||||||
|
| `AUXILIARY_WEB_EXTRACT_BASE_URL` | Direct OpenAI-compatible endpoint for web extraction/summarization |
|
||||||
|
| `AUXILIARY_WEB_EXTRACT_API_KEY` | API key paired with `AUXILIARY_WEB_EXTRACT_BASE_URL` |
|
||||||
|
| `CONTEXT_COMPRESSION_PROVIDER` | Override provider for context compression summaries |
|
||||||
|
| `CONTEXT_COMPRESSION_MODEL` | Override model for context compression summaries |
|
||||||
|
|
||||||
## Provider Routing (config.yaml only)
|
## Provider Routing (config.yaml only)
|
||||||
|
|
||||||
These go in `~/.hermes/config.yaml` under the `provider_routing` section:
|
These go in `~/.hermes/config.yaml` under the `provider_routing` section:
|
||||||
|
|||||||
@@ -563,11 +563,15 @@ auxiliary:
|
|||||||
vision:
|
vision:
|
||||||
provider: "auto" # "auto", "openrouter", "nous", "main"
|
provider: "auto" # "auto", "openrouter", "nous", "main"
|
||||||
model: "" # e.g. "openai/gpt-4o", "google/gemini-2.5-flash"
|
model: "" # e.g. "openai/gpt-4o", "google/gemini-2.5-flash"
|
||||||
|
base_url: "" # direct OpenAI-compatible endpoint (takes precedence over provider)
|
||||||
|
api_key: "" # API key for base_url (falls back to OPENAI_API_KEY)
|
||||||
|
|
||||||
# Web page summarization + browser page text extraction
|
# Web page summarization + browser page text extraction
|
||||||
web_extract:
|
web_extract:
|
||||||
provider: "auto"
|
provider: "auto"
|
||||||
model: "" # e.g. "google/gemini-2.5-flash"
|
model: "" # e.g. "google/gemini-2.5-flash"
|
||||||
|
base_url: ""
|
||||||
|
api_key: ""
|
||||||
```
|
```
|
||||||
|
|
||||||
### Changing the Vision Model
|
### Changing the Vision Model
|
||||||
@@ -598,6 +602,17 @@ AUXILIARY_VISION_MODEL=openai/gpt-4o
|
|||||||
|
|
||||||
### Common Setups
|
### Common Setups
|
||||||
|
|
||||||
|
**Using a direct custom endpoint** (clearer than `provider: "main"` for local/self-hosted APIs):
|
||||||
|
```yaml
|
||||||
|
auxiliary:
|
||||||
|
vision:
|
||||||
|
base_url: "http://localhost:1234/v1"
|
||||||
|
api_key: "local-key"
|
||||||
|
model: "qwen2.5-vl"
|
||||||
|
```
|
||||||
|
|
||||||
|
`base_url` takes precedence over `provider`, so this is the most explicit way to route an auxiliary task to a specific endpoint.
|
||||||
|
|
||||||
**Using OpenAI API key for vision:**
|
**Using OpenAI API key for vision:**
|
||||||
```yaml
|
```yaml
|
||||||
# In ~/.hermes/.env:
|
# In ~/.hermes/.env:
|
||||||
@@ -807,13 +822,17 @@ delegation:
|
|||||||
- web
|
- web
|
||||||
# model: "google/gemini-3-flash-preview" # Override model (empty = inherit parent)
|
# model: "google/gemini-3-flash-preview" # Override model (empty = inherit parent)
|
||||||
# provider: "openrouter" # Override provider (empty = inherit parent)
|
# provider: "openrouter" # Override provider (empty = inherit parent)
|
||||||
|
# base_url: "http://localhost:1234/v1" # Direct OpenAI-compatible endpoint (takes precedence over provider)
|
||||||
|
# api_key: "local-key" # API key for base_url (falls back to OPENAI_API_KEY)
|
||||||
```
|
```
|
||||||
|
|
||||||
**Subagent provider:model override:** By default, subagents inherit the parent agent's provider and model. Set `delegation.provider` and `delegation.model` to route subagents to a different provider:model pair — e.g., use a cheap/fast model for narrowly-scoped subtasks while your primary agent runs an expensive reasoning model.
|
**Subagent provider:model override:** By default, subagents inherit the parent agent's provider and model. Set `delegation.provider` and `delegation.model` to route subagents to a different provider:model pair — e.g., use a cheap/fast model for narrowly-scoped subtasks while your primary agent runs an expensive reasoning model.
|
||||||
|
|
||||||
|
**Direct endpoint override:** If you want the obvious custom-endpoint path, set `delegation.base_url`, `delegation.api_key`, and `delegation.model`. That sends subagents directly to that OpenAI-compatible endpoint and takes precedence over `delegation.provider`.
|
||||||
|
|
||||||
The delegation provider uses the same credential resolution as CLI/gateway startup. All configured providers are supported: `openrouter`, `nous`, `zai`, `kimi-coding`, `minimax`, `minimax-cn`. When a provider is set, the system automatically resolves the correct base URL, API key, and API mode — no manual credential wiring needed.
|
The delegation provider uses the same credential resolution as CLI/gateway startup. All configured providers are supported: `openrouter`, `nous`, `zai`, `kimi-coding`, `minimax`, `minimax-cn`. When a provider is set, the system automatically resolves the correct base URL, API key, and API mode — no manual credential wiring needed.
|
||||||
|
|
||||||
**Precedence:** `delegation.provider` in config → parent provider (inherited). `delegation.model` in config → parent model (inherited). Setting just `model` without `provider` changes only the model name while keeping the parent's credentials (useful for switching models within the same provider like OpenRouter).
|
**Precedence:** `delegation.base_url` in config → `delegation.provider` in config → parent provider (inherited). `delegation.model` in config → parent model (inherited). Setting just `model` without `provider` changes only the model name while keeping the parent's credentials (useful for switching models within the same provider like OpenRouter).
|
||||||
|
|
||||||
## Clarify
|
## Clarify
|
||||||
|
|
||||||
|
|||||||
@@ -209,6 +209,14 @@ Delegation has a **depth limit of 2** — a parent (depth 0) can spawn children
|
|||||||
delegation:
|
delegation:
|
||||||
max_iterations: 50 # Max turns per child (default: 50)
|
max_iterations: 50 # Max turns per child (default: 50)
|
||||||
default_toolsets: ["terminal", "file", "web"] # Default toolsets
|
default_toolsets: ["terminal", "file", "web"] # Default toolsets
|
||||||
|
model: "google/gemini-3-flash-preview" # Optional provider/model override
|
||||||
|
provider: "openrouter" # Optional built-in provider
|
||||||
|
|
||||||
|
# Or use a direct custom endpoint instead of provider:
|
||||||
|
delegation:
|
||||||
|
model: "qwen2.5-coder"
|
||||||
|
base_url: "http://localhost:1234/v1"
|
||||||
|
api_key: "local-key"
|
||||||
```
|
```
|
||||||
|
|
||||||
:::tip
|
:::tip
|
||||||
|
|||||||
Reference in New Issue
Block a user