diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index ff542a11345..cf740bc89e3 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -57,6 +57,7 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = { "minimax": "MiniMax-M2.5-highspeed", "minimax-cn": "MiniMax-M2.5-highspeed", "anthropic": "claude-haiku-4-5-20251001", + "ai-gateway": "google/gemini-3-flash", } # OpenRouter app attribution headers diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py index 1863f0bb8d3..0ece3521d99 100644 --- a/hermes_cli/auth.py +++ b/hermes_cli/auth.py @@ -155,6 +155,14 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = { api_key_env_vars=("DEEPSEEK_API_KEY",), base_url_env_var="DEEPSEEK_BASE_URL", ), + "ai-gateway": ProviderConfig( + id="ai-gateway", + name="AI Gateway", + auth_type="api_key", + inference_base_url="https://ai-gateway.vercel.sh/v1", + api_key_env_vars=("AI_GATEWAY_API_KEY",), + base_url_env_var="AI_GATEWAY_BASE_URL", + ), } diff --git a/hermes_cli/models.py b/hermes_cli/models.py index 13373afa9bb..c01168556ea 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -8,6 +8,7 @@ Add, remove, or reorder entries here — both `hermes setup` and from __future__ import annotations import json +import os import urllib.request import urllib.error from difflib import get_close_matches @@ -82,6 +83,20 @@ _PROVIDER_MODELS: dict[str, list[str]] = { "deepseek-chat", "deepseek-reasoner", ], + "ai-gateway": [ + "anthropic/claude-opus-4.6", + "anthropic/claude-sonnet-4.6", + "anthropic/claude-sonnet-4.5", + "anthropic/claude-haiku-4.5", + "openai/gpt-5", + "openai/gpt-4.1", + "openai/gpt-4.1-mini", + "google/gemini-3-pro-preview", + "google/gemini-3-flash", + "google/gemini-2.5-pro", + "google/gemini-2.5-flash", + "deepseek/deepseek-v3.2", + ], } _PROVIDER_LABELS = { @@ -94,6 +109,7 @@ _PROVIDER_LABELS = { "minimax-cn": "MiniMax (China)", "anthropic": "Anthropic", "deepseek": "DeepSeek", + "ai-gateway": "AI Gateway", "custom": "Custom endpoint", } @@ -109,6 +125,9 @@ _PROVIDER_ALIASES = { "claude": "anthropic", "claude-code": "anthropic", "deep-seek": "deepseek", + "aigateway": "ai-gateway", + "vercel": "ai-gateway", + "vercel-ai-gateway": "ai-gateway", } @@ -143,6 +162,7 @@ def list_available_providers() -> list[dict[str, str]]: _PROVIDER_ORDER = [ "openrouter", "nous", "openai-codex", "zai", "kimi-coding", "minimax", "minimax-cn", "anthropic", "deepseek", + "ai-gateway", ] # Build reverse alias map aliases_for: dict[str, list[str]] = {} @@ -372,6 +392,10 @@ def provider_model_ids(provider: Optional[str]) -> list[str]: live = _fetch_anthropic_models() if live: return live + if normalized == "ai-gateway": + live = _fetch_ai_gateway_models() + if live: + return live return list(_PROVIDER_MODELS.get(normalized, [])) @@ -475,6 +499,33 @@ def probe_api_models( } +def _fetch_ai_gateway_models(timeout: float = 5.0) -> Optional[list[str]]: + """Fetch available language models with tool-use from AI Gateway.""" + api_key = os.getenv("AI_GATEWAY_API_KEY", "").strip() + if not api_key: + return None + base_url = os.getenv("AI_GATEWAY_BASE_URL", "").strip() + if not base_url: + from hermes_constants import AI_GATEWAY_BASE_URL + base_url = AI_GATEWAY_BASE_URL + + url = base_url.rstrip("/") + "/models" + headers: dict[str, str] = {"Authorization": f"Bearer {api_key}"} + req = urllib.request.Request(url, headers=headers) + try: + with urllib.request.urlopen(req, timeout=timeout) as resp: + data = json.loads(resp.read().decode()) + return [ + m["id"] + for m in data.get("data", []) + if m.get("id") + and m.get("type") == "language" + and "tool-use" in (m.get("tags") or []) + ] + except Exception: + return None + + def fetch_api_models( api_key: Optional[str], base_url: Optional[str], diff --git a/hermes_constants.py b/hermes_constants.py index a81af04d3da..6a11fb37af0 100644 --- a/hermes_constants.py +++ b/hermes_constants.py @@ -8,5 +8,9 @@ OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1" OPENROUTER_MODELS_URL = f"{OPENROUTER_BASE_URL}/models" OPENROUTER_CHAT_URL = f"{OPENROUTER_BASE_URL}/chat/completions" +AI_GATEWAY_BASE_URL = "https://ai-gateway.vercel.sh/v1" +AI_GATEWAY_MODELS_URL = f"{AI_GATEWAY_BASE_URL}/models" +AI_GATEWAY_CHAT_URL = f"{AI_GATEWAY_BASE_URL}/chat/completions" + NOUS_API_BASE_URL = "https://inference-api.nousresearch.com/v1" NOUS_API_CHAT_URL = f"{NOUS_API_BASE_URL}/chat/completions" diff --git a/run_agent.py b/run_agent.py index 6ae8170db3b..afee105e8e3 100644 --- a/run_agent.py +++ b/run_agent.py @@ -3523,6 +3523,8 @@ class AIAgent: base_url = (self.base_url or "").lower() if "nousresearch" in base_url: return True + if "ai-gateway.vercel.sh" in base_url: + return True if "openrouter" not in base_url: return False if "api.mistral.ai" in base_url: diff --git a/tests/test_provider_parity.py b/tests/test_provider_parity.py index dc976b8f176..e6d885604ea 100644 --- a/tests/test_provider_parity.py +++ b/tests/test_provider_parity.py @@ -137,6 +137,40 @@ class TestBuildApiKwargsOpenRouter: assert "codex_reasoning_items" in messages[1] +class TestBuildApiKwargsAIGateway: + def test_uses_chat_completions_format(self, monkeypatch): + agent = _make_agent(monkeypatch, "ai-gateway", base_url="https://ai-gateway.vercel.sh/v1") + messages = [{"role": "user", "content": "hi"}] + kwargs = agent._build_api_kwargs(messages) + assert "messages" in kwargs + assert "model" in kwargs + assert kwargs["messages"][-1]["content"] == "hi" + + def test_no_responses_api_fields(self, monkeypatch): + agent = _make_agent(monkeypatch, "ai-gateway", base_url="https://ai-gateway.vercel.sh/v1") + messages = [{"role": "user", "content": "hi"}] + kwargs = agent._build_api_kwargs(messages) + assert "input" not in kwargs + assert "instructions" not in kwargs + assert "store" not in kwargs + + def test_includes_reasoning_in_extra_body(self, monkeypatch): + agent = _make_agent(monkeypatch, "ai-gateway", base_url="https://ai-gateway.vercel.sh/v1") + messages = [{"role": "user", "content": "hi"}] + kwargs = agent._build_api_kwargs(messages) + extra = kwargs.get("extra_body", {}) + assert "reasoning" in extra + assert extra["reasoning"]["enabled"] is True + + def test_includes_tools(self, monkeypatch): + agent = _make_agent(monkeypatch, "ai-gateway", base_url="https://ai-gateway.vercel.sh/v1") + messages = [{"role": "user", "content": "hi"}] + kwargs = agent._build_api_kwargs(messages) + assert "tools" in kwargs + tool_names = [t["function"]["name"] for t in kwargs["tools"]] + assert "web_search" in tool_names + + class TestBuildApiKwargsNousPortal: def test_includes_nous_product_tags(self, monkeypatch): agent = _make_agent(monkeypatch, "nous", base_url="https://inference-api.nousresearch.com/v1") diff --git a/tests/test_runtime_provider_resolution.py b/tests/test_runtime_provider_resolution.py index 52d4a1d4fb1..c02fb3cdc3d 100644 --- a/tests/test_runtime_provider_resolution.py +++ b/tests/test_runtime_provider_resolution.py @@ -26,6 +26,20 @@ def test_resolve_runtime_provider_codex(monkeypatch): assert resolved["requested_provider"] == "openai-codex" +def test_resolve_runtime_provider_ai_gateway(monkeypatch): + monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "ai-gateway") + monkeypatch.setattr(rp, "_get_model_config", lambda: {}) + monkeypatch.setenv("AI_GATEWAY_API_KEY", "test-ai-gw-key") + + resolved = rp.resolve_runtime_provider(requested="ai-gateway") + + assert resolved["provider"] == "ai-gateway" + assert resolved["api_mode"] == "chat_completions" + assert resolved["base_url"] == "https://ai-gateway.vercel.sh/v1" + assert resolved["api_key"] == "test-ai-gw-key" + assert resolved["requested_provider"] == "ai-gateway" + + def test_resolve_runtime_provider_openrouter_explicit(monkeypatch): monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter") monkeypatch.setattr(rp, "_get_model_config", lambda: {}) diff --git a/website/docs/developer-guide/provider-runtime.md b/website/docs/developer-guide/provider-runtime.md index bf3c95090b3..faa84d5f6d5 100644 --- a/website/docs/developer-guide/provider-runtime.md +++ b/website/docs/developer-guide/provider-runtime.md @@ -37,6 +37,7 @@ That ordering matters because Hermes treats the saved model/provider choice as t Current provider families include: +- AI Gateway (Vercel) - OpenRouter - Nous Portal - OpenAI Codex @@ -68,11 +69,21 @@ This resolver is the main reason Hermes can share auth/runtime logic between: - ACP editor sessions - auxiliary model tasks -## OpenRouter vs custom OpenAI-compatible base URLs +## AI Gateway -Hermes contains logic to avoid leaking the wrong API key to a custom endpoint when both `OPENROUTER_API_KEY` and `OPENAI_API_KEY` exist. +Set `AI_GATEWAY_API_KEY` in `~/.hermes/.env` and run with `--provider ai-gateway`. Hermes fetches available models from the gateway's `/models` endpoint, filtering to language models with tool-use support. -It also distinguishes between: +## OpenRouter, AI Gateway, and custom OpenAI-compatible base URLs + +Hermes contains logic to avoid leaking the wrong API key to a custom endpoint when multiple provider keys exist (e.g. `OPENROUTER_API_KEY`, `AI_GATEWAY_API_KEY`, and `OPENAI_API_KEY`). + +Each provider's API key is scoped to its own base URL: + +- `OPENROUTER_API_KEY` is only sent to `openrouter.ai` endpoints +- `AI_GATEWAY_API_KEY` is only sent to `ai-gateway.vercel.sh` endpoints +- `OPENAI_API_KEY` is used for custom endpoints and as a fallback + +Hermes also distinguishes between: - a real custom endpoint selected by the user - the OpenRouter fallback path used when no custom endpoint is configured @@ -80,7 +91,7 @@ It also distinguishes between: That distinction is especially important for: - local model servers -- non-OpenRouter OpenAI-compatible APIs +- non-OpenRouter/non-AI Gateway OpenAI-compatible APIs - switching providers without re-running setup - config-saved custom endpoints that should keep working even when `OPENAI_BASE_URL` is not exported in the current shell diff --git a/website/docs/reference/environment-variables.md b/website/docs/reference/environment-variables.md index daaad87bc7d..d10d66c1fa7 100644 --- a/website/docs/reference/environment-variables.md +++ b/website/docs/reference/environment-variables.md @@ -14,6 +14,8 @@ All variables go in `~/.hermes/.env`. You can also set them with `hermes config |----------|-------------| | `OPENROUTER_API_KEY` | OpenRouter API key (recommended for flexibility) | | `OPENROUTER_BASE_URL` | Override the OpenRouter-compatible base URL | +| `AI_GATEWAY_API_KEY` | Vercel AI Gateway API key ([ai-gateway.vercel.sh](https://ai-gateway.vercel.sh)) | +| `AI_GATEWAY_BASE_URL` | Override AI Gateway base URL (default: `https://ai-gateway.vercel.sh/v1`) | | `OPENAI_API_KEY` | API key for custom OpenAI-compatible endpoints (used with `OPENAI_BASE_URL`) | | `OPENAI_BASE_URL` | Base URL for custom endpoint (VLLM, SGLang, etc.) | | `GLM_API_KEY` | z.ai / ZhipuAI GLM API key ([z.ai](https://z.ai)) | diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md index f55a65181d6..abaabbad49a 100644 --- a/website/docs/user-guide/configuration.md +++ b/website/docs/user-guide/configuration.md @@ -65,6 +65,7 @@ You need at least one way to connect to an LLM. Use `hermes model` to switch pro | **OpenAI Codex** | `hermes model` (ChatGPT OAuth, uses Codex models) | | **Anthropic** | `hermes model` (Claude Pro/Max via Claude Code auth, Anthropic API key, or manual setup-token) | | **OpenRouter** | `OPENROUTER_API_KEY` in `~/.hermes/.env` | +| **AI Gateway** | `AI_GATEWAY_API_KEY` in `~/.hermes/.env` (provider: `ai-gateway`) | | **z.ai / GLM** | `GLM_API_KEY` in `~/.hermes/.env` (provider: `zai`) | | **Kimi / Moonshot** | `KIMI_API_KEY` in `~/.hermes/.env` (provider: `kimi-coding`) | | **MiniMax** | `MINIMAX_API_KEY` in `~/.hermes/.env` (provider: `minimax`) |