diff --git a/.gitignore b/.gitignore index 72f3bd17f7..6ae86265a6 100644 --- a/.gitignore +++ b/.gitignore @@ -69,3 +69,4 @@ mini-swe-agent/ .nix-stamps/ result website/static/api/skills-index.json +models-dev-upstream/ diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 42cd5eceb4..65378df0c3 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -82,6 +82,8 @@ _PROVIDER_ALIASES = { "moonshot": "kimi-coding", "kimi-cn": "kimi-coding-cn", "moonshot-cn": "kimi-coding-cn", + "gmi-cloud": "gmi", + "gmicloud": "gmi", "minimax-china": "minimax-cn", "minimax_cn": "minimax-cn", "claude": "anthropic", @@ -155,6 +157,7 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = { "kimi-coding": "kimi-k2-turbo-preview", "stepfun": "step-3.5-flash", "kimi-coding-cn": "kimi-k2-turbo-preview", + "gmi": "anthropic/claude-opus-4.6", "minimax": "MiniMax-M2.7", "minimax-cn": "MiniMax-M2.7", "anthropic": "claude-haiku-4-5-20251001", @@ -2558,12 +2561,19 @@ def _is_openrouter_client(client: Any) -> bool: return False +def _cached_client_accepts_slash_models(client: Any, cached_default: Optional[str]) -> bool: + """Best-effort check for cached clients that accept ``vendor/model`` IDs.""" + if _is_openrouter_client(client): + return True + return bool(cached_default and "/" in cached_default) + + def _compat_model(client: Any, model: Optional[str], cached_default: Optional[str]) -> Optional[str]: - """Drop OpenRouter-format model slugs (with '/') for non-OpenRouter clients. + """Keep slash-bearing model IDs only for cached clients that support them. Mirrors the guard in resolve_provider_client() which is skipped on cache hits. """ - if model and "/" in model and not _is_openrouter_client(client): + if model and "/" in model and not _cached_client_accepts_slash_models(client, cached_default): return cached_default return model or cached_default diff --git a/agent/model_metadata.py b/agent/model_metadata.py index 62c18218b1..6ea1603565 100644 --- a/agent/model_metadata.py +++ b/agent/model_metadata.py @@ -51,6 +51,7 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({ "qwen-oauth", "xiaomi", "arcee", + "gmi", "custom", "local", # Common aliases "google", "google-gemini", "google-ai-studio", @@ -60,6 +61,7 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({ "stepfun", "opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen", "mimo", "xiaomi-mimo", "arcee-ai", "arceeai", + "gmi-cloud", "gmicloud", "xai", "x-ai", "x.ai", "grok", "nvidia", "nim", "nvidia-nim", "nemotron", "qwen-portal", @@ -307,6 +309,7 @@ _URL_TO_PROVIDER: Dict[str, str] = { "integrate.api.nvidia.com": "nvidia", "api.xiaomimimo.com": "xiaomi", "xiaomimimo.com": "xiaomi", + "api.gmi-serving.com": "gmi", "ollama.com": "ollama-cloud", } @@ -702,6 +705,29 @@ def fetch_endpoint_model_metadata( return {} +def _resolve_endpoint_context_length( + model: str, + base_url: str, + api_key: str = "", +) -> Optional[int]: + """Resolve context length from an endpoint's live ``/models`` metadata.""" + endpoint_metadata = fetch_endpoint_model_metadata(base_url, api_key=api_key) + matched = endpoint_metadata.get(model) + if not matched: + if len(endpoint_metadata) == 1: + matched = next(iter(endpoint_metadata.values())) + else: + for key, entry in endpoint_metadata.items(): + if model in key or key in model: + matched = entry + break + if matched: + context_length = matched.get("context_length") + if isinstance(context_length, int): + return context_length + return None + + def _get_context_cache_path() -> Path: """Return path to the persistent context length cache file.""" from hermes_constants import get_hermes_home @@ -1295,22 +1321,9 @@ def get_model_context_length( # returns 128k) instead of the model's full context (400k). models.dev # has the correct per-provider values and is checked at step 5+. if _is_custom_endpoint(base_url) and not _is_known_provider_base_url(base_url): - endpoint_metadata = fetch_endpoint_model_metadata(base_url, api_key=api_key) - matched = endpoint_metadata.get(model) - if not matched: - # Single-model servers: if only one model is loaded, use it - if len(endpoint_metadata) == 1: - matched = next(iter(endpoint_metadata.values())) - else: - # Fuzzy match: substring in either direction - for key, entry in endpoint_metadata.items(): - if model in key or key in model: - matched = entry - break - if matched: - context_length = matched.get("context_length") - if isinstance(context_length, int): - return context_length + context_length = _resolve_endpoint_context_length(model, base_url, api_key=api_key) + if context_length is not None: + return context_length if not _is_known_provider_base_url(base_url): # 3. Try querying local server directly if is_local_endpoint(base_url): @@ -1374,6 +1387,12 @@ def get_model_context_length( if base_url: save_context_length(model, base_url, codex_ctx) return codex_ctx + if effective_provider == "gmi" and base_url: + # GMI exposes authoritative context_length via /models, but it is not + # in models.dev yet. Preserve that higher-fidelity endpoint lookup. + ctx = _resolve_endpoint_context_length(model, base_url, api_key=api_key) + if ctx is not None: + return ctx if effective_provider: from agent.models_dev import lookup_models_dev_context ctx = lookup_models_dev_context(effective_provider, model) diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py index 610a06dc94..fb6a79d1ff 100644 --- a/hermes_cli/auth.py +++ b/hermes_cli/auth.py @@ -224,6 +224,14 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = { api_key_env_vars=("ARCEEAI_API_KEY",), base_url_env_var="ARCEE_BASE_URL", ), + "gmi": ProviderConfig( + id="gmi", + name="GMI Cloud", + auth_type="api_key", + inference_base_url="https://api.gmi-serving.com/v1", + api_key_env_vars=("GMI_API_KEY",), + base_url_env_var="GMI_BASE_URL", + ), "minimax": ProviderConfig( id="minimax", name="MiniMax", @@ -1120,6 +1128,7 @@ def resolve_provider( "kimi-cn": "kimi-coding-cn", "moonshot-cn": "kimi-coding-cn", "step": "stepfun", "stepfun-coding-plan": "stepfun", "arcee-ai": "arcee", "arceeai": "arcee", + "gmi-cloud": "gmi", "gmicloud": "gmi", "minimax-china": "minimax-cn", "minimax_cn": "minimax-cn", "alibaba_coding": "alibaba-coding-plan", "alibaba-coding": "alibaba-coding-plan", "alibaba_coding_plan": "alibaba-coding-plan", diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 49ee1e4730..3a7eb7d035 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -1082,6 +1082,7 @@ ENV_VARS_BY_VERSION: Dict[int, List[str]] = { "SLACK_BOT_TOKEN", "SLACK_APP_TOKEN", "SLACK_ALLOWED_USERS"], 10: ["TAVILY_API_KEY"], 11: ["TERMINAL_MODAL_MODE"], + 17: ["GMI_API_KEY", "GMI_BASE_URL"], } # Required environment variables with metadata for migration prompts. @@ -1254,6 +1255,22 @@ OPTIONAL_ENV_VARS = { "category": "provider", "advanced": True, }, + "GMI_API_KEY": { + "description": "GMI Cloud API key", + "prompt": "GMI Cloud API key", + "url": "https://www.gmicloud.ai/", + "password": True, + "category": "provider", + "advanced": True, + }, + "GMI_BASE_URL": { + "description": "GMI Cloud base URL override", + "prompt": "GMI Cloud base URL (leave empty for default)", + "url": None, + "password": False, + "category": "provider", + "advanced": True, + }, "MINIMAX_API_KEY": { "description": "MiniMax API key (international)", "prompt": "MiniMax API key", diff --git a/hermes_cli/doctor.py b/hermes_cli/doctor.py index e2eb598ae6..dc346ac9b2 100644 --- a/hermes_cli/doctor.py +++ b/hermes_cli/doctor.py @@ -46,6 +46,7 @@ _PROVIDER_ENV_HINTS = ( "Z_AI_API_KEY", "KIMI_API_KEY", "KIMI_CN_API_KEY", + "GMI_API_KEY", "MINIMAX_API_KEY", "MINIMAX_CN_API_KEY", "KILOCODE_API_KEY", @@ -937,6 +938,7 @@ def run_doctor(args): ("StepFun Step Plan", ("STEPFUN_API_KEY",), "https://api.stepfun.ai/step_plan/v1/models", "STEPFUN_BASE_URL", True), ("Kimi / Moonshot (China)", ("KIMI_CN_API_KEY",), "https://api.moonshot.cn/v1/models", None, True), ("Arcee AI", ("ARCEEAI_API_KEY",), "https://api.arcee.ai/api/v1/models", "ARCEE_BASE_URL", True), + ("GMI Cloud", ("GMI_API_KEY",), "https://api.gmi-serving.com/v1/models", "GMI_BASE_URL", True), ("DeepSeek", ("DEEPSEEK_API_KEY",), "https://api.deepseek.com/v1/models", "DEEPSEEK_BASE_URL", True), ("Hugging Face", ("HF_TOKEN",), "https://router.huggingface.co/v1/models", "HF_BASE_URL", True), ("NVIDIA NIM", ("NVIDIA_API_KEY",), "https://integrate.api.nvidia.com/v1/models", "NVIDIA_BASE_URL", True), diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 497e226c93..375561ad6d 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -1768,6 +1768,7 @@ def select_provider_and_model(args=None): "huggingface", "xiaomi", "arcee", + "gmi", "nvidia", "ollama-cloud", ): @@ -7782,6 +7783,7 @@ For more help on a command: "kilocode", "xiaomi", "arcee", + "gmi", "nvidia", ], default=None, diff --git a/hermes_cli/models.py b/hermes_cli/models.py index 7c15f7c3d4..28ca6d7dea 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -278,6 +278,14 @@ _PROVIDER_MODELS: dict[str, list[str]] = { "trinity-large-preview", "trinity-mini", ], + "gmi": [ + "zai-org/GLM-5.1-FP8", + "deepseek-ai/DeepSeek-V3.2", + "moonshotai/Kimi-K2.5", + "google/gemini-3.1-flash-lite-preview", + "anthropic/claude-sonnet-4.6", + "openai/gpt-5.4", + ], "opencode-zen": [ "kimi-k2.5", "gpt-5.4-pro", @@ -709,7 +717,6 @@ class ProviderEntry(NamedTuple): label: str tui_desc: str # detailed description for `hermes model` TUI - CANONICAL_PROVIDERS: list[ProviderEntry] = [ ProviderEntry("nous", "Nous Portal", "Nous Portal (Nous Research subscription)"), ProviderEntry("openrouter", "OpenRouter", "OpenRouter (100+ models, pay-per-use)"), @@ -735,6 +742,7 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [ ProviderEntry("alibaba", "Alibaba Cloud (DashScope)","Alibaba Cloud / DashScope Coding (Qwen + multi-provider)"), ProviderEntry("ollama-cloud", "Ollama Cloud", "Ollama Cloud (cloud-hosted open models — ollama.com)"), ProviderEntry("arcee", "Arcee AI", "Arcee AI (Trinity models — direct API)"), + ProviderEntry("gmi", "GMI Cloud", "GMI Cloud (multi-model direct API)"), ProviderEntry("kilocode", "Kilo Code", "Kilo Code (Kilo Gateway API)"), ProviderEntry("opencode-zen", "OpenCode Zen", "OpenCode Zen (35+ curated models, pay-as-you-go)"), ProviderEntry("opencode-go", "OpenCode Go", "OpenCode Go (open models, $10/month subscription)"), @@ -769,6 +777,8 @@ _PROVIDER_ALIASES = { "stepfun-coding-plan": "stepfun", "arcee-ai": "arcee", "arceeai": "arcee", + "gmi-cloud": "gmi", + "gmicloud": "gmi", "minimax-china": "minimax-cn", "minimax_cn": "minimax-cn", "claude": "anthropic", @@ -1849,6 +1859,19 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False) return live except Exception: pass + if normalized == "gmi": + try: + from hermes_cli.auth import resolve_api_key_provider_credentials + + creds = resolve_api_key_provider_credentials("gmi") + api_key = str(creds.get("api_key") or "").strip() + base_url = str(creds.get("base_url") or "").strip() + if api_key and base_url: + live = fetch_api_models(api_key, base_url) + if live: + return live + except Exception: + pass if normalized == "custom": base_url = _get_custom_base_url() if base_url: diff --git a/hermes_cli/providers.py b/hermes_cli/providers.py index 2f759c7905..c526682809 100644 --- a/hermes_cli/providers.py +++ b/hermes_cli/providers.py @@ -163,6 +163,12 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = { base_url_override="https://api.arcee.ai/api/v1", base_url_env_var="ARCEE_BASE_URL", ), + "gmi": HermesOverlay( + transport="openai_chat", + extra_env_vars=("GMI_API_KEY",), + base_url_override="https://api.gmi-serving.com/v1", + base_url_env_var="GMI_BASE_URL", + ), "ollama-cloud": HermesOverlay( transport="openai_chat", base_url_env_var="OLLAMA_BASE_URL", @@ -297,6 +303,10 @@ ALIASES: Dict[str, str] = { "arcee-ai": "arcee", "arceeai": "arcee", + # gmi + "gmi-cloud": "gmi", + "gmicloud": "gmi", + # Local server aliases → virtual "local" concept (resolved via user config) "lmstudio": "lmstudio", "lm-studio": "lmstudio", @@ -319,6 +329,7 @@ _LABEL_OVERRIDES: Dict[str, str] = { "copilot-acp": "GitHub Copilot ACP", "stepfun": "StepFun Step Plan", "xiaomi": "Xiaomi MiMo", + "gmi": "GMI Cloud", "local": "Local endpoint", "bedrock": "AWS Bedrock", "ollama-cloud": "Ollama Cloud", diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py index 5ee0f1265c..f503dec9fd 100644 --- a/tests/agent/test_auxiliary_client.py +++ b/tests/agent/test_auxiliary_client.py @@ -516,19 +516,82 @@ class TestGetTextAuxiliaryClient: assert isinstance(client, CodexAuxiliaryClient) assert model == "gpt-5.2-codex" + def test_returns_none_when_nothing_available(self, monkeypatch): + monkeypatch.delenv("OPENAI_BASE_URL", raising=False) + monkeypatch.delenv("OPENAI_API_KEY", raising=False) + monkeypatch.delenv("OPENROUTER_API_KEY", raising=False) + with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \ + patch("agent.auxiliary_client._read_codex_access_token", return_value=None), \ + patch("agent.auxiliary_client._resolve_api_key_provider", return_value=(None, None)): + client, model = get_text_auxiliary_client() + assert client is None + assert model is None -class TestNousAuxiliaryRefresh: - def test_try_nous_prefers_runtime_credentials(self): - fresh_base = "https://inference-api.nousresearch.com/v1" + def test_custom_endpoint_uses_codex_wrapper_when_runtime_requests_responses_api(self): + with patch("agent.auxiliary_client._resolve_custom_runtime", + return_value=("https://api.openai.com/v1", "sk-test", "codex_responses")), \ + patch("agent.auxiliary_client._read_main_model", return_value="gpt-5.3-codex"), \ + patch("agent.auxiliary_client.OpenAI") as mock_openai: + client, model = get_text_auxiliary_client() + + from agent.auxiliary_client import CodexAuxiliaryClient + assert isinstance(client, CodexAuxiliaryClient) + assert model == "gpt-5.3-codex" + assert mock_openai.call_args.kwargs["base_url"] == "https://api.openai.com/v1" + assert mock_openai.call_args.kwargs["api_key"] == "sk-test" + + +class TestVisionClientFallback: + """Vision client auto mode resolves known-good multimodal backends.""" + + def test_vision_auto_includes_active_provider_when_configured(self, monkeypatch): + """Active provider appears in available backends when credentials exist.""" + monkeypatch.setenv("ANTHROPIC_API_KEY", "***") with ( - patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "stale-token"}), - patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)), - patch("hermes_cli.models.get_nous_recommended_aux_model", return_value=None), + patch("agent.auxiliary_client._read_nous_auth", return_value=None), + patch("agent.auxiliary_client._read_main_provider", return_value="anthropic"), + patch("agent.auxiliary_client._read_main_model", return_value="claude-sonnet-4"), + patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()), + patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="***"), + ): + backends = get_available_vision_backends() + + assert "anthropic" in backends + + def test_resolve_provider_client_returns_native_anthropic_wrapper(self, monkeypatch): + monkeypatch.setenv("ANTHROPIC_API_KEY", "***") + with ( + patch("agent.auxiliary_client._read_nous_auth", return_value=None), + patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()), + patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="***"), + ): + client, model = resolve_provider_client("anthropic") + + assert client is not None + assert client.__class__.__name__ == "AnthropicAuxiliaryClient" + assert model == "claude-haiku-4-5-20251001" + + +class TestAuxiliaryPoolAwareness: + def test_try_nous_uses_pool_entry(self): + class _Entry: + access_token = "pooled-access-token" + agent_key = "pooled-agent-key" + inference_base_url = "https://inference.pool.example/v1" + + class _Pool: + def has_credentials(self): + return True + + def select(self): + return _Entry() + + with ( + patch("agent.auxiliary_client.load_pool", return_value=_Pool()), patch("agent.auxiliary_client.OpenAI") as mock_openai, ): from agent.auxiliary_client import _try_nous - mock_openai.return_value = MagicMock() client, model = _try_nous() assert client is not None @@ -643,6 +706,67 @@ class TestNousAuxiliaryRefresh: assert stale_client.chat.completions.create.await_count == 1 assert fresh_async_client.chat.completions.create.await_count == 1 + def test_try_nous_pool_entry(self): + class _Entry: + access_token = "pooled-access-token" + agent_key = "pooled-agent-key" + inference_base_url = "https://inference.pool.example/v1" + + class _Pool: + def has_credentials(self): + return True + + def select(self): + return _Entry() + + with ( + patch("agent.auxiliary_client.load_pool", return_value=_Pool()), + patch("agent.auxiliary_client.OpenAI") as mock_openai, + ): + from agent.auxiliary_client import _try_nous + + client, model = _try_nous() + + assert client is not None + assert model == "gemini-3-flash" + call_kwargs = mock_openai.call_args.kwargs + assert call_kwargs["api_key"] == "pooled-agent-key" + assert call_kwargs["base_url"] == "https://inference.pool.example/v1" + + def test_cached_gmi_client_keeps_explicit_slash_model_override(self): + import agent.auxiliary_client as aux + + fake_client = MagicMock() + + with patch( + "agent.auxiliary_client.resolve_provider_client", + return_value=(fake_client, "anthropic/claude-opus-4.6"), + ) as mock_resolve: + aux.shutdown_cached_clients() + try: + client, model = aux._get_cached_client( + "gmi", + "anthropic/claude-opus-4.6", + base_url="https://api.gmi-serving.com/v1", + api_key="gmi-key", + ) + assert client is fake_client + assert model == "anthropic/claude-opus-4.6" + + client, model = aux._get_cached_client( + "gmi", + "openai/gpt-5.4-mini", + base_url="https://api.gmi-serving.com/v1", + api_key="gmi-key", + ) + finally: + aux.shutdown_cached_clients() + + assert client is fake_client + assert model == "openai/gpt-5.4-mini" + assert mock_resolve.call_count == 1 + + # ── Payment / credit exhaustion fallback ───────────────────────────────── diff --git a/tests/conftest.py b/tests/conftest.py index 844138f66e..d2545e0594 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -288,6 +288,10 @@ def _hermetic_environment(tmp_path, monkeypatch): monkeypatch.setattr(_plugins_mod, "_plugin_manager", None) except Exception: pass + # Explicitly clear provider-specific base URL overrides that don't match + # the generic credential-shaped env-var filter above. + monkeypatch.delenv("GMI_API_KEY", raising=False) + monkeypatch.delenv("GMI_BASE_URL", raising=False) # Backward-compat alias — old tests reference this fixture name. Keep it diff --git a/tests/hermes_cli/test_api_key_providers.py b/tests/hermes_cli/test_api_key_providers.py index e8f181fa4a..77afc61705 100644 --- a/tests/hermes_cli/test_api_key_providers.py +++ b/tests/hermes_cli/test_api_key_providers.py @@ -42,6 +42,7 @@ class TestProviderRegistry: ("minimax-cn", "MiniMax (China)", "api_key"), ("ai-gateway", "Vercel AI Gateway", "api_key"), ("kilocode", "Kilo Code", "api_key"), + ("gmi", "GMI Cloud", "api_key"), ]) def test_provider_registered(self, provider_id, name, auth_type): assert provider_id in PROVIDER_REGISTRY @@ -106,6 +107,11 @@ class TestProviderRegistry: assert pconfig.api_key_env_vars == ("KILOCODE_API_KEY",) assert pconfig.base_url_env_var == "KILOCODE_BASE_URL" + def test_gmi_env_vars(self): + pconfig = PROVIDER_REGISTRY["gmi"] + assert pconfig.api_key_env_vars == ("GMI_API_KEY",) + assert pconfig.base_url_env_var == "GMI_BASE_URL" + def test_huggingface_env_vars(self): pconfig = PROVIDER_REGISTRY["huggingface"] assert pconfig.api_key_env_vars == ("HF_TOKEN",) @@ -121,6 +127,7 @@ class TestProviderRegistry: assert PROVIDER_REGISTRY["minimax-cn"].inference_base_url == "https://api.minimaxi.com/anthropic" assert PROVIDER_REGISTRY["ai-gateway"].inference_base_url == "https://ai-gateway.vercel.sh/v1" assert PROVIDER_REGISTRY["kilocode"].inference_base_url == "https://api.kilo.ai/api/gateway" + assert PROVIDER_REGISTRY["gmi"].inference_base_url == "https://api.gmi-serving.com/v1" assert PROVIDER_REGISTRY["huggingface"].inference_base_url == "https://router.huggingface.co/v1" def test_oauth_providers_unchanged(self): @@ -143,6 +150,7 @@ PROVIDER_ENV_VARS = ( "MINIMAX_API_KEY", "MINIMAX_CN_API_KEY", "AI_GATEWAY_API_KEY", "AI_GATEWAY_BASE_URL", "KILOCODE_API_KEY", "KILOCODE_BASE_URL", + "GMI_API_KEY", "GMI_BASE_URL", "DASHSCOPE_API_KEY", "OPENCODE_ZEN_API_KEY", "OPENCODE_GO_API_KEY", "NOUS_API_KEY", "GITHUB_TOKEN", "GH_TOKEN", "OPENAI_BASE_URL", "HERMES_COPILOT_ACP_COMMAND", "COPILOT_CLI_PATH", @@ -178,6 +186,9 @@ class TestResolveProvider: def test_explicit_ai_gateway(self): assert resolve_provider("ai-gateway") == "ai-gateway" + def test_explicit_gmi(self): + assert resolve_provider("gmi") == "gmi" + def test_alias_glm(self): assert resolve_provider("glm") == "zai" @@ -205,6 +216,9 @@ class TestResolveProvider: def test_alias_vercel(self): assert resolve_provider("vercel") == "ai-gateway" + def test_alias_gmi_cloud(self): + assert resolve_provider("gmi-cloud") == "gmi" + def test_explicit_kilocode(self): assert resolve_provider("kilocode") == "kilocode" @@ -280,6 +294,10 @@ class TestResolveProvider: monkeypatch.setenv("AI_GATEWAY_API_KEY", "test-gw-key") assert resolve_provider("auto") == "ai-gateway" + def test_auto_detects_gmi_key(self, monkeypatch): + monkeypatch.setenv("GMI_API_KEY", "test-gmi-key") + assert resolve_provider("auto") == "gmi" + def test_auto_detects_kilocode_key(self, monkeypatch): monkeypatch.setenv("KILOCODE_API_KEY", "test-kilo-key") assert resolve_provider("auto") == "kilocode" @@ -497,6 +515,19 @@ class TestResolveApiKeyProviderCredentials: assert creds["api_key"] == "kilo-secret-key" assert creds["base_url"] == "https://api.kilo.ai/api/gateway" + def test_resolve_gmi_with_key(self, monkeypatch): + monkeypatch.setenv("GMI_API_KEY", "gmi-secret-key") + creds = resolve_api_key_provider_credentials("gmi") + assert creds["provider"] == "gmi" + assert creds["api_key"] == "gmi-secret-key" + assert creds["base_url"] == "https://api.gmi-serving.com/v1" + + def test_resolve_gmi_custom_base_url(self, monkeypatch): + monkeypatch.setenv("GMI_API_KEY", "gmi-key") + monkeypatch.setenv("GMI_BASE_URL", "https://custom.gmi.example/v1") + creds = resolve_api_key_provider_credentials("gmi") + assert creds["base_url"] == "https://custom.gmi.example/v1" + def test_resolve_kilocode_custom_base_url(self, monkeypatch): monkeypatch.setenv("KILOCODE_API_KEY", "kilo-key") monkeypatch.setenv("KILOCODE_BASE_URL", "https://custom.kilo.example/v1") @@ -594,6 +625,15 @@ class TestRuntimeProviderResolution: assert result["api_key"] == "kilo-key" assert "kilo.ai" in result["base_url"] + def test_runtime_gmi(self, monkeypatch): + monkeypatch.setenv("GMI_API_KEY", "gmi-key") + from hermes_cli.runtime_provider import resolve_runtime_provider + result = resolve_runtime_provider(requested="gmi") + assert result["provider"] == "gmi" + assert result["api_mode"] == "chat_completions" + assert result["api_key"] == "gmi-key" + assert result["base_url"] == "https://api.gmi-serving.com/v1" + def test_runtime_auto_detects_api_key_provider(self, monkeypatch): monkeypatch.setenv("KIMI_API_KEY", "auto-kimi-key") from hermes_cli.runtime_provider import resolve_runtime_provider diff --git a/tests/hermes_cli/test_gmi_provider.py b/tests/hermes_cli/test_gmi_provider.py new file mode 100644 index 0000000000..ffaf972e7e --- /dev/null +++ b/tests/hermes_cli/test_gmi_provider.py @@ -0,0 +1,363 @@ +"""Focused tests for GMI Cloud first-class provider wiring.""" + +from __future__ import annotations + +import contextlib +import io +import sys +import types +from argparse import Namespace +from unittest.mock import patch + +import pytest + +if "dotenv" not in sys.modules: + fake_dotenv = types.ModuleType("dotenv") + fake_dotenv.load_dotenv = lambda *args, **kwargs: None + sys.modules["dotenv"] = fake_dotenv + +from hermes_cli.auth import resolve_provider +from hermes_cli.config import load_config +from hermes_cli.models import ( + CANONICAL_PROVIDERS, + _PROVIDER_LABELS, + _PROVIDER_MODELS, + normalize_provider, + provider_model_ids, +) +from agent.auxiliary_client import resolve_provider_client +from agent.model_metadata import get_model_context_length + + +@pytest.fixture(autouse=True) +def _clear_provider_env(monkeypatch): + for key in ( + "OPENROUTER_API_KEY", + "OPENAI_API_KEY", + "ANTHROPIC_API_KEY", + "GOOGLE_API_KEY", + "GLM_API_KEY", + "KIMI_API_KEY", + "MINIMAX_API_KEY", + "GMI_API_KEY", + "GMI_BASE_URL", + ): + monkeypatch.delenv(key, raising=False) + + +class TestGmiAliases: + @pytest.mark.parametrize("alias", ["gmi", "gmi-cloud", "gmicloud"]) + def test_alias_resolves(self, alias, monkeypatch): + monkeypatch.setenv("GMI_API_KEY", "gmi-test-key") + assert resolve_provider(alias) == "gmi" + + def test_models_normalize_provider(self): + assert normalize_provider("gmi-cloud") == "gmi" + assert normalize_provider("gmicloud") == "gmi" + + def test_providers_normalize_provider(self): + from hermes_cli.providers import normalize_provider as normalize_provider_in_providers + + assert normalize_provider_in_providers("gmi-cloud") == "gmi" + assert normalize_provider_in_providers("gmicloud") == "gmi" + + +class TestGmiConfigRegistry: + def test_optional_env_vars_include_gmi(self): + from hermes_cli.config import ENV_VARS_BY_VERSION, OPTIONAL_ENV_VARS + + assert "GMI_API_KEY" in OPTIONAL_ENV_VARS + assert OPTIONAL_ENV_VARS["GMI_API_KEY"]["category"] == "provider" + assert OPTIONAL_ENV_VARS["GMI_API_KEY"]["password"] is True + assert OPTIONAL_ENV_VARS["GMI_API_KEY"]["url"] == "https://www.gmicloud.ai/" + + assert "GMI_BASE_URL" in OPTIONAL_ENV_VARS + assert OPTIONAL_ENV_VARS["GMI_BASE_URL"]["category"] == "provider" + assert OPTIONAL_ENV_VARS["GMI_BASE_URL"]["password"] is False + + assert "GMI_API_KEY" in ENV_VARS_BY_VERSION[17] + assert "GMI_BASE_URL" in ENV_VARS_BY_VERSION[17] + + +class TestGmiModelCatalog: + def test_static_model_fallback_exists(self): + assert "gmi" in _PROVIDER_MODELS + models = _PROVIDER_MODELS["gmi"] + assert "zai-org/GLM-5.1-FP8" in models + assert "deepseek-ai/DeepSeek-V3.2" in models + assert "moonshotai/Kimi-K2.5" in models + assert "anthropic/claude-sonnet-4.6" in models + + def test_canonical_provider_entry(self): + slugs = [p.slug for p in CANONICAL_PROVIDERS] + assert "gmi" in slugs + + def test_provider_model_ids_prefers_live_api(self, monkeypatch): + monkeypatch.setattr( + "hermes_cli.auth.resolve_api_key_provider_credentials", + lambda provider_id: { + "provider": provider_id, + "api_key": "gmi-live-key", + "base_url": "https://api.gmi-serving.com/v1", + "source": "GMI_API_KEY", + }, + ) + monkeypatch.setattr( + "hermes_cli.models.fetch_api_models", + lambda api_key, base_url: [ + "openai/gpt-5.4-mini", + "zai-org/GLM-5.1-FP8", + ], + ) + + assert provider_model_ids("gmi") == [ + "openai/gpt-5.4-mini", + "zai-org/GLM-5.1-FP8", + ] + + def test_provider_model_ids_falls_back_to_static_models(self, monkeypatch): + monkeypatch.setattr( + "hermes_cli.auth.resolve_api_key_provider_credentials", + lambda provider_id: { + "provider": provider_id, + "api_key": "gmi-live-key", + "base_url": "https://api.gmi-serving.com/v1", + "source": "GMI_API_KEY", + }, + ) + monkeypatch.setattr("hermes_cli.models.fetch_api_models", lambda api_key, base_url: None) + + assert provider_model_ids("gmi") == list(_PROVIDER_MODELS["gmi"]) + + +class TestGmiProvidersModule: + def test_overlay_exists(self): + from hermes_cli.providers import HERMES_OVERLAYS + + assert "gmi" in HERMES_OVERLAYS + overlay = HERMES_OVERLAYS["gmi"] + assert overlay.transport == "openai_chat" + assert overlay.extra_env_vars == ("GMI_API_KEY",) + assert overlay.base_url_override == "https://api.gmi-serving.com/v1" + assert overlay.base_url_env_var == "GMI_BASE_URL" + assert not overlay.is_aggregator + + def test_provider_label(self): + assert _PROVIDER_LABELS["gmi"] == "GMI Cloud" + + +class TestGmiDoctor: + def test_provider_env_hints_include_gmi(self): + from hermes_cli.doctor import _PROVIDER_ENV_HINTS + + assert "GMI_API_KEY" in _PROVIDER_ENV_HINTS + + def test_run_doctor_checks_gmi_models_endpoint(self, monkeypatch, tmp_path): + from hermes_cli import doctor as doctor_mod + + home = tmp_path / ".hermes" + home.mkdir(parents=True, exist_ok=True) + (home / "config.yaml").write_text("memory: {}\n", encoding="utf-8") + (home / ".env").write_text("GMI_API_KEY=gmi-test-key\n", encoding="utf-8") + project = tmp_path / "project" + project.mkdir(exist_ok=True) + + monkeypatch.setattr(doctor_mod, "HERMES_HOME", home) + monkeypatch.setattr(doctor_mod, "PROJECT_ROOT", project) + monkeypatch.setattr(doctor_mod, "_DHH", str(home)) + monkeypatch.setenv("GMI_API_KEY", "gmi-test-key") + + for env_name in ( + "OPENROUTER_API_KEY", + "OPENAI_API_KEY", + "ANTHROPIC_API_KEY", + "ANTHROPIC_TOKEN", + "GLM_API_KEY", + "ZAI_API_KEY", + "Z_AI_API_KEY", + "KIMI_API_KEY", + "KIMI_CN_API_KEY", + "ARCEEAI_API_KEY", + "DEEPSEEK_API_KEY", + "HF_TOKEN", + "DASHSCOPE_API_KEY", + "MINIMAX_API_KEY", + "MINIMAX_CN_API_KEY", + "AI_GATEWAY_API_KEY", + "KILOCODE_API_KEY", + "OPENCODE_ZEN_API_KEY", + "OPENCODE_GO_API_KEY", + "XIAOMI_API_KEY", + ): + monkeypatch.delenv(env_name, raising=False) + + fake_model_tools = types.SimpleNamespace( + check_tool_availability=lambda *a, **kw: ([], []), + TOOLSET_REQUIREMENTS={}, + ) + monkeypatch.setitem(sys.modules, "model_tools", fake_model_tools) + + try: + from hermes_cli import auth as _auth_mod + + monkeypatch.setattr(_auth_mod, "get_nous_auth_status", lambda: {}) + monkeypatch.setattr(_auth_mod, "get_codex_auth_status", lambda: {}) + except Exception: + pass + + calls = [] + + def fake_get(url, headers=None, timeout=None): + calls.append((url, headers, timeout)) + return types.SimpleNamespace(status_code=200) + + import httpx + + monkeypatch.setattr(httpx, "get", fake_get) + + buf = io.StringIO() + with contextlib.redirect_stdout(buf): + doctor_mod.run_doctor(Namespace(fix=False)) + out = buf.getvalue() + + assert "API key or custom endpoint configured" in out + assert "GMI Cloud" in out + assert any(url == "https://api.gmi-serving.com/v1/models" for url, _, _ in calls) + + +class TestGmiModelMetadata: + def test_url_to_provider(self): + from agent.model_metadata import _URL_TO_PROVIDER + + assert _URL_TO_PROVIDER.get("api.gmi-serving.com") == "gmi" + + def test_provider_prefixes(self): + from agent.model_metadata import _PROVIDER_PREFIXES + + assert "gmi" in _PROVIDER_PREFIXES + assert "gmi-cloud" in _PROVIDER_PREFIXES + assert "gmicloud" in _PROVIDER_PREFIXES + + def test_infer_from_url(self): + from agent.model_metadata import _infer_provider_from_url + + assert _infer_provider_from_url("https://api.gmi-serving.com/v1") == "gmi" + + def test_known_gmi_endpoint_still_uses_endpoint_metadata(self): + with patch( + "agent.model_metadata.get_cached_context_length", + return_value=None, + ), patch( + "agent.model_metadata.fetch_endpoint_model_metadata", + return_value={"anthropic/claude-opus-4.6": {"context_length": 409600}}, + ), patch( + "agent.models_dev.lookup_models_dev_context", + return_value=None, + ), patch( + "agent.model_metadata.fetch_model_metadata", + return_value={}, + ): + result = get_model_context_length( + "anthropic/claude-opus-4.6", + base_url="https://api.gmi-serving.com/v1", + api_key="gmi-test-key", + provider="custom", + ) + + assert result == 409600 + + +class TestGmiAuxiliary: + def test_aux_default_model(self): + from agent.auxiliary_client import _API_KEY_PROVIDER_AUX_MODELS + + assert _API_KEY_PROVIDER_AUX_MODELS["gmi"] == "anthropic/claude-opus-4.6" + + def test_resolve_provider_client_uses_gmi_aux_default(self, monkeypatch): + monkeypatch.setenv("GMI_API_KEY", "gmi-test-key") + + with patch("agent.auxiliary_client.OpenAI") as mock_openai: + mock_openai.return_value = object() + client, model = resolve_provider_client("gmi") + + assert client is not None + assert model == "anthropic/claude-opus-4.6" + assert mock_openai.call_args.kwargs["api_key"] == "gmi-test-key" + assert mock_openai.call_args.kwargs["base_url"] == "https://api.gmi-serving.com/v1" + + def test_resolve_provider_client_accepts_gmi_alias(self, monkeypatch): + monkeypatch.setenv("GMI_API_KEY", "gmi-test-key") + + with patch("agent.auxiliary_client.OpenAI") as mock_openai: + mock_openai.return_value = object() + client, model = resolve_provider_client("gmi-cloud") + + assert client is not None + assert model == "anthropic/claude-opus-4.6" + + +class TestGmiMainFlow: + def test_chat_parser_accepts_gmi_provider(self, monkeypatch): + recorded: dict[str, str] = {} + + monkeypatch.setattr("hermes_cli.config.get_container_exec_info", lambda: None) + monkeypatch.setattr( + "hermes_cli.main.cmd_chat", + lambda args: recorded.setdefault("provider", args.provider), + ) + monkeypatch.setattr(sys, "argv", ["hermes", "chat", "--provider", "gmi"]) + + from hermes_cli.main import main + + main() + + assert recorded["provider"] == "gmi" + + def test_select_provider_and_model_routes_gmi_to_generic_flow(self, monkeypatch): + recorded: dict[str, str] = {} + + monkeypatch.setattr("hermes_cli.auth.resolve_provider", lambda *args, **kwargs: None) + + def fake_prompt_provider_choice(choices, default=0): + return next(i for i, label in enumerate(choices) if label.startswith("GMI Cloud")) + + def fake_model_flow_api_key_provider(config, provider_id, current_model=""): + recorded["provider_id"] = provider_id + + monkeypatch.setattr("hermes_cli.main._prompt_provider_choice", fake_prompt_provider_choice) + monkeypatch.setattr("hermes_cli.main._model_flow_api_key_provider", fake_model_flow_api_key_provider) + + from hermes_cli.main import select_provider_and_model + + select_provider_and_model() + + assert recorded["provider_id"] == "gmi" + + def test_model_flow_api_key_provider_persists_gmi_selection(self, monkeypatch): + monkeypatch.setenv("GMI_API_KEY", "gmi-test-key") + + with patch( + "hermes_cli.models.fetch_api_models", + return_value=["zai-org/GLM-5.1-FP8", "openai/gpt-5.4-mini"], + ), patch( + "hermes_cli.auth._prompt_model_selection", + return_value="openai/gpt-5.4-mini", + ), patch( + "hermes_cli.auth.deactivate_provider", + ), patch( + "builtins.input", + return_value="", + ): + from hermes_cli.main import _model_flow_api_key_provider + + _model_flow_api_key_provider(load_config(), "gmi", "old-model") + + import yaml + from hermes_constants import get_hermes_home + + config = yaml.safe_load((get_hermes_home() / "config.yaml").read_text()) or {} + model_cfg = config.get("model") + assert isinstance(model_cfg, dict) + assert model_cfg["provider"] == "gmi" + assert model_cfg["default"] == "openai/gpt-5.4-mini" + assert model_cfg["base_url"] == "https://api.gmi-serving.com/v1" diff --git a/website/docs/getting-started/quickstart.md b/website/docs/getting-started/quickstart.md index b67f63ae36..16769bbd05 100644 --- a/website/docs/getting-started/quickstart.md +++ b/website/docs/getting-started/quickstart.md @@ -66,13 +66,30 @@ hermes model Good defaults: -| Situation | Recommended path | -|---|---| -| Least friction | Nous Portal or OpenRouter | -| You already have Claude or Codex auth | Anthropic or OpenAI Codex | -| You want local/private inference | Ollama or any custom OpenAI-compatible endpoint | -| You want multi-provider routing | OpenRouter | -| You have a custom GPU server | vLLM, SGLang, LiteLLM, or any OpenAI-compatible endpoint | +| Provider | What it is | How to set up | +|----------|-----------|---------------| +| **Nous Portal** | Subscription-based, zero-config | OAuth login via `hermes model` | +| **OpenAI Codex** | ChatGPT OAuth, uses Codex models | Device code auth via `hermes model` | +| **Anthropic** | Claude models directly (Pro/Max or API key) | `hermes model` with Claude Code auth, or an Anthropic API key | +| **OpenRouter** | Multi-provider routing across many models | Enter your API key | +| **Z.AI** | GLM / Zhipu-hosted models | Set `GLM_API_KEY` / `ZAI_API_KEY` | +| **Kimi / Moonshot** | Moonshot-hosted coding and chat models | Set `KIMI_API_KEY` | +| **Kimi / Moonshot China** | China-region Moonshot endpoint | Set `KIMI_CN_API_KEY` | +| **Arcee AI** | Trinity models | Set `ARCEEAI_API_KEY` | +| **GMI Cloud** | Multi-model direct API | Set `GMI_API_KEY` | +| **MiniMax** | International MiniMax endpoint | Set `MINIMAX_API_KEY` | +| **MiniMax China** | China-region MiniMax endpoint | Set `MINIMAX_CN_API_KEY` | +| **Alibaba Cloud** | Qwen models via DashScope | Set `DASHSCOPE_API_KEY` | +| **Hugging Face** | 20+ open models via unified router (Qwen, DeepSeek, Kimi, etc.) | Set `HF_TOKEN` | +| **Kilo Code** | KiloCode-hosted models | Set `KILOCODE_API_KEY` | +| **OpenCode Zen** | Pay-as-you-go access to curated models | Set `OPENCODE_ZEN_API_KEY` | +| **OpenCode Go** | $10/month subscription for open models | Set `OPENCODE_GO_API_KEY` | +| **DeepSeek** | Direct DeepSeek API access | Set `DEEPSEEK_API_KEY` | +| **NVIDIA NIM** | Nemotron models via build.nvidia.com or local NIM | Set `NVIDIA_API_KEY` (optional: `NVIDIA_BASE_URL`) | +| **GitHub Copilot** | GitHub Copilot subscription (GPT-5.x, Claude, Gemini, etc.) | OAuth via `hermes model`, or `COPILOT_GITHUB_TOKEN` / `GH_TOKEN` | +| **GitHub Copilot ACP** | Copilot ACP agent backend (spawns local `copilot` CLI) | `hermes model` (requires `copilot` CLI + `copilot login`) | +| **Vercel AI Gateway** | Vercel AI Gateway routing | Set `AI_GATEWAY_API_KEY` | +| **Custom Endpoint** | VLLM, SGLang, Ollama, or any OpenAI-compatible API | Set base URL + API key | For most first-time users: choose a provider, accept the defaults unless you know why you're changing them. The full provider catalog with env vars and setup steps lives on the [Providers](../integrations/providers.md) page. diff --git a/website/docs/integrations/providers.md b/website/docs/integrations/providers.md index eb0eb4e790..5d5e6b0e41 100644 --- a/website/docs/integrations/providers.md +++ b/website/docs/integrations/providers.md @@ -25,6 +25,7 @@ You need at least one way to connect to an LLM. Use `hermes model` to switch pro | **Kimi / Moonshot** | `KIMI_API_KEY` in `~/.hermes/.env` (provider: `kimi-coding`) | | **Kimi / Moonshot (China)** | `KIMI_CN_API_KEY` in `~/.hermes/.env` (provider: `kimi-coding-cn`; aliases: `kimi-cn`, `moonshot-cn`) | | **Arcee AI** | `ARCEEAI_API_KEY` in `~/.hermes/.env` (provider: `arcee`; aliases: `arcee-ai`, `arceeai`) | +| **GMI Cloud** | `GMI_API_KEY` in `~/.hermes/.env` (provider: `gmi`; aliases: `gmi-cloud`, `gmicloud`) | | **MiniMax** | `MINIMAX_API_KEY` in `~/.hermes/.env` (provider: `minimax`) | | **MiniMax China** | `MINIMAX_CN_API_KEY` in `~/.hermes/.env` (provider: `minimax-cn`) | | **Alibaba Cloud** | `DASHSCOPE_API_KEY` in `~/.hermes/.env` (provider: `alibaba`, aliases: `dashscope`, `qwen`) | @@ -250,7 +251,7 @@ model: | `HERMES_COPILOT_ACP_COMMAND` | Override the Copilot CLI binary path (default: `copilot`) | | `HERMES_COPILOT_ACP_ARGS` | Override ACP args (default: `--acp --stdio`) | -### First-Class Chinese AI Providers +### First-Class API-Key Providers These providers have built-in support with dedicated provider IDs. Set the API key and use `--provider` to select: @@ -286,16 +287,21 @@ hermes chat --provider xiaomi --model mimo-v2-pro # Arcee AI (Trinity models) hermes chat --provider arcee --model trinity-large-thinking # Requires: ARCEEAI_API_KEY in ~/.hermes/.env + +# GMI Cloud +# Use the exact model ID returned by GMI's /v1/models endpoint. +hermes chat --provider gmi --model zai-org/GLM-5.1-FP8 +# Requires: GMI_API_KEY in ~/.hermes/.env ``` Or set the provider permanently in `config.yaml`: ```yaml model: - provider: "zai" # or: kimi-coding, kimi-coding-cn, minimax, minimax-cn, alibaba, xiaomi, arcee - default: "glm-5" + provider: "gmi" + default: "zai-org/GLM-5.1-FP8" ``` -Base URLs can be overridden with `GLM_BASE_URL`, `KIMI_BASE_URL`, `MINIMAX_BASE_URL`, `MINIMAX_CN_BASE_URL`, `DASHSCOPE_BASE_URL`, or `XIAOMI_BASE_URL` environment variables. +Base URLs can be overridden with `GLM_BASE_URL`, `KIMI_BASE_URL`, `MINIMAX_BASE_URL`, `MINIMAX_CN_BASE_URL`, `DASHSCOPE_BASE_URL`, `XIAOMI_BASE_URL`, or `GMI_BASE_URL` environment variables. :::note Z.AI Endpoint Auto-Detection When using the Z.AI / GLM provider, Hermes automatically probes multiple endpoints (global, China, coding variants) to find one that accepts your API key. You don't need to set `GLM_BASE_URL` manually — the working endpoint is detected and cached automatically. diff --git a/website/docs/reference/environment-variables.md b/website/docs/reference/environment-variables.md index 4aff2276e1..f324edf160 100644 --- a/website/docs/reference/environment-variables.md +++ b/website/docs/reference/environment-variables.md @@ -36,6 +36,8 @@ All variables go in `~/.hermes/.env`. You can also set them with `hermes config | `KIMI_CN_API_KEY` | Kimi / Moonshot China API key ([moonshot.cn](https://platform.moonshot.cn)) | | `ARCEEAI_API_KEY` | Arcee AI API key ([chat.arcee.ai](https://chat.arcee.ai/)) | | `ARCEE_BASE_URL` | Override Arcee base URL (default: `https://api.arcee.ai/api/v1`) | +| `GMI_API_KEY` | GMI Cloud API key ([gmicloud.ai](https://www.gmicloud.ai/)) | +| `GMI_BASE_URL` | Override GMI Cloud base URL (default: `https://api.gmi-serving.com/v1`) | | `MINIMAX_API_KEY` | MiniMax API key — global endpoint ([minimax.io](https://www.minimax.io)) | | `MINIMAX_BASE_URL` | Override MiniMax base URL (default: `https://api.minimax.io/anthropic` — Hermes uses MiniMax's Anthropic Messages-compatible endpoint) | | `MINIMAX_CN_API_KEY` | MiniMax API key — China endpoint ([minimaxi.com](https://www.minimaxi.com)) | @@ -89,7 +91,7 @@ For native Anthropic auth, Hermes prefers Claude Code's own credential files whe | Variable | Description | |----------|-------------| -| `HERMES_INFERENCE_PROVIDER` | Override provider selection: `auto`, `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `huggingface`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `kilocode`, `xiaomi`, `arcee`, `alibaba`, `deepseek`, `nvidia`, `ollama-cloud`, `xai` (alias `grok`), `google-gemini-cli`, `qwen-oauth`, `bedrock`, `opencode-zen`, `opencode-go`, `ai-gateway` (default: `auto`) | +| `HERMES_INFERENCE_PROVIDER` | Override provider selection: `auto`, `custom`, `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `huggingface`, `gemini`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `kilocode`, `xiaomi`, `arcee`, `gmi`, `alibaba`, `deepseek`, `nvidia`, `ollama-cloud`, `xai` (alias `grok`), `google-gemini-cli`, `qwen-oauth`, `bedrock`, `opencode-zen`, `opencode-go`, `ai-gateway` (default: `auto`) | | `HERMES_PORTAL_BASE_URL` | Override Nous Portal URL (for development/testing) | | `NOUS_INFERENCE_BASE_URL` | Override Nous inference API URL | | `HERMES_NOUS_MIN_KEY_TTL_SECONDS` | Min agent key TTL before re-mint (default: 1800 = 30min) | diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md index d60ad3ecff..3a31bd272a 100644 --- a/website/docs/user-guide/configuration.md +++ b/website/docs/user-guide/configuration.md @@ -801,6 +801,17 @@ These options apply to **auxiliary task configs** (`auxiliary:`, `compression:`, | `"codex"` | Force Codex OAuth (ChatGPT account). Supports vision (gpt-5.3-codex). | `hermes model` → Codex | | `"main"` | Use your active custom/main endpoint. This can come from `OPENAI_BASE_URL` + `OPENAI_API_KEY` or from a custom endpoint saved via `hermes model` / `config.yaml`. Works with OpenAI, local models, or any OpenAI-compatible API. **Auxiliary tasks only — not valid for `model.provider`.** | Custom endpoint credentials + base URL | +Direct API-key providers from the main provider catalog also work here when you want side tasks to bypass your default router. `gmi` is valid once `GMI_API_KEY` is configured: + +```yaml +auxiliary: + compression: + provider: "gmi" + model: "anthropic/claude-opus-4.6" +``` + +For GMI auxiliary routing, use the exact model ID returned by GMI's `/v1/models` endpoint. + ### Common Setups **Using a direct custom endpoint** (clearer than `provider: "main"` for local/self-hosted APIs): diff --git a/website/docs/user-guide/features/fallback-providers.md b/website/docs/user-guide/features/fallback-providers.md index 9ecefb0d03..a0d699dfb2 100644 --- a/website/docs/user-guide/features/fallback-providers.md +++ b/website/docs/user-guide/features/fallback-providers.md @@ -59,6 +59,7 @@ Both `provider` and `model` are **required**. If either is missing, the fallback | Kilo Code | `kilocode` | `KILOCODE_API_KEY` | | Xiaomi MiMo | `xiaomi` | `XIAOMI_API_KEY` | | Arcee AI | `arcee` | `ARCEEAI_API_KEY` | +| GMI Cloud | `gmi` | `GMI_API_KEY` | | Alibaba / DashScope | `alibaba` | `DASHSCOPE_API_KEY` | | Hugging Face | `huggingface` | `HF_TOKEN` | | Custom endpoint | `custom` | `base_url` + `key_env` (see below) |