mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-28 06:51:16 +08:00
fix(providers): complete NVIDIA NIM parity with other providers
Follow-up on the native NVIDIA NIM provider salvage. The original PR wired
PROVIDER_REGISTRY + HERMES_OVERLAYS correctly but missed several touchpoints
required for full parity with other OpenAI-compatible providers (xai,
huggingface, deepseek, zai).
Gaps closed:
- hermes_cli/main.py:
- Add 'nvidia' to the _model_flow_api_key_provider dispatch tuple so
selecting 'NVIDIA NIM' in `hermes model` actually runs the api-key
provider flow (previously fell through silently).
- Add 'nvidia' to `hermes chat --provider` argparse choices so the
documented test command (`hermes chat --provider nvidia --model ...`)
parses successfully.
- hermes_cli/config.py: Register NVIDIA_API_KEY and NVIDIA_BASE_URL in
OPTIONAL_ENV_VARS so setup wizard can prompt for them and they're
auto-added to the subprocess env blocklist.
- hermes_cli/doctor.py: Add NVIDIA NIM row to `_apikey_providers` so
`hermes doctor` probes https://integrate.api.nvidia.com/v1/models.
- hermes_cli/dump.py: Add NVIDIA_API_KEY → 'nvidia' mapping for
`hermes dump` credential masking.
- tests/tools/test_local_env_blocklist.py: Extend registry_vars fixture
with NVIDIA_API_KEY to verify it's blocked from leaking into subprocesses.
- agent/model_metadata.py: Add 'nemotron' → 131072 context-length entry
so all Nemotron variants get 128K context via substring match (rather
than falling back to MINIMUM_CONTEXT_LENGTH).
- hermes_cli/models.py: Fix hallucinated model ID
'nvidia/nemotron-3-nano-8b-a4b' → 'nvidia/nemotron-3-nano-30b-a3b'
(verified against live integrate.api.nvidia.com/v1/models catalog).
Expand curated list from 5 to 9 agentic models mapping to OpenRouter
defaults per provider-guide convention: add qwen3.5-397b-a17b,
deepseek-v3.2, llama-3.3-nemotron-super-49b-v1.5, gpt-oss-120b.
- cli-config.yaml.example: Document 'nvidia' provider option.
- scripts/release.py: Map asurla@nvidia.com → anniesurla in AUTHOR_MAP
for CI attribution.
E2E verified: `hermes chat --provider nvidia ...` now reaches NVIDIA's
endpoint (returns 401 with bogus key instead of argparse error);
`hermes doctor` detects NVIDIA NIM when NVIDIA_API_KEY is set.
This commit is contained in:
@@ -159,6 +159,8 @@ DEFAULT_CONTEXT_LENGTHS = {
|
||||
"grok": 131072, # catch-all (grok-beta, unknown grok-*)
|
||||
# Kimi
|
||||
"kimi": 262144,
|
||||
# Nemotron — NVIDIA's open-weights series (128K context across all sizes)
|
||||
"nemotron": 131072,
|
||||
# Arcee
|
||||
"trinity": 262144,
|
||||
# OpenRouter
|
||||
|
||||
@@ -24,6 +24,7 @@ model:
|
||||
# "minimax" - MiniMax global (requires: MINIMAX_API_KEY)
|
||||
# "minimax-cn" - MiniMax China (requires: MINIMAX_CN_API_KEY)
|
||||
# "huggingface" - Hugging Face Inference (requires: HF_TOKEN)
|
||||
# "nvidia" - NVIDIA NIM / build.nvidia.com (requires: NVIDIA_API_KEY)
|
||||
# "xiaomi" - Xiaomi MiMo (requires: XIAOMI_API_KEY)
|
||||
# "arcee" - Arcee AI Trinity models (requires: ARCEEAI_API_KEY)
|
||||
# "ollama-cloud" - Ollama Cloud (requires: OLLAMA_API_KEY — https://ollama.com/settings)
|
||||
|
||||
@@ -861,6 +861,22 @@ OPTIONAL_ENV_VARS = {
|
||||
"category": "provider",
|
||||
"advanced": True,
|
||||
},
|
||||
"NVIDIA_API_KEY": {
|
||||
"description": "NVIDIA NIM API key (build.nvidia.com or local NIM endpoint)",
|
||||
"prompt": "NVIDIA NIM API key",
|
||||
"url": "https://build.nvidia.com/",
|
||||
"password": True,
|
||||
"category": "provider",
|
||||
"advanced": True,
|
||||
},
|
||||
"NVIDIA_BASE_URL": {
|
||||
"description": "NVIDIA NIM base URL override (e.g. http://localhost:8000/v1 for local NIM)",
|
||||
"prompt": "NVIDIA NIM base URL (leave empty for default)",
|
||||
"url": None,
|
||||
"password": False,
|
||||
"category": "provider",
|
||||
"advanced": True,
|
||||
},
|
||||
"GLM_API_KEY": {
|
||||
"description": "Z.AI / GLM API key (also recognized as ZAI_API_KEY / Z_AI_API_KEY)",
|
||||
"prompt": "Z.AI / GLM API key",
|
||||
|
||||
@@ -825,6 +825,7 @@ def run_doctor(args):
|
||||
("Arcee AI", ("ARCEEAI_API_KEY",), "https://api.arcee.ai/api/v1/models", "ARCEE_BASE_URL", True),
|
||||
("DeepSeek", ("DEEPSEEK_API_KEY",), "https://api.deepseek.com/v1/models", "DEEPSEEK_BASE_URL", True),
|
||||
("Hugging Face", ("HF_TOKEN",), "https://router.huggingface.co/v1/models", "HF_BASE_URL", True),
|
||||
("NVIDIA NIM", ("NVIDIA_API_KEY",), "https://integrate.api.nvidia.com/v1/models", "NVIDIA_BASE_URL", True),
|
||||
("Alibaba/DashScope", ("DASHSCOPE_API_KEY",), "https://dashscope-intl.aliyuncs.com/compatible-mode/v1/models", "DASHSCOPE_BASE_URL", True),
|
||||
# MiniMax: the /anthropic endpoint doesn't support /models, but the /v1 endpoint does.
|
||||
("MiniMax", ("MINIMAX_API_KEY",), "https://api.minimax.io/v1/models", "MINIMAX_BASE_URL", True),
|
||||
|
||||
@@ -296,6 +296,7 @@ def run_dump(args):
|
||||
("DEEPSEEK_API_KEY", "deepseek"),
|
||||
("DASHSCOPE_API_KEY", "dashscope"),
|
||||
("HF_TOKEN", "huggingface"),
|
||||
("NVIDIA_API_KEY", "nvidia"),
|
||||
("AI_GATEWAY_API_KEY", "ai_gateway"),
|
||||
("OPENCODE_ZEN_API_KEY", "opencode_zen"),
|
||||
("OPENCODE_GO_API_KEY", "opencode_go"),
|
||||
|
||||
@@ -1143,7 +1143,7 @@ def select_provider_and_model(args=None):
|
||||
_model_flow_kimi(config, current_model)
|
||||
elif selected_provider == "bedrock":
|
||||
_model_flow_bedrock(config, current_model)
|
||||
elif selected_provider in ("gemini", "deepseek", "xai", "zai", "kimi-coding-cn", "minimax", "minimax-cn", "kilocode", "opencode-zen", "opencode-go", "ai-gateway", "alibaba", "huggingface", "xiaomi", "arcee", "ollama-cloud"):
|
||||
elif selected_provider in ("gemini", "deepseek", "xai", "zai", "kimi-coding-cn", "minimax", "minimax-cn", "kilocode", "opencode-zen", "opencode-go", "ai-gateway", "alibaba", "huggingface", "xiaomi", "arcee", "nvidia", "ollama-cloud"):
|
||||
_model_flow_api_key_provider(config, selected_provider, current_model)
|
||||
|
||||
# ── Post-switch cleanup: clear stale OPENAI_BASE_URL ──────────────
|
||||
@@ -4954,7 +4954,7 @@ For more help on a command:
|
||||
)
|
||||
chat_parser.add_argument(
|
||||
"--provider",
|
||||
choices=["auto", "openrouter", "nous", "openai-codex", "copilot-acp", "copilot", "anthropic", "gemini", "xai", "ollama-cloud", "huggingface", "zai", "kimi-coding", "kimi-coding-cn", "minimax", "minimax-cn", "kilocode", "xiaomi", "arcee"],
|
||||
choices=["auto", "openrouter", "nous", "openai-codex", "copilot-acp", "copilot", "anthropic", "gemini", "xai", "ollama-cloud", "huggingface", "zai", "kimi-coding", "kimi-coding-cn", "minimax", "minimax-cn", "kilocode", "xiaomi", "arcee", "nvidia"],
|
||||
default=None,
|
||||
help="Inference provider (default: auto)"
|
||||
)
|
||||
|
||||
@@ -156,11 +156,18 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
|
||||
"grok-4-1-fast-reasoning",
|
||||
],
|
||||
"nvidia": [
|
||||
# NVIDIA flagship reasoning models
|
||||
"nvidia/nemotron-3-super-120b-a12b",
|
||||
"nvidia/nemotron-3-nano-8b-a4b",
|
||||
"z-ai/glm5",
|
||||
"nvidia/nemotron-3-nano-30b-a3b",
|
||||
"nvidia/llama-3.3-nemotron-super-49b-v1.5",
|
||||
# Third-party agentic models hosted on build.nvidia.com
|
||||
# (map to OpenRouter defaults — users get familiar picks on NIM)
|
||||
"qwen/qwen3.5-397b-a17b",
|
||||
"deepseek-ai/deepseek-v3.2",
|
||||
"moonshotai/kimi-k2.5",
|
||||
"minimaxai/minimax-m2.5",
|
||||
"z-ai/glm5",
|
||||
"openai/gpt-oss-120b",
|
||||
],
|
||||
"kimi-coding": [
|
||||
"kimi-k2.5",
|
||||
@@ -543,6 +550,7 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
|
||||
ProviderEntry("anthropic", "Anthropic", "Anthropic (Claude models — API key or Claude Code)"),
|
||||
ProviderEntry("openai-codex", "OpenAI Codex", "OpenAI Codex"),
|
||||
ProviderEntry("xiaomi", "Xiaomi MiMo", "Xiaomi MiMo (MiMo-V2 models — pro, omni, flash)"),
|
||||
ProviderEntry("nvidia", "NVIDIA NIM", "NVIDIA NIM (Nemotron models — build.nvidia.com or local NIM)"),
|
||||
ProviderEntry("qwen-oauth", "Qwen OAuth (Portal)", "Qwen OAuth (reuses local Qwen CLI login)"),
|
||||
ProviderEntry("copilot", "GitHub Copilot", "GitHub Copilot (uses GITHUB_TOKEN or gh auth token)"),
|
||||
ProviderEntry("copilot-acp", "GitHub Copilot ACP", "GitHub Copilot ACP (spawns `copilot --acp --stdio`)"),
|
||||
@@ -551,7 +559,6 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
|
||||
ProviderEntry("google-gemini-cli", "Google Gemini (OAuth)", "Google Gemini via OAuth + Code Assist (free tier supported; no API key needed)"),
|
||||
ProviderEntry("deepseek", "DeepSeek", "DeepSeek (DeepSeek-V3, R1, coder — direct API)"),
|
||||
ProviderEntry("xai", "xAI", "xAI (Grok models — direct API)"),
|
||||
ProviderEntry("nvidia", "NVIDIA NIM", "NVIDIA NIM (Nemotron models — build.nvidia.com or local NIM)"),
|
||||
ProviderEntry("zai", "Z.AI / GLM", "Z.AI / GLM (Zhipu AI direct API)"),
|
||||
ProviderEntry("kimi-coding", "Kimi / Kimi Coding Plan", "Kimi Coding Plan (api.kimi.com) & Moonshot API"),
|
||||
ProviderEntry("kimi-coding-cn", "Kimi / Moonshot (China)", "Kimi / Moonshot China (Moonshot CN direct API)"),
|
||||
|
||||
@@ -256,6 +256,7 @@ AUTHOR_MAP = {
|
||||
"anthhub@163.com": "anthhub",
|
||||
"shenuu@gmail.com": "shenuu",
|
||||
"xiayh17@gmail.com": "xiayh0107",
|
||||
"asurla@nvidia.com": "anniesurla",
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -86,6 +86,7 @@ class TestProviderEnvBlocklist:
|
||||
"MINIMAX_API_KEY": "mm-key",
|
||||
"MINIMAX_CN_API_KEY": "mmcn-key",
|
||||
"DEEPSEEK_API_KEY": "deepseek-key",
|
||||
"NVIDIA_API_KEY": "nvidia-key",
|
||||
}
|
||||
result_env = _run_with_env(extra_os_env=registry_vars)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user