2026-02-22 02:16:11 -08:00
|
|
|
"""
|
2026-03-07 19:56:48 -08:00
|
|
|
Canonical model catalogs and lightweight validation helpers.
|
2026-02-22 02:16:11 -08:00
|
|
|
|
|
|
|
|
Add, remove, or reorder entries here — both `hermes setup` and
|
|
|
|
|
`hermes` provider-selection will pick up the change automatically.
|
|
|
|
|
"""
|
|
|
|
|
|
2026-03-07 19:56:48 -08:00
|
|
|
from __future__ import annotations
|
|
|
|
|
|
2026-03-08 05:22:15 -07:00
|
|
|
import json
|
2026-03-17 00:12:16 -07:00
|
|
|
import os
|
2026-03-08 05:22:15 -07:00
|
|
|
import urllib.request
|
|
|
|
|
import urllib.error
|
2026-04-15 22:32:05 -07:00
|
|
|
import time
|
2026-03-07 19:56:48 -08:00
|
|
|
from difflib import get_close_matches
|
2026-04-15 22:32:05 -07:00
|
|
|
from pathlib import Path
|
2026-04-13 14:59:50 -07:00
|
|
|
from typing import Any, NamedTuple, Optional
|
2026-03-07 19:56:48 -08:00
|
|
|
|
2026-03-17 23:40:22 -07:00
|
|
|
COPILOT_BASE_URL = "https://api.githubcopilot.com"
|
|
|
|
|
COPILOT_MODELS_URL = f"{COPILOT_BASE_URL}/models"
|
|
|
|
|
COPILOT_EDITOR_VERSION = "vscode/1.104.1"
|
|
|
|
|
COPILOT_REASONING_EFFORTS_GPT5 = ["minimal", "low", "medium", "high"]
|
|
|
|
|
COPILOT_REASONING_EFFORTS_O_SERIES = ["low", "medium", "high"]
|
|
|
|
|
|
|
|
|
|
|
2026-04-09 15:11:58 -05:00
|
|
|
# Fallback OpenRouter snapshot used when the live catalog is unavailable.
|
2026-02-22 02:16:11 -08:00
|
|
|
# (model_id, display description shown in menus)
|
|
|
|
|
OPENROUTER_MODELS: list[tuple[str, str]] = [
|
|
|
|
|
("anthropic/claude-opus-4.6", "recommended"),
|
2026-03-30 20:33:21 -07:00
|
|
|
("anthropic/claude-sonnet-4.6", ""),
|
2026-04-09 15:11:58 -05:00
|
|
|
("qwen/qwen3.6-plus", ""),
|
2026-02-22 02:16:11 -08:00
|
|
|
("anthropic/claude-sonnet-4.5", ""),
|
2026-03-18 03:26:06 -07:00
|
|
|
("anthropic/claude-haiku-4.5", ""),
|
2026-04-13 21:16:14 -07:00
|
|
|
("openrouter/elephant-alpha", "free"),
|
2026-03-06 00:52:45 -08:00
|
|
|
("openai/gpt-5.4", ""),
|
2026-03-18 03:23:20 -07:00
|
|
|
("openai/gpt-5.4-mini", ""),
|
2026-03-22 04:46:05 -07:00
|
|
|
("xiaomi/mimo-v2-pro", ""),
|
2026-03-06 00:52:45 -08:00
|
|
|
("openai/gpt-5.3-codex", ""),
|
2026-04-09 15:11:58 -05:00
|
|
|
("google/gemini-3-pro-image-preview", ""),
|
2026-02-22 02:16:11 -08:00
|
|
|
("google/gemini-3-flash-preview", ""),
|
2026-03-29 15:44:07 -07:00
|
|
|
("google/gemini-3.1-pro-preview", ""),
|
|
|
|
|
("google/gemini-3.1-flash-lite-preview", ""),
|
2026-03-07 19:56:48 -08:00
|
|
|
("qwen/qwen3.5-plus-02-15", ""),
|
|
|
|
|
("qwen/qwen3.5-35b-a3b", ""),
|
2026-03-06 00:49:25 -08:00
|
|
|
("stepfun/step-3.5-flash", ""),
|
2026-03-22 05:00:25 -07:00
|
|
|
("minimax/minimax-m2.7", ""),
|
2026-03-18 03:31:04 -07:00
|
|
|
("minimax/minimax-m2.5", ""),
|
2026-04-07 10:06:25 -07:00
|
|
|
("z-ai/glm-5.1", ""),
|
2026-04-14 16:26:01 -07:00
|
|
|
("z-ai/glm-5v-turbo", ""),
|
2026-03-18 03:31:04 -07:00
|
|
|
("z-ai/glm-5-turbo", ""),
|
2026-02-22 02:16:11 -08:00
|
|
|
("moonshotai/kimi-k2.5", ""),
|
2026-04-09 15:11:58 -05:00
|
|
|
("x-ai/grok-4.20", ""),
|
2026-03-22 04:46:05 -07:00
|
|
|
("nvidia/nemotron-3-super-120b-a12b", ""),
|
feat: upgrade MiniMax default to M2.7 + add new OpenRouter models
MiniMax: Add M2.7 and M2.7-highspeed as new defaults across provider
model lists, auxiliary client, metadata, setup wizard, RL training tool,
fallback tests, and docs. Retain M2.5/M2.1 as alternatives.
OpenRouter: Add grok-4.20-beta, nemotron-3-super-120b-a12b:free,
trinity-large-preview:free, glm-5-turbo, and hunter-alpha to the
model catalog.
MiniMax changes based on PR #1882 by @octo-patch (applied manually
due to stale conflicts in refactored pricing module).
2026-03-18 02:42:58 -07:00
|
|
|
("nvidia/nemotron-3-super-120b-a12b:free", "free"),
|
|
|
|
|
("arcee-ai/trinity-large-preview:free", "free"),
|
2026-04-03 13:45:16 -07:00
|
|
|
("arcee-ai/trinity-large-thinking", ""),
|
2026-03-18 03:26:06 -07:00
|
|
|
("openai/gpt-5.4-pro", ""),
|
|
|
|
|
("openai/gpt-5.4-nano", ""),
|
2026-02-22 02:16:11 -08:00
|
|
|
]
|
|
|
|
|
|
2026-04-09 15:11:58 -05:00
|
|
|
_openrouter_catalog_cache: list[tuple[str, str]] | None = None
|
|
|
|
|
|
2026-04-11 10:38:24 -07:00
|
|
|
|
|
|
|
|
def _codex_curated_models() -> list[str]:
|
|
|
|
|
"""Derive the openai-codex curated list from codex_models.py.
|
|
|
|
|
|
|
|
|
|
Single source of truth: DEFAULT_CODEX_MODELS + forward-compat synthesis.
|
|
|
|
|
This keeps the gateway /model picker in sync with the CLI `hermes model`
|
|
|
|
|
flow without maintaining a separate static list.
|
|
|
|
|
"""
|
|
|
|
|
from hermes_cli.codex_models import DEFAULT_CODEX_MODELS, _add_forward_compat_models
|
|
|
|
|
return _add_forward_compat_models(list(DEFAULT_CODEX_MODELS))
|
|
|
|
|
|
|
|
|
|
|
2026-03-07 19:56:48 -08:00
|
|
|
_PROVIDER_MODELS: dict[str, list[str]] = {
|
2026-03-11 23:06:06 -07:00
|
|
|
"nous": [
|
2026-04-12 16:43:54 -07:00
|
|
|
"xiaomi/mimo-v2-pro",
|
2026-03-26 13:49:43 -07:00
|
|
|
"anthropic/claude-opus-4.6",
|
2026-03-30 20:33:21 -07:00
|
|
|
"anthropic/claude-sonnet-4.6",
|
2026-03-26 13:49:43 -07:00
|
|
|
"anthropic/claude-sonnet-4.5",
|
|
|
|
|
"anthropic/claude-haiku-4.5",
|
|
|
|
|
"openai/gpt-5.4",
|
|
|
|
|
"openai/gpt-5.4-mini",
|
|
|
|
|
"openai/gpt-5.3-codex",
|
|
|
|
|
"google/gemini-3-pro-preview",
|
|
|
|
|
"google/gemini-3-flash-preview",
|
2026-03-29 15:44:07 -07:00
|
|
|
"google/gemini-3.1-pro-preview",
|
|
|
|
|
"google/gemini-3.1-flash-lite-preview",
|
2026-03-26 13:49:43 -07:00
|
|
|
"qwen/qwen3.5-plus-02-15",
|
|
|
|
|
"qwen/qwen3.5-35b-a3b",
|
|
|
|
|
"stepfun/step-3.5-flash",
|
|
|
|
|
"minimax/minimax-m2.7",
|
|
|
|
|
"minimax/minimax-m2.5",
|
2026-04-07 10:06:25 -07:00
|
|
|
"z-ai/glm-5.1",
|
2026-04-14 16:26:01 -07:00
|
|
|
"z-ai/glm-5v-turbo",
|
2026-03-26 13:49:43 -07:00
|
|
|
"z-ai/glm-5-turbo",
|
|
|
|
|
"moonshotai/kimi-k2.5",
|
|
|
|
|
"x-ai/grok-4.20-beta",
|
|
|
|
|
"nvidia/nemotron-3-super-120b-a12b",
|
|
|
|
|
"nvidia/nemotron-3-super-120b-a12b:free",
|
|
|
|
|
"arcee-ai/trinity-large-preview:free",
|
2026-04-03 13:45:16 -07:00
|
|
|
"arcee-ai/trinity-large-thinking",
|
2026-03-26 13:49:43 -07:00
|
|
|
"openai/gpt-5.4-pro",
|
|
|
|
|
"openai/gpt-5.4-nano",
|
2026-04-13 21:16:14 -07:00
|
|
|
"openrouter/elephant-alpha",
|
2026-03-11 23:06:06 -07:00
|
|
|
],
|
2026-04-11 10:38:24 -07:00
|
|
|
"openai-codex": _codex_curated_models(),
|
2026-03-17 23:40:22 -07:00
|
|
|
"copilot-acp": [
|
|
|
|
|
"copilot-acp",
|
|
|
|
|
],
|
|
|
|
|
"copilot": [
|
|
|
|
|
"gpt-5.4",
|
|
|
|
|
"gpt-5.4-mini",
|
|
|
|
|
"gpt-5-mini",
|
|
|
|
|
"gpt-5.3-codex",
|
|
|
|
|
"gpt-5.2-codex",
|
|
|
|
|
"gpt-4.1",
|
|
|
|
|
"gpt-4o",
|
|
|
|
|
"gpt-4o-mini",
|
|
|
|
|
"claude-opus-4.6",
|
|
|
|
|
"claude-sonnet-4.6",
|
|
|
|
|
"claude-sonnet-4.5",
|
|
|
|
|
"claude-haiku-4.5",
|
|
|
|
|
"gemini-2.5-pro",
|
|
|
|
|
"grok-code-fast-1",
|
|
|
|
|
],
|
2026-04-06 10:14:01 -07:00
|
|
|
"gemini": [
|
2026-04-06 10:19:19 -07:00
|
|
|
"gemini-3.1-pro-preview",
|
|
|
|
|
"gemini-3-flash-preview",
|
|
|
|
|
"gemini-3.1-flash-lite-preview",
|
2026-04-06 10:14:01 -07:00
|
|
|
"gemini-2.5-pro",
|
|
|
|
|
"gemini-2.5-flash",
|
2026-04-06 10:19:19 -07:00
|
|
|
"gemini-2.5-flash-lite",
|
2026-04-06 10:14:01 -07:00
|
|
|
# Gemma open models (also served via AI Studio)
|
|
|
|
|
"gemma-4-31b-it",
|
2026-04-06 10:19:19 -07:00
|
|
|
"gemma-4-26b-it",
|
2026-04-06 10:14:01 -07:00
|
|
|
],
|
2026-03-07 19:56:48 -08:00
|
|
|
"zai": [
|
2026-04-12 21:22:59 -07:00
|
|
|
"glm-5.1",
|
2026-03-07 19:56:48 -08:00
|
|
|
"glm-5",
|
2026-04-14 16:26:01 -07:00
|
|
|
"glm-5v-turbo",
|
2026-03-25 17:42:25 -07:00
|
|
|
"glm-5-turbo",
|
2026-03-07 19:56:48 -08:00
|
|
|
"glm-4.7",
|
|
|
|
|
"glm-4.5",
|
|
|
|
|
"glm-4.5-flash",
|
|
|
|
|
],
|
2026-04-10 12:51:30 +04:00
|
|
|
"xai": [
|
feat(xai): upgrade to Responses API, add TTS provider
Cherry-picked and trimmed from PR #10600 by Jaaneek.
- Switch xAI transport from openai_chat to codex_responses (Responses API)
- Add codex_responses detection for xAI in all runtime_provider resolution paths
- Add xAI api_mode detection in AIAgent.__init__ (provider name + URL auto-detect)
- Add extra_headers passthrough for codex_responses requests
- Add x-grok-conv-id session header for xAI prompt caching
- Add xAI reasoning support (encrypted_content include, no effort param)
- Move x-grok-conv-id from chat_completions path to codex_responses path
- Add xAI TTS provider (dedicated /v1/tts endpoint with Opus conversion)
- Add xAI provider aliases (grok, x-ai, x.ai) across auth, models, providers, auxiliary
- Trim xAI model list to agentic models (grok-4.20-reasoning, grok-4-1-fast-reasoning)
- Add XAI_API_KEY/XAI_BASE_URL to OPTIONAL_ENV_VARS
- Add xAI TTS config section, setup wizard entry, tools_config provider option
- Add shared xai_http.py helper for User-Agent string
Co-authored-by: Jaaneek <Jaaneek@users.noreply.github.com>
2026-04-15 22:27:26 -07:00
|
|
|
"grok-4.20-reasoning",
|
2026-04-10 12:51:30 +04:00
|
|
|
"grok-4-1-fast-reasoning",
|
|
|
|
|
],
|
2026-03-07 19:56:48 -08:00
|
|
|
"kimi-coding": [
|
fix: improve Kimi model selection — auto-detect endpoint, add missing models (#1039)
* fix: /reasoning command output ordering, display, and inline think extraction
Three issues with the /reasoning command:
1. Output interleaving: The command echo used print() while feedback
used _cprint(), causing them to render out-of-order under
prompt_toolkit's patch_stdout. Changed echo to use _cprint() so
all output renders through the same path in correct order.
2. Reasoning display not working: /reasoning show toggled a flag
but reasoning never appeared for models that embed thinking in
inline <think> blocks rather than structured API fields. Added
fallback extraction in _build_assistant_message to capture
<think> block content as reasoning when no structured reasoning
fields (reasoning, reasoning_content, reasoning_details) are
present. This feeds into both the reasoning callback (during
tool loops) and the post-response reasoning box display.
3. Feedback clarity: Added checkmarks to confirm actions, persisted
show/hide to config (was session-only before), and aligned the
status display for readability.
Tests: 7 new tests for inline think block extraction (41 total).
* feat: add /reasoning command to gateway (Telegram/Discord/etc)
The /reasoning command only existed in the CLI — messaging platforms
had no way to view or change reasoning settings. This adds:
1. /reasoning command handler in the gateway:
- No args: shows current effort level and display state
- /reasoning <level>: sets reasoning effort (none/low/medium/high/xhigh)
- /reasoning show|hide: toggles reasoning display in responses
- All changes saved to config.yaml immediately
2. Reasoning display in gateway responses:
- When show_reasoning is enabled, prepends a 'Reasoning' block
with the model's last_reasoning content before the response
- Collapses long reasoning (>15 lines) to keep messages readable
- Uses last_reasoning from run_conversation result dict
3. Plumbing:
- Added _show_reasoning attribute loaded from config at startup
- Propagated last_reasoning through _run_agent return dict
- Added /reasoning to help text and known_commands set
- Uses getattr for _show_reasoning to handle test stubs
* fix: improve Kimi model selection — auto-detect endpoint, add missing models
Kimi Coding Plan setup:
- New dedicated _model_flow_kimi() replaces the generic API-key flow
for kimi-coding. Removes the confusing 'Base URL' prompt entirely —
the endpoint is auto-detected from the API key prefix:
sk-kimi-* → api.kimi.com/coding/v1 (Kimi Coding Plan)
other → api.moonshot.ai/v1 (legacy Moonshot)
- Shows appropriate models for each endpoint:
Coding Plan: kimi-for-coding, kimi-k2.5, kimi-k2-thinking, kimi-k2-thinking-turbo
Moonshot: full model catalog
- Clears any stale KIMI_BASE_URL override so runtime auto-detection
via _resolve_kimi_base_url() works correctly.
Model catalog updates:
- Added kimi-for-coding (primary Coding Plan model) and kimi-k2-thinking-turbo
to models.py, main.py _PROVIDER_MODELS, and model_metadata.py context windows.
- Updated User-Agent from KimiCLI/1.0 to KimiCLI/1.3 (Kimi's coding
endpoint whitelists known coding agents via User-Agent sniffing).
2026-03-12 05:58:48 -07:00
|
|
|
"kimi-for-coding",
|
2026-03-07 19:56:48 -08:00
|
|
|
"kimi-k2.5",
|
|
|
|
|
"kimi-k2-thinking",
|
fix: improve Kimi model selection — auto-detect endpoint, add missing models (#1039)
* fix: /reasoning command output ordering, display, and inline think extraction
Three issues with the /reasoning command:
1. Output interleaving: The command echo used print() while feedback
used _cprint(), causing them to render out-of-order under
prompt_toolkit's patch_stdout. Changed echo to use _cprint() so
all output renders through the same path in correct order.
2. Reasoning display not working: /reasoning show toggled a flag
but reasoning never appeared for models that embed thinking in
inline <think> blocks rather than structured API fields. Added
fallback extraction in _build_assistant_message to capture
<think> block content as reasoning when no structured reasoning
fields (reasoning, reasoning_content, reasoning_details) are
present. This feeds into both the reasoning callback (during
tool loops) and the post-response reasoning box display.
3. Feedback clarity: Added checkmarks to confirm actions, persisted
show/hide to config (was session-only before), and aligned the
status display for readability.
Tests: 7 new tests for inline think block extraction (41 total).
* feat: add /reasoning command to gateway (Telegram/Discord/etc)
The /reasoning command only existed in the CLI — messaging platforms
had no way to view or change reasoning settings. This adds:
1. /reasoning command handler in the gateway:
- No args: shows current effort level and display state
- /reasoning <level>: sets reasoning effort (none/low/medium/high/xhigh)
- /reasoning show|hide: toggles reasoning display in responses
- All changes saved to config.yaml immediately
2. Reasoning display in gateway responses:
- When show_reasoning is enabled, prepends a 'Reasoning' block
with the model's last_reasoning content before the response
- Collapses long reasoning (>15 lines) to keep messages readable
- Uses last_reasoning from run_conversation result dict
3. Plumbing:
- Added _show_reasoning attribute loaded from config at startup
- Propagated last_reasoning through _run_agent return dict
- Added /reasoning to help text and known_commands set
- Uses getattr for _show_reasoning to handle test stubs
* fix: improve Kimi model selection — auto-detect endpoint, add missing models
Kimi Coding Plan setup:
- New dedicated _model_flow_kimi() replaces the generic API-key flow
for kimi-coding. Removes the confusing 'Base URL' prompt entirely —
the endpoint is auto-detected from the API key prefix:
sk-kimi-* → api.kimi.com/coding/v1 (Kimi Coding Plan)
other → api.moonshot.ai/v1 (legacy Moonshot)
- Shows appropriate models for each endpoint:
Coding Plan: kimi-for-coding, kimi-k2.5, kimi-k2-thinking, kimi-k2-thinking-turbo
Moonshot: full model catalog
- Clears any stale KIMI_BASE_URL override so runtime auto-detection
via _resolve_kimi_base_url() works correctly.
Model catalog updates:
- Added kimi-for-coding (primary Coding Plan model) and kimi-k2-thinking-turbo
to models.py, main.py _PROVIDER_MODELS, and model_metadata.py context windows.
- Updated User-Agent from KimiCLI/1.0 to KimiCLI/1.3 (Kimi's coding
endpoint whitelists known coding agents via User-Agent sniffing).
2026-03-12 05:58:48 -07:00
|
|
|
"kimi-k2-thinking-turbo",
|
2026-03-07 19:56:48 -08:00
|
|
|
"kimi-k2-turbo-preview",
|
|
|
|
|
"kimi-k2-0905-preview",
|
|
|
|
|
],
|
2026-04-13 11:13:09 -07:00
|
|
|
"kimi-coding-cn": [
|
|
|
|
|
"kimi-k2.5",
|
|
|
|
|
"kimi-k2-thinking",
|
|
|
|
|
"kimi-k2-turbo-preview",
|
|
|
|
|
"kimi-k2-0905-preview",
|
|
|
|
|
],
|
2026-04-02 09:36:24 -07:00
|
|
|
"moonshot": [
|
|
|
|
|
"kimi-k2.5",
|
|
|
|
|
"kimi-k2-thinking",
|
|
|
|
|
"kimi-k2-turbo-preview",
|
|
|
|
|
"kimi-k2-0905-preview",
|
|
|
|
|
],
|
2026-03-07 19:56:48 -08:00
|
|
|
"minimax": [
|
2026-04-08 01:39:28 -07:00
|
|
|
"MiniMax-M2.7",
|
fix: align MiniMax provider with official API docs
Aligns MiniMax provider with official API documentation. Fixes 6 bugs:
transport mismatch (openai_chat -> anthropic_messages), credential leak
in switch_model(), prompt caching sent to non-Anthropic endpoints,
dot-to-hyphen model name corruption, trajectory compressor URL routing,
and stale doctor health check.
Also corrects context window (204,800), thinking support (manual mode),
max output (131,072), and model catalog (M2 family only on /anthropic).
Source: https://platform.minimax.io/docs/api-reference/text-anthropic-api
Co-authored-by: kshitijk4poor <kshitijk4poor@users.noreply.github.com>
2026-04-10 03:53:18 -07:00
|
|
|
"MiniMax-M2.5",
|
|
|
|
|
"MiniMax-M2.1",
|
|
|
|
|
"MiniMax-M2",
|
2026-03-07 19:56:48 -08:00
|
|
|
],
|
|
|
|
|
"minimax-cn": [
|
2026-04-08 01:39:28 -07:00
|
|
|
"MiniMax-M2.7",
|
fix: align MiniMax provider with official API docs
Aligns MiniMax provider with official API documentation. Fixes 6 bugs:
transport mismatch (openai_chat -> anthropic_messages), credential leak
in switch_model(), prompt caching sent to non-Anthropic endpoints,
dot-to-hyphen model name corruption, trajectory compressor URL routing,
and stale doctor health check.
Also corrects context window (204,800), thinking support (manual mode),
max output (131,072), and model catalog (M2 family only on /anthropic).
Source: https://platform.minimax.io/docs/api-reference/text-anthropic-api
Co-authored-by: kshitijk4poor <kshitijk4poor@users.noreply.github.com>
2026-04-10 03:53:18 -07:00
|
|
|
"MiniMax-M2.5",
|
|
|
|
|
"MiniMax-M2.1",
|
|
|
|
|
"MiniMax-M2",
|
2026-03-07 19:56:48 -08:00
|
|
|
],
|
feat: native Anthropic provider with Claude Code credential auto-discovery
Add Anthropic as a first-class inference provider, bypassing OpenRouter
for direct API access. Uses the native Anthropic SDK with a full format
adapter (same pattern as the codex_responses api_mode).
## Auth (three methods, priority order)
1. ANTHROPIC_API_KEY env var (regular API key, sk-ant-api-*)
2. ANTHROPIC_TOKEN / CLAUDE_CODE_OAUTH_TOKEN env var (setup-token, sk-ant-oat-*)
3. Auto-discovery from ~/.claude/.credentials.json (Claude Code subscription)
- Reads Claude Code's OAuth credentials
- Checks token expiry with 60s buffer
- Setup tokens use Bearer auth + anthropic-beta: oauth-2025-04-20 header
- Regular API keys use standard x-api-key header
## Changes by file
### New files
- agent/anthropic_adapter.py — Client builder, message/tool/response
format conversion, Claude Code credential reader, token resolver.
Handles system prompt extraction, tool_use/tool_result blocks,
thinking/reasoning, orphaned tool_use cleanup, cache_control.
- tests/test_anthropic_adapter.py — 36 tests covering all adapter logic
### Modified files
- pyproject.toml — Add anthropic>=0.39.0 dependency
- hermes_cli/auth.py — Add 'anthropic' to PROVIDER_REGISTRY with
three env vars, plus 'claude'/'claude-code' aliases
- hermes_cli/models.py — Add model catalog, labels, aliases, provider order
- hermes_cli/main.py — Add 'anthropic' to --provider CLI choices
- hermes_cli/runtime_provider.py — Add Anthropic branch returning
api_mode='anthropic_messages' (before generic api_key fallthrough)
- hermes_cli/setup.py — Add Anthropic setup wizard with Claude Code
credential auto-discovery, model selection, OpenRouter tools prompt
- agent/auxiliary_client.py — Add claude-haiku-4-5 as aux model
- agent/model_metadata.py — Add bare Claude model context lengths
- run_agent.py — Add anthropic_messages api_mode:
* Client init (Anthropic SDK instead of OpenAI)
* API call dispatch (_anthropic_client.messages.create)
* Response validation (content blocks)
* finish_reason mapping (stop_reason -> finish_reason)
* Token usage (input_tokens/output_tokens)
* Response normalization (normalize_anthropic_response)
* Client interrupt/rebuild
* Prompt caching auto-enabled for native Anthropic
- tests/test_run_agent.py — Update test_anthropic_base_url_accepted to
expect native routing, add test_prompt_caching_native_anthropic
2026-03-12 15:47:45 -07:00
|
|
|
"anthropic": [
|
2026-03-12 17:04:31 -07:00
|
|
|
"claude-opus-4-6",
|
|
|
|
|
"claude-sonnet-4-6",
|
|
|
|
|
"claude-opus-4-5-20251101",
|
|
|
|
|
"claude-sonnet-4-5-20250929",
|
feat: native Anthropic provider with Claude Code credential auto-discovery
Add Anthropic as a first-class inference provider, bypassing OpenRouter
for direct API access. Uses the native Anthropic SDK with a full format
adapter (same pattern as the codex_responses api_mode).
## Auth (three methods, priority order)
1. ANTHROPIC_API_KEY env var (regular API key, sk-ant-api-*)
2. ANTHROPIC_TOKEN / CLAUDE_CODE_OAUTH_TOKEN env var (setup-token, sk-ant-oat-*)
3. Auto-discovery from ~/.claude/.credentials.json (Claude Code subscription)
- Reads Claude Code's OAuth credentials
- Checks token expiry with 60s buffer
- Setup tokens use Bearer auth + anthropic-beta: oauth-2025-04-20 header
- Regular API keys use standard x-api-key header
## Changes by file
### New files
- agent/anthropic_adapter.py — Client builder, message/tool/response
format conversion, Claude Code credential reader, token resolver.
Handles system prompt extraction, tool_use/tool_result blocks,
thinking/reasoning, orphaned tool_use cleanup, cache_control.
- tests/test_anthropic_adapter.py — 36 tests covering all adapter logic
### Modified files
- pyproject.toml — Add anthropic>=0.39.0 dependency
- hermes_cli/auth.py — Add 'anthropic' to PROVIDER_REGISTRY with
three env vars, plus 'claude'/'claude-code' aliases
- hermes_cli/models.py — Add model catalog, labels, aliases, provider order
- hermes_cli/main.py — Add 'anthropic' to --provider CLI choices
- hermes_cli/runtime_provider.py — Add Anthropic branch returning
api_mode='anthropic_messages' (before generic api_key fallthrough)
- hermes_cli/setup.py — Add Anthropic setup wizard with Claude Code
credential auto-discovery, model selection, OpenRouter tools prompt
- agent/auxiliary_client.py — Add claude-haiku-4-5 as aux model
- agent/model_metadata.py — Add bare Claude model context lengths
- run_agent.py — Add anthropic_messages api_mode:
* Client init (Anthropic SDK instead of OpenAI)
* API call dispatch (_anthropic_client.messages.create)
* Response validation (content blocks)
* finish_reason mapping (stop_reason -> finish_reason)
* Token usage (input_tokens/output_tokens)
* Response normalization (normalize_anthropic_response)
* Client interrupt/rebuild
* Prompt caching auto-enabled for native Anthropic
- tests/test_run_agent.py — Update test_anthropic_base_url_accepted to
expect native routing, add test_prompt_caching_native_anthropic
2026-03-12 15:47:45 -07:00
|
|
|
"claude-opus-4-20250514",
|
2026-03-12 17:04:31 -07:00
|
|
|
"claude-sonnet-4-20250514",
|
feat: native Anthropic provider with Claude Code credential auto-discovery
Add Anthropic as a first-class inference provider, bypassing OpenRouter
for direct API access. Uses the native Anthropic SDK with a full format
adapter (same pattern as the codex_responses api_mode).
## Auth (three methods, priority order)
1. ANTHROPIC_API_KEY env var (regular API key, sk-ant-api-*)
2. ANTHROPIC_TOKEN / CLAUDE_CODE_OAUTH_TOKEN env var (setup-token, sk-ant-oat-*)
3. Auto-discovery from ~/.claude/.credentials.json (Claude Code subscription)
- Reads Claude Code's OAuth credentials
- Checks token expiry with 60s buffer
- Setup tokens use Bearer auth + anthropic-beta: oauth-2025-04-20 header
- Regular API keys use standard x-api-key header
## Changes by file
### New files
- agent/anthropic_adapter.py — Client builder, message/tool/response
format conversion, Claude Code credential reader, token resolver.
Handles system prompt extraction, tool_use/tool_result blocks,
thinking/reasoning, orphaned tool_use cleanup, cache_control.
- tests/test_anthropic_adapter.py — 36 tests covering all adapter logic
### Modified files
- pyproject.toml — Add anthropic>=0.39.0 dependency
- hermes_cli/auth.py — Add 'anthropic' to PROVIDER_REGISTRY with
three env vars, plus 'claude'/'claude-code' aliases
- hermes_cli/models.py — Add model catalog, labels, aliases, provider order
- hermes_cli/main.py — Add 'anthropic' to --provider CLI choices
- hermes_cli/runtime_provider.py — Add Anthropic branch returning
api_mode='anthropic_messages' (before generic api_key fallthrough)
- hermes_cli/setup.py — Add Anthropic setup wizard with Claude Code
credential auto-discovery, model selection, OpenRouter tools prompt
- agent/auxiliary_client.py — Add claude-haiku-4-5 as aux model
- agent/model_metadata.py — Add bare Claude model context lengths
- run_agent.py — Add anthropic_messages api_mode:
* Client init (Anthropic SDK instead of OpenAI)
* API call dispatch (_anthropic_client.messages.create)
* Response validation (content blocks)
* finish_reason mapping (stop_reason -> finish_reason)
* Token usage (input_tokens/output_tokens)
* Response normalization (normalize_anthropic_response)
* Client interrupt/rebuild
* Prompt caching auto-enabled for native Anthropic
- tests/test_run_agent.py — Update test_anthropic_base_url_accepted to
expect native routing, add test_prompt_caching_native_anthropic
2026-03-12 15:47:45 -07:00
|
|
|
"claude-haiku-4-5-20251001",
|
|
|
|
|
],
|
2026-03-16 04:34:45 -07:00
|
|
|
"deepseek": [
|
|
|
|
|
"deepseek-chat",
|
|
|
|
|
"deepseek-reasoner",
|
|
|
|
|
],
|
feat(xiaomi): add Xiaomi MiMo as first-class provider
Cherry-picked from PR #7702 by kshitijk4poor.
Adds Xiaomi MiMo as a direct provider (XIAOMI_API_KEY) with models:
- mimo-v2-pro (1M context), mimo-v2-omni (256K, multimodal), mimo-v2-flash (256K, cheapest)
Standard OpenAI-compatible provider checklist: auth.py, config.py, models.py,
main.py, providers.py, doctor.py, model_normalize.py, model_metadata.py,
models_dev.py, auxiliary_client.py, .env.example, cli-config.yaml.example.
Follow-up: vision tasks use mimo-v2-omni (multimodal) instead of the user's
main model. Non-vision aux uses the user's selected model. Added
_PROVIDER_VISION_MODELS dict for provider-specific vision model overrides.
On failure, falls back to aggregators (gemini flash) via existing fallback chain.
Corrects pre-existing context lengths: mimo-v2-pro 1048576→1000000,
mimo-v2-omni 1048576→256000, adds mimo-v2-flash 256000.
36 tests covering registry, aliases, auto-detect, credentials, models.dev,
normalization, URL mapping, providers module, doctor, aux client, vision
model override, and agent init.
2026-04-11 10:10:31 -07:00
|
|
|
"xiaomi": [
|
|
|
|
|
"mimo-v2-pro",
|
|
|
|
|
"mimo-v2-omni",
|
|
|
|
|
"mimo-v2-flash",
|
|
|
|
|
],
|
feat(providers): add Arcee AI as direct API provider
Adds Arcee AI as a standard direct provider (ARCEEAI_API_KEY) with
Trinity models: trinity-large-thinking, trinity-large-preview, trinity-mini.
Standard OpenAI-compatible provider checklist: auth.py, config.py,
models.py, main.py, providers.py, doctor.py, model_normalize.py,
model_metadata.py, setup.py, trajectory_compressor.py.
Based on PR #9274 by arthurbr11, simplified to a standard direct
provider without dual-endpoint OpenRouter routing.
2026-04-13 17:16:43 -07:00
|
|
|
"arcee": [
|
|
|
|
|
"trinity-large-thinking",
|
|
|
|
|
"trinity-large-preview",
|
|
|
|
|
"trinity-mini",
|
|
|
|
|
],
|
2026-03-17 02:02:43 -07:00
|
|
|
"opencode-zen": [
|
|
|
|
|
"gpt-5.4-pro",
|
|
|
|
|
"gpt-5.4",
|
|
|
|
|
"gpt-5.3-codex",
|
|
|
|
|
"gpt-5.3-codex-spark",
|
|
|
|
|
"gpt-5.2",
|
|
|
|
|
"gpt-5.2-codex",
|
|
|
|
|
"gpt-5.1",
|
|
|
|
|
"gpt-5.1-codex",
|
|
|
|
|
"gpt-5.1-codex-max",
|
|
|
|
|
"gpt-5.1-codex-mini",
|
|
|
|
|
"gpt-5",
|
|
|
|
|
"gpt-5-codex",
|
|
|
|
|
"gpt-5-nano",
|
|
|
|
|
"claude-opus-4-6",
|
|
|
|
|
"claude-opus-4-5",
|
|
|
|
|
"claude-opus-4-1",
|
|
|
|
|
"claude-sonnet-4-6",
|
|
|
|
|
"claude-sonnet-4-5",
|
|
|
|
|
"claude-sonnet-4",
|
|
|
|
|
"claude-haiku-4-5",
|
|
|
|
|
"claude-3-5-haiku",
|
|
|
|
|
"gemini-3.1-pro",
|
|
|
|
|
"gemini-3-pro",
|
|
|
|
|
"gemini-3-flash",
|
2026-03-22 05:00:25 -07:00
|
|
|
"minimax-m2.7",
|
2026-03-17 02:02:43 -07:00
|
|
|
"minimax-m2.5",
|
|
|
|
|
"minimax-m2.5-free",
|
|
|
|
|
"minimax-m2.1",
|
|
|
|
|
"glm-5",
|
|
|
|
|
"glm-4.7",
|
|
|
|
|
"glm-4.6",
|
|
|
|
|
"kimi-k2.5",
|
|
|
|
|
"kimi-k2-thinking",
|
|
|
|
|
"kimi-k2",
|
|
|
|
|
"qwen3-coder",
|
|
|
|
|
"big-pickle",
|
|
|
|
|
],
|
|
|
|
|
"opencode-go": [
|
|
|
|
|
"glm-5",
|
|
|
|
|
"kimi-k2.5",
|
2026-04-03 18:47:51 -07:00
|
|
|
"mimo-v2-pro",
|
|
|
|
|
"mimo-v2-omni",
|
2026-03-31 01:54:13 -07:00
|
|
|
"minimax-m2.7",
|
2026-04-03 18:47:51 -07:00
|
|
|
"minimax-m2.5",
|
2026-03-17 02:02:43 -07:00
|
|
|
],
|
2026-03-17 00:12:16 -07:00
|
|
|
"ai-gateway": [
|
|
|
|
|
"anthropic/claude-opus-4.6",
|
|
|
|
|
"anthropic/claude-sonnet-4.6",
|
|
|
|
|
"anthropic/claude-sonnet-4.5",
|
|
|
|
|
"anthropic/claude-haiku-4.5",
|
|
|
|
|
"openai/gpt-5",
|
|
|
|
|
"openai/gpt-4.1",
|
|
|
|
|
"openai/gpt-4.1-mini",
|
|
|
|
|
"google/gemini-3-pro-preview",
|
|
|
|
|
"google/gemini-3-flash",
|
|
|
|
|
"google/gemini-2.5-pro",
|
|
|
|
|
"google/gemini-2.5-flash",
|
|
|
|
|
"deepseek/deepseek-v3.2",
|
|
|
|
|
],
|
feat: add Kilo Code (kilocode) as first-class inference provider (#1666)
Add Kilo Gateway (kilo.ai) as an API-key provider with OpenAI-compatible
endpoint at https://api.kilo.ai/api/gateway. Supports 500+ models from
Anthropic, OpenAI, Google, xAI, Mistral, MiniMax via a single API key.
- Register kilocode in PROVIDER_REGISTRY with aliases (kilo, kilo-code,
kilo-gateway) and KILOCODE_API_KEY / KILOCODE_BASE_URL env vars
- Add to model catalog, CLI provider menu, setup wizard, doctor checks
- Add google/gemini-3-flash-preview as default aux model
- 12 new tests covering registration, aliases, credential resolution,
runtime config
- Documentation updates (env vars, config, fallback providers)
- Fix setup test index shift from provider insertion
Inspired by PR #1473 by @amanning3390.
Co-authored-by: amanning3390 <amanning3390@users.noreply.github.com>
2026-03-17 02:40:34 -07:00
|
|
|
"kilocode": [
|
|
|
|
|
"anthropic/claude-opus-4.6",
|
|
|
|
|
"anthropic/claude-sonnet-4.6",
|
|
|
|
|
"openai/gpt-5.4",
|
|
|
|
|
"google/gemini-3-pro-preview",
|
|
|
|
|
"google/gemini-3-flash-preview",
|
|
|
|
|
],
|
2026-03-27 22:10:10 -07:00
|
|
|
# Alibaba DashScope Coding platform (coding-intl) — default endpoint.
|
|
|
|
|
# Supports Qwen models + third-party providers (GLM, Kimi, MiniMax).
|
|
|
|
|
# Users with classic DashScope keys should override DASHSCOPE_BASE_URL
|
|
|
|
|
# to https://dashscope-intl.aliyuncs.com/compatible-mode/v1 (OpenAI-compat)
|
|
|
|
|
# or https://dashscope-intl.aliyuncs.com/apps/anthropic (Anthropic-compat).
|
2026-03-17 02:49:22 -07:00
|
|
|
"alibaba": [
|
|
|
|
|
"qwen3.5-plus",
|
|
|
|
|
"qwen3-coder-plus",
|
|
|
|
|
"qwen3-coder-next",
|
2026-03-27 22:10:10 -07:00
|
|
|
# Third-party models available on coding-intl
|
|
|
|
|
"glm-5",
|
|
|
|
|
"glm-4.7",
|
|
|
|
|
"kimi-k2.5",
|
|
|
|
|
"MiniMax-M2.5",
|
2026-03-17 02:49:22 -07:00
|
|
|
],
|
2026-03-27 13:54:46 -07:00
|
|
|
# Curated HF model list — only agentic models that map to OpenRouter defaults.
|
feat: add Hugging Face as a first-class inference provider (#3419)
Salvage of PR #1747 (original PR #1171 by @davanstrien) onto current main.
Registers Hugging Face Inference Providers (router.huggingface.co/v1) as a named provider:
- hermes chat --provider huggingface (or --provider hf)
- 18 curated open models via hermes model picker
- HF_TOKEN in ~/.hermes/.env
- OpenAI-compatible endpoint with automatic failover (Groq, Together, SambaNova, etc.)
Files: auth.py, models.py, main.py, setup.py, config.py, model_metadata.py, .env.example, 5 docs pages, 17 new tests.
Co-authored-by: Daniel van Strien <davanstrien@gmail.com>
2026-03-27 12:41:59 -07:00
|
|
|
"huggingface": [
|
|
|
|
|
"Qwen/Qwen3.5-397B-A17B",
|
2026-03-27 13:54:46 -07:00
|
|
|
"Qwen/Qwen3.5-35B-A3B",
|
feat: add Hugging Face as a first-class inference provider (#3419)
Salvage of PR #1747 (original PR #1171 by @davanstrien) onto current main.
Registers Hugging Face Inference Providers (router.huggingface.co/v1) as a named provider:
- hermes chat --provider huggingface (or --provider hf)
- 18 curated open models via hermes model picker
- HF_TOKEN in ~/.hermes/.env
- OpenAI-compatible endpoint with automatic failover (Groq, Together, SambaNova, etc.)
Files: auth.py, models.py, main.py, setup.py, config.py, model_metadata.py, .env.example, 5 docs pages, 17 new tests.
Co-authored-by: Daniel van Strien <davanstrien@gmail.com>
2026-03-27 12:41:59 -07:00
|
|
|
"deepseek-ai/DeepSeek-V3.2",
|
|
|
|
|
"moonshotai/Kimi-K2.5",
|
|
|
|
|
"MiniMaxAI/MiniMax-M2.5",
|
|
|
|
|
"zai-org/GLM-5",
|
2026-03-27 13:54:46 -07:00
|
|
|
"XiaomiMiMo/MiMo-V2-Flash",
|
|
|
|
|
"moonshotai/Kimi-K2-Thinking",
|
feat: add Hugging Face as a first-class inference provider (#3419)
Salvage of PR #1747 (original PR #1171 by @davanstrien) onto current main.
Registers Hugging Face Inference Providers (router.huggingface.co/v1) as a named provider:
- hermes chat --provider huggingface (or --provider hf)
- 18 curated open models via hermes model picker
- HF_TOKEN in ~/.hermes/.env
- OpenAI-compatible endpoint with automatic failover (Groq, Together, SambaNova, etc.)
Files: auth.py, models.py, main.py, setup.py, config.py, model_metadata.py, .env.example, 5 docs pages, 17 new tests.
Co-authored-by: Daniel van Strien <davanstrien@gmail.com>
2026-03-27 12:41:59 -07:00
|
|
|
],
|
feat: native AWS Bedrock provider via Converse API
Salvaged from PR #7920 by JiaDe-Wu — cherry-picked Bedrock-specific
additions onto current main, skipping stale-branch reverts (293 commits
behind).
Dual-path architecture:
- Claude models → AnthropicBedrock SDK (prompt caching, thinking budgets)
- Non-Claude models → Converse API via boto3 (Nova, DeepSeek, Llama, Mistral)
Includes:
- Core adapter (agent/bedrock_adapter.py, 1098 lines)
- Full provider registration (auth, models, providers, config, runtime, main)
- IAM credential chain + Bedrock API Key auth modes
- Dynamic model discovery via ListFoundationModels + ListInferenceProfiles
- Streaming with delta callbacks, error classification, guardrails
- hermes doctor + hermes auth integration
- /usage pricing for 7 Bedrock models
- 130 automated tests (79 unit + 28 integration + follow-up fixes)
- Documentation (website/docs/guides/aws-bedrock.md)
- boto3 optional dependency (pip install hermes-agent[bedrock])
Co-authored-by: JiaDe WU <40445668+JiaDe-Wu@users.noreply.github.com>
2026-04-15 15:18:01 -07:00
|
|
|
# AWS Bedrock — static fallback list used when dynamic discovery is
|
|
|
|
|
# unavailable (no boto3, no credentials, or API error). The agent
|
|
|
|
|
# prefers live discovery via ListFoundationModels + ListInferenceProfiles.
|
|
|
|
|
# Use inference profile IDs (us.*) since most models require them.
|
|
|
|
|
"bedrock": [
|
|
|
|
|
"us.anthropic.claude-sonnet-4-6",
|
|
|
|
|
"us.anthropic.claude-opus-4-6-v1",
|
|
|
|
|
"us.anthropic.claude-haiku-4-5-20251001-v1:0",
|
|
|
|
|
"us.anthropic.claude-sonnet-4-5-20250929-v1:0",
|
|
|
|
|
"us.amazon.nova-pro-v1:0",
|
|
|
|
|
"us.amazon.nova-lite-v1:0",
|
|
|
|
|
"us.amazon.nova-micro-v1:0",
|
|
|
|
|
"deepseek.v3.2",
|
|
|
|
|
"us.meta.llama4-maverick-17b-instruct-v1:0",
|
|
|
|
|
"us.meta.llama4-scout-17b-instruct-v1:0",
|
|
|
|
|
],
|
2026-03-07 19:56:48 -08:00
|
|
|
}
|
|
|
|
|
|
2026-04-07 02:17:14 -04:00
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
# Nous Portal free-model filtering
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
# Models that are ALLOWED to appear when priced as free on Nous Portal.
|
|
|
|
|
# Any other free model is hidden — prevents promotional/temporary free models
|
|
|
|
|
# from cluttering the selection when users are paying subscribers.
|
|
|
|
|
# Models in this list are ALSO filtered out if they are NOT free (i.e. they
|
|
|
|
|
# should only appear in the menu when they are genuinely free).
|
|
|
|
|
_NOUS_ALLOWED_FREE_MODELS: frozenset[str] = frozenset({
|
|
|
|
|
"xiaomi/mimo-v2-pro",
|
|
|
|
|
"xiaomi/mimo-v2-omni",
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _is_model_free(model_id: str, pricing: dict[str, dict[str, str]]) -> bool:
|
|
|
|
|
"""Return True if *model_id* has zero-cost prompt AND completion pricing."""
|
|
|
|
|
p = pricing.get(model_id)
|
|
|
|
|
if not p:
|
|
|
|
|
return False
|
|
|
|
|
try:
|
|
|
|
|
return float(p.get("prompt", "1")) == 0 and float(p.get("completion", "1")) == 0
|
|
|
|
|
except (TypeError, ValueError):
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def filter_nous_free_models(
|
|
|
|
|
model_ids: list[str],
|
|
|
|
|
pricing: dict[str, dict[str, str]],
|
|
|
|
|
) -> list[str]:
|
|
|
|
|
"""Filter the Nous Portal model list according to free-model policy.
|
|
|
|
|
|
|
|
|
|
Rules:
|
|
|
|
|
• Paid models that are NOT in the allowlist → keep (normal case).
|
|
|
|
|
• Free models that are NOT in the allowlist → drop.
|
|
|
|
|
• Allowlist models that ARE free → keep.
|
|
|
|
|
• Allowlist models that are NOT free → drop.
|
|
|
|
|
"""
|
|
|
|
|
if not pricing:
|
|
|
|
|
return model_ids # no pricing data — can't filter, show everything
|
|
|
|
|
|
|
|
|
|
result: list[str] = []
|
|
|
|
|
for mid in model_ids:
|
|
|
|
|
free = _is_model_free(mid, pricing)
|
|
|
|
|
if mid in _NOUS_ALLOWED_FREE_MODELS:
|
|
|
|
|
# Allowlist model: only show when it's actually free
|
|
|
|
|
if free:
|
|
|
|
|
result.append(mid)
|
|
|
|
|
else:
|
|
|
|
|
# Regular model: keep only when it's NOT free
|
|
|
|
|
if not free:
|
|
|
|
|
result.append(mid)
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
# Nous Portal account tier detection
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
def fetch_nous_account_tier(access_token: str, portal_base_url: str = "") -> dict[str, Any]:
|
|
|
|
|
"""Fetch the user's Nous Portal account/subscription info.
|
|
|
|
|
|
|
|
|
|
Calls ``<portal>/api/oauth/account`` with the OAuth access token.
|
|
|
|
|
|
|
|
|
|
Returns the parsed JSON dict on success, e.g.::
|
|
|
|
|
|
|
|
|
|
{
|
|
|
|
|
"subscription": {
|
|
|
|
|
"plan": "Plus",
|
|
|
|
|
"tier": 2,
|
|
|
|
|
"monthly_charge": 20,
|
|
|
|
|
"credits_remaining": 1686.60,
|
|
|
|
|
...
|
|
|
|
|
},
|
|
|
|
|
...
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Returns an empty dict on any failure (network, auth, parse).
|
|
|
|
|
"""
|
|
|
|
|
base = (portal_base_url or "https://portal.nousresearch.com").rstrip("/")
|
|
|
|
|
url = f"{base}/api/oauth/account"
|
|
|
|
|
headers = {
|
|
|
|
|
"Authorization": f"Bearer {access_token}",
|
|
|
|
|
"Accept": "application/json",
|
|
|
|
|
}
|
|
|
|
|
try:
|
|
|
|
|
req = urllib.request.Request(url, headers=headers)
|
|
|
|
|
with urllib.request.urlopen(req, timeout=8) as resp:
|
|
|
|
|
return json.loads(resp.read().decode())
|
|
|
|
|
except Exception:
|
|
|
|
|
return {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def is_nous_free_tier(account_info: dict[str, Any]) -> bool:
|
|
|
|
|
"""Return True if the account info indicates a free (unpaid) tier.
|
|
|
|
|
|
|
|
|
|
Checks ``subscription.monthly_charge == 0``. Returns False when
|
|
|
|
|
the field is missing or unparseable (assumes paid — don't block users).
|
|
|
|
|
"""
|
|
|
|
|
sub = account_info.get("subscription")
|
|
|
|
|
if not isinstance(sub, dict):
|
|
|
|
|
return False
|
|
|
|
|
charge = sub.get("monthly_charge")
|
|
|
|
|
if charge is None:
|
|
|
|
|
return False
|
|
|
|
|
try:
|
|
|
|
|
return float(charge) == 0
|
|
|
|
|
except (TypeError, ValueError):
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def partition_nous_models_by_tier(
|
|
|
|
|
model_ids: list[str],
|
|
|
|
|
pricing: dict[str, dict[str, str]],
|
|
|
|
|
free_tier: bool,
|
|
|
|
|
) -> tuple[list[str], list[str]]:
|
|
|
|
|
"""Split Nous models into (selectable, unavailable) based on user tier.
|
|
|
|
|
|
|
|
|
|
For paid-tier users: all models are selectable, none unavailable
|
|
|
|
|
(free-model filtering is handled separately by ``filter_nous_free_models``).
|
|
|
|
|
|
|
|
|
|
For free-tier users: only free models are selectable; paid models
|
|
|
|
|
are returned as unavailable (shown grayed out in the menu).
|
|
|
|
|
"""
|
|
|
|
|
if not free_tier:
|
|
|
|
|
return (model_ids, [])
|
|
|
|
|
|
|
|
|
|
if not pricing:
|
|
|
|
|
return (model_ids, []) # can't determine, show everything
|
|
|
|
|
|
|
|
|
|
selectable: list[str] = []
|
|
|
|
|
unavailable: list[str] = []
|
|
|
|
|
for mid in model_ids:
|
|
|
|
|
if _is_model_free(mid, pricing):
|
|
|
|
|
selectable.append(mid)
|
|
|
|
|
else:
|
|
|
|
|
unavailable.append(mid)
|
|
|
|
|
return (selectable, unavailable)
|
|
|
|
|
|
|
|
|
|
|
2026-04-07 09:30:26 -07:00
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
# TTL cache for free-tier detection — avoids repeated API calls within a
|
|
|
|
|
# session while still picking up upgrades quickly.
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
_FREE_TIER_CACHE_TTL: int = 180 # seconds (3 minutes)
|
|
|
|
|
_free_tier_cache: tuple[bool, float] | None = None # (result, timestamp)
|
|
|
|
|
|
|
|
|
|
|
2026-04-07 02:17:14 -04:00
|
|
|
def check_nous_free_tier() -> bool:
|
|
|
|
|
"""Check if the current Nous Portal user is on a free (unpaid) tier.
|
|
|
|
|
|
2026-04-07 09:30:26 -07:00
|
|
|
Results are cached for ``_FREE_TIER_CACHE_TTL`` seconds to avoid
|
|
|
|
|
hitting the Portal API on every call. The cache is short-lived so
|
|
|
|
|
that an account upgrade is reflected within a few minutes.
|
|
|
|
|
|
|
|
|
|
Returns False (assume paid) on any error — never blocks paying users.
|
2026-04-07 02:17:14 -04:00
|
|
|
"""
|
2026-04-07 09:30:26 -07:00
|
|
|
global _free_tier_cache
|
|
|
|
|
import time
|
|
|
|
|
|
|
|
|
|
now = time.monotonic()
|
|
|
|
|
if _free_tier_cache is not None:
|
|
|
|
|
cached_result, cached_at = _free_tier_cache
|
|
|
|
|
if now - cached_at < _FREE_TIER_CACHE_TTL:
|
|
|
|
|
return cached_result
|
|
|
|
|
|
2026-04-07 02:17:14 -04:00
|
|
|
try:
|
|
|
|
|
from hermes_cli.auth import get_provider_auth_state, resolve_nous_runtime_credentials
|
|
|
|
|
|
|
|
|
|
# Ensure we have a fresh token (triggers refresh if needed)
|
|
|
|
|
resolve_nous_runtime_credentials(min_key_ttl_seconds=60)
|
|
|
|
|
|
|
|
|
|
state = get_provider_auth_state("nous")
|
|
|
|
|
if not state:
|
2026-04-07 09:30:26 -07:00
|
|
|
_free_tier_cache = (False, now)
|
2026-04-07 02:17:14 -04:00
|
|
|
return False
|
|
|
|
|
access_token = state.get("access_token", "")
|
|
|
|
|
portal_url = state.get("portal_base_url", "")
|
|
|
|
|
if not access_token:
|
2026-04-07 09:30:26 -07:00
|
|
|
_free_tier_cache = (False, now)
|
2026-04-07 02:17:14 -04:00
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
account_info = fetch_nous_account_tier(access_token, portal_url)
|
2026-04-07 09:30:26 -07:00
|
|
|
result = is_nous_free_tier(account_info)
|
|
|
|
|
_free_tier_cache = (result, now)
|
|
|
|
|
return result
|
2026-04-07 02:17:14 -04:00
|
|
|
except Exception:
|
2026-04-07 09:30:26 -07:00
|
|
|
_free_tier_cache = (False, now)
|
2026-04-07 02:17:14 -04:00
|
|
|
return False # default to paid on error — don't block users
|
|
|
|
|
|
|
|
|
|
|
2026-04-13 14:59:50 -07:00
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
# Canonical provider list — single source of truth for provider identity.
|
|
|
|
|
# Every code path that lists, displays, or iterates providers derives from
|
|
|
|
|
# this list: hermes model, /model, /provider, list_authenticated_providers.
|
|
|
|
|
#
|
|
|
|
|
# Fields:
|
|
|
|
|
# slug — internal provider ID (used in config.yaml, --provider flag)
|
|
|
|
|
# label — short display name
|
|
|
|
|
# tui_desc — longer description for the `hermes model` interactive picker
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
class ProviderEntry(NamedTuple):
|
|
|
|
|
slug: str
|
|
|
|
|
label: str
|
|
|
|
|
tui_desc: str # detailed description for `hermes model` TUI
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CANONICAL_PROVIDERS: list[ProviderEntry] = [
|
2026-04-13 18:51:13 -07:00
|
|
|
ProviderEntry("nous", "Nous Portal", "Nous Portal (Nous Research subscription)"),
|
|
|
|
|
ProviderEntry("openrouter", "OpenRouter", "OpenRouter (100+ models, pay-per-use)"),
|
|
|
|
|
ProviderEntry("anthropic", "Anthropic", "Anthropic (Claude models — API key or Claude Code)"),
|
|
|
|
|
ProviderEntry("openai-codex", "OpenAI Codex", "OpenAI Codex"),
|
2026-04-13 19:51:54 -07:00
|
|
|
ProviderEntry("xiaomi", "Xiaomi MiMo", "Xiaomi MiMo (MiMo-V2 models — pro, omni, flash)"),
|
2026-04-13 18:51:13 -07:00
|
|
|
ProviderEntry("qwen-oauth", "Qwen OAuth (Portal)", "Qwen OAuth (reuses local Qwen CLI login)"),
|
|
|
|
|
ProviderEntry("copilot", "GitHub Copilot", "GitHub Copilot (uses GITHUB_TOKEN or gh auth token)"),
|
|
|
|
|
ProviderEntry("copilot-acp", "GitHub Copilot ACP", "GitHub Copilot ACP (spawns `copilot --acp --stdio`)"),
|
|
|
|
|
ProviderEntry("huggingface", "Hugging Face", "Hugging Face Inference Providers (20+ open models)"),
|
|
|
|
|
ProviderEntry("gemini", "Google AI Studio", "Google AI Studio (Gemini models — OpenAI-compatible endpoint)"),
|
|
|
|
|
ProviderEntry("deepseek", "DeepSeek", "DeepSeek (DeepSeek-V3, R1, coder — direct API)"),
|
|
|
|
|
ProviderEntry("xai", "xAI", "xAI (Grok models — direct API)"),
|
|
|
|
|
ProviderEntry("zai", "Z.AI / GLM", "Z.AI / GLM (Zhipu AI direct API)"),
|
2026-04-15 14:54:30 -07:00
|
|
|
ProviderEntry("kimi-coding", "Kimi / Kimi Coding Plan", "Kimi Coding Plan (api.kimi.com) & Moonshot API"),
|
2026-04-13 18:51:13 -07:00
|
|
|
ProviderEntry("kimi-coding-cn", "Kimi / Moonshot (China)", "Kimi / Moonshot China (Moonshot CN direct API)"),
|
|
|
|
|
ProviderEntry("minimax", "MiniMax", "MiniMax (global direct API)"),
|
|
|
|
|
ProviderEntry("minimax-cn", "MiniMax (China)", "MiniMax China (domestic direct API)"),
|
|
|
|
|
ProviderEntry("alibaba", "Alibaba Cloud (DashScope)","Alibaba Cloud / DashScope Coding (Qwen + multi-provider)"),
|
2026-04-15 22:32:05 -07:00
|
|
|
ProviderEntry("ollama-cloud", "Ollama Cloud", "Ollama Cloud (cloud-hosted open models — ollama.com)"),
|
2026-04-13 18:51:13 -07:00
|
|
|
ProviderEntry("arcee", "Arcee AI", "Arcee AI (Trinity models — direct API)"),
|
|
|
|
|
ProviderEntry("kilocode", "Kilo Code", "Kilo Code (Kilo Gateway API)"),
|
|
|
|
|
ProviderEntry("opencode-zen", "OpenCode Zen", "OpenCode Zen (35+ curated models, pay-as-you-go)"),
|
|
|
|
|
ProviderEntry("opencode-go", "OpenCode Go", "OpenCode Go (open models, $10/month subscription)"),
|
2026-04-13 19:51:54 -07:00
|
|
|
ProviderEntry("ai-gateway", "Vercel AI Gateway", "Vercel AI Gateway (200+ models, pay-per-use)"),
|
feat: native AWS Bedrock provider via Converse API
Salvaged from PR #7920 by JiaDe-Wu — cherry-picked Bedrock-specific
additions onto current main, skipping stale-branch reverts (293 commits
behind).
Dual-path architecture:
- Claude models → AnthropicBedrock SDK (prompt caching, thinking budgets)
- Non-Claude models → Converse API via boto3 (Nova, DeepSeek, Llama, Mistral)
Includes:
- Core adapter (agent/bedrock_adapter.py, 1098 lines)
- Full provider registration (auth, models, providers, config, runtime, main)
- IAM credential chain + Bedrock API Key auth modes
- Dynamic model discovery via ListFoundationModels + ListInferenceProfiles
- Streaming with delta callbacks, error classification, guardrails
- hermes doctor + hermes auth integration
- /usage pricing for 7 Bedrock models
- 130 automated tests (79 unit + 28 integration + follow-up fixes)
- Documentation (website/docs/guides/aws-bedrock.md)
- boto3 optional dependency (pip install hermes-agent[bedrock])
Co-authored-by: JiaDe WU <40445668+JiaDe-Wu@users.noreply.github.com>
2026-04-15 15:18:01 -07:00
|
|
|
ProviderEntry("bedrock", "AWS Bedrock", "AWS Bedrock (Claude, Nova, Llama, DeepSeek — IAM or API key)"),
|
2026-04-13 14:59:50 -07:00
|
|
|
]
|
|
|
|
|
|
|
|
|
|
# Derived dicts — used throughout the codebase
|
|
|
|
|
_PROVIDER_LABELS = {p.slug: p.label for p in CANONICAL_PROVIDERS}
|
|
|
|
|
_PROVIDER_LABELS["custom"] = "Custom endpoint" # special case: not a named provider
|
2026-03-07 19:56:48 -08:00
|
|
|
|
2026-04-15 22:32:05 -07:00
|
|
|
|
2026-03-07 19:56:48 -08:00
|
|
|
_PROVIDER_ALIASES = {
|
|
|
|
|
"glm": "zai",
|
|
|
|
|
"z-ai": "zai",
|
|
|
|
|
"z.ai": "zai",
|
|
|
|
|
"zhipu": "zai",
|
2026-03-17 23:40:22 -07:00
|
|
|
"github": "copilot",
|
|
|
|
|
"github-copilot": "copilot",
|
|
|
|
|
"github-models": "copilot",
|
|
|
|
|
"github-model": "copilot",
|
|
|
|
|
"github-copilot-acp": "copilot-acp",
|
|
|
|
|
"copilot-acp-agent": "copilot-acp",
|
2026-04-06 10:14:01 -07:00
|
|
|
"google": "gemini",
|
|
|
|
|
"google-gemini": "gemini",
|
|
|
|
|
"google-ai-studio": "gemini",
|
2026-03-07 19:56:48 -08:00
|
|
|
"kimi": "kimi-coding",
|
|
|
|
|
"moonshot": "kimi-coding",
|
2026-04-13 11:13:09 -07:00
|
|
|
"kimi-cn": "kimi-coding-cn",
|
|
|
|
|
"moonshot-cn": "kimi-coding-cn",
|
feat(providers): add Arcee AI as direct API provider
Adds Arcee AI as a standard direct provider (ARCEEAI_API_KEY) with
Trinity models: trinity-large-thinking, trinity-large-preview, trinity-mini.
Standard OpenAI-compatible provider checklist: auth.py, config.py,
models.py, main.py, providers.py, doctor.py, model_normalize.py,
model_metadata.py, setup.py, trajectory_compressor.py.
Based on PR #9274 by arthurbr11, simplified to a standard direct
provider without dual-endpoint OpenRouter routing.
2026-04-13 17:16:43 -07:00
|
|
|
"arcee-ai": "arcee",
|
|
|
|
|
"arceeai": "arcee",
|
2026-03-07 19:56:48 -08:00
|
|
|
"minimax-china": "minimax-cn",
|
|
|
|
|
"minimax_cn": "minimax-cn",
|
feat: native Anthropic provider with Claude Code credential auto-discovery
Add Anthropic as a first-class inference provider, bypassing OpenRouter
for direct API access. Uses the native Anthropic SDK with a full format
adapter (same pattern as the codex_responses api_mode).
## Auth (three methods, priority order)
1. ANTHROPIC_API_KEY env var (regular API key, sk-ant-api-*)
2. ANTHROPIC_TOKEN / CLAUDE_CODE_OAUTH_TOKEN env var (setup-token, sk-ant-oat-*)
3. Auto-discovery from ~/.claude/.credentials.json (Claude Code subscription)
- Reads Claude Code's OAuth credentials
- Checks token expiry with 60s buffer
- Setup tokens use Bearer auth + anthropic-beta: oauth-2025-04-20 header
- Regular API keys use standard x-api-key header
## Changes by file
### New files
- agent/anthropic_adapter.py — Client builder, message/tool/response
format conversion, Claude Code credential reader, token resolver.
Handles system prompt extraction, tool_use/tool_result blocks,
thinking/reasoning, orphaned tool_use cleanup, cache_control.
- tests/test_anthropic_adapter.py — 36 tests covering all adapter logic
### Modified files
- pyproject.toml — Add anthropic>=0.39.0 dependency
- hermes_cli/auth.py — Add 'anthropic' to PROVIDER_REGISTRY with
three env vars, plus 'claude'/'claude-code' aliases
- hermes_cli/models.py — Add model catalog, labels, aliases, provider order
- hermes_cli/main.py — Add 'anthropic' to --provider CLI choices
- hermes_cli/runtime_provider.py — Add Anthropic branch returning
api_mode='anthropic_messages' (before generic api_key fallthrough)
- hermes_cli/setup.py — Add Anthropic setup wizard with Claude Code
credential auto-discovery, model selection, OpenRouter tools prompt
- agent/auxiliary_client.py — Add claude-haiku-4-5 as aux model
- agent/model_metadata.py — Add bare Claude model context lengths
- run_agent.py — Add anthropic_messages api_mode:
* Client init (Anthropic SDK instead of OpenAI)
* API call dispatch (_anthropic_client.messages.create)
* Response validation (content blocks)
* finish_reason mapping (stop_reason -> finish_reason)
* Token usage (input_tokens/output_tokens)
* Response normalization (normalize_anthropic_response)
* Client interrupt/rebuild
* Prompt caching auto-enabled for native Anthropic
- tests/test_run_agent.py — Update test_anthropic_base_url_accepted to
expect native routing, add test_prompt_caching_native_anthropic
2026-03-12 15:47:45 -07:00
|
|
|
"claude": "anthropic",
|
|
|
|
|
"claude-code": "anthropic",
|
2026-03-16 04:34:45 -07:00
|
|
|
"deep-seek": "deepseek",
|
2026-03-17 02:02:43 -07:00
|
|
|
"opencode": "opencode-zen",
|
|
|
|
|
"zen": "opencode-zen",
|
|
|
|
|
"go": "opencode-go",
|
|
|
|
|
"opencode-go-sub": "opencode-go",
|
2026-03-17 00:12:16 -07:00
|
|
|
"aigateway": "ai-gateway",
|
|
|
|
|
"vercel": "ai-gateway",
|
|
|
|
|
"vercel-ai-gateway": "ai-gateway",
|
feat: add Kilo Code (kilocode) as first-class inference provider (#1666)
Add Kilo Gateway (kilo.ai) as an API-key provider with OpenAI-compatible
endpoint at https://api.kilo.ai/api/gateway. Supports 500+ models from
Anthropic, OpenAI, Google, xAI, Mistral, MiniMax via a single API key.
- Register kilocode in PROVIDER_REGISTRY with aliases (kilo, kilo-code,
kilo-gateway) and KILOCODE_API_KEY / KILOCODE_BASE_URL env vars
- Add to model catalog, CLI provider menu, setup wizard, doctor checks
- Add google/gemini-3-flash-preview as default aux model
- 12 new tests covering registration, aliases, credential resolution,
runtime config
- Documentation updates (env vars, config, fallback providers)
- Fix setup test index shift from provider insertion
Inspired by PR #1473 by @amanning3390.
Co-authored-by: amanning3390 <amanning3390@users.noreply.github.com>
2026-03-17 02:40:34 -07:00
|
|
|
"kilo": "kilocode",
|
|
|
|
|
"kilo-code": "kilocode",
|
|
|
|
|
"kilo-gateway": "kilocode",
|
2026-03-17 02:49:22 -07:00
|
|
|
"dashscope": "alibaba",
|
|
|
|
|
"aliyun": "alibaba",
|
|
|
|
|
"qwen": "alibaba",
|
|
|
|
|
"alibaba-cloud": "alibaba",
|
feat(qwen): add Qwen OAuth provider with portal request support
Based on #6079 by @tunamitom with critical fixes and comprehensive tests.
Changes from #6079:
- Fix: sanitization overwrite bug — Qwen message prep now runs AFTER codex
field sanitization, not before (was silently discarding Qwen transforms)
- Fix: missing try/except AuthError in runtime_provider.py — stale Qwen
credentials now fall through to next provider on auto-detect
- Fix: 'qwen' alias conflict — bare 'qwen' stays mapped to 'alibaba'
(DashScope); use 'qwen-portal' or 'qwen-cli' for the OAuth provider
- Fix: hardcoded ['coder-model'] replaced with live API fetch + curated
fallback list (qwen3-coder-plus, qwen3-coder)
- Fix: extract _is_qwen_portal() helper + _qwen_portal_headers() to replace
5 inline 'portal.qwen.ai' string checks and share headers between init
and credential swap
- Fix: add Qwen branch to _apply_client_headers_for_base_url for mid-session
credential swaps
- Fix: remove suspicious TypeError catch blocks around _prompt_provider_choice
- Fix: handle bare string items in content lists (were silently dropped)
- Fix: remove redundant dict() copies after deepcopy in message prep
- Revert: unrelated ai-gateway test mock removal and model_switch.py comment deletion
New tests (30 test functions):
- _qwen_cli_auth_path, _read_qwen_cli_tokens (success + 3 error paths)
- _save_qwen_cli_tokens (roundtrip, parent creation, permissions)
- _qwen_access_token_is_expiring (5 edge cases: fresh, expired, within skew,
None, non-numeric)
- _refresh_qwen_cli_tokens (success, preserve old refresh, 4 error paths,
default expires_in, disk persistence)
- resolve_qwen_runtime_credentials (fresh, auto-refresh, force-refresh,
missing token, env override)
- get_qwen_auth_status (logged in, not logged in)
- Runtime provider resolution (direct, pool entry, alias)
- _build_api_kwargs (metadata, vl_high_resolution_images, message formatting,
max_tokens suppression)
2026-04-08 20:48:21 +05:30
|
|
|
"qwen-portal": "qwen-oauth",
|
feat: add Hugging Face as a first-class inference provider (#3419)
Salvage of PR #1747 (original PR #1171 by @davanstrien) onto current main.
Registers Hugging Face Inference Providers (router.huggingface.co/v1) as a named provider:
- hermes chat --provider huggingface (or --provider hf)
- 18 curated open models via hermes model picker
- HF_TOKEN in ~/.hermes/.env
- OpenAI-compatible endpoint with automatic failover (Groq, Together, SambaNova, etc.)
Files: auth.py, models.py, main.py, setup.py, config.py, model_metadata.py, .env.example, 5 docs pages, 17 new tests.
Co-authored-by: Daniel van Strien <davanstrien@gmail.com>
2026-03-27 12:41:59 -07:00
|
|
|
"hf": "huggingface",
|
|
|
|
|
"hugging-face": "huggingface",
|
|
|
|
|
"huggingface-hub": "huggingface",
|
feat(xiaomi): add Xiaomi MiMo as first-class provider
Cherry-picked from PR #7702 by kshitijk4poor.
Adds Xiaomi MiMo as a direct provider (XIAOMI_API_KEY) with models:
- mimo-v2-pro (1M context), mimo-v2-omni (256K, multimodal), mimo-v2-flash (256K, cheapest)
Standard OpenAI-compatible provider checklist: auth.py, config.py, models.py,
main.py, providers.py, doctor.py, model_normalize.py, model_metadata.py,
models_dev.py, auxiliary_client.py, .env.example, cli-config.yaml.example.
Follow-up: vision tasks use mimo-v2-omni (multimodal) instead of the user's
main model. Non-vision aux uses the user's selected model. Added
_PROVIDER_VISION_MODELS dict for provider-specific vision model overrides.
On failure, falls back to aggregators (gemini flash) via existing fallback chain.
Corrects pre-existing context lengths: mimo-v2-pro 1048576→1000000,
mimo-v2-omni 1048576→256000, adds mimo-v2-flash 256000.
36 tests covering registry, aliases, auto-detect, credentials, models.dev,
normalization, URL mapping, providers module, doctor, aux client, vision
model override, and agent init.
2026-04-11 10:10:31 -07:00
|
|
|
"mimo": "xiaomi",
|
|
|
|
|
"xiaomi-mimo": "xiaomi",
|
feat: native AWS Bedrock provider via Converse API
Salvaged from PR #7920 by JiaDe-Wu — cherry-picked Bedrock-specific
additions onto current main, skipping stale-branch reverts (293 commits
behind).
Dual-path architecture:
- Claude models → AnthropicBedrock SDK (prompt caching, thinking budgets)
- Non-Claude models → Converse API via boto3 (Nova, DeepSeek, Llama, Mistral)
Includes:
- Core adapter (agent/bedrock_adapter.py, 1098 lines)
- Full provider registration (auth, models, providers, config, runtime, main)
- IAM credential chain + Bedrock API Key auth modes
- Dynamic model discovery via ListFoundationModels + ListInferenceProfiles
- Streaming with delta callbacks, error classification, guardrails
- hermes doctor + hermes auth integration
- /usage pricing for 7 Bedrock models
- 130 automated tests (79 unit + 28 integration + follow-up fixes)
- Documentation (website/docs/guides/aws-bedrock.md)
- boto3 optional dependency (pip install hermes-agent[bedrock])
Co-authored-by: JiaDe WU <40445668+JiaDe-Wu@users.noreply.github.com>
2026-04-15 15:18:01 -07:00
|
|
|
"aws": "bedrock",
|
|
|
|
|
"aws-bedrock": "bedrock",
|
|
|
|
|
"amazon-bedrock": "bedrock",
|
|
|
|
|
"amazon": "bedrock",
|
2026-04-13 14:59:50 -07:00
|
|
|
"grok": "xai",
|
|
|
|
|
"x-ai": "xai",
|
|
|
|
|
"x.ai": "xai",
|
2026-04-15 22:32:05 -07:00
|
|
|
"ollama": "custom", # bare "ollama" = local; use "ollama-cloud" for cloud
|
|
|
|
|
"ollama_cloud": "ollama-cloud",
|
2026-03-07 19:56:48 -08:00
|
|
|
}
|
|
|
|
|
|
2026-02-22 02:16:11 -08:00
|
|
|
|
2026-04-12 03:53:30 -07:00
|
|
|
def get_default_model_for_provider(provider: str) -> str:
|
|
|
|
|
"""Return the default model for a provider, or empty string if unknown.
|
|
|
|
|
|
|
|
|
|
Uses the first entry in _PROVIDER_MODELS as the default. This is the
|
|
|
|
|
model a user would be offered first in the ``hermes model`` picker.
|
|
|
|
|
|
|
|
|
|
Used as a fallback when the user has configured a provider but never
|
|
|
|
|
selected a model (e.g. ``hermes auth add openai-codex`` without
|
|
|
|
|
``hermes model``).
|
|
|
|
|
"""
|
|
|
|
|
models = _PROVIDER_MODELS.get(provider, [])
|
|
|
|
|
return models[0] if models else ""
|
|
|
|
|
|
|
|
|
|
|
2026-04-09 15:11:58 -05:00
|
|
|
def _openrouter_model_is_free(pricing: Any) -> bool:
|
|
|
|
|
"""Return True when both prompt and completion pricing are zero."""
|
|
|
|
|
if not isinstance(pricing, dict):
|
|
|
|
|
return False
|
|
|
|
|
try:
|
|
|
|
|
return float(pricing.get("prompt", "0")) == 0 and float(pricing.get("completion", "0")) == 0
|
|
|
|
|
except (TypeError, ValueError):
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def fetch_openrouter_models(
|
|
|
|
|
timeout: float = 8.0,
|
|
|
|
|
*,
|
|
|
|
|
force_refresh: bool = False,
|
|
|
|
|
) -> list[tuple[str, str]]:
|
|
|
|
|
"""Return the curated OpenRouter picker list, refreshed from the live catalog when possible."""
|
|
|
|
|
global _openrouter_catalog_cache
|
|
|
|
|
|
|
|
|
|
if _openrouter_catalog_cache is not None and not force_refresh:
|
|
|
|
|
return list(_openrouter_catalog_cache)
|
|
|
|
|
|
|
|
|
|
fallback = list(OPENROUTER_MODELS)
|
|
|
|
|
preferred_ids = [mid for mid, _ in fallback]
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
req = urllib.request.Request(
|
|
|
|
|
"https://openrouter.ai/api/v1/models",
|
|
|
|
|
headers={"Accept": "application/json"},
|
|
|
|
|
)
|
|
|
|
|
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
|
|
|
|
payload = json.loads(resp.read().decode())
|
|
|
|
|
except Exception:
|
|
|
|
|
return list(_openrouter_catalog_cache or fallback)
|
|
|
|
|
|
|
|
|
|
live_items = payload.get("data", [])
|
|
|
|
|
if not isinstance(live_items, list):
|
|
|
|
|
return list(_openrouter_catalog_cache or fallback)
|
|
|
|
|
|
|
|
|
|
live_by_id: dict[str, dict[str, Any]] = {}
|
|
|
|
|
for item in live_items:
|
|
|
|
|
if not isinstance(item, dict):
|
|
|
|
|
continue
|
|
|
|
|
mid = str(item.get("id") or "").strip()
|
|
|
|
|
if not mid:
|
|
|
|
|
continue
|
|
|
|
|
live_by_id[mid] = item
|
|
|
|
|
|
|
|
|
|
curated: list[tuple[str, str]] = []
|
|
|
|
|
for preferred_id in preferred_ids:
|
|
|
|
|
live_item = live_by_id.get(preferred_id)
|
|
|
|
|
if live_item is None:
|
|
|
|
|
continue
|
|
|
|
|
desc = "free" if _openrouter_model_is_free(live_item.get("pricing")) else ""
|
|
|
|
|
curated.append((preferred_id, desc))
|
|
|
|
|
|
|
|
|
|
if not curated:
|
|
|
|
|
return list(_openrouter_catalog_cache or fallback)
|
|
|
|
|
|
|
|
|
|
first_id, _ = curated[0]
|
|
|
|
|
curated[0] = (first_id, "recommended")
|
|
|
|
|
_openrouter_catalog_cache = curated
|
|
|
|
|
return list(curated)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def model_ids(*, force_refresh: bool = False) -> list[str]:
|
2026-03-07 19:56:48 -08:00
|
|
|
"""Return just the OpenRouter model-id strings."""
|
2026-04-09 15:11:58 -05:00
|
|
|
return [mid for mid, _ in fetch_openrouter_models(force_refresh=force_refresh)]
|
2026-02-22 02:16:11 -08:00
|
|
|
|
|
|
|
|
|
2026-03-07 19:56:48 -08:00
|
|
|
|
2026-04-10 03:03:30 -07:00
|
|
|
|
2026-04-03 18:46:45 -04:00
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
# Pricing helpers — fetch live pricing from OpenRouter-compatible /v1/models
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
# Cache: maps model_id → {"prompt": str, "completion": str} per endpoint
|
|
|
|
|
_pricing_cache: dict[str, dict[str, dict[str, str]]] = {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _format_price_per_mtok(per_token_str: str) -> str:
|
|
|
|
|
"""Convert a per-token price string to a human-friendly $/Mtok string.
|
|
|
|
|
|
|
|
|
|
Always uses 2 decimal places so that prices align vertically when
|
|
|
|
|
right-justified in a column (the decimal point stays in the same position).
|
|
|
|
|
|
|
|
|
|
Examples:
|
|
|
|
|
"0.000003" → "$3.00" (per million tokens)
|
|
|
|
|
"0.00003" → "$30.00"
|
|
|
|
|
"0.00000015" → "$0.15"
|
|
|
|
|
"0.0000001" → "$0.10"
|
|
|
|
|
"0.00018" → "$180.00"
|
|
|
|
|
"0" → "free"
|
|
|
|
|
"""
|
|
|
|
|
try:
|
|
|
|
|
val = float(per_token_str)
|
|
|
|
|
except (TypeError, ValueError):
|
|
|
|
|
return "?"
|
|
|
|
|
if val == 0:
|
|
|
|
|
return "free"
|
|
|
|
|
per_m = val * 1_000_000
|
|
|
|
|
return f"${per_m:.2f}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def format_model_pricing_table(
|
|
|
|
|
models: list[tuple[str, str]],
|
|
|
|
|
pricing_map: dict[str, dict[str, str]],
|
|
|
|
|
current_model: str = "",
|
|
|
|
|
indent: str = " ",
|
|
|
|
|
) -> list[str]:
|
|
|
|
|
"""Build a column-aligned model+pricing table for terminal display.
|
|
|
|
|
|
|
|
|
|
Returns a list of pre-formatted lines ready to print.
|
|
|
|
|
*models* is ``[(model_id, description), ...]``.
|
|
|
|
|
"""
|
|
|
|
|
if not models:
|
|
|
|
|
return []
|
|
|
|
|
|
2026-04-05 22:39:02 -04:00
|
|
|
# Build rows: (model_id, input_price, output_price, cache_price, is_current)
|
|
|
|
|
rows: list[tuple[str, str, str, str, bool]] = []
|
|
|
|
|
has_cache = False
|
2026-04-03 18:46:45 -04:00
|
|
|
for mid, _desc in models:
|
|
|
|
|
is_cur = mid == current_model
|
|
|
|
|
p = pricing_map.get(mid)
|
|
|
|
|
if p:
|
|
|
|
|
inp = _format_price_per_mtok(p.get("prompt", ""))
|
|
|
|
|
out = _format_price_per_mtok(p.get("completion", ""))
|
2026-04-05 22:39:02 -04:00
|
|
|
cache_read = p.get("input_cache_read", "")
|
|
|
|
|
cache = _format_price_per_mtok(cache_read) if cache_read else ""
|
|
|
|
|
if cache:
|
|
|
|
|
has_cache = True
|
2026-04-03 18:46:45 -04:00
|
|
|
else:
|
2026-04-05 22:39:02 -04:00
|
|
|
inp, out, cache = "", "", ""
|
|
|
|
|
rows.append((mid, inp, out, cache, is_cur))
|
2026-04-03 18:46:45 -04:00
|
|
|
|
|
|
|
|
name_col = max(len(r[0]) for r in rows) + 2
|
|
|
|
|
# Compute price column widths from the actual data so decimals align
|
|
|
|
|
price_col = max(
|
|
|
|
|
max((len(r[1]) for r in rows if r[1]), default=4),
|
|
|
|
|
max((len(r[2]) for r in rows if r[2]), default=4),
|
|
|
|
|
3, # minimum: "In" / "Out" header
|
|
|
|
|
)
|
2026-04-05 22:39:02 -04:00
|
|
|
cache_col = max(
|
|
|
|
|
max((len(r[3]) for r in rows if r[3]), default=4),
|
|
|
|
|
5, # minimum: "Cache" header
|
|
|
|
|
) if has_cache else 0
|
2026-04-03 18:46:45 -04:00
|
|
|
lines: list[str] = []
|
|
|
|
|
|
|
|
|
|
# Header
|
2026-04-05 22:39:02 -04:00
|
|
|
if has_cache:
|
|
|
|
|
lines.append(f"{indent}{'Model':<{name_col}} {'In':>{price_col}} {'Out':>{price_col}} {'Cache':>{cache_col}} /Mtok")
|
|
|
|
|
lines.append(f"{indent}{'-' * name_col} {'-' * price_col} {'-' * price_col} {'-' * cache_col}")
|
|
|
|
|
else:
|
|
|
|
|
lines.append(f"{indent}{'Model':<{name_col}} {'In':>{price_col}} {'Out':>{price_col}} /Mtok")
|
|
|
|
|
lines.append(f"{indent}{'-' * name_col} {'-' * price_col} {'-' * price_col}")
|
|
|
|
|
|
|
|
|
|
for mid, inp, out, cache, is_cur in rows:
|
2026-04-03 18:46:45 -04:00
|
|
|
marker = " ← current" if is_cur else ""
|
2026-04-05 22:39:02 -04:00
|
|
|
if has_cache:
|
|
|
|
|
lines.append(f"{indent}{mid:<{name_col}} {inp:>{price_col}} {out:>{price_col}} {cache:>{cache_col}}{marker}")
|
|
|
|
|
else:
|
|
|
|
|
lines.append(f"{indent}{mid:<{name_col}} {inp:>{price_col}} {out:>{price_col}}{marker}")
|
2026-04-03 18:46:45 -04:00
|
|
|
|
|
|
|
|
return lines
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def fetch_models_with_pricing(
|
|
|
|
|
api_key: str | None = None,
|
|
|
|
|
base_url: str = "https://openrouter.ai/api",
|
|
|
|
|
timeout: float = 8.0,
|
|
|
|
|
*,
|
|
|
|
|
force_refresh: bool = False,
|
|
|
|
|
) -> dict[str, dict[str, str]]:
|
|
|
|
|
"""Fetch ``/v1/models`` and return ``{model_id: {prompt, completion}}`` pricing.
|
|
|
|
|
|
|
|
|
|
Results are cached per *base_url* so repeated calls are free.
|
|
|
|
|
Works with any OpenRouter-compatible endpoint (OpenRouter, Nous Portal).
|
|
|
|
|
"""
|
|
|
|
|
cache_key = (base_url or "").rstrip("/")
|
|
|
|
|
if not force_refresh and cache_key in _pricing_cache:
|
|
|
|
|
return _pricing_cache[cache_key]
|
|
|
|
|
|
|
|
|
|
url = cache_key.rstrip("/") + "/v1/models"
|
|
|
|
|
headers: dict[str, str] = {"Accept": "application/json"}
|
|
|
|
|
if api_key:
|
|
|
|
|
headers["Authorization"] = f"Bearer {api_key}"
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
req = urllib.request.Request(url, headers=headers)
|
|
|
|
|
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
|
|
|
|
payload = json.loads(resp.read().decode())
|
|
|
|
|
except Exception:
|
|
|
|
|
_pricing_cache[cache_key] = {}
|
|
|
|
|
return {}
|
|
|
|
|
|
|
|
|
|
result: dict[str, dict[str, str]] = {}
|
|
|
|
|
for item in payload.get("data", []):
|
|
|
|
|
mid = item.get("id")
|
|
|
|
|
pricing = item.get("pricing")
|
|
|
|
|
if mid and isinstance(pricing, dict):
|
2026-04-05 22:39:02 -04:00
|
|
|
entry: dict[str, str] = {
|
2026-04-03 18:46:45 -04:00
|
|
|
"prompt": str(pricing.get("prompt", "")),
|
|
|
|
|
"completion": str(pricing.get("completion", "")),
|
|
|
|
|
}
|
2026-04-05 22:39:02 -04:00
|
|
|
if pricing.get("input_cache_read"):
|
|
|
|
|
entry["input_cache_read"] = str(pricing["input_cache_read"])
|
|
|
|
|
if pricing.get("input_cache_write"):
|
|
|
|
|
entry["input_cache_write"] = str(pricing["input_cache_write"])
|
|
|
|
|
result[mid] = entry
|
2026-04-03 18:46:45 -04:00
|
|
|
|
|
|
|
|
_pricing_cache[cache_key] = result
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _resolve_openrouter_api_key() -> str:
|
|
|
|
|
"""Best-effort OpenRouter API key for pricing fetch."""
|
|
|
|
|
return os.getenv("OPENROUTER_API_KEY", "").strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _resolve_nous_pricing_credentials() -> tuple[str, str]:
|
|
|
|
|
"""Return ``(api_key, base_url)`` for Nous Portal pricing, or empty strings."""
|
|
|
|
|
try:
|
|
|
|
|
from hermes_cli.auth import resolve_nous_runtime_credentials
|
|
|
|
|
creds = resolve_nous_runtime_credentials()
|
|
|
|
|
if creds:
|
|
|
|
|
return (creds.get("api_key", ""), creds.get("base_url", ""))
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
return ("", "")
|
|
|
|
|
|
|
|
|
|
|
2026-04-09 15:11:58 -05:00
|
|
|
def get_pricing_for_provider(provider: str, *, force_refresh: bool = False) -> dict[str, dict[str, str]]:
|
2026-04-03 18:46:45 -04:00
|
|
|
"""Return live pricing for providers that support it (openrouter, nous)."""
|
|
|
|
|
normalized = normalize_provider(provider)
|
|
|
|
|
if normalized == "openrouter":
|
|
|
|
|
return fetch_models_with_pricing(
|
|
|
|
|
api_key=_resolve_openrouter_api_key(),
|
|
|
|
|
base_url="https://openrouter.ai/api",
|
2026-04-09 15:11:58 -05:00
|
|
|
force_refresh=force_refresh,
|
2026-04-03 18:46:45 -04:00
|
|
|
)
|
|
|
|
|
if normalized == "nous":
|
|
|
|
|
api_key, base_url = _resolve_nous_pricing_credentials()
|
|
|
|
|
if base_url:
|
|
|
|
|
# Nous base_url typically looks like https://inference-api.nousresearch.com/v1
|
|
|
|
|
# We need the part before /v1 for our fetch function
|
|
|
|
|
stripped = base_url.rstrip("/")
|
|
|
|
|
if stripped.endswith("/v1"):
|
|
|
|
|
stripped = stripped[:-3]
|
|
|
|
|
return fetch_models_with_pricing(
|
|
|
|
|
api_key=api_key,
|
|
|
|
|
base_url=stripped,
|
2026-04-09 15:11:58 -05:00
|
|
|
force_refresh=force_refresh,
|
2026-04-03 18:46:45 -04:00
|
|
|
)
|
|
|
|
|
return {}
|
|
|
|
|
|
|
|
|
|
|
2026-03-08 06:09:36 -07:00
|
|
|
# All provider IDs and aliases that are valid for the provider:model syntax.
|
|
|
|
|
_KNOWN_PROVIDER_NAMES: set[str] = (
|
|
|
|
|
set(_PROVIDER_LABELS.keys())
|
|
|
|
|
| set(_PROVIDER_ALIASES.keys())
|
|
|
|
|
| {"openrouter", "custom"}
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def list_available_providers() -> list[dict[str, str]]:
|
|
|
|
|
"""Return info about all providers the user could use with ``provider:model``.
|
|
|
|
|
|
|
|
|
|
Each dict has ``id``, ``label``, and ``aliases``.
|
|
|
|
|
Checks which providers have valid credentials configured.
|
2026-04-13 14:59:50 -07:00
|
|
|
|
|
|
|
|
Derives the provider list from :data:`CANONICAL_PROVIDERS` (single
|
|
|
|
|
source of truth shared with ``hermes model``, ``/model``, etc.).
|
2026-03-08 06:09:36 -07:00
|
|
|
"""
|
2026-04-13 14:59:50 -07:00
|
|
|
# Derive display order from canonical list + custom
|
|
|
|
|
provider_order = [p.slug for p in CANONICAL_PROVIDERS] + ["custom"]
|
|
|
|
|
|
2026-03-08 06:09:36 -07:00
|
|
|
# Build reverse alias map
|
|
|
|
|
aliases_for: dict[str, list[str]] = {}
|
|
|
|
|
for alias, canonical in _PROVIDER_ALIASES.items():
|
|
|
|
|
aliases_for.setdefault(canonical, []).append(alias)
|
|
|
|
|
|
|
|
|
|
result = []
|
2026-04-13 14:59:50 -07:00
|
|
|
for pid in provider_order:
|
2026-03-08 06:09:36 -07:00
|
|
|
label = _PROVIDER_LABELS.get(pid, pid)
|
|
|
|
|
alias_list = aliases_for.get(pid, [])
|
|
|
|
|
# Check if this provider has credentials available
|
|
|
|
|
has_creds = False
|
|
|
|
|
try:
|
2026-03-21 12:55:42 -07:00
|
|
|
from hermes_cli.auth import get_auth_status, has_usable_secret
|
2026-03-17 01:52:46 -07:00
|
|
|
if pid == "custom":
|
refactor: make config.yaml the single source of truth for endpoint URLs (#4165)
OPENAI_BASE_URL was written to .env AND config.yaml, creating a dual-source
confusion. Users (especially Docker) would see the URL in .env and assume
that's where all config lives, then wonder why LLM_MODEL in .env didn't work.
Changes:
- Remove all 27 save_env_value("OPENAI_BASE_URL", ...) calls across main.py,
setup.py, and tools_config.py
- Remove OPENAI_BASE_URL env var reading from runtime_provider.py, cli.py,
models.py, and gateway/run.py
- Remove LLM_MODEL/HERMES_MODEL env var reading from gateway/run.py and
auxiliary_client.py — config.yaml model.default is authoritative
- Vision base URL now saved to config.yaml auxiliary.vision.base_url
(both setup wizard and tools_config paths)
- Tests updated to set config values instead of env vars
Convention enforced: .env is for SECRETS only (API keys). All other
configuration (model names, base URLs, provider selection) lives
exclusively in config.yaml.
2026-03-30 22:02:53 -07:00
|
|
|
custom_base_url = _get_custom_base_url() or ""
|
2026-03-21 12:55:42 -07:00
|
|
|
has_creds = bool(custom_base_url.strip())
|
|
|
|
|
elif pid == "openrouter":
|
|
|
|
|
has_creds = has_usable_secret(os.getenv("OPENROUTER_API_KEY", ""))
|
2026-03-17 01:52:46 -07:00
|
|
|
else:
|
2026-03-21 12:55:42 -07:00
|
|
|
status = get_auth_status(pid)
|
|
|
|
|
has_creds = bool(status.get("logged_in") or status.get("configured"))
|
2026-03-08 06:09:36 -07:00
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
result.append({
|
|
|
|
|
"id": pid,
|
|
|
|
|
"label": label,
|
|
|
|
|
"aliases": alias_list,
|
|
|
|
|
"authenticated": has_creds,
|
|
|
|
|
})
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
|
2026-03-08 05:45:55 -07:00
|
|
|
def parse_model_input(raw: str, current_provider: str) -> tuple[str, str]:
|
|
|
|
|
"""Parse ``/model`` input into ``(provider, model)``.
|
|
|
|
|
|
|
|
|
|
Supports ``provider:model`` syntax to switch providers at runtime::
|
|
|
|
|
|
|
|
|
|
openrouter:anthropic/claude-sonnet-4.5 → ("openrouter", "anthropic/claude-sonnet-4.5")
|
|
|
|
|
nous:hermes-3 → ("nous", "hermes-3")
|
|
|
|
|
anthropic/claude-sonnet-4.5 → (current_provider, "anthropic/claude-sonnet-4.5")
|
|
|
|
|
gpt-5.4 → (current_provider, "gpt-5.4")
|
|
|
|
|
|
2026-03-08 06:09:36 -07:00
|
|
|
The colon is only treated as a provider delimiter if the left side is a
|
|
|
|
|
recognized provider name or alias. This avoids misinterpreting model names
|
|
|
|
|
that happen to contain colons (e.g. ``anthropic/claude-3.5-sonnet:beta``).
|
|
|
|
|
|
2026-03-08 05:45:55 -07:00
|
|
|
Returns ``(provider, model)`` where *provider* is either the explicit
|
|
|
|
|
provider from the input or *current_provider* if none was specified.
|
|
|
|
|
"""
|
|
|
|
|
stripped = raw.strip()
|
|
|
|
|
colon = stripped.find(":")
|
|
|
|
|
if colon > 0:
|
|
|
|
|
provider_part = stripped[:colon].strip().lower()
|
|
|
|
|
model_part = stripped[colon + 1:].strip()
|
2026-03-08 06:09:36 -07:00
|
|
|
if provider_part and model_part and provider_part in _KNOWN_PROVIDER_NAMES:
|
feat(model): /model command overhaul — Phases 2, 3, 5
* feat(model): persist base_url on /model switch, auto-detect for bare /model custom
Phase 2+3 of the /model command overhaul:
Phase 2 — Persist base_url on model switch:
- CLI: save model.base_url when switching to a non-OpenRouter endpoint;
clear it when switching away from custom to prevent stale URLs
leaking into the new provider's resolution
- Gateway: same logic using direct YAML write
Phase 3 — Better feedback and edge cases:
- Bare '/model custom' now auto-detects the model from the endpoint
using _auto_detect_local_model() and saves all three config values
(model, provider, base_url) atomically
- Shows endpoint URL in success messages when switching to/from
custom providers (both CLI and gateway)
- Clear error messages when no custom endpoint is configured
- Updated test assertions for the additional save_config_value call
Fixes #2562 (Phase 2+3)
* feat(model): support custom:name:model triple syntax for named custom providers
Phase 5 of the /model command overhaul.
Extends parse_model_input() to handle the triple syntax:
/model custom:local-server:qwen → provider='custom:local-server', model='qwen'
/model custom:my-model → provider='custom', model='my-model' (unchanged)
The 'custom:local-server' provider string is already supported by
_get_named_custom_provider() in runtime_provider.py, which matches
it against the custom_providers list in config.yaml. This just wires
the parsing so users can do it from the /model slash command.
Added 4 tests covering single, triple, whitespace, and empty model cases.
2026-03-24 06:58:04 -07:00
|
|
|
# Support custom:name:model triple syntax for named custom
|
|
|
|
|
# providers. ``custom:local:qwen`` → ("custom:local", "qwen").
|
|
|
|
|
# Single colon ``custom:qwen`` → ("custom", "qwen") as before.
|
|
|
|
|
if provider_part == "custom" and ":" in model_part:
|
|
|
|
|
second_colon = model_part.find(":")
|
|
|
|
|
custom_name = model_part[:second_colon].strip()
|
|
|
|
|
actual_model = model_part[second_colon + 1:].strip()
|
|
|
|
|
if custom_name and actual_model:
|
|
|
|
|
return (f"custom:{custom_name}", actual_model)
|
2026-03-08 05:45:55 -07:00
|
|
|
return (normalize_provider(provider_part), model_part)
|
|
|
|
|
return (current_provider, stripped)
|
|
|
|
|
|
|
|
|
|
|
2026-03-17 01:52:46 -07:00
|
|
|
def _get_custom_base_url() -> str:
|
|
|
|
|
"""Get the custom endpoint base_url from config.yaml."""
|
|
|
|
|
try:
|
|
|
|
|
from hermes_cli.config import load_config
|
|
|
|
|
config = load_config()
|
|
|
|
|
model_cfg = config.get("model", {})
|
|
|
|
|
if isinstance(model_cfg, dict):
|
|
|
|
|
return str(model_cfg.get("base_url", "")).strip()
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
return ""
|
|
|
|
|
|
|
|
|
|
|
2026-04-09 15:11:58 -05:00
|
|
|
def curated_models_for_provider(
|
|
|
|
|
provider: Optional[str],
|
|
|
|
|
*,
|
|
|
|
|
force_refresh: bool = False,
|
|
|
|
|
) -> list[tuple[str, str]]:
|
2026-03-12 02:46:33 -07:00
|
|
|
"""Return ``(model_id, description)`` tuples for a provider's model list.
|
|
|
|
|
|
|
|
|
|
Tries to fetch the live model list from the provider's API first,
|
|
|
|
|
falling back to the static ``_PROVIDER_MODELS`` catalog if the API
|
|
|
|
|
is unreachable.
|
|
|
|
|
"""
|
2026-03-08 05:45:55 -07:00
|
|
|
normalized = normalize_provider(provider)
|
|
|
|
|
if normalized == "openrouter":
|
2026-04-09 15:11:58 -05:00
|
|
|
return fetch_openrouter_models(force_refresh=force_refresh)
|
2026-03-12 02:46:33 -07:00
|
|
|
|
|
|
|
|
# Try live API first (Codex, Nous, etc. all support /models)
|
|
|
|
|
live = provider_model_ids(normalized)
|
|
|
|
|
if live:
|
|
|
|
|
return [(m, "") for m in live]
|
|
|
|
|
|
|
|
|
|
# Fallback to static catalog
|
2026-03-08 05:45:55 -07:00
|
|
|
models = _PROVIDER_MODELS.get(normalized, [])
|
|
|
|
|
return [(m, "") for m in models]
|
|
|
|
|
|
|
|
|
|
|
2026-03-16 04:34:45 -07:00
|
|
|
def detect_provider_for_model(
|
|
|
|
|
model_name: str,
|
|
|
|
|
current_provider: str,
|
|
|
|
|
) -> Optional[tuple[str, str]]:
|
|
|
|
|
"""Auto-detect the best provider for a model name.
|
|
|
|
|
|
|
|
|
|
Returns ``(provider_id, model_name)`` — the model name may be remapped
|
|
|
|
|
(e.g. bare ``deepseek-chat`` → ``deepseek/deepseek-chat`` for OpenRouter).
|
|
|
|
|
Returns ``None`` when no confident match is found.
|
|
|
|
|
|
|
|
|
|
Priority:
|
2026-03-19 12:06:48 -07:00
|
|
|
0. Bare provider name → switch to that provider's default model
|
2026-03-16 04:34:45 -07:00
|
|
|
1. Direct provider with credentials (highest)
|
|
|
|
|
2. Direct provider without credentials → remap to OpenRouter slug
|
|
|
|
|
3. OpenRouter catalog match
|
|
|
|
|
"""
|
|
|
|
|
name = (model_name or "").strip()
|
|
|
|
|
if not name:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
name_lower = name.lower()
|
|
|
|
|
|
2026-03-19 12:06:48 -07:00
|
|
|
# --- Step 0: bare provider name typed as model ---
|
|
|
|
|
# If someone types `/model nous` or `/model anthropic`, treat it as a
|
|
|
|
|
# provider switch and pick the first model from that provider's catalog.
|
|
|
|
|
# Skip "custom" and "openrouter" — custom has no model catalog, and
|
|
|
|
|
# openrouter requires an explicit model name to be useful.
|
|
|
|
|
resolved_provider = _PROVIDER_ALIASES.get(name_lower, name_lower)
|
|
|
|
|
if resolved_provider not in {"custom", "openrouter"}:
|
|
|
|
|
default_models = _PROVIDER_MODELS.get(resolved_provider, [])
|
|
|
|
|
if (
|
|
|
|
|
resolved_provider in _PROVIDER_LABELS
|
|
|
|
|
and default_models
|
|
|
|
|
and resolved_provider != normalize_provider(current_provider)
|
|
|
|
|
):
|
|
|
|
|
return (resolved_provider, default_models[0])
|
|
|
|
|
|
2026-03-16 04:34:45 -07:00
|
|
|
# Aggregators list other providers' models — never auto-switch TO them
|
2026-04-16 06:49:36 -07:00
|
|
|
_AGGREGATORS = {"nous", "openrouter", "ai-gateway", "copilot", "kilocode"}
|
2026-03-16 04:34:45 -07:00
|
|
|
|
|
|
|
|
# If the model belongs to the current provider's catalog, don't suggest switching
|
|
|
|
|
current_models = _PROVIDER_MODELS.get(current_provider, [])
|
|
|
|
|
if any(name_lower == m.lower() for m in current_models):
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
# --- Step 1: check static provider catalogs for a direct match ---
|
|
|
|
|
direct_match: Optional[str] = None
|
|
|
|
|
for pid, models in _PROVIDER_MODELS.items():
|
|
|
|
|
if pid == current_provider or pid in _AGGREGATORS:
|
|
|
|
|
continue
|
|
|
|
|
if any(name_lower == m.lower() for m in models):
|
|
|
|
|
direct_match = pid
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
if direct_match:
|
2026-04-16 02:27:20 -07:00
|
|
|
# Check if we have credentials for this provider — env vars,
|
|
|
|
|
# credential pool, or auth store entries.
|
2026-03-16 04:34:45 -07:00
|
|
|
has_creds = False
|
|
|
|
|
try:
|
|
|
|
|
from hermes_cli.auth import PROVIDER_REGISTRY
|
|
|
|
|
pconfig = PROVIDER_REGISTRY.get(direct_match)
|
|
|
|
|
if pconfig:
|
|
|
|
|
import os
|
|
|
|
|
for env_var in pconfig.api_key_env_vars:
|
|
|
|
|
if os.getenv(env_var, "").strip():
|
|
|
|
|
has_creds = True
|
|
|
|
|
break
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
2026-04-16 02:27:20 -07:00
|
|
|
# Also check credential pool and auth store — covers OAuth,
|
|
|
|
|
# Claude Code tokens, and other non-env-var credentials (#10300).
|
|
|
|
|
if not has_creds:
|
|
|
|
|
try:
|
|
|
|
|
from agent.credential_pool import load_pool
|
|
|
|
|
pool = load_pool(direct_match)
|
|
|
|
|
if pool.has_credentials():
|
|
|
|
|
has_creds = True
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
if not has_creds:
|
|
|
|
|
try:
|
|
|
|
|
from hermes_cli.auth import _load_auth_store
|
|
|
|
|
store = _load_auth_store()
|
|
|
|
|
if direct_match in store.get("providers", {}) or direct_match in store.get("credential_pool", {}):
|
|
|
|
|
has_creds = True
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
# Always return the direct provider match. If credentials are
|
|
|
|
|
# missing, the client init will give a clear error rather than
|
|
|
|
|
# silently routing through the wrong provider (#10300).
|
2026-03-16 04:34:45 -07:00
|
|
|
return (direct_match, name)
|
|
|
|
|
|
|
|
|
|
# --- Step 2: check OpenRouter catalog ---
|
|
|
|
|
# First try exact match (handles provider/model format)
|
|
|
|
|
or_slug = _find_openrouter_slug(name)
|
|
|
|
|
if or_slug:
|
|
|
|
|
if current_provider != "openrouter":
|
|
|
|
|
return ("openrouter", or_slug)
|
|
|
|
|
# Already on openrouter, just return the resolved slug
|
|
|
|
|
if or_slug != name:
|
|
|
|
|
return ("openrouter", or_slug)
|
|
|
|
|
return None # already on openrouter with matching name
|
|
|
|
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _find_openrouter_slug(model_name: str) -> Optional[str]:
|
|
|
|
|
"""Find the full OpenRouter model slug for a bare or partial model name.
|
|
|
|
|
|
|
|
|
|
Handles:
|
|
|
|
|
- Exact match: ``anthropic/claude-opus-4.6`` → as-is
|
|
|
|
|
- Bare name: ``deepseek-chat`` → ``deepseek/deepseek-chat``
|
|
|
|
|
- Bare name: ``claude-opus-4.6`` → ``anthropic/claude-opus-4.6``
|
|
|
|
|
"""
|
|
|
|
|
name_lower = model_name.strip().lower()
|
|
|
|
|
if not name_lower:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
# Exact match (already has provider/ prefix)
|
2026-04-09 15:11:58 -05:00
|
|
|
for mid in model_ids():
|
2026-03-16 04:34:45 -07:00
|
|
|
if name_lower == mid.lower():
|
|
|
|
|
return mid
|
|
|
|
|
|
|
|
|
|
# Try matching just the model part (after the /)
|
2026-04-09 15:11:58 -05:00
|
|
|
for mid in model_ids():
|
2026-03-16 04:34:45 -07:00
|
|
|
if "/" in mid:
|
|
|
|
|
_, model_part = mid.split("/", 1)
|
|
|
|
|
if name_lower == model_part.lower():
|
|
|
|
|
return mid
|
|
|
|
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
2026-03-07 19:56:48 -08:00
|
|
|
def normalize_provider(provider: Optional[str]) -> str:
|
2026-03-08 05:58:45 -07:00
|
|
|
"""Normalize provider aliases to Hermes' canonical provider ids.
|
|
|
|
|
|
|
|
|
|
Note: ``"auto"`` passes through unchanged — use
|
|
|
|
|
``hermes_cli.auth.resolve_provider()`` to resolve it to a concrete
|
|
|
|
|
provider based on credentials and environment.
|
|
|
|
|
"""
|
2026-03-07 19:56:48 -08:00
|
|
|
normalized = (provider or "openrouter").strip().lower()
|
|
|
|
|
return _PROVIDER_ALIASES.get(normalized, normalized)
|
|
|
|
|
|
|
|
|
|
|
2026-03-11 19:37:42 +03:00
|
|
|
def provider_label(provider: Optional[str]) -> str:
|
|
|
|
|
"""Return a human-friendly label for a provider id or alias."""
|
|
|
|
|
original = (provider or "openrouter").strip()
|
|
|
|
|
normalized = original.lower()
|
|
|
|
|
if normalized == "auto":
|
|
|
|
|
return "Auto"
|
|
|
|
|
normalized = normalize_provider(normalized)
|
|
|
|
|
return _PROVIDER_LABELS.get(normalized, original or "OpenRouter")
|
|
|
|
|
|
|
|
|
|
|
feat: expand /fast to all OpenAI Priority Processing models (#6960)
Previously /fast only supported gpt-5.4 and forced a provider switch to
openai-codex. Now supports all 13 models from OpenAI's Priority Processing
pricing table (gpt-5.4, gpt-5.4-mini, gpt-5.2, gpt-5.1, gpt-5, gpt-5-mini,
gpt-4.1, gpt-4.1-mini, gpt-4.1-nano, gpt-4o, gpt-4o-mini, o3, o4-mini).
Key changes:
- Replaced _FAST_MODE_BACKEND_CONFIG with _PRIORITY_PROCESSING_MODELS frozenset
- Removed provider-forcing logic — service_tier is now injected into whatever
API path the user is already on (Codex Responses, Chat Completions, or
OpenRouter passthrough)
- Added request_overrides support to chat_completions path in run_agent.py
- Updated messaging from 'Codex inference tier' to 'Priority Processing'
- Expanded test coverage for all supported models
2026-04-09 22:06:30 -07:00
|
|
|
# Models that support OpenAI Priority Processing (service_tier="priority").
|
|
|
|
|
# See https://openai.com/api-priority-processing/ for the canonical list.
|
|
|
|
|
# Only the bare model slug is stored (no vendor prefix).
|
|
|
|
|
_PRIORITY_PROCESSING_MODELS: frozenset[str] = frozenset({
|
|
|
|
|
"gpt-5.4",
|
|
|
|
|
"gpt-5.4-mini",
|
|
|
|
|
"gpt-5.2",
|
|
|
|
|
"gpt-5.1",
|
|
|
|
|
"gpt-5",
|
|
|
|
|
"gpt-5-mini",
|
|
|
|
|
"gpt-4.1",
|
|
|
|
|
"gpt-4.1-mini",
|
|
|
|
|
"gpt-4.1-nano",
|
|
|
|
|
"gpt-4o",
|
|
|
|
|
"gpt-4o-mini",
|
|
|
|
|
"o3",
|
|
|
|
|
"o4-mini",
|
|
|
|
|
})
|
2026-04-09 18:10:57 -07:00
|
|
|
|
2026-04-10 02:32:15 -07:00
|
|
|
# Models that support Anthropic Fast Mode (speed="fast").
|
|
|
|
|
# See https://platform.claude.com/docs/en/build-with-claude/fast-mode
|
|
|
|
|
# Currently only Claude Opus 4.6. Both hyphen and dot variants are stored
|
|
|
|
|
# to handle native Anthropic (claude-opus-4-6) and OpenRouter (claude-opus-4.6).
|
|
|
|
|
_ANTHROPIC_FAST_MODE_MODELS: frozenset[str] = frozenset({
|
|
|
|
|
"claude-opus-4-6",
|
|
|
|
|
"claude-opus-4.6",
|
|
|
|
|
})
|
2026-04-09 18:10:57 -07:00
|
|
|
|
2026-04-10 02:32:15 -07:00
|
|
|
|
|
|
|
|
def _strip_vendor_prefix(model_id: str) -> str:
|
|
|
|
|
"""Strip vendor/ prefix from a model ID (e.g. 'anthropic/claude-opus-4-6' -> 'claude-opus-4-6')."""
|
2026-04-09 18:10:57 -07:00
|
|
|
raw = str(model_id or "").strip().lower()
|
|
|
|
|
if "/" in raw:
|
|
|
|
|
raw = raw.split("/", 1)[1]
|
2026-04-10 02:32:15 -07:00
|
|
|
return raw
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def model_supports_fast_mode(model_id: Optional[str]) -> bool:
|
|
|
|
|
"""Return whether Hermes should expose the /fast toggle for this model."""
|
|
|
|
|
raw = _strip_vendor_prefix(str(model_id or ""))
|
|
|
|
|
if raw in _PRIORITY_PROCESSING_MODELS:
|
|
|
|
|
return True
|
|
|
|
|
# Anthropic fast mode — strip date suffixes (e.g. claude-opus-4-6-20260401)
|
|
|
|
|
# and OpenRouter variant tags (:fast, :beta) for matching.
|
|
|
|
|
base = raw.split(":")[0]
|
|
|
|
|
return base in _ANTHROPIC_FAST_MODE_MODELS
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _is_anthropic_fast_model(model_id: Optional[str]) -> bool:
|
|
|
|
|
"""Return True if the model supports Anthropic's fast mode (speed='fast')."""
|
|
|
|
|
raw = _strip_vendor_prefix(str(model_id or ""))
|
|
|
|
|
base = raw.split(":")[0]
|
|
|
|
|
return base in _ANTHROPIC_FAST_MODE_MODELS
|
2026-04-09 18:10:57 -07:00
|
|
|
|
|
|
|
|
|
feat: expand /fast to all OpenAI Priority Processing models (#6960)
Previously /fast only supported gpt-5.4 and forced a provider switch to
openai-codex. Now supports all 13 models from OpenAI's Priority Processing
pricing table (gpt-5.4, gpt-5.4-mini, gpt-5.2, gpt-5.1, gpt-5, gpt-5-mini,
gpt-4.1, gpt-4.1-mini, gpt-4.1-nano, gpt-4o, gpt-4o-mini, o3, o4-mini).
Key changes:
- Replaced _FAST_MODE_BACKEND_CONFIG with _PRIORITY_PROCESSING_MODELS frozenset
- Removed provider-forcing logic — service_tier is now injected into whatever
API path the user is already on (Codex Responses, Chat Completions, or
OpenRouter passthrough)
- Added request_overrides support to chat_completions path in run_agent.py
- Updated messaging from 'Codex inference tier' to 'Priority Processing'
- Expanded test coverage for all supported models
2026-04-09 22:06:30 -07:00
|
|
|
def resolve_fast_mode_overrides(model_id: Optional[str]) -> dict[str, Any] | None:
|
2026-04-10 02:32:15 -07:00
|
|
|
"""Return request_overrides for fast/priority mode, or None if unsupported.
|
|
|
|
|
|
|
|
|
|
Returns provider-appropriate overrides:
|
|
|
|
|
- OpenAI models: ``{"service_tier": "priority"}`` (Priority Processing)
|
|
|
|
|
- Anthropic models: ``{"speed": "fast"}`` (Anthropic Fast Mode beta)
|
2026-04-09 18:10:57 -07:00
|
|
|
|
2026-04-10 02:32:15 -07:00
|
|
|
The overrides are injected into the API request kwargs by
|
|
|
|
|
``_build_api_kwargs`` in run_agent.py — each API path handles its own
|
|
|
|
|
keys (service_tier for OpenAI/Codex, speed for Anthropic Messages).
|
feat: expand /fast to all OpenAI Priority Processing models (#6960)
Previously /fast only supported gpt-5.4 and forced a provider switch to
openai-codex. Now supports all 13 models from OpenAI's Priority Processing
pricing table (gpt-5.4, gpt-5.4-mini, gpt-5.2, gpt-5.1, gpt-5, gpt-5-mini,
gpt-4.1, gpt-4.1-mini, gpt-4.1-nano, gpt-4o, gpt-4o-mini, o3, o4-mini).
Key changes:
- Replaced _FAST_MODE_BACKEND_CONFIG with _PRIORITY_PROCESSING_MODELS frozenset
- Removed provider-forcing logic — service_tier is now injected into whatever
API path the user is already on (Codex Responses, Chat Completions, or
OpenRouter passthrough)
- Added request_overrides support to chat_completions path in run_agent.py
- Updated messaging from 'Codex inference tier' to 'Priority Processing'
- Expanded test coverage for all supported models
2026-04-09 22:06:30 -07:00
|
|
|
"""
|
|
|
|
|
if not model_supports_fast_mode(model_id):
|
2026-04-09 18:10:57 -07:00
|
|
|
return None
|
2026-04-10 02:32:15 -07:00
|
|
|
if _is_anthropic_fast_model(model_id):
|
|
|
|
|
return {"speed": "fast"}
|
feat: expand /fast to all OpenAI Priority Processing models (#6960)
Previously /fast only supported gpt-5.4 and forced a provider switch to
openai-codex. Now supports all 13 models from OpenAI's Priority Processing
pricing table (gpt-5.4, gpt-5.4-mini, gpt-5.2, gpt-5.1, gpt-5, gpt-5-mini,
gpt-4.1, gpt-4.1-mini, gpt-4.1-nano, gpt-4o, gpt-4o-mini, o3, o4-mini).
Key changes:
- Replaced _FAST_MODE_BACKEND_CONFIG with _PRIORITY_PROCESSING_MODELS frozenset
- Removed provider-forcing logic — service_tier is now injected into whatever
API path the user is already on (Codex Responses, Chat Completions, or
OpenRouter passthrough)
- Added request_overrides support to chat_completions path in run_agent.py
- Updated messaging from 'Codex inference tier' to 'Priority Processing'
- Expanded test coverage for all supported models
2026-04-09 22:06:30 -07:00
|
|
|
return {"service_tier": "priority"}
|
2026-04-09 18:10:57 -07:00
|
|
|
|
|
|
|
|
|
2026-03-17 23:40:22 -07:00
|
|
|
def _resolve_copilot_catalog_api_key() -> str:
|
|
|
|
|
"""Best-effort GitHub token for fetching the Copilot model catalog."""
|
|
|
|
|
try:
|
|
|
|
|
from hermes_cli.auth import resolve_api_key_provider_credentials
|
|
|
|
|
|
|
|
|
|
creds = resolve_api_key_provider_credentials("copilot")
|
|
|
|
|
return str(creds.get("api_key") or "").strip()
|
|
|
|
|
except Exception:
|
|
|
|
|
return ""
|
|
|
|
|
|
|
|
|
|
|
2026-04-09 15:11:58 -05:00
|
|
|
def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False) -> list[str]:
|
2026-03-12 02:46:33 -07:00
|
|
|
"""Return the best known model catalog for a provider.
|
|
|
|
|
|
|
|
|
|
Tries live API endpoints for providers that support them (Codex, Nous),
|
|
|
|
|
falling back to static lists.
|
|
|
|
|
"""
|
2026-03-07 19:56:48 -08:00
|
|
|
normalized = normalize_provider(provider)
|
|
|
|
|
if normalized == "openrouter":
|
2026-04-09 15:11:58 -05:00
|
|
|
return model_ids(force_refresh=force_refresh)
|
2026-03-07 19:56:48 -08:00
|
|
|
if normalized == "openai-codex":
|
|
|
|
|
from hermes_cli.codex_models import get_codex_model_ids
|
|
|
|
|
|
|
|
|
|
return get_codex_model_ids()
|
2026-03-17 23:40:22 -07:00
|
|
|
if normalized in {"copilot", "copilot-acp"}:
|
|
|
|
|
try:
|
|
|
|
|
live = _fetch_github_models(_resolve_copilot_catalog_api_key())
|
|
|
|
|
if live:
|
|
|
|
|
return live
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
if normalized == "copilot-acp":
|
|
|
|
|
return list(_PROVIDER_MODELS.get("copilot", []))
|
2026-03-12 02:46:33 -07:00
|
|
|
if normalized == "nous":
|
|
|
|
|
# Try live Nous Portal /models endpoint
|
|
|
|
|
try:
|
|
|
|
|
from hermes_cli.auth import fetch_nous_models, resolve_nous_runtime_credentials
|
|
|
|
|
creds = resolve_nous_runtime_credentials()
|
|
|
|
|
if creds:
|
2026-03-17 03:42:46 -07:00
|
|
|
live = fetch_nous_models(api_key=creds.get("api_key", ""), inference_base_url=creds.get("base_url", ""))
|
2026-03-12 02:46:33 -07:00
|
|
|
if live:
|
|
|
|
|
return live
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
2026-03-12 17:04:31 -07:00
|
|
|
if normalized == "anthropic":
|
|
|
|
|
live = _fetch_anthropic_models()
|
|
|
|
|
if live:
|
|
|
|
|
return live
|
2026-03-17 00:12:16 -07:00
|
|
|
if normalized == "ai-gateway":
|
|
|
|
|
live = _fetch_ai_gateway_models()
|
|
|
|
|
if live:
|
|
|
|
|
return live
|
2026-04-16 20:03:31 +09:30
|
|
|
if normalized == "ollama-cloud":
|
|
|
|
|
live = fetch_ollama_cloud_models(force_refresh=force_refresh)
|
|
|
|
|
if live:
|
|
|
|
|
return live
|
2026-03-17 01:52:46 -07:00
|
|
|
if normalized == "custom":
|
|
|
|
|
base_url = _get_custom_base_url()
|
|
|
|
|
if base_url:
|
|
|
|
|
# Try common API key env vars for custom endpoints
|
|
|
|
|
api_key = (
|
|
|
|
|
os.getenv("CUSTOM_API_KEY", "")
|
|
|
|
|
or os.getenv("OPENAI_API_KEY", "")
|
|
|
|
|
or os.getenv("OPENROUTER_API_KEY", "")
|
|
|
|
|
)
|
|
|
|
|
live = fetch_api_models(api_key, base_url)
|
|
|
|
|
if live:
|
|
|
|
|
return live
|
2026-03-07 19:56:48 -08:00
|
|
|
return list(_PROVIDER_MODELS.get(normalized, []))
|
|
|
|
|
|
|
|
|
|
|
2026-03-12 17:04:31 -07:00
|
|
|
def _fetch_anthropic_models(timeout: float = 5.0) -> Optional[list[str]]:
|
|
|
|
|
"""Fetch available models from the Anthropic /v1/models endpoint.
|
|
|
|
|
|
|
|
|
|
Uses resolve_anthropic_token() to find credentials (env vars or
|
|
|
|
|
Claude Code auto-discovery). Returns sorted model IDs or None.
|
|
|
|
|
"""
|
|
|
|
|
try:
|
|
|
|
|
from agent.anthropic_adapter import resolve_anthropic_token, _is_oauth_token
|
|
|
|
|
except ImportError:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
token = resolve_anthropic_token()
|
|
|
|
|
if not token:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
headers: dict[str, str] = {"anthropic-version": "2023-06-01"}
|
|
|
|
|
if _is_oauth_token(token):
|
|
|
|
|
headers["Authorization"] = f"Bearer {token}"
|
fix: Anthropic OAuth — beta header, token refresh, config contamination, reauthentication (#1132)
Fixes Anthropic OAuth/subscription authentication end-to-end:
Auth failures (401 errors):
- Add missing 'claude-code-20250219' beta header for OAuth tokens. Both
clawdbot and OpenCode include this alongside 'oauth-2025-04-20' — without
it, Anthropic's API rejects OAuth tokens with 401 authentication errors.
- Fix _fetch_anthropic_models() to use canonical beta headers from
_COMMON_BETAS + _OAUTH_ONLY_BETAS instead of hardcoding.
Token refresh:
- Add _refresh_oauth_token() — when Claude Code credentials from
~/.claude/.credentials.json are expired but have a refresh token,
automatically POST to console.anthropic.com/v1/oauth/token to get
a new access token. Uses the same client_id as Claude Code / OpenCode.
- Add _write_claude_code_credentials() — writes refreshed tokens back
to ~/.claude/.credentials.json, preserving other fields.
- resolve_anthropic_token() now auto-refreshes expired tokens before
returning None.
Config contamination:
- Anthropic's _model_flow_anthropic() no longer saves base_url to config.
Since resolve_runtime_provider() always hardcodes Anthropic's URL, the
stale base_url was contaminating other providers when users switched
without re-running 'hermes model' (e.g., Codex hitting api.anthropic.com).
- _update_config_for_provider() now pops base_url when passed empty string.
- Same fix in setup.py.
Flow/UX (hermes model command):
- CLAUDE_CODE_OAUTH_TOKEN env var now checked in credential detection
- Reauthentication option when existing credentials found
- run_oauth_setup_token() runs 'claude setup-token' as interactive
subprocess, then auto-detects saved credentials
- Clean has_creds/needs_auth flow in both main.py and setup.py
Tests (14 new):
- Beta header assertions for claude-code-20250219
- Token refresh: successful refresh with credential writeback, failed
refresh returns None, no refresh token returns None
- Credential writeback: new file creation, preserving existing fields
- Auto-refresh integration in resolve_anthropic_token()
- CLAUDE_CODE_OAUTH_TOKEN fallback, credential file auto-discovery
- run_oauth_setup_token() (5 scenarios)
2026-03-12 20:45:50 -07:00
|
|
|
from agent.anthropic_adapter import _COMMON_BETAS, _OAUTH_ONLY_BETAS
|
|
|
|
|
headers["anthropic-beta"] = ",".join(_COMMON_BETAS + _OAUTH_ONLY_BETAS)
|
2026-03-12 17:04:31 -07:00
|
|
|
else:
|
|
|
|
|
headers["x-api-key"] = token
|
|
|
|
|
|
|
|
|
|
req = urllib.request.Request(
|
|
|
|
|
"https://api.anthropic.com/v1/models",
|
|
|
|
|
headers=headers,
|
|
|
|
|
)
|
|
|
|
|
try:
|
|
|
|
|
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
|
|
|
|
data = json.loads(resp.read().decode())
|
|
|
|
|
models = [m["id"] for m in data.get("data", []) if m.get("id")]
|
|
|
|
|
# Sort: latest/largest first (opus > sonnet > haiku, higher version first)
|
|
|
|
|
return sorted(models, key=lambda m: (
|
|
|
|
|
"opus" not in m, # opus first
|
|
|
|
|
"sonnet" not in m, # then sonnet
|
|
|
|
|
"haiku" not in m, # then haiku
|
|
|
|
|
m, # alphabetical within tier
|
|
|
|
|
))
|
2026-03-12 17:14:22 -07:00
|
|
|
except Exception as e:
|
|
|
|
|
import logging
|
|
|
|
|
logging.getLogger(__name__).debug("Failed to fetch Anthropic models: %s", e)
|
2026-03-12 17:04:31 -07:00
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
2026-03-17 23:40:22 -07:00
|
|
|
def _payload_items(payload: Any) -> list[dict[str, Any]]:
|
|
|
|
|
if isinstance(payload, list):
|
|
|
|
|
return [item for item in payload if isinstance(item, dict)]
|
|
|
|
|
if isinstance(payload, dict):
|
|
|
|
|
data = payload.get("data", [])
|
|
|
|
|
if isinstance(data, list):
|
|
|
|
|
return [item for item in data if isinstance(item, dict)]
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def copilot_default_headers() -> dict[str, str]:
|
2026-03-18 03:25:58 -07:00
|
|
|
"""Standard headers for Copilot API requests.
|
|
|
|
|
|
|
|
|
|
Includes Openai-Intent and x-initiator headers that opencode and the
|
|
|
|
|
Copilot CLI send on every request.
|
|
|
|
|
"""
|
|
|
|
|
try:
|
|
|
|
|
from hermes_cli.copilot_auth import copilot_request_headers
|
|
|
|
|
return copilot_request_headers(is_agent_turn=True)
|
|
|
|
|
except ImportError:
|
|
|
|
|
return {
|
|
|
|
|
"Editor-Version": COPILOT_EDITOR_VERSION,
|
|
|
|
|
"User-Agent": "HermesAgent/1.0",
|
|
|
|
|
"Openai-Intent": "conversation-edits",
|
|
|
|
|
"x-initiator": "agent",
|
|
|
|
|
}
|
2026-03-17 23:40:22 -07:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def _copilot_catalog_item_is_text_model(item: dict[str, Any]) -> bool:
|
|
|
|
|
model_id = str(item.get("id") or "").strip()
|
|
|
|
|
if not model_id:
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
if item.get("model_picker_enabled") is False:
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
capabilities = item.get("capabilities")
|
|
|
|
|
if isinstance(capabilities, dict):
|
|
|
|
|
model_type = str(capabilities.get("type") or "").strip().lower()
|
|
|
|
|
if model_type and model_type != "chat":
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
supported_endpoints = item.get("supported_endpoints")
|
|
|
|
|
if isinstance(supported_endpoints, list):
|
|
|
|
|
normalized_endpoints = {
|
|
|
|
|
str(endpoint).strip()
|
|
|
|
|
for endpoint in supported_endpoints
|
|
|
|
|
if str(endpoint).strip()
|
|
|
|
|
}
|
|
|
|
|
if normalized_endpoints and not normalized_endpoints.intersection(
|
|
|
|
|
{"/chat/completions", "/responses", "/v1/messages"}
|
|
|
|
|
):
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def fetch_github_model_catalog(
|
|
|
|
|
api_key: Optional[str] = None, timeout: float = 5.0
|
|
|
|
|
) -> Optional[list[dict[str, Any]]]:
|
|
|
|
|
"""Fetch the live GitHub Copilot model catalog for this account."""
|
|
|
|
|
attempts: list[dict[str, str]] = []
|
|
|
|
|
if api_key:
|
|
|
|
|
attempts.append({
|
|
|
|
|
**copilot_default_headers(),
|
|
|
|
|
"Authorization": f"Bearer {api_key}",
|
|
|
|
|
})
|
|
|
|
|
attempts.append(copilot_default_headers())
|
|
|
|
|
|
|
|
|
|
for headers in attempts:
|
|
|
|
|
req = urllib.request.Request(COPILOT_MODELS_URL, headers=headers)
|
|
|
|
|
try:
|
|
|
|
|
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
|
|
|
|
data = json.loads(resp.read().decode())
|
|
|
|
|
items = _payload_items(data)
|
|
|
|
|
models: list[dict[str, Any]] = []
|
|
|
|
|
seen_ids: set[str] = set()
|
|
|
|
|
for item in items:
|
|
|
|
|
if not _copilot_catalog_item_is_text_model(item):
|
|
|
|
|
continue
|
|
|
|
|
model_id = str(item.get("id") or "").strip()
|
|
|
|
|
if not model_id or model_id in seen_ids:
|
|
|
|
|
continue
|
|
|
|
|
seen_ids.add(model_id)
|
|
|
|
|
models.append(item)
|
|
|
|
|
if models:
|
|
|
|
|
return models
|
|
|
|
|
except Exception:
|
|
|
|
|
continue
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _is_github_models_base_url(base_url: Optional[str]) -> bool:
|
|
|
|
|
normalized = (base_url or "").strip().rstrip("/").lower()
|
|
|
|
|
return (
|
|
|
|
|
normalized.startswith(COPILOT_BASE_URL)
|
|
|
|
|
or normalized.startswith("https://models.github.ai/inference")
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _fetch_github_models(api_key: Optional[str] = None, timeout: float = 5.0) -> Optional[list[str]]:
|
|
|
|
|
catalog = fetch_github_model_catalog(api_key=api_key, timeout=timeout)
|
|
|
|
|
if not catalog:
|
|
|
|
|
return None
|
|
|
|
|
return [item.get("id", "") for item in catalog if item.get("id")]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
_COPILOT_MODEL_ALIASES = {
|
|
|
|
|
"openai/gpt-5": "gpt-5-mini",
|
|
|
|
|
"openai/gpt-5-chat": "gpt-5-mini",
|
|
|
|
|
"openai/gpt-5-mini": "gpt-5-mini",
|
|
|
|
|
"openai/gpt-5-nano": "gpt-5-mini",
|
|
|
|
|
"openai/gpt-4.1": "gpt-4.1",
|
|
|
|
|
"openai/gpt-4.1-mini": "gpt-4.1",
|
|
|
|
|
"openai/gpt-4.1-nano": "gpt-4.1",
|
|
|
|
|
"openai/gpt-4o": "gpt-4o",
|
|
|
|
|
"openai/gpt-4o-mini": "gpt-4o-mini",
|
|
|
|
|
"openai/o1": "gpt-5.2",
|
|
|
|
|
"openai/o1-mini": "gpt-5-mini",
|
|
|
|
|
"openai/o1-preview": "gpt-5.2",
|
|
|
|
|
"openai/o3": "gpt-5.3-codex",
|
|
|
|
|
"openai/o3-mini": "gpt-5-mini",
|
|
|
|
|
"openai/o4-mini": "gpt-5-mini",
|
|
|
|
|
"anthropic/claude-opus-4.6": "claude-opus-4.6",
|
|
|
|
|
"anthropic/claude-sonnet-4.6": "claude-sonnet-4.6",
|
|
|
|
|
"anthropic/claude-sonnet-4.5": "claude-sonnet-4.5",
|
|
|
|
|
"anthropic/claude-haiku-4.5": "claude-haiku-4.5",
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _copilot_catalog_ids(
|
|
|
|
|
catalog: Optional[list[dict[str, Any]]] = None,
|
|
|
|
|
api_key: Optional[str] = None,
|
|
|
|
|
) -> set[str]:
|
|
|
|
|
if catalog is None and api_key:
|
|
|
|
|
catalog = fetch_github_model_catalog(api_key=api_key)
|
|
|
|
|
if not catalog:
|
|
|
|
|
return set()
|
|
|
|
|
return {
|
|
|
|
|
str(item.get("id") or "").strip()
|
|
|
|
|
for item in catalog
|
|
|
|
|
if str(item.get("id") or "").strip()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def normalize_copilot_model_id(
|
|
|
|
|
model_id: Optional[str],
|
|
|
|
|
*,
|
|
|
|
|
catalog: Optional[list[dict[str, Any]]] = None,
|
|
|
|
|
api_key: Optional[str] = None,
|
|
|
|
|
) -> str:
|
|
|
|
|
raw = str(model_id or "").strip()
|
|
|
|
|
if not raw:
|
|
|
|
|
return ""
|
|
|
|
|
|
|
|
|
|
catalog_ids = _copilot_catalog_ids(catalog=catalog, api_key=api_key)
|
|
|
|
|
alias = _COPILOT_MODEL_ALIASES.get(raw)
|
|
|
|
|
if alias:
|
|
|
|
|
return alias
|
|
|
|
|
|
|
|
|
|
candidates = [raw]
|
|
|
|
|
if "/" in raw:
|
|
|
|
|
candidates.append(raw.split("/", 1)[1].strip())
|
|
|
|
|
|
|
|
|
|
if raw.endswith("-mini"):
|
|
|
|
|
candidates.append(raw[:-5])
|
|
|
|
|
if raw.endswith("-nano"):
|
|
|
|
|
candidates.append(raw[:-5])
|
|
|
|
|
if raw.endswith("-chat"):
|
|
|
|
|
candidates.append(raw[:-5])
|
|
|
|
|
|
|
|
|
|
seen: set[str] = set()
|
|
|
|
|
for candidate in candidates:
|
|
|
|
|
if not candidate or candidate in seen:
|
|
|
|
|
continue
|
|
|
|
|
seen.add(candidate)
|
|
|
|
|
if candidate in _COPILOT_MODEL_ALIASES:
|
|
|
|
|
return _COPILOT_MODEL_ALIASES[candidate]
|
|
|
|
|
if candidate in catalog_ids:
|
|
|
|
|
return candidate
|
|
|
|
|
|
|
|
|
|
if "/" in raw:
|
|
|
|
|
return raw.split("/", 1)[1].strip()
|
|
|
|
|
return raw
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _github_reasoning_efforts_for_model_id(model_id: str) -> list[str]:
|
|
|
|
|
raw = (model_id or "").strip().lower()
|
|
|
|
|
if raw.startswith(("openai/o1", "openai/o3", "openai/o4", "o1", "o3", "o4")):
|
|
|
|
|
return list(COPILOT_REASONING_EFFORTS_O_SERIES)
|
|
|
|
|
normalized = normalize_copilot_model_id(model_id).lower()
|
|
|
|
|
if normalized.startswith("gpt-5"):
|
|
|
|
|
return list(COPILOT_REASONING_EFFORTS_GPT5)
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
|
2026-03-18 03:54:50 -07:00
|
|
|
def _should_use_copilot_responses_api(model_id: str) -> bool:
|
|
|
|
|
"""Decide whether a Copilot model should use the Responses API.
|
|
|
|
|
|
|
|
|
|
Replicates opencode's ``shouldUseCopilotResponsesApi`` logic:
|
|
|
|
|
GPT-5+ models use Responses API, except ``gpt-5-mini`` which uses
|
|
|
|
|
Chat Completions. All non-GPT models (Claude, Gemini, etc.) use
|
|
|
|
|
Chat Completions.
|
|
|
|
|
"""
|
|
|
|
|
import re
|
|
|
|
|
|
|
|
|
|
match = re.match(r"^gpt-(\d+)", model_id)
|
|
|
|
|
if not match:
|
|
|
|
|
return False
|
|
|
|
|
major = int(match.group(1))
|
|
|
|
|
return major >= 5 and not model_id.startswith("gpt-5-mini")
|
|
|
|
|
|
|
|
|
|
|
2026-03-17 23:40:22 -07:00
|
|
|
def copilot_model_api_mode(
|
|
|
|
|
model_id: Optional[str],
|
|
|
|
|
*,
|
|
|
|
|
catalog: Optional[list[dict[str, Any]]] = None,
|
|
|
|
|
api_key: Optional[str] = None,
|
|
|
|
|
) -> str:
|
2026-03-18 03:54:50 -07:00
|
|
|
"""Determine the API mode for a Copilot model.
|
|
|
|
|
|
|
|
|
|
Uses the model ID pattern (matching opencode's approach) as the
|
|
|
|
|
primary signal. Falls back to the catalog's ``supported_endpoints``
|
|
|
|
|
only for models not covered by the pattern check.
|
|
|
|
|
"""
|
2026-04-16 05:18:34 -07:00
|
|
|
# Fetch the catalog once so normalize + endpoint check share it
|
|
|
|
|
# (avoids two redundant network calls for non-GPT-5 models).
|
|
|
|
|
if catalog is None and api_key:
|
|
|
|
|
catalog = fetch_github_model_catalog(api_key=api_key)
|
|
|
|
|
|
2026-03-17 23:40:22 -07:00
|
|
|
normalized = normalize_copilot_model_id(model_id, catalog=catalog, api_key=api_key)
|
|
|
|
|
if not normalized:
|
|
|
|
|
return "chat_completions"
|
|
|
|
|
|
2026-03-18 03:54:50 -07:00
|
|
|
# Primary: model ID pattern (matches opencode's shouldUseCopilotResponsesApi)
|
|
|
|
|
if _should_use_copilot_responses_api(normalized):
|
|
|
|
|
return "codex_responses"
|
|
|
|
|
|
|
|
|
|
# Secondary: check catalog for non-GPT-5 models (Claude via /v1/messages, etc.)
|
2026-03-17 23:40:22 -07:00
|
|
|
if catalog:
|
|
|
|
|
catalog_entry = next((item for item in catalog if item.get("id") == normalized), None)
|
2026-03-18 03:54:50 -07:00
|
|
|
if isinstance(catalog_entry, dict):
|
|
|
|
|
supported_endpoints = {
|
|
|
|
|
str(endpoint).strip()
|
|
|
|
|
for endpoint in (catalog_entry.get("supported_endpoints") or [])
|
|
|
|
|
if str(endpoint).strip()
|
|
|
|
|
}
|
|
|
|
|
# For non-GPT-5 models, check if they only support messages API
|
|
|
|
|
if "/v1/messages" in supported_endpoints and "/chat/completions" not in supported_endpoints:
|
|
|
|
|
return "anthropic_messages"
|
2026-03-17 23:40:22 -07:00
|
|
|
|
|
|
|
|
return "chat_completions"
|
|
|
|
|
|
|
|
|
|
|
2026-04-02 09:36:24 -07:00
|
|
|
def normalize_opencode_model_id(provider_id: Optional[str], model_id: Optional[str]) -> str:
|
|
|
|
|
"""Normalize OpenCode config IDs to the bare model slug used in API requests."""
|
|
|
|
|
provider = normalize_provider(provider_id)
|
|
|
|
|
current = str(model_id or "").strip()
|
|
|
|
|
if not current or provider not in {"opencode-zen", "opencode-go"}:
|
|
|
|
|
return current
|
|
|
|
|
|
|
|
|
|
prefix = f"{provider}/"
|
|
|
|
|
if current.lower().startswith(prefix):
|
|
|
|
|
return current[len(prefix):]
|
|
|
|
|
return current
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def opencode_model_api_mode(provider_id: Optional[str], model_id: Optional[str]) -> str:
|
|
|
|
|
"""Determine the API mode for an OpenCode Zen / Go model.
|
|
|
|
|
|
|
|
|
|
OpenCode routes different models behind different API surfaces:
|
|
|
|
|
|
|
|
|
|
- GPT-5 / Codex models on Zen use ``/v1/responses``
|
|
|
|
|
- Claude models on Zen use ``/v1/messages``
|
|
|
|
|
- MiniMax models on Go use ``/v1/messages``
|
|
|
|
|
- GLM / Kimi on Go use ``/v1/chat/completions``
|
|
|
|
|
- Other Zen models (Gemini, GLM, Kimi, MiniMax, Qwen, etc.) use
|
|
|
|
|
``/v1/chat/completions``
|
|
|
|
|
|
|
|
|
|
This follows the published OpenCode docs for Zen and Go endpoints.
|
|
|
|
|
"""
|
|
|
|
|
provider = normalize_provider(provider_id)
|
|
|
|
|
normalized = normalize_opencode_model_id(provider_id, model_id).lower()
|
|
|
|
|
if not normalized:
|
|
|
|
|
return "chat_completions"
|
|
|
|
|
|
|
|
|
|
if provider == "opencode-go":
|
|
|
|
|
if normalized.startswith("minimax-"):
|
|
|
|
|
return "anthropic_messages"
|
|
|
|
|
return "chat_completions"
|
|
|
|
|
|
|
|
|
|
if provider == "opencode-zen":
|
|
|
|
|
if normalized.startswith("claude-"):
|
|
|
|
|
return "anthropic_messages"
|
|
|
|
|
if normalized.startswith("gpt-"):
|
|
|
|
|
return "codex_responses"
|
|
|
|
|
return "chat_completions"
|
|
|
|
|
|
|
|
|
|
return "chat_completions"
|
|
|
|
|
|
|
|
|
|
|
2026-03-17 23:40:22 -07:00
|
|
|
def github_model_reasoning_efforts(
|
|
|
|
|
model_id: Optional[str],
|
|
|
|
|
*,
|
|
|
|
|
catalog: Optional[list[dict[str, Any]]] = None,
|
|
|
|
|
api_key: Optional[str] = None,
|
|
|
|
|
) -> list[str]:
|
|
|
|
|
"""Return supported reasoning-effort levels for a Copilot-visible model."""
|
|
|
|
|
normalized = normalize_copilot_model_id(model_id, catalog=catalog, api_key=api_key)
|
|
|
|
|
if not normalized:
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
catalog_entry = None
|
|
|
|
|
if catalog is not None:
|
|
|
|
|
catalog_entry = next((item for item in catalog if item.get("id") == normalized), None)
|
|
|
|
|
elif api_key:
|
|
|
|
|
fetched_catalog = fetch_github_model_catalog(api_key=api_key)
|
|
|
|
|
if fetched_catalog:
|
|
|
|
|
catalog_entry = next((item for item in fetched_catalog if item.get("id") == normalized), None)
|
|
|
|
|
|
|
|
|
|
if catalog_entry is not None:
|
|
|
|
|
capabilities = catalog_entry.get("capabilities")
|
|
|
|
|
if isinstance(capabilities, dict):
|
|
|
|
|
supports = capabilities.get("supports")
|
|
|
|
|
if isinstance(supports, dict):
|
|
|
|
|
efforts = supports.get("reasoning_effort")
|
|
|
|
|
if isinstance(efforts, list):
|
|
|
|
|
normalized_efforts = [
|
|
|
|
|
str(effort).strip().lower()
|
|
|
|
|
for effort in efforts
|
|
|
|
|
if str(effort).strip()
|
|
|
|
|
]
|
|
|
|
|
return list(dict.fromkeys(normalized_efforts))
|
|
|
|
|
return []
|
|
|
|
|
legacy_capabilities = {
|
|
|
|
|
str(capability).strip().lower()
|
|
|
|
|
for capability in catalog_entry.get("capabilities", [])
|
|
|
|
|
if str(capability).strip()
|
|
|
|
|
}
|
|
|
|
|
if "reasoning" not in legacy_capabilities:
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
return _github_reasoning_efforts_for_model_id(str(model_id or normalized))
|
|
|
|
|
|
|
|
|
|
|
2026-03-15 20:09:50 -07:00
|
|
|
def probe_api_models(
|
|
|
|
|
api_key: Optional[str],
|
|
|
|
|
base_url: Optional[str],
|
|
|
|
|
timeout: float = 5.0,
|
|
|
|
|
) -> dict[str, Any]:
|
|
|
|
|
"""Probe an OpenAI-compatible ``/models`` endpoint with light URL heuristics."""
|
|
|
|
|
normalized = (base_url or "").strip().rstrip("/")
|
|
|
|
|
if not normalized:
|
|
|
|
|
return {
|
|
|
|
|
"models": None,
|
|
|
|
|
"probed_url": None,
|
|
|
|
|
"resolved_base_url": "",
|
|
|
|
|
"suggested_base_url": None,
|
|
|
|
|
"used_fallback": False,
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-17 23:40:22 -07:00
|
|
|
if _is_github_models_base_url(normalized):
|
|
|
|
|
models = _fetch_github_models(api_key=api_key, timeout=timeout)
|
|
|
|
|
return {
|
|
|
|
|
"models": models,
|
|
|
|
|
"probed_url": COPILOT_MODELS_URL,
|
|
|
|
|
"resolved_base_url": COPILOT_BASE_URL,
|
|
|
|
|
"suggested_base_url": None,
|
|
|
|
|
"used_fallback": False,
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-15 20:09:50 -07:00
|
|
|
if normalized.endswith("/v1"):
|
|
|
|
|
alternate_base = normalized[:-3].rstrip("/")
|
|
|
|
|
else:
|
|
|
|
|
alternate_base = normalized + "/v1"
|
|
|
|
|
|
|
|
|
|
candidates: list[tuple[str, bool]] = [(normalized, False)]
|
|
|
|
|
if alternate_base and alternate_base != normalized:
|
|
|
|
|
candidates.append((alternate_base, True))
|
|
|
|
|
|
|
|
|
|
tried: list[str] = []
|
|
|
|
|
headers: dict[str, str] = {}
|
|
|
|
|
if api_key:
|
|
|
|
|
headers["Authorization"] = f"Bearer {api_key}"
|
2026-03-17 23:40:22 -07:00
|
|
|
if normalized.startswith(COPILOT_BASE_URL):
|
|
|
|
|
headers.update(copilot_default_headers())
|
2026-03-15 20:09:50 -07:00
|
|
|
|
|
|
|
|
for candidate_base, is_fallback in candidates:
|
|
|
|
|
url = candidate_base.rstrip("/") + "/models"
|
|
|
|
|
tried.append(url)
|
|
|
|
|
req = urllib.request.Request(url, headers=headers)
|
|
|
|
|
try:
|
|
|
|
|
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
|
|
|
|
data = json.loads(resp.read().decode())
|
|
|
|
|
return {
|
|
|
|
|
"models": [m.get("id", "") for m in data.get("data", [])],
|
|
|
|
|
"probed_url": url,
|
|
|
|
|
"resolved_base_url": candidate_base.rstrip("/"),
|
|
|
|
|
"suggested_base_url": alternate_base if alternate_base != candidate_base else normalized,
|
|
|
|
|
"used_fallback": is_fallback,
|
|
|
|
|
}
|
|
|
|
|
except Exception:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
"models": None,
|
2026-04-09 16:15:37 +09:00
|
|
|
"probed_url": tried[0] if tried else normalized.rstrip("/") + "/models",
|
2026-03-15 20:09:50 -07:00
|
|
|
"resolved_base_url": normalized,
|
|
|
|
|
"suggested_base_url": alternate_base if alternate_base != normalized else None,
|
|
|
|
|
"used_fallback": False,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2026-03-17 00:12:16 -07:00
|
|
|
def _fetch_ai_gateway_models(timeout: float = 5.0) -> Optional[list[str]]:
|
|
|
|
|
"""Fetch available language models with tool-use from AI Gateway."""
|
|
|
|
|
api_key = os.getenv("AI_GATEWAY_API_KEY", "").strip()
|
|
|
|
|
if not api_key:
|
|
|
|
|
return None
|
|
|
|
|
base_url = os.getenv("AI_GATEWAY_BASE_URL", "").strip()
|
|
|
|
|
if not base_url:
|
|
|
|
|
from hermes_constants import AI_GATEWAY_BASE_URL
|
|
|
|
|
base_url = AI_GATEWAY_BASE_URL
|
|
|
|
|
|
|
|
|
|
url = base_url.rstrip("/") + "/models"
|
|
|
|
|
headers: dict[str, str] = {"Authorization": f"Bearer {api_key}"}
|
|
|
|
|
req = urllib.request.Request(url, headers=headers)
|
|
|
|
|
try:
|
|
|
|
|
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
|
|
|
|
data = json.loads(resp.read().decode())
|
|
|
|
|
return [
|
|
|
|
|
m["id"]
|
|
|
|
|
for m in data.get("data", [])
|
|
|
|
|
if m.get("id")
|
|
|
|
|
and m.get("type") == "language"
|
|
|
|
|
and "tool-use" in (m.get("tags") or [])
|
|
|
|
|
]
|
|
|
|
|
except Exception:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
2026-03-08 05:22:15 -07:00
|
|
|
def fetch_api_models(
|
|
|
|
|
api_key: Optional[str],
|
|
|
|
|
base_url: Optional[str],
|
|
|
|
|
timeout: float = 5.0,
|
|
|
|
|
) -> Optional[list[str]]:
|
|
|
|
|
"""Fetch the list of available model IDs from the provider's ``/models`` endpoint.
|
|
|
|
|
|
|
|
|
|
Returns a list of model ID strings, or ``None`` if the endpoint could not
|
|
|
|
|
be reached (network error, timeout, auth failure, etc.).
|
|
|
|
|
"""
|
2026-03-15 20:09:50 -07:00
|
|
|
return probe_api_models(api_key, base_url, timeout=timeout).get("models")
|
2026-03-08 05:22:15 -07:00
|
|
|
|
|
|
|
|
|
2026-04-15 22:32:05 -07:00
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
# Ollama Cloud — merged model discovery with disk cache
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
_OLLAMA_CLOUD_CACHE_TTL = 3600 # 1 hour
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _ollama_cloud_cache_path() -> Path:
|
|
|
|
|
"""Return the path for the Ollama Cloud model cache."""
|
|
|
|
|
from hermes_constants import get_hermes_home
|
|
|
|
|
return get_hermes_home() / "ollama_cloud_models_cache.json"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _load_ollama_cloud_cache(*, ignore_ttl: bool = False) -> Optional[dict]:
|
|
|
|
|
"""Load cached Ollama Cloud models from disk.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
ignore_ttl: If True, return data even if the TTL has expired (stale fallback).
|
|
|
|
|
"""
|
|
|
|
|
try:
|
|
|
|
|
cache_path = _ollama_cloud_cache_path()
|
|
|
|
|
if not cache_path.exists():
|
|
|
|
|
return None
|
|
|
|
|
with open(cache_path, encoding="utf-8") as f:
|
|
|
|
|
data = json.load(f)
|
|
|
|
|
if not isinstance(data, dict):
|
|
|
|
|
return None
|
|
|
|
|
models = data.get("models")
|
|
|
|
|
if not (isinstance(models, list) and models):
|
|
|
|
|
return None
|
|
|
|
|
if not ignore_ttl:
|
|
|
|
|
cached_at = data.get("cached_at", 0)
|
|
|
|
|
if (time.time() - cached_at) > _OLLAMA_CLOUD_CACHE_TTL:
|
|
|
|
|
return None # stale
|
|
|
|
|
return data
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _save_ollama_cloud_cache(models: list[str]) -> None:
|
|
|
|
|
"""Persist the merged Ollama Cloud model list to disk."""
|
|
|
|
|
try:
|
|
|
|
|
from utils import atomic_json_write
|
|
|
|
|
cache_path = _ollama_cloud_cache_path()
|
|
|
|
|
cache_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
atomic_json_write(cache_path, {"models": models, "cached_at": time.time()}, indent=None)
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def fetch_ollama_cloud_models(
|
|
|
|
|
api_key: Optional[str] = None,
|
|
|
|
|
base_url: Optional[str] = None,
|
|
|
|
|
*,
|
|
|
|
|
force_refresh: bool = False,
|
|
|
|
|
) -> list[str]:
|
|
|
|
|
"""Fetch Ollama Cloud models by merging live API + models.dev, with disk cache.
|
|
|
|
|
|
|
|
|
|
Resolution order:
|
|
|
|
|
1. Disk cache (if fresh, < 1 hour, and not force_refresh)
|
|
|
|
|
2. Live ``/v1/models`` endpoint (primary — freshest source)
|
|
|
|
|
3. models.dev registry (secondary — fills gaps for unlisted models)
|
|
|
|
|
4. Merge: live models first, then models.dev additions (deduped)
|
|
|
|
|
|
|
|
|
|
Returns a list of model IDs (never None — empty list on total failure).
|
|
|
|
|
"""
|
|
|
|
|
# 1. Check disk cache
|
|
|
|
|
if not force_refresh:
|
|
|
|
|
cached = _load_ollama_cloud_cache()
|
|
|
|
|
if cached is not None:
|
|
|
|
|
return cached["models"]
|
|
|
|
|
|
|
|
|
|
# 2. Live API probe
|
|
|
|
|
if not api_key:
|
|
|
|
|
api_key = os.getenv("OLLAMA_API_KEY", "")
|
|
|
|
|
if not base_url:
|
|
|
|
|
base_url = os.getenv("OLLAMA_BASE_URL", "") or "https://ollama.com/v1"
|
|
|
|
|
|
|
|
|
|
live_models: list[str] = []
|
|
|
|
|
if api_key:
|
|
|
|
|
result = fetch_api_models(api_key, base_url, timeout=8.0)
|
|
|
|
|
if result:
|
|
|
|
|
live_models = result
|
|
|
|
|
|
|
|
|
|
# 3. models.dev registry
|
|
|
|
|
mdev_models: list[str] = []
|
|
|
|
|
try:
|
|
|
|
|
from agent.models_dev import list_agentic_models
|
|
|
|
|
mdev_models = list_agentic_models("ollama-cloud")
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
# 4. Merge: live first, then models.dev additions (deduped, order-preserving)
|
|
|
|
|
if live_models or mdev_models:
|
|
|
|
|
seen: set[str] = set()
|
|
|
|
|
merged: list[str] = []
|
|
|
|
|
for m in live_models:
|
|
|
|
|
if m and m not in seen:
|
|
|
|
|
seen.add(m)
|
|
|
|
|
merged.append(m)
|
|
|
|
|
for m in mdev_models:
|
|
|
|
|
if m and m not in seen:
|
|
|
|
|
seen.add(m)
|
|
|
|
|
merged.append(m)
|
|
|
|
|
if merged:
|
|
|
|
|
_save_ollama_cloud_cache(merged)
|
|
|
|
|
return merged
|
|
|
|
|
|
|
|
|
|
# Total failure — return stale cache if available (ignore TTL)
|
|
|
|
|
stale = _load_ollama_cloud_cache(ignore_ttl=True)
|
|
|
|
|
if stale is not None:
|
|
|
|
|
return stale["models"]
|
|
|
|
|
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
|
2026-03-07 19:56:48 -08:00
|
|
|
def validate_requested_model(
|
|
|
|
|
model_name: str,
|
|
|
|
|
provider: Optional[str],
|
|
|
|
|
*,
|
2026-03-08 05:22:15 -07:00
|
|
|
api_key: Optional[str] = None,
|
2026-03-07 19:56:48 -08:00
|
|
|
base_url: Optional[str] = None,
|
|
|
|
|
) -> dict[str, Any]:
|
|
|
|
|
"""
|
2026-03-08 05:22:15 -07:00
|
|
|
Validate a ``/model`` value for the active provider.
|
|
|
|
|
|
|
|
|
|
Performs format checks first, then probes the live API to confirm
|
|
|
|
|
the model actually exists.
|
2026-03-07 19:56:48 -08:00
|
|
|
|
|
|
|
|
Returns a dict with:
|
|
|
|
|
- accepted: whether the CLI should switch to the requested model now
|
|
|
|
|
- persist: whether it is safe to save to config
|
|
|
|
|
- recognized: whether it matched a known provider catalog
|
|
|
|
|
- message: optional warning / guidance for the user
|
|
|
|
|
"""
|
|
|
|
|
requested = (model_name or "").strip()
|
|
|
|
|
normalized = normalize_provider(provider)
|
|
|
|
|
if normalized == "openrouter" and base_url and "openrouter.ai" not in base_url:
|
|
|
|
|
normalized = "custom"
|
2026-03-17 23:40:22 -07:00
|
|
|
requested_for_lookup = requested
|
|
|
|
|
if normalized == "copilot":
|
|
|
|
|
requested_for_lookup = normalize_copilot_model_id(
|
|
|
|
|
requested,
|
|
|
|
|
api_key=api_key,
|
|
|
|
|
) or requested
|
2026-03-07 19:56:48 -08:00
|
|
|
|
|
|
|
|
if not requested:
|
|
|
|
|
return {
|
|
|
|
|
"accepted": False,
|
|
|
|
|
"persist": False,
|
|
|
|
|
"recognized": False,
|
|
|
|
|
"message": "Model name cannot be empty.",
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if any(ch.isspace() for ch in requested):
|
|
|
|
|
return {
|
|
|
|
|
"accepted": False,
|
|
|
|
|
"persist": False,
|
|
|
|
|
"recognized": False,
|
|
|
|
|
"message": "Model names cannot contain spaces.",
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-11 23:29:26 -07:00
|
|
|
if normalized == "custom":
|
2026-03-15 20:09:50 -07:00
|
|
|
probe = probe_api_models(api_key, base_url)
|
|
|
|
|
api_models = probe.get("models")
|
|
|
|
|
if api_models is not None:
|
2026-03-17 23:40:22 -07:00
|
|
|
if requested_for_lookup in set(api_models):
|
2026-03-15 20:09:50 -07:00
|
|
|
return {
|
|
|
|
|
"accepted": True,
|
|
|
|
|
"persist": True,
|
|
|
|
|
"recognized": True,
|
|
|
|
|
"message": None,
|
|
|
|
|
}
|
|
|
|
|
|
fix: auto-correct close model name matches in /model validation (#9424)
* feat(skills): add fitness-nutrition skill to optional-skills
Cherry-picked from PR #9177 by @haileymarshall.
Adds a fitness and nutrition skill for gym-goers and health-conscious users:
- Exercise search via wger API (690+ exercises, free, no auth)
- Nutrition lookup via USDA FoodData Central (380K+ foods, DEMO_KEY fallback)
- Offline body composition calculators (BMI, TDEE, 1RM, macros, body fat %)
- Pure stdlib Python, no pip dependencies
Changes from original PR:
- Moved from skills/ to optional-skills/health/ (correct location)
- Fixed BMR formula in FORMULAS.md (removed confusing -5+10, now just +5)
- Fixed author attribution to match PR submitter
- Marked USDA_API_KEY as optional (DEMO_KEY works without signup)
Also adds optional env var support to the skill readiness checker:
- New 'optional: true' field in required_environment_variables entries
- Optional vars are preserved in metadata but don't block skill readiness
- Optional vars skip the CLI capture prompt flow
- Skills with only optional missing vars show as 'available' not 'setup_needed'
* fix: auto-correct close model name matches in /model validation
When a user types a model name with a minor typo (e.g. gpt5.3-codex instead
of gpt-5.3-codex), the validation now auto-corrects to the closest match
instead of accepting the wrong name with a warning.
Uses difflib get_close_matches with cutoff=0.9 to avoid false corrections
(e.g. gpt-5.3 should not silently become gpt-5.4). Applied consistently
across all three validation paths: codex provider, custom endpoints, and
generic API-probed providers.
The validate_requested_model() return dict gains an optional corrected_model
key that switch_model() applies before building the result.
Reported by Discord user — /model gpt5.3-codex was accepted with a warning
but would fail at the API level.
---------
Co-authored-by: haileymarshall <haileymarshall@users.noreply.github.com>
2026-04-13 23:09:39 -07:00
|
|
|
# Auto-correct if the top match is very similar (e.g. typo)
|
|
|
|
|
auto = get_close_matches(requested_for_lookup, api_models, n=1, cutoff=0.9)
|
|
|
|
|
if auto:
|
|
|
|
|
return {
|
|
|
|
|
"accepted": True,
|
|
|
|
|
"persist": True,
|
|
|
|
|
"recognized": True,
|
|
|
|
|
"corrected_model": auto[0],
|
|
|
|
|
"message": f"Auto-corrected `{requested}` → `{auto[0]}`",
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-15 20:09:50 -07:00
|
|
|
suggestions = get_close_matches(requested, api_models, n=3, cutoff=0.5)
|
|
|
|
|
suggestion_text = ""
|
|
|
|
|
if suggestions:
|
|
|
|
|
suggestion_text = "\n Similar models: " + ", ".join(f"`{s}`" for s in suggestions)
|
|
|
|
|
|
|
|
|
|
message = (
|
|
|
|
|
f"Note: `{requested}` was not found in this custom endpoint's model listing "
|
|
|
|
|
f"({probe.get('probed_url')}). It may still work if the server supports hidden or aliased models."
|
|
|
|
|
f"{suggestion_text}"
|
|
|
|
|
)
|
|
|
|
|
if probe.get("used_fallback"):
|
|
|
|
|
message += (
|
|
|
|
|
f"\n Endpoint verification succeeded after trying `{probe.get('resolved_base_url')}`. "
|
|
|
|
|
f"Consider saving that as your base URL."
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
"accepted": True,
|
|
|
|
|
"persist": True,
|
|
|
|
|
"recognized": False,
|
|
|
|
|
"message": message,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
message = (
|
|
|
|
|
f"Note: could not reach this custom endpoint's model listing at `{probe.get('probed_url')}`. "
|
|
|
|
|
f"Hermes will still save `{requested}`, but the endpoint should expose `/models` for verification."
|
|
|
|
|
)
|
|
|
|
|
if probe.get("suggested_base_url"):
|
|
|
|
|
message += f"\n If this server expects `/v1`, try base URL: `{probe.get('suggested_base_url')}`"
|
|
|
|
|
|
2026-03-11 23:29:26 -07:00
|
|
|
return {
|
|
|
|
|
"accepted": True,
|
|
|
|
|
"persist": True,
|
|
|
|
|
"recognized": False,
|
2026-03-15 20:09:50 -07:00
|
|
|
"message": message,
|
2026-03-11 23:29:26 -07:00
|
|
|
}
|
|
|
|
|
|
2026-04-11 16:59:41 -07:00
|
|
|
# OpenAI Codex has its own catalog path; /v1/models probing is not the right validation path.
|
|
|
|
|
if normalized == "openai-codex":
|
|
|
|
|
try:
|
|
|
|
|
codex_models = provider_model_ids("openai-codex")
|
|
|
|
|
except Exception:
|
|
|
|
|
codex_models = []
|
|
|
|
|
if codex_models:
|
|
|
|
|
if requested_for_lookup in set(codex_models):
|
|
|
|
|
return {
|
|
|
|
|
"accepted": True,
|
|
|
|
|
"persist": True,
|
|
|
|
|
"recognized": True,
|
|
|
|
|
"message": None,
|
|
|
|
|
}
|
fix: auto-correct close model name matches in /model validation (#9424)
* feat(skills): add fitness-nutrition skill to optional-skills
Cherry-picked from PR #9177 by @haileymarshall.
Adds a fitness and nutrition skill for gym-goers and health-conscious users:
- Exercise search via wger API (690+ exercises, free, no auth)
- Nutrition lookup via USDA FoodData Central (380K+ foods, DEMO_KEY fallback)
- Offline body composition calculators (BMI, TDEE, 1RM, macros, body fat %)
- Pure stdlib Python, no pip dependencies
Changes from original PR:
- Moved from skills/ to optional-skills/health/ (correct location)
- Fixed BMR formula in FORMULAS.md (removed confusing -5+10, now just +5)
- Fixed author attribution to match PR submitter
- Marked USDA_API_KEY as optional (DEMO_KEY works without signup)
Also adds optional env var support to the skill readiness checker:
- New 'optional: true' field in required_environment_variables entries
- Optional vars are preserved in metadata but don't block skill readiness
- Optional vars skip the CLI capture prompt flow
- Skills with only optional missing vars show as 'available' not 'setup_needed'
* fix: auto-correct close model name matches in /model validation
When a user types a model name with a minor typo (e.g. gpt5.3-codex instead
of gpt-5.3-codex), the validation now auto-corrects to the closest match
instead of accepting the wrong name with a warning.
Uses difflib get_close_matches with cutoff=0.9 to avoid false corrections
(e.g. gpt-5.3 should not silently become gpt-5.4). Applied consistently
across all three validation paths: codex provider, custom endpoints, and
generic API-probed providers.
The validate_requested_model() return dict gains an optional corrected_model
key that switch_model() applies before building the result.
Reported by Discord user — /model gpt5.3-codex was accepted with a warning
but would fail at the API level.
---------
Co-authored-by: haileymarshall <haileymarshall@users.noreply.github.com>
2026-04-13 23:09:39 -07:00
|
|
|
# Auto-correct if the top match is very similar (e.g. typo)
|
|
|
|
|
auto = get_close_matches(requested_for_lookup, codex_models, n=1, cutoff=0.9)
|
|
|
|
|
if auto:
|
|
|
|
|
return {
|
|
|
|
|
"accepted": True,
|
|
|
|
|
"persist": True,
|
|
|
|
|
"recognized": True,
|
|
|
|
|
"corrected_model": auto[0],
|
|
|
|
|
"message": f"Auto-corrected `{requested}` → `{auto[0]}`",
|
|
|
|
|
}
|
2026-04-11 16:59:41 -07:00
|
|
|
suggestions = get_close_matches(requested_for_lookup, codex_models, n=3, cutoff=0.5)
|
|
|
|
|
suggestion_text = ""
|
|
|
|
|
if suggestions:
|
|
|
|
|
suggestion_text = "\n Similar models: " + ", ".join(f"`{s}`" for s in suggestions)
|
|
|
|
|
return {
|
|
|
|
|
"accepted": True,
|
|
|
|
|
"persist": True,
|
|
|
|
|
"recognized": False,
|
|
|
|
|
"message": (
|
|
|
|
|
f"Note: `{requested}` was not found in the OpenAI Codex model listing. "
|
|
|
|
|
f"It may still work if your account has access to it."
|
|
|
|
|
f"{suggestion_text}"
|
|
|
|
|
),
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-08 05:22:15 -07:00
|
|
|
# Probe the live API to check if the model actually exists
|
|
|
|
|
api_models = fetch_api_models(api_key, base_url)
|
|
|
|
|
|
|
|
|
|
if api_models is not None:
|
2026-03-17 23:40:22 -07:00
|
|
|
if requested_for_lookup in set(api_models):
|
2026-03-08 05:22:15 -07:00
|
|
|
# API confirmed the model exists
|
|
|
|
|
return {
|
|
|
|
|
"accepted": True,
|
|
|
|
|
"persist": True,
|
|
|
|
|
"recognized": True,
|
|
|
|
|
"message": None,
|
|
|
|
|
}
|
|
|
|
|
else:
|
2026-03-12 16:02:35 -07:00
|
|
|
# API responded but model is not listed. Accept anyway —
|
|
|
|
|
# the user may have access to models not shown in the public
|
|
|
|
|
# listing (e.g. Z.AI Pro/Max plans can use glm-5 on coding
|
|
|
|
|
# endpoints even though it's not in /models). Warn but allow.
|
fix: auto-correct close model name matches in /model validation (#9424)
* feat(skills): add fitness-nutrition skill to optional-skills
Cherry-picked from PR #9177 by @haileymarshall.
Adds a fitness and nutrition skill for gym-goers and health-conscious users:
- Exercise search via wger API (690+ exercises, free, no auth)
- Nutrition lookup via USDA FoodData Central (380K+ foods, DEMO_KEY fallback)
- Offline body composition calculators (BMI, TDEE, 1RM, macros, body fat %)
- Pure stdlib Python, no pip dependencies
Changes from original PR:
- Moved from skills/ to optional-skills/health/ (correct location)
- Fixed BMR formula in FORMULAS.md (removed confusing -5+10, now just +5)
- Fixed author attribution to match PR submitter
- Marked USDA_API_KEY as optional (DEMO_KEY works without signup)
Also adds optional env var support to the skill readiness checker:
- New 'optional: true' field in required_environment_variables entries
- Optional vars are preserved in metadata but don't block skill readiness
- Optional vars skip the CLI capture prompt flow
- Skills with only optional missing vars show as 'available' not 'setup_needed'
* fix: auto-correct close model name matches in /model validation
When a user types a model name with a minor typo (e.g. gpt5.3-codex instead
of gpt-5.3-codex), the validation now auto-corrects to the closest match
instead of accepting the wrong name with a warning.
Uses difflib get_close_matches with cutoff=0.9 to avoid false corrections
(e.g. gpt-5.3 should not silently become gpt-5.4). Applied consistently
across all three validation paths: codex provider, custom endpoints, and
generic API-probed providers.
The validate_requested_model() return dict gains an optional corrected_model
key that switch_model() applies before building the result.
Reported by Discord user — /model gpt5.3-codex was accepted with a warning
but would fail at the API level.
---------
Co-authored-by: haileymarshall <haileymarshall@users.noreply.github.com>
2026-04-13 23:09:39 -07:00
|
|
|
|
|
|
|
|
# Auto-correct if the top match is very similar (e.g. typo)
|
|
|
|
|
auto = get_close_matches(requested_for_lookup, api_models, n=1, cutoff=0.9)
|
|
|
|
|
if auto:
|
|
|
|
|
return {
|
|
|
|
|
"accepted": True,
|
|
|
|
|
"persist": True,
|
|
|
|
|
"recognized": True,
|
|
|
|
|
"corrected_model": auto[0],
|
|
|
|
|
"message": f"Auto-corrected `{requested}` → `{auto[0]}`",
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-08 05:22:15 -07:00
|
|
|
suggestions = get_close_matches(requested, api_models, n=3, cutoff=0.5)
|
|
|
|
|
suggestion_text = ""
|
|
|
|
|
if suggestions:
|
2026-03-12 16:02:35 -07:00
|
|
|
suggestion_text = "\n Similar models: " + ", ".join(f"`{s}`" for s in suggestions)
|
2026-03-08 05:22:15 -07:00
|
|
|
|
|
|
|
|
return {
|
2026-03-12 16:02:35 -07:00
|
|
|
"accepted": True,
|
|
|
|
|
"persist": True,
|
2026-03-08 05:22:15 -07:00
|
|
|
"recognized": False,
|
|
|
|
|
"message": (
|
2026-03-12 16:02:35 -07:00
|
|
|
f"Note: `{requested}` was not found in this provider's model listing. "
|
|
|
|
|
f"It may still work if your plan supports it."
|
2026-03-08 05:22:15 -07:00
|
|
|
f"{suggestion_text}"
|
|
|
|
|
),
|
|
|
|
|
}
|
2026-03-07 19:56:48 -08:00
|
|
|
|
2026-03-12 16:02:35 -07:00
|
|
|
# api_models is None — couldn't reach API. Accept and persist,
|
|
|
|
|
# but warn so typos don't silently break things.
|
feat: native AWS Bedrock provider via Converse API
Salvaged from PR #7920 by JiaDe-Wu — cherry-picked Bedrock-specific
additions onto current main, skipping stale-branch reverts (293 commits
behind).
Dual-path architecture:
- Claude models → AnthropicBedrock SDK (prompt caching, thinking budgets)
- Non-Claude models → Converse API via boto3 (Nova, DeepSeek, Llama, Mistral)
Includes:
- Core adapter (agent/bedrock_adapter.py, 1098 lines)
- Full provider registration (auth, models, providers, config, runtime, main)
- IAM credential chain + Bedrock API Key auth modes
- Dynamic model discovery via ListFoundationModels + ListInferenceProfiles
- Streaming with delta callbacks, error classification, guardrails
- hermes doctor + hermes auth integration
- /usage pricing for 7 Bedrock models
- 130 automated tests (79 unit + 28 integration + follow-up fixes)
- Documentation (website/docs/guides/aws-bedrock.md)
- boto3 optional dependency (pip install hermes-agent[bedrock])
Co-authored-by: JiaDe WU <40445668+JiaDe-Wu@users.noreply.github.com>
2026-04-15 15:18:01 -07:00
|
|
|
|
|
|
|
|
# Bedrock: use our own discovery instead of HTTP /models endpoint.
|
|
|
|
|
# Bedrock's bedrock-runtime URL doesn't support /models — it uses the
|
|
|
|
|
# AWS SDK control plane (ListFoundationModels + ListInferenceProfiles).
|
|
|
|
|
if normalized == "bedrock":
|
|
|
|
|
try:
|
|
|
|
|
from agent.bedrock_adapter import discover_bedrock_models, resolve_bedrock_region
|
|
|
|
|
region = resolve_bedrock_region()
|
|
|
|
|
discovered = discover_bedrock_models(region)
|
|
|
|
|
discovered_ids = {m["id"] for m in discovered}
|
|
|
|
|
if requested in discovered_ids:
|
|
|
|
|
return {
|
|
|
|
|
"accepted": True,
|
|
|
|
|
"persist": True,
|
|
|
|
|
"recognized": True,
|
|
|
|
|
"message": None,
|
|
|
|
|
}
|
|
|
|
|
# Not in discovered list — still accept (user may have custom
|
|
|
|
|
# inference profiles or cross-account access), but warn.
|
|
|
|
|
suggestions = get_close_matches(requested, list(discovered_ids), n=3, cutoff=0.4)
|
|
|
|
|
suggestion_text = ""
|
|
|
|
|
if suggestions:
|
|
|
|
|
suggestion_text = "\n Similar models: " + ", ".join(f"`{s}`" for s in suggestions)
|
|
|
|
|
return {
|
|
|
|
|
"accepted": True,
|
|
|
|
|
"persist": True,
|
|
|
|
|
"recognized": False,
|
|
|
|
|
"message": (
|
|
|
|
|
f"Note: `{requested}` was not found in Bedrock model discovery for {region}. "
|
|
|
|
|
f"It may still work with custom inference profiles or cross-account access."
|
|
|
|
|
f"{suggestion_text}"
|
|
|
|
|
),
|
|
|
|
|
}
|
|
|
|
|
except Exception:
|
|
|
|
|
pass # Fall through to generic warning
|
|
|
|
|
|
2026-03-08 05:22:15 -07:00
|
|
|
provider_label = _PROVIDER_LABELS.get(normalized, normalized)
|
2026-03-07 19:56:48 -08:00
|
|
|
return {
|
|
|
|
|
"accepted": True,
|
2026-03-12 16:02:35 -07:00
|
|
|
"persist": True,
|
2026-03-07 19:56:48 -08:00
|
|
|
"recognized": False,
|
|
|
|
|
"message": (
|
2026-03-12 16:02:35 -07:00
|
|
|
f"Could not reach the {provider_label} API to validate `{requested}`. "
|
|
|
|
|
f"If the service isn't down, this model may not be valid."
|
2026-03-07 19:56:48 -08:00
|
|
|
),
|
|
|
|
|
}
|