mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-28 06:51:16 +08:00
feat(azure-foundry): auto-detect transport, models, context length
The azure-foundry wizard now probes the endpoint before asking the user
to pick anything by hand:
1. URL path sniff — endpoints ending in /anthropic are Azure Foundry
Claude routes and skip to anthropic_messages.
2. GET <base>/models probe — if the endpoint returns an OpenAI-shaped
model list, we switch to chat_completions and prefill the picker
with the returned deployment/model IDs.
3. Anthropic Messages probe — fallback for endpoints that don't expose
/models but do speak the Anthropic Messages shape.
4. Manual fallback — private endpoints / custom routes still work;
the user picks API mode + types a deployment name.
Context length for the selected model is resolved through the existing
agent.model_metadata.get_model_context_length chain (models.dev,
provider metadata, hardcoded family fallbacks) and stored in
model.context_length when a non-default value is found.
Also refactors runtime_provider so Azure Foundry resolution is reused
between the explicit-credentials path and the default top-level path —
previously the /v1 strip for Anthropic-style Azure only ran when the
caller passed explicit_* args, which meant config-driven sessions
hit a double-/v1 URL.
New module hermes_cli/azure_detect.py with 19 unit tests covering:
- path sniff, model ID extraction, probe fallbacks
- HTTP error handling (URLError, HTTPError)
- context-length lookup passthrough
- DEFAULT_FALLBACK_CONTEXT rejection
New runtime tests cover:
- OpenAI-style Azure Foundry
- Anthropic-style Azure Foundry with /v1 stripping
- Missing base_url / API key raising AuthError
Rationale: Microsoft confirms there's no pure-API-key endpoint to list
Azure deployments (that requires ARM management auth). The v1 Azure
OpenAI endpoint does expose /models with the resource's available
model catalog, which is good enough for picker prefill in the common
case. Users on private/gated endpoints fall through to manual entry.
This commit is contained in:
300
hermes_cli/azure_detect.py
Normal file
300
hermes_cli/azure_detect.py
Normal file
@@ -0,0 +1,300 @@
|
||||
"""Azure Foundry endpoint auto-detection.
|
||||
|
||||
Inspect an Azure AI Foundry / Azure OpenAI endpoint to determine:
|
||||
- API transport (OpenAI-style ``chat_completions`` vs
|
||||
Anthropic-style ``anthropic_messages``)
|
||||
- Available models (best effort — Azure does not expose a deployment
|
||||
listing via the inference API key, but Azure OpenAI v1 endpoints
|
||||
return the resource's model catalog via ``GET /models``)
|
||||
- Context length for each discovered/entered model, via the existing
|
||||
:func:`agent.model_metadata.get_model_context_length` resolver.
|
||||
|
||||
Rationale:
|
||||
|
||||
Azure has no pure-API-key deployment-listing endpoint — per Microsoft,
|
||||
deployment enumeration requires ARM management-plane auth. Azure
|
||||
OpenAI v1 endpoints ``{resource}.openai.azure.com/openai/v1`` do return
|
||||
a ``/models`` list, but it reflects the resource's *available* models
|
||||
rather than the user's *deployed* deployment names. In practice it is
|
||||
still a useful hint — the user picks a familiar model name and we look
|
||||
up its context length from the catalog.
|
||||
|
||||
The detector never crashes on errors (every HTTP call is wrapped in a
|
||||
broad try/except). Callers get a :class:`DetectionResult` with whatever
|
||||
information could be gathered, and fall back to manual entry for the
|
||||
rest.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Optional
|
||||
from urllib import request as urllib_request
|
||||
from urllib.error import HTTPError, URLError
|
||||
from urllib.parse import urlparse, urlunparse
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Default Azure OpenAI ``api-version`` to probe with. The v1 GA endpoint
|
||||
# accepts requests without ``api-version`` entirely, so this is only used
|
||||
# as a fallback for pre-v1 resources that still require it.
|
||||
_AZURE_OPENAI_PROBE_API_VERSIONS = (
|
||||
"2025-04-01-preview",
|
||||
"2024-10-21", # oldest GA that supports /models
|
||||
)
|
||||
|
||||
# Default Azure Anthropic ``api-version``. Matches the value used by
|
||||
# ``agent/anthropic_adapter.py`` when building the Anthropic client.
|
||||
_AZURE_ANTHROPIC_API_VERSION = "2025-04-15"
|
||||
|
||||
|
||||
@dataclass
|
||||
class DetectionResult:
|
||||
"""Everything auto-detection could gather from a base URL + API key."""
|
||||
|
||||
#: Detected API transport: ``"chat_completions"``,
|
||||
#: ``"anthropic_messages"``, or ``None`` when detection failed.
|
||||
api_mode: Optional[str] = None
|
||||
|
||||
#: Deployment / model IDs returned by ``/models`` (best effort).
|
||||
#: Empty when the endpoint doesn't expose the list with an API key.
|
||||
models: list[str] = field(default_factory=list)
|
||||
|
||||
#: Lowercased host from the base URL (used for display messages).
|
||||
hostname: str = ""
|
||||
|
||||
#: Human-readable reason the detector chose ``api_mode``. Useful
|
||||
#: for explaining auto-detection to the user in the wizard.
|
||||
reason: str = ""
|
||||
|
||||
#: ``True`` when ``/models`` returned a valid OpenAI-shaped payload.
|
||||
models_probe_ok: bool = False
|
||||
|
||||
#: ``True`` when the URL was determined to be an Anthropic-style
|
||||
#: endpoint (from path suffix or live probe).
|
||||
is_anthropic: bool = False
|
||||
|
||||
|
||||
def _http_get_json(url: str, api_key: str, timeout: float = 6.0) -> tuple[int, Optional[dict]]:
|
||||
"""GET a URL with ``api-key`` + ``Authorization`` headers. Return
|
||||
``(status_code, parsed_json_or_None)``. Never raises."""
|
||||
req = urllib_request.Request(url, method="GET")
|
||||
# Azure OpenAI uses ``api-key``. Some Azure deployments (and
|
||||
# Anthropic-style routes) use ``Authorization: Bearer``. Send both
|
||||
# so we probe once per URL rather than twice.
|
||||
req.add_header("api-key", api_key)
|
||||
req.add_header("Authorization", f"Bearer {api_key}")
|
||||
req.add_header("User-Agent", "hermes-agent/azure-detect")
|
||||
try:
|
||||
with urllib_request.urlopen(req, timeout=timeout) as resp:
|
||||
body = resp.read()
|
||||
try:
|
||||
return resp.status, json.loads(body.decode("utf-8", errors="replace"))
|
||||
except Exception:
|
||||
return resp.status, None
|
||||
except HTTPError as exc:
|
||||
return exc.code, None
|
||||
except (URLError, TimeoutError, OSError) as exc:
|
||||
logger.debug("azure_detect: GET %s failed: %s", url, exc)
|
||||
return 0, None
|
||||
except Exception as exc: # pragma: no cover — defensive
|
||||
logger.debug("azure_detect: GET %s unexpected error: %s", url, exc)
|
||||
return 0, None
|
||||
|
||||
|
||||
def _strip_trailing_v1(url: str) -> str:
|
||||
"""Strip trailing ``/v1`` or ``/v1/`` so we can construct sub-paths."""
|
||||
return re.sub(r"/v1/?$", "", url.rstrip("/"))
|
||||
|
||||
|
||||
def _looks_like_anthropic_path(url: str) -> bool:
|
||||
"""Return True when the URL's path ends in ``/anthropic`` or
|
||||
contains a ``/anthropic/`` segment. Used by Azure Foundry
|
||||
resources that route Claude traffic through a dedicated path."""
|
||||
try:
|
||||
parsed = urlparse(url)
|
||||
path = (parsed.path or "").lower().rstrip("/")
|
||||
return path.endswith("/anthropic") or "/anthropic/" in path + "/"
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _extract_model_ids(payload: dict) -> list[str]:
|
||||
"""Extract a list of model IDs from an OpenAI-shaped ``/models``
|
||||
response. Returns ``[]`` on any shape mismatch."""
|
||||
data = payload.get("data") if isinstance(payload, dict) else None
|
||||
if not isinstance(data, list):
|
||||
return []
|
||||
ids: list[str] = []
|
||||
for item in data:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
# OpenAI shape: {"id": "gpt-5.4", "object": "model", ...}
|
||||
mid = item.get("id") or item.get("model") or item.get("name")
|
||||
if isinstance(mid, str) and mid:
|
||||
ids.append(mid)
|
||||
return ids
|
||||
|
||||
|
||||
def _probe_openai_models(base_url: str, api_key: str) -> tuple[bool, list[str]]:
|
||||
"""Probe ``<base>/models`` for an OpenAI-shaped response.
|
||||
|
||||
Returns ``(ok, models)``. ``ok`` is True iff the endpoint accepted
|
||||
us as an OpenAI-style caller (200 OK + OpenAI-shaped JSON body).
|
||||
"""
|
||||
base_url = base_url.rstrip("/")
|
||||
|
||||
# Azure OpenAI v1: {resource}.openai.azure.com/openai/v1 — no
|
||||
# api-version required for GA paths, so probe without first.
|
||||
candidates = [f"{base_url}/models"]
|
||||
# Fallback: explicit api-version for pre-v1 resources
|
||||
for v in _AZURE_OPENAI_PROBE_API_VERSIONS:
|
||||
candidates.append(f"{base_url}/models?api-version={v}")
|
||||
|
||||
for url in candidates:
|
||||
status, body = _http_get_json(url, api_key)
|
||||
if status == 200 and body is not None:
|
||||
ids = _extract_model_ids(body)
|
||||
if ids:
|
||||
logger.info(
|
||||
"azure_detect: /models probe OK at %s (%d models)",
|
||||
url, len(ids),
|
||||
)
|
||||
return True, ids
|
||||
# 200 + empty list still counts as "OpenAI shape, no models
|
||||
# listed" — let the user proceed with manual entry.
|
||||
if isinstance(body, dict) and "data" in body:
|
||||
return True, []
|
||||
return False, []
|
||||
|
||||
|
||||
def _probe_anthropic_messages(base_url: str, api_key: str) -> bool:
|
||||
"""Send a zero-token request to ``<base>/v1/messages`` and check
|
||||
whether the endpoint at least *recognises* the Anthropic Messages
|
||||
shape (any 4xx that mentions ``messages`` or ``model``, or a 400
|
||||
``invalid_request`` with an Anthropic error shape). Never completes
|
||||
a real chat.
|
||||
"""
|
||||
base = _strip_trailing_v1(base_url)
|
||||
url = f"{base}/v1/messages?api-version={_AZURE_ANTHROPIC_API_VERSION}"
|
||||
payload = json.dumps({
|
||||
"model": "probe",
|
||||
"max_tokens": 1,
|
||||
"messages": [{"role": "user", "content": "ping"}],
|
||||
}).encode("utf-8")
|
||||
req = urllib_request.Request(url, method="POST", data=payload)
|
||||
req.add_header("api-key", api_key)
|
||||
req.add_header("Authorization", f"Bearer {api_key}")
|
||||
req.add_header("anthropic-version", "2023-06-01")
|
||||
req.add_header("content-type", "application/json")
|
||||
req.add_header("User-Agent", "hermes-agent/azure-detect")
|
||||
try:
|
||||
with urllib_request.urlopen(req, timeout=6.0) as resp:
|
||||
# Should never 200 — "probe" isn't a real deployment. But
|
||||
# if it does, the endpoint definitely speaks Anthropic.
|
||||
return resp.status < 500
|
||||
except HTTPError as exc:
|
||||
# 4xx with an Anthropic-shaped error body = Anthropic endpoint.
|
||||
try:
|
||||
body = exc.read().decode("utf-8", errors="replace")
|
||||
lowered = body.lower()
|
||||
if "anthropic" in lowered or '"type"' in lowered and '"error"' in lowered:
|
||||
return True
|
||||
# Pre-Azure-v1 Azure Foundry returns a plain 404 for
|
||||
# Anthropic-style calls on non-Anthropic deployments. A
|
||||
# 400 "model not found" IS Anthropic though.
|
||||
if exc.code == 400 and ("messages" in lowered or "model" in lowered):
|
||||
return True
|
||||
return False
|
||||
except Exception:
|
||||
return False
|
||||
except (URLError, TimeoutError, OSError):
|
||||
return False
|
||||
except Exception: # pragma: no cover
|
||||
return False
|
||||
|
||||
|
||||
def detect(base_url: str, api_key: str) -> DetectionResult:
|
||||
"""Inspect an Azure endpoint and describe its transport + models.
|
||||
|
||||
Call this from the wizard before asking the user to pick an API
|
||||
mode manually. The caller should treat the returned
|
||||
:class:`DetectionResult` as *advisory* — if ``api_mode`` is None,
|
||||
fall back to asking the user.
|
||||
"""
|
||||
result = DetectionResult()
|
||||
|
||||
try:
|
||||
parsed = urlparse(base_url)
|
||||
result.hostname = (parsed.hostname or "").lower()
|
||||
except Exception:
|
||||
result.hostname = ""
|
||||
|
||||
# 1. Path sniff. Azure Foundry exposes Anthropic-style deployments
|
||||
# under a dedicated ``/anthropic`` path.
|
||||
if _looks_like_anthropic_path(base_url):
|
||||
result.is_anthropic = True
|
||||
result.api_mode = "anthropic_messages"
|
||||
result.reason = "URL path ends in /anthropic → Anthropic Messages API"
|
||||
return result
|
||||
|
||||
# 2. Try the OpenAI-style /models probe. If this works, the
|
||||
# endpoint definitely speaks OpenAI wire.
|
||||
ok, models = _probe_openai_models(base_url, api_key)
|
||||
if ok:
|
||||
result.models_probe_ok = True
|
||||
result.models = models
|
||||
result.api_mode = "chat_completions"
|
||||
result.reason = (
|
||||
f"GET /models returned {len(models)} model(s) — OpenAI-style endpoint"
|
||||
if models
|
||||
else "GET /models returned an OpenAI-shaped empty list — OpenAI-style endpoint"
|
||||
)
|
||||
return result
|
||||
|
||||
# 3. Fallback: probe the Anthropic Messages shape. Slower and more
|
||||
# intrusive than /models, so only run it when the OpenAI probe
|
||||
# failed.
|
||||
if _probe_anthropic_messages(base_url, api_key):
|
||||
result.is_anthropic = True
|
||||
result.api_mode = "anthropic_messages"
|
||||
result.reason = "Endpoint accepts Anthropic Messages shape"
|
||||
return result
|
||||
|
||||
# Nothing matched. Caller falls back to manual selection.
|
||||
result.reason = (
|
||||
"Could not probe endpoint (private network, missing model list, or "
|
||||
"non-standard path) — falling back to manual API-mode selection"
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
def lookup_context_length(model: str, base_url: str, api_key: str) -> Optional[int]:
|
||||
"""Thin wrapper around :func:`agent.model_metadata.get_model_context_length`
|
||||
that returns ``None`` when only the fallback default (128k) would
|
||||
fire, so the wizard can distinguish "we actually know this" from
|
||||
"we guessed."""
|
||||
try:
|
||||
from agent.model_metadata import (
|
||||
DEFAULT_FALLBACK_CONTEXT,
|
||||
get_model_context_length,
|
||||
)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
try:
|
||||
n = get_model_context_length(model, base_url=base_url, api_key=api_key)
|
||||
except Exception as exc:
|
||||
logger.debug("azure_detect: context length lookup failed: %s", exc)
|
||||
return None
|
||||
|
||||
if isinstance(n, int) and n > 0 and n != DEFAULT_FALLBACK_CONTEXT:
|
||||
return n
|
||||
return None
|
||||
|
||||
|
||||
__all__ = ["DetectionResult", "detect", "lookup_context_length"]
|
||||
@@ -2935,19 +2935,30 @@ def _save_custom_provider(
|
||||
def _model_flow_azure_foundry(config, current_model=""):
|
||||
"""Azure Foundry provider: configure endpoint, API mode, API key, and model.
|
||||
|
||||
Azure Foundry supports both OpenAI-style (/v1/chat/completions) and
|
||||
Anthropic-style (/v1/messages) endpoints. The user must select which
|
||||
API format their endpoint uses.
|
||||
Azure Foundry supports both OpenAI-style (``/v1/chat/completions``) and
|
||||
Anthropic-style (``/v1/messages``) endpoints. The wizard auto-detects
|
||||
the transport and available models when possible:
|
||||
|
||||
* URLs ending in ``/anthropic`` → Anthropic Messages API.
|
||||
* Successful ``GET <base>/models`` probe → OpenAI-style + populates
|
||||
a picker with the returned deployment / model IDs.
|
||||
* Anthropic Messages probe fallback when ``/models`` fails.
|
||||
* Manual entry when every probe fails (private endpoints, etc.).
|
||||
|
||||
Context lengths for the chosen model are resolved via the standard
|
||||
:func:`agent.model_metadata.get_model_context_length` chain
|
||||
(models.dev, provider metadata, hardcoded family fallbacks).
|
||||
"""
|
||||
from hermes_cli.auth import _save_model_choice, deactivate_provider
|
||||
from hermes_cli.auth import _save_model_choice, deactivate_provider # noqa: F401
|
||||
from hermes_cli.config import get_env_value, save_env_value, load_config, save_config
|
||||
from hermes_cli import azure_detect
|
||||
import getpass
|
||||
|
||||
# Load current Azure Foundry configuration
|
||||
# ── Load current Azure Foundry configuration ─────────────────────
|
||||
model_cfg = config.get("model", {})
|
||||
if isinstance(model_cfg, dict):
|
||||
current_base_url = model_cfg.get("base_url", "") if model_cfg.get("provider") == "azure-foundry" else ""
|
||||
current_api_mode = model_cfg.get("api_mode", "") if model_cfg.get("provider") == "azure-foundry" else ""
|
||||
if isinstance(model_cfg, dict) and model_cfg.get("provider") == "azure-foundry":
|
||||
current_base_url = str(model_cfg.get("base_url", "") or "")
|
||||
current_api_mode = str(model_cfg.get("api_mode", "") or "")
|
||||
else:
|
||||
current_base_url = ""
|
||||
current_api_mode = ""
|
||||
@@ -2959,64 +2970,43 @@ def _model_flow_azure_foundry(config, current_model=""):
|
||||
print("=" * 50)
|
||||
print()
|
||||
print("Azure Foundry can host models with either OpenAI-style or")
|
||||
print("Anthropic-style API endpoints. Configure your endpoint below.")
|
||||
print("Anthropic-style API endpoints. Hermes will probe your")
|
||||
print("endpoint to auto-detect the transport and the deployed")
|
||||
print("models when possible.")
|
||||
print()
|
||||
|
||||
if current_base_url:
|
||||
print(f" Current endpoint: {current_base_url}")
|
||||
if current_api_mode:
|
||||
mode_label = "OpenAI-style" if current_api_mode == "chat_completions" else "Anthropic-style"
|
||||
print(f" Current API mode: {mode_label}")
|
||||
_lbl = "OpenAI-style" if current_api_mode == "chat_completions" else "Anthropic-style"
|
||||
print(f" Current API mode: {_lbl}")
|
||||
if current_api_key:
|
||||
print(f" Current API key: {current_api_key[:8]}...")
|
||||
print()
|
||||
|
||||
# Step 1: Get the endpoint URL
|
||||
# ── Step 1: endpoint URL ─────────────────────────────────────────
|
||||
try:
|
||||
base_url = input(f"API endpoint URL [{current_base_url or 'e.g. https://your-model.azure.com/v1'}]: ").strip()
|
||||
base_url = input(
|
||||
f"API endpoint URL [{current_base_url or 'e.g. https://your-resource.openai.azure.com/openai/v1'}]: "
|
||||
).strip()
|
||||
except (KeyboardInterrupt, EOFError):
|
||||
print("\nCancelled.")
|
||||
return
|
||||
|
||||
effective_url = base_url or current_base_url
|
||||
effective_url = (base_url or current_base_url).rstrip("/")
|
||||
if not effective_url:
|
||||
print("No endpoint URL provided. Cancelled.")
|
||||
return
|
||||
|
||||
# Validate URL format
|
||||
if not effective_url.startswith(("http://", "https://")):
|
||||
print(f"Invalid URL: {effective_url} (must start with http:// or https://)")
|
||||
return
|
||||
|
||||
# Step 2: Select API mode (OpenAI or Anthropic style)
|
||||
print()
|
||||
print("Select the API format your Azure Foundry endpoint uses:")
|
||||
print()
|
||||
print(" 1. OpenAI-style (POST /v1/chat/completions)")
|
||||
print(" For: GPT models, Llama, Mistral, and most open models")
|
||||
print()
|
||||
print(" 2. Anthropic-style (POST /v1/messages)")
|
||||
print(" For: Claude models deployed via Anthropic API format")
|
||||
print()
|
||||
|
||||
try:
|
||||
default_choice = "1" if current_api_mode != "anthropic_messages" else "2"
|
||||
mode_choice = input(f"API format [1/2] ({default_choice}): ").strip() or default_choice
|
||||
except (KeyboardInterrupt, EOFError):
|
||||
print("\nCancelled.")
|
||||
return
|
||||
|
||||
if mode_choice == "2":
|
||||
api_mode = "anthropic_messages"
|
||||
print(" → Using Anthropic-style API format")
|
||||
else:
|
||||
api_mode = "chat_completions"
|
||||
print(" → Using OpenAI-style API format")
|
||||
|
||||
# Step 3: Get the API key
|
||||
# ── Step 2: API key ──────────────────────────────────────────────
|
||||
print()
|
||||
try:
|
||||
api_key = getpass.getpass(f"API key [{current_api_key[:8] + '...' if current_api_key else 'required'}]: ").strip()
|
||||
api_key = getpass.getpass(
|
||||
f"API key [{current_api_key[:8] + '...' if current_api_key else 'required'}]: "
|
||||
).strip()
|
||||
except (KeyboardInterrupt, EOFError):
|
||||
print("\nCancelled.")
|
||||
return
|
||||
@@ -3026,24 +3016,82 @@ def _model_flow_azure_foundry(config, current_model=""):
|
||||
print("No API key provided. Cancelled.")
|
||||
return
|
||||
|
||||
# Step 4: Get the model name
|
||||
# ── Step 3: auto-detect transport + models ───────────────────────
|
||||
print()
|
||||
try:
|
||||
model_name = input(f"Model name [{current_model or 'e.g. gpt-4, claude-3-5-sonnet'}]: ").strip()
|
||||
except (KeyboardInterrupt, EOFError):
|
||||
print("\nCancelled.")
|
||||
return
|
||||
print("◐ Probing endpoint to auto-detect transport and models...")
|
||||
detection = azure_detect.detect(effective_url, effective_key)
|
||||
|
||||
discovered_models: list[str] = list(detection.models)
|
||||
api_mode: str = detection.api_mode or ""
|
||||
|
||||
if api_mode:
|
||||
mode_label = "OpenAI-style" if api_mode == "chat_completions" else "Anthropic-style"
|
||||
print(f"✓ Detected API transport: {mode_label}")
|
||||
if detection.reason:
|
||||
print(f" ({detection.reason})")
|
||||
if discovered_models:
|
||||
print(f"✓ Found {len(discovered_models)} deployed model(s) on this endpoint")
|
||||
else:
|
||||
print(f"⚠ Auto-detection incomplete: {detection.reason}")
|
||||
print()
|
||||
print("Select the API format your Azure Foundry endpoint uses:")
|
||||
print(" 1. OpenAI-style (POST /v1/chat/completions)")
|
||||
print(" For: GPT models, Llama, Mistral, and most open models")
|
||||
print(" 2. Anthropic-style (POST /v1/messages)")
|
||||
print(" For: Claude models deployed via Anthropic API format")
|
||||
try:
|
||||
default_choice = "2" if current_api_mode == "anthropic_messages" else "1"
|
||||
mode_choice = input(f"API format [1/2] ({default_choice}): ").strip() or default_choice
|
||||
except (KeyboardInterrupt, EOFError):
|
||||
print("\nCancelled.")
|
||||
return
|
||||
api_mode = "anthropic_messages" if mode_choice == "2" else "chat_completions"
|
||||
|
||||
# ── Step 4: model name ───────────────────────────────────────────
|
||||
print()
|
||||
effective_model = ""
|
||||
if discovered_models:
|
||||
print("Available models on this endpoint:")
|
||||
for i, mid in enumerate(discovered_models[:30], start=1):
|
||||
print(f" {i:>2}. {mid}")
|
||||
if len(discovered_models) > 30:
|
||||
print(f" ... and {len(discovered_models) - 30} more (type name manually if not shown)")
|
||||
print()
|
||||
try:
|
||||
pick = input(
|
||||
f"Pick by number, or type a deployment name [{current_model or discovered_models[0]}]: "
|
||||
).strip()
|
||||
except (KeyboardInterrupt, EOFError):
|
||||
print("\nCancelled.")
|
||||
return
|
||||
if not pick:
|
||||
effective_model = current_model or discovered_models[0]
|
||||
elif pick.isdigit() and 1 <= int(pick) <= min(len(discovered_models), 30):
|
||||
effective_model = discovered_models[int(pick) - 1]
|
||||
else:
|
||||
effective_model = pick
|
||||
else:
|
||||
try:
|
||||
model_name = input(
|
||||
f"Model / deployment name [{current_model or 'e.g. gpt-5.4, claude-sonnet-4-6'}]: "
|
||||
).strip()
|
||||
except (KeyboardInterrupt, EOFError):
|
||||
print("\nCancelled.")
|
||||
return
|
||||
effective_model = model_name or current_model
|
||||
|
||||
effective_model = model_name or current_model
|
||||
if not effective_model:
|
||||
print("No model name provided. Cancelled.")
|
||||
return
|
||||
|
||||
# Step 5: Save configuration
|
||||
# Save API key to .env
|
||||
# ── Step 5: context-length lookup ────────────────────────────────
|
||||
ctx_len = azure_detect.lookup_context_length(
|
||||
effective_model, effective_url, effective_key,
|
||||
)
|
||||
|
||||
# ── Step 6: persist ──────────────────────────────────────────────
|
||||
save_env_value("AZURE_FOUNDRY_API_KEY", effective_key)
|
||||
|
||||
# Update config.yaml
|
||||
cfg = load_config()
|
||||
model = cfg.get("model")
|
||||
if not isinstance(model, dict):
|
||||
@@ -3051,19 +3099,18 @@ def _model_flow_azure_foundry(config, current_model=""):
|
||||
cfg["model"] = model
|
||||
|
||||
model["provider"] = "azure-foundry"
|
||||
model["base_url"] = effective_url.rstrip("/")
|
||||
model["base_url"] = effective_url
|
||||
model["api_mode"] = api_mode
|
||||
model["default"] = effective_model
|
||||
if ctx_len:
|
||||
model["context_length"] = ctx_len
|
||||
|
||||
save_config(cfg)
|
||||
|
||||
# Deactivate any OAuth provider
|
||||
deactivate_provider()
|
||||
|
||||
# Update caller's config dict
|
||||
config["model"] = dict(model)
|
||||
|
||||
# Clear any conflicting env vars
|
||||
# Clear any conflicting env vars so auxiliary clients don't poison
|
||||
# themselves with a stale OpenAI base URL / key.
|
||||
if get_env_value("OPENAI_BASE_URL"):
|
||||
save_env_value("OPENAI_BASE_URL", "")
|
||||
if get_env_value("OPENAI_API_KEY"):
|
||||
@@ -3071,10 +3118,14 @@ def _model_flow_azure_foundry(config, current_model=""):
|
||||
|
||||
mode_label = "OpenAI-style" if api_mode == "chat_completions" else "Anthropic-style"
|
||||
print()
|
||||
print(f"✓ Azure Foundry configured:")
|
||||
print(f" Endpoint: {effective_url}")
|
||||
print(f" API mode: {mode_label}")
|
||||
print(f" Model: {effective_model}")
|
||||
print("✓ Azure Foundry configured:")
|
||||
print(f" Endpoint: {effective_url}")
|
||||
print(f" API mode: {mode_label}")
|
||||
print(f" Model: {effective_model}")
|
||||
if ctx_len:
|
||||
print(f" Context length: {ctx_len:,} tokens")
|
||||
else:
|
||||
print(" Context length: not auto-detected (will fall back at runtime)")
|
||||
print()
|
||||
|
||||
|
||||
|
||||
@@ -602,6 +602,71 @@ def _resolve_openrouter_runtime(
|
||||
}
|
||||
|
||||
|
||||
def _resolve_azure_foundry_runtime(
|
||||
*,
|
||||
requested_provider: str,
|
||||
model_cfg: Dict[str, Any],
|
||||
explicit_api_key: Optional[str] = None,
|
||||
explicit_base_url: Optional[str] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Resolve an Azure Foundry runtime entry.
|
||||
|
||||
Reads ``model.base_url`` + ``model.api_mode`` from config.yaml (or
|
||||
explicit overrides), pulls the API key from ``.env`` / env var, and
|
||||
strips a trailing ``/v1`` for Anthropic-style endpoints because the
|
||||
Anthropic SDK appends ``/v1/messages`` internally.
|
||||
|
||||
Raises :class:`AuthError` when required values are missing.
|
||||
"""
|
||||
explicit_api_key = str(explicit_api_key or "").strip()
|
||||
explicit_base_url_clean = str(explicit_base_url or "").strip().rstrip("/")
|
||||
|
||||
cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
|
||||
cfg_base_url = ""
|
||||
cfg_api_mode = "chat_completions"
|
||||
if cfg_provider == "azure-foundry":
|
||||
cfg_base_url = str(model_cfg.get("base_url") or "").strip().rstrip("/")
|
||||
cfg_api_mode = _parse_api_mode(model_cfg.get("api_mode")) or "chat_completions"
|
||||
|
||||
env_base_url = os.getenv("AZURE_FOUNDRY_BASE_URL", "").strip().rstrip("/")
|
||||
base_url = explicit_base_url_clean or cfg_base_url or env_base_url
|
||||
if not base_url:
|
||||
raise AuthError(
|
||||
"Azure Foundry requires a base URL. Set it via 'hermes model' or "
|
||||
"the AZURE_FOUNDRY_BASE_URL environment variable."
|
||||
)
|
||||
|
||||
api_key = explicit_api_key
|
||||
if not api_key:
|
||||
try:
|
||||
from hermes_cli.config import get_env_value
|
||||
api_key = get_env_value("AZURE_FOUNDRY_API_KEY") or ""
|
||||
except Exception:
|
||||
api_key = ""
|
||||
if not api_key:
|
||||
api_key = os.getenv("AZURE_FOUNDRY_API_KEY", "").strip()
|
||||
if not api_key:
|
||||
raise AuthError(
|
||||
"Azure Foundry requires an API key. Set AZURE_FOUNDRY_API_KEY in "
|
||||
"~/.hermes/.env or run 'hermes model' to configure."
|
||||
)
|
||||
|
||||
# Anthropic SDK appends /v1/messages itself, so strip any trailing /v1
|
||||
# we inherited from the configured base_url to avoid double-/v1 paths.
|
||||
if cfg_api_mode == "anthropic_messages":
|
||||
base_url = re.sub(r"/v1/?$", "", base_url)
|
||||
|
||||
source = "explicit" if (explicit_api_key or explicit_base_url) else "config"
|
||||
return {
|
||||
"provider": "azure-foundry",
|
||||
"api_mode": cfg_api_mode,
|
||||
"base_url": base_url,
|
||||
"api_key": api_key,
|
||||
"source": source,
|
||||
"requested_provider": requested_provider,
|
||||
}
|
||||
|
||||
|
||||
def _resolve_explicit_runtime(
|
||||
*,
|
||||
provider: str,
|
||||
@@ -693,44 +758,12 @@ def _resolve_explicit_runtime(
|
||||
|
||||
# Azure Foundry: user-configured endpoint with selectable API mode
|
||||
if provider == "azure-foundry":
|
||||
cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
|
||||
cfg_base_url = ""
|
||||
cfg_api_mode = "chat_completions"
|
||||
if cfg_provider == "azure-foundry":
|
||||
cfg_base_url = str(model_cfg.get("base_url") or "").strip().rstrip("/")
|
||||
cfg_api_mode = _parse_api_mode(model_cfg.get("api_mode")) or "chat_completions"
|
||||
|
||||
env_base_url = os.getenv("AZURE_FOUNDRY_BASE_URL", "").strip().rstrip("/")
|
||||
base_url = explicit_base_url or cfg_base_url or env_base_url
|
||||
if not base_url:
|
||||
raise AuthError(
|
||||
"Azure Foundry requires a base URL. Set it via 'hermes model' or "
|
||||
"the AZURE_FOUNDRY_BASE_URL environment variable."
|
||||
)
|
||||
|
||||
api_key = explicit_api_key
|
||||
if not api_key:
|
||||
from hermes_cli.config import get_env_value
|
||||
api_key = get_env_value("AZURE_FOUNDRY_API_KEY") or os.getenv("AZURE_FOUNDRY_API_KEY", "")
|
||||
if not api_key:
|
||||
raise AuthError(
|
||||
"Azure Foundry requires an API key. Set AZURE_FOUNDRY_API_KEY in "
|
||||
"~/.hermes/.env or run 'hermes model' to configure."
|
||||
)
|
||||
|
||||
# For Anthropic-style endpoints, strip /v1 suffix since the Anthropic SDK
|
||||
# appends /v1/messages internally
|
||||
if cfg_api_mode == "anthropic_messages":
|
||||
base_url = re.sub(r"/v1/?$", "", base_url)
|
||||
|
||||
return {
|
||||
"provider": "azure-foundry",
|
||||
"api_mode": cfg_api_mode,
|
||||
"base_url": base_url,
|
||||
"api_key": api_key,
|
||||
"source": "explicit",
|
||||
"requested_provider": requested_provider,
|
||||
}
|
||||
return _resolve_azure_foundry_runtime(
|
||||
requested_provider=requested_provider,
|
||||
model_cfg=model_cfg,
|
||||
explicit_api_key=explicit_api_key,
|
||||
explicit_base_url=explicit_base_url,
|
||||
)
|
||||
|
||||
pconfig = PROVIDER_REGISTRY.get(provider)
|
||||
if pconfig and pconfig.auth_type == "api_key":
|
||||
@@ -820,6 +853,20 @@ def resolve_runtime_provider(
|
||||
"requested_provider": requested_provider,
|
||||
}
|
||||
|
||||
# Azure Foundry: user-configured endpoint with selectable API mode
|
||||
# (OpenAI-style chat_completions or Anthropic-style anthropic_messages).
|
||||
# Resolve before the custom-runtime / pool / generic paths so Azure
|
||||
# config is always picked up from model.base_url + model.api_mode,
|
||||
# regardless of whether the caller passed explicit_* args.
|
||||
if requested_provider == "azure-foundry":
|
||||
azure_runtime = _resolve_azure_foundry_runtime(
|
||||
requested_provider=requested_provider,
|
||||
model_cfg=_get_model_config(),
|
||||
explicit_api_key=explicit_api_key,
|
||||
explicit_base_url=explicit_base_url,
|
||||
)
|
||||
return azure_runtime
|
||||
|
||||
custom_runtime = _resolve_named_custom_runtime(
|
||||
requested_provider=requested_provider,
|
||||
explicit_api_key=explicit_api_key,
|
||||
|
||||
237
tests/hermes_cli/test_azure_detect.py
Normal file
237
tests/hermes_cli/test_azure_detect.py
Normal file
@@ -0,0 +1,237 @@
|
||||
"""Tests for hermes_cli.azure_detect — transport & model auto-detection."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from hermes_cli import azure_detect
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
class _FakeHTTPResponse:
|
||||
"""Minimal stand-in for urllib.request.urlopen's context manager."""
|
||||
|
||||
def __init__(self, status: int, body: bytes):
|
||||
self.status = status
|
||||
self._body = body
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc, tb):
|
||||
return False
|
||||
|
||||
def read(self) -> bytes:
|
||||
return self._body
|
||||
|
||||
|
||||
def _openai_models_body(*ids: str) -> bytes:
|
||||
return json.dumps({
|
||||
"object": "list",
|
||||
"data": [{"id": i, "object": "model"} for i in ids],
|
||||
}).encode()
|
||||
|
||||
|
||||
def _anthropic_error_body(msg: str = "model not found") -> bytes:
|
||||
return json.dumps({
|
||||
"type": "error",
|
||||
"error": {"type": "invalid_request_error", "message": msg},
|
||||
}).encode()
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# _looks_like_anthropic_path
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
@pytest.mark.parametrize("url, expected", [
|
||||
("https://foo.services.ai.azure.com/anthropic", True),
|
||||
("https://foo.services.ai.azure.com/anthropic/", True),
|
||||
("https://foo.services.ai.azure.com/anthropic/v1", True),
|
||||
("https://foo.openai.azure.com/openai/v1", False),
|
||||
("https://foo.openai.azure.com/", False),
|
||||
("https://openrouter.ai/api/v1", False),
|
||||
])
|
||||
def test_looks_like_anthropic_path(url, expected):
|
||||
assert azure_detect._looks_like_anthropic_path(url) is expected
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# _extract_model_ids
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
def test_extract_model_ids_openai_shape():
|
||||
body = {
|
||||
"object": "list",
|
||||
"data": [
|
||||
{"id": "gpt-4.1-mini", "object": "model"},
|
||||
{"id": "claude-sonnet-4-6", "object": "model"},
|
||||
],
|
||||
}
|
||||
assert azure_detect._extract_model_ids(body) == ["gpt-4.1-mini", "claude-sonnet-4-6"]
|
||||
|
||||
|
||||
def test_extract_model_ids_bad_shape_returns_empty():
|
||||
assert azure_detect._extract_model_ids({}) == []
|
||||
assert azure_detect._extract_model_ids({"data": "not-a-list"}) == []
|
||||
assert azure_detect._extract_model_ids({"data": [{"no-id": True}]}) == []
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# detect() integration
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
def test_detect_anthropic_path_wins_without_http():
|
||||
"""URL path sniff short-circuits — no HTTP call happens."""
|
||||
with patch.object(azure_detect, "_http_get_json") as fake_get, \
|
||||
patch.object(azure_detect, "_probe_anthropic_messages") as fake_probe:
|
||||
result = azure_detect.detect(
|
||||
"https://foo.services.ai.azure.com/anthropic", "key-abc",
|
||||
)
|
||||
assert result.api_mode == "anthropic_messages"
|
||||
assert result.is_anthropic is True
|
||||
assert "path" in result.reason.lower()
|
||||
fake_get.assert_not_called()
|
||||
fake_probe.assert_not_called()
|
||||
|
||||
|
||||
def test_detect_openai_models_probe_success():
|
||||
"""/models probe returning a model list → chat_completions."""
|
||||
def _fake_get(url, api_key, timeout=6.0):
|
||||
assert "key-abc" == api_key
|
||||
return 200, json.loads(_openai_models_body("gpt-5.4", "claude-opus-4-6"))
|
||||
|
||||
with patch.object(azure_detect, "_http_get_json", side_effect=_fake_get):
|
||||
result = azure_detect.detect(
|
||||
"https://my.openai.azure.com/openai/v1", "key-abc",
|
||||
)
|
||||
assert result.api_mode == "chat_completions"
|
||||
assert result.models_probe_ok is True
|
||||
assert result.models == ["gpt-5.4", "claude-opus-4-6"]
|
||||
assert "/models" in result.reason
|
||||
|
||||
|
||||
def test_detect_openai_models_probe_empty_list_still_counts():
|
||||
"""Endpoint returned OpenAI shape but no models → still chat_completions."""
|
||||
def _fake_get(url, api_key, timeout=6.0):
|
||||
return 200, {"object": "list", "data": []}
|
||||
|
||||
with patch.object(azure_detect, "_http_get_json", side_effect=_fake_get):
|
||||
result = azure_detect.detect(
|
||||
"https://my.openai.azure.com/openai/v1", "key-abc",
|
||||
)
|
||||
assert result.api_mode == "chat_completions"
|
||||
assert result.models == []
|
||||
assert result.models_probe_ok is True
|
||||
|
||||
|
||||
def test_detect_falls_back_to_anthropic_probe():
|
||||
"""/models fails but Anthropic Messages probe succeeds."""
|
||||
def _fake_get(url, api_key, timeout=6.0):
|
||||
return 401, None # /models forbidden
|
||||
|
||||
with patch.object(azure_detect, "_http_get_json", side_effect=_fake_get), \
|
||||
patch.object(azure_detect, "_probe_anthropic_messages", return_value=True):
|
||||
result = azure_detect.detect(
|
||||
"https://my.services.ai.azure.com/v1", "key-abc",
|
||||
)
|
||||
assert result.api_mode == "anthropic_messages"
|
||||
assert result.is_anthropic is True
|
||||
|
||||
|
||||
def test_detect_all_probes_fail_returns_none():
|
||||
"""Every probe fails → api_mode is None and caller falls back to manual."""
|
||||
with patch.object(azure_detect, "_http_get_json", return_value=(500, None)), \
|
||||
patch.object(azure_detect, "_probe_anthropic_messages", return_value=False):
|
||||
result = azure_detect.detect(
|
||||
"https://some-private.example.com/", "key-abc",
|
||||
)
|
||||
assert result.api_mode is None
|
||||
assert result.models == []
|
||||
assert "manual" in result.reason.lower()
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# _probe_openai_models URL list (Azure vs v1 api-version)
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
def test_probe_openai_models_tries_multiple_api_versions():
|
||||
"""First call (no api-version) fails, api-version fallback succeeds."""
|
||||
calls = []
|
||||
|
||||
def _fake_get(url, api_key, timeout=6.0):
|
||||
calls.append(url)
|
||||
if "api-version" not in url:
|
||||
return 404, None
|
||||
return 200, json.loads(_openai_models_body("gpt-4.1"))
|
||||
|
||||
with patch.object(azure_detect, "_http_get_json", side_effect=_fake_get):
|
||||
ok, models = azure_detect._probe_openai_models(
|
||||
"https://my.openai.azure.com/openai/v1", "k",
|
||||
)
|
||||
assert ok is True
|
||||
assert models == ["gpt-4.1"]
|
||||
# Should have tried without api-version first, then with at least one
|
||||
assert any("api-version" not in u for u in calls)
|
||||
assert any("api-version" in u for u in calls)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# _http_get_json error handling
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
def test_http_get_json_on_urlerror_returns_zero_none():
|
||||
"""Network failure returns (0, None), never raises."""
|
||||
import urllib.error
|
||||
with patch("hermes_cli.azure_detect.urllib_request.urlopen",
|
||||
side_effect=urllib.error.URLError("dns fail")):
|
||||
status, body = azure_detect._http_get_json("https://bad.example/", "k")
|
||||
assert status == 0
|
||||
assert body is None
|
||||
|
||||
|
||||
def test_http_get_json_on_http_error_returns_code_none():
|
||||
"""HTTP 4xx/5xx returns (code, None)."""
|
||||
import urllib.error
|
||||
err = urllib.error.HTTPError("https://x/", 403, "Forbidden", {}, None)
|
||||
with patch("hermes_cli.azure_detect.urllib_request.urlopen", side_effect=err):
|
||||
status, body = azure_detect._http_get_json("https://x/", "k")
|
||||
assert status == 403
|
||||
assert body is None
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# lookup_context_length
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
def test_lookup_context_length_returns_known():
|
||||
"""When model_metadata returns a non-fallback value, we pass it through."""
|
||||
fake = MagicMock(return_value=400000)
|
||||
with patch("agent.model_metadata.get_model_context_length", fake), \
|
||||
patch("agent.model_metadata.DEFAULT_FALLBACK_CONTEXT", 128000):
|
||||
n = azure_detect.lookup_context_length(
|
||||
"gpt-5.4", "https://x.openai.azure.com/openai/v1", "k",
|
||||
)
|
||||
assert n == 400000
|
||||
|
||||
|
||||
def test_lookup_context_length_returns_none_on_fallback():
|
||||
"""When resolver falls through to DEFAULT_FALLBACK_CONTEXT, we return None."""
|
||||
with patch("agent.model_metadata.get_model_context_length", return_value=128000), \
|
||||
patch("agent.model_metadata.DEFAULT_FALLBACK_CONTEXT", 128000):
|
||||
n = azure_detect.lookup_context_length(
|
||||
"totally-unknown-model", "https://x.openai.azure.com/openai/v1", "k",
|
||||
)
|
||||
assert n is None
|
||||
|
||||
|
||||
def test_lookup_context_length_swallows_exceptions():
|
||||
"""Resolver raising must not crash the wizard."""
|
||||
with patch("agent.model_metadata.get_model_context_length",
|
||||
side_effect=RuntimeError("boom")):
|
||||
assert azure_detect.lookup_context_length("m", "https://x/", "k") is None
|
||||
@@ -1,3 +1,5 @@
|
||||
import pytest
|
||||
|
||||
from hermes_cli import runtime_provider as rp
|
||||
|
||||
|
||||
@@ -1565,3 +1567,79 @@ class TestOllamaUrlSubstringLeak:
|
||||
resolved = rp.resolve_runtime_provider(requested="custom")
|
||||
|
||||
assert resolved["api_key"] == "ol-legit-key"
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Azure Foundry — both OpenAI-style and Anthropic-style endpoints
|
||||
# =============================================================================
|
||||
|
||||
class TestAzureFoundryResolution:
|
||||
"""Verify Azure Foundry resolves correctly for both API modes."""
|
||||
|
||||
def _make_cfg(self, base_url: str, api_mode: str = "chat_completions"):
|
||||
return {
|
||||
"provider": "azure-foundry",
|
||||
"base_url": base_url,
|
||||
"api_mode": api_mode,
|
||||
"default": "gpt-5.4",
|
||||
}
|
||||
|
||||
def test_azure_foundry_openai_style_explicit(self, monkeypatch):
|
||||
"""OpenAI-style Azure Foundry → chat_completions, keeps base_url as-is."""
|
||||
monkeypatch.setenv("AZURE_FOUNDRY_API_KEY", "az-key-openai")
|
||||
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "azure-foundry")
|
||||
monkeypatch.setattr(rp, "_get_model_config", lambda: self._make_cfg(
|
||||
"https://my-resource.openai.azure.com/openai/v1",
|
||||
"chat_completions",
|
||||
))
|
||||
monkeypatch.setattr(rp, "load_pool", lambda provider: None)
|
||||
|
||||
resolved = rp.resolve_runtime_provider(requested="azure-foundry")
|
||||
|
||||
assert resolved["provider"] == "azure-foundry"
|
||||
assert resolved["api_mode"] == "chat_completions"
|
||||
assert resolved["base_url"] == "https://my-resource.openai.azure.com/openai/v1"
|
||||
assert resolved["api_key"] == "az-key-openai"
|
||||
|
||||
def test_azure_foundry_anthropic_style_strips_v1_suffix(self, monkeypatch):
|
||||
"""Anthropic-style Azure Foundry → anthropic_messages, /v1 stripped
|
||||
because the Anthropic SDK appends /v1/messages itself."""
|
||||
monkeypatch.setenv("AZURE_FOUNDRY_API_KEY", "az-key-ant")
|
||||
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "azure-foundry")
|
||||
monkeypatch.setattr(rp, "_get_model_config", lambda: self._make_cfg(
|
||||
"https://my-resource.services.ai.azure.com/anthropic/v1",
|
||||
"anthropic_messages",
|
||||
))
|
||||
monkeypatch.setattr(rp, "load_pool", lambda provider: None)
|
||||
|
||||
resolved = rp.resolve_runtime_provider(requested="azure-foundry")
|
||||
|
||||
assert resolved["provider"] == "azure-foundry"
|
||||
assert resolved["api_mode"] == "anthropic_messages"
|
||||
# /v1 stripped so SDK can append /v1/messages cleanly
|
||||
assert resolved["base_url"] == "https://my-resource.services.ai.azure.com/anthropic"
|
||||
|
||||
def test_azure_foundry_missing_base_url_raises(self, monkeypatch):
|
||||
monkeypatch.setenv("AZURE_FOUNDRY_API_KEY", "az-key")
|
||||
monkeypatch.delenv("AZURE_FOUNDRY_BASE_URL", raising=False)
|
||||
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "azure-foundry")
|
||||
monkeypatch.setattr(rp, "_get_model_config", lambda: {})
|
||||
monkeypatch.setattr(rp, "load_pool", lambda provider: None)
|
||||
|
||||
with pytest.raises(rp.AuthError, match="base URL"):
|
||||
rp.resolve_runtime_provider(requested="azure-foundry")
|
||||
|
||||
def test_azure_foundry_missing_api_key_raises(self, monkeypatch):
|
||||
monkeypatch.delenv("AZURE_FOUNDRY_API_KEY", raising=False)
|
||||
# `get_env_value` reads from ~/.hermes/.env — mock it to return None
|
||||
# so the resolver can't find a key there either.
|
||||
import hermes_cli.config as cfg_mod
|
||||
monkeypatch.setattr(cfg_mod, "get_env_value", lambda k: None)
|
||||
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "azure-foundry")
|
||||
monkeypatch.setattr(rp, "_get_model_config", lambda: self._make_cfg(
|
||||
"https://my-resource.openai.azure.com/openai/v1"
|
||||
))
|
||||
monkeypatch.setattr(rp, "load_pool", lambda provider: None)
|
||||
|
||||
with pytest.raises(rp.AuthError, match="API key"):
|
||||
rp.resolve_runtime_provider(requested="azure-foundry")
|
||||
|
||||
Reference in New Issue
Block a user