From 731e1ef8cb69837c6419272bd0d3f05a9179a476 Mon Sep 17 00:00:00 2001 From: Teknium Date: Sat, 25 Apr 2026 18:38:38 -0700 Subject: [PATCH] feat(azure-foundry): auto-detect transport, models, context length MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The azure-foundry wizard now probes the endpoint before asking the user to pick anything by hand: 1. URL path sniff — endpoints ending in /anthropic are Azure Foundry Claude routes and skip to anthropic_messages. 2. GET /models probe — if the endpoint returns an OpenAI-shaped model list, we switch to chat_completions and prefill the picker with the returned deployment/model IDs. 3. Anthropic Messages probe — fallback for endpoints that don't expose /models but do speak the Anthropic Messages shape. 4. Manual fallback — private endpoints / custom routes still work; the user picks API mode + types a deployment name. Context length for the selected model is resolved through the existing agent.model_metadata.get_model_context_length chain (models.dev, provider metadata, hardcoded family fallbacks) and stored in model.context_length when a non-default value is found. Also refactors runtime_provider so Azure Foundry resolution is reused between the explicit-credentials path and the default top-level path — previously the /v1 strip for Anthropic-style Azure only ran when the caller passed explicit_* args, which meant config-driven sessions hit a double-/v1 URL. New module hermes_cli/azure_detect.py with 19 unit tests covering: - path sniff, model ID extraction, probe fallbacks - HTTP error handling (URLError, HTTPError) - context-length lookup passthrough - DEFAULT_FALLBACK_CONTEXT rejection New runtime tests cover: - OpenAI-style Azure Foundry - Anthropic-style Azure Foundry with /v1 stripping - Missing base_url / API key raising AuthError Rationale: Microsoft confirms there's no pure-API-key endpoint to list Azure deployments (that requires ARM management auth). The v1 Azure OpenAI endpoint does expose /models with the resource's available model catalog, which is good enough for picker prefill in the common case. Users on private/gated endpoints fall through to manual entry. --- hermes_cli/azure_detect.py | 300 ++++++++++++++++++ hermes_cli/main.py | 177 +++++++---- hermes_cli/runtime_provider.py | 123 ++++--- tests/hermes_cli/test_azure_detect.py | 237 ++++++++++++++ .../test_runtime_provider_resolution.py | 78 +++++ 5 files changed, 814 insertions(+), 101 deletions(-) create mode 100644 hermes_cli/azure_detect.py create mode 100644 tests/hermes_cli/test_azure_detect.py diff --git a/hermes_cli/azure_detect.py b/hermes_cli/azure_detect.py new file mode 100644 index 0000000000..4ed4c1d0b7 --- /dev/null +++ b/hermes_cli/azure_detect.py @@ -0,0 +1,300 @@ +"""Azure Foundry endpoint auto-detection. + +Inspect an Azure AI Foundry / Azure OpenAI endpoint to determine: + - API transport (OpenAI-style ``chat_completions`` vs + Anthropic-style ``anthropic_messages``) + - Available models (best effort — Azure does not expose a deployment + listing via the inference API key, but Azure OpenAI v1 endpoints + return the resource's model catalog via ``GET /models``) + - Context length for each discovered/entered model, via the existing + :func:`agent.model_metadata.get_model_context_length` resolver. + +Rationale: + +Azure has no pure-API-key deployment-listing endpoint — per Microsoft, +deployment enumeration requires ARM management-plane auth. Azure +OpenAI v1 endpoints ``{resource}.openai.azure.com/openai/v1`` do return +a ``/models`` list, but it reflects the resource's *available* models +rather than the user's *deployed* deployment names. In practice it is +still a useful hint — the user picks a familiar model name and we look +up its context length from the catalog. + +The detector never crashes on errors (every HTTP call is wrapped in a +broad try/except). Callers get a :class:`DetectionResult` with whatever +information could be gathered, and fall back to manual entry for the +rest. +""" + +from __future__ import annotations + +import json +import logging +import re +from dataclasses import dataclass, field +from typing import Optional +from urllib import request as urllib_request +from urllib.error import HTTPError, URLError +from urllib.parse import urlparse, urlunparse + +logger = logging.getLogger(__name__) + + +# Default Azure OpenAI ``api-version`` to probe with. The v1 GA endpoint +# accepts requests without ``api-version`` entirely, so this is only used +# as a fallback for pre-v1 resources that still require it. +_AZURE_OPENAI_PROBE_API_VERSIONS = ( + "2025-04-01-preview", + "2024-10-21", # oldest GA that supports /models +) + +# Default Azure Anthropic ``api-version``. Matches the value used by +# ``agent/anthropic_adapter.py`` when building the Anthropic client. +_AZURE_ANTHROPIC_API_VERSION = "2025-04-15" + + +@dataclass +class DetectionResult: + """Everything auto-detection could gather from a base URL + API key.""" + + #: Detected API transport: ``"chat_completions"``, + #: ``"anthropic_messages"``, or ``None`` when detection failed. + api_mode: Optional[str] = None + + #: Deployment / model IDs returned by ``/models`` (best effort). + #: Empty when the endpoint doesn't expose the list with an API key. + models: list[str] = field(default_factory=list) + + #: Lowercased host from the base URL (used for display messages). + hostname: str = "" + + #: Human-readable reason the detector chose ``api_mode``. Useful + #: for explaining auto-detection to the user in the wizard. + reason: str = "" + + #: ``True`` when ``/models`` returned a valid OpenAI-shaped payload. + models_probe_ok: bool = False + + #: ``True`` when the URL was determined to be an Anthropic-style + #: endpoint (from path suffix or live probe). + is_anthropic: bool = False + + +def _http_get_json(url: str, api_key: str, timeout: float = 6.0) -> tuple[int, Optional[dict]]: + """GET a URL with ``api-key`` + ``Authorization`` headers. Return + ``(status_code, parsed_json_or_None)``. Never raises.""" + req = urllib_request.Request(url, method="GET") + # Azure OpenAI uses ``api-key``. Some Azure deployments (and + # Anthropic-style routes) use ``Authorization: Bearer``. Send both + # so we probe once per URL rather than twice. + req.add_header("api-key", api_key) + req.add_header("Authorization", f"Bearer {api_key}") + req.add_header("User-Agent", "hermes-agent/azure-detect") + try: + with urllib_request.urlopen(req, timeout=timeout) as resp: + body = resp.read() + try: + return resp.status, json.loads(body.decode("utf-8", errors="replace")) + except Exception: + return resp.status, None + except HTTPError as exc: + return exc.code, None + except (URLError, TimeoutError, OSError) as exc: + logger.debug("azure_detect: GET %s failed: %s", url, exc) + return 0, None + except Exception as exc: # pragma: no cover — defensive + logger.debug("azure_detect: GET %s unexpected error: %s", url, exc) + return 0, None + + +def _strip_trailing_v1(url: str) -> str: + """Strip trailing ``/v1`` or ``/v1/`` so we can construct sub-paths.""" + return re.sub(r"/v1/?$", "", url.rstrip("/")) + + +def _looks_like_anthropic_path(url: str) -> bool: + """Return True when the URL's path ends in ``/anthropic`` or + contains a ``/anthropic/`` segment. Used by Azure Foundry + resources that route Claude traffic through a dedicated path.""" + try: + parsed = urlparse(url) + path = (parsed.path or "").lower().rstrip("/") + return path.endswith("/anthropic") or "/anthropic/" in path + "/" + except Exception: + return False + + +def _extract_model_ids(payload: dict) -> list[str]: + """Extract a list of model IDs from an OpenAI-shaped ``/models`` + response. Returns ``[]`` on any shape mismatch.""" + data = payload.get("data") if isinstance(payload, dict) else None + if not isinstance(data, list): + return [] + ids: list[str] = [] + for item in data: + if not isinstance(item, dict): + continue + # OpenAI shape: {"id": "gpt-5.4", "object": "model", ...} + mid = item.get("id") or item.get("model") or item.get("name") + if isinstance(mid, str) and mid: + ids.append(mid) + return ids + + +def _probe_openai_models(base_url: str, api_key: str) -> tuple[bool, list[str]]: + """Probe ``/models`` for an OpenAI-shaped response. + + Returns ``(ok, models)``. ``ok`` is True iff the endpoint accepted + us as an OpenAI-style caller (200 OK + OpenAI-shaped JSON body). + """ + base_url = base_url.rstrip("/") + + # Azure OpenAI v1: {resource}.openai.azure.com/openai/v1 — no + # api-version required for GA paths, so probe without first. + candidates = [f"{base_url}/models"] + # Fallback: explicit api-version for pre-v1 resources + for v in _AZURE_OPENAI_PROBE_API_VERSIONS: + candidates.append(f"{base_url}/models?api-version={v}") + + for url in candidates: + status, body = _http_get_json(url, api_key) + if status == 200 and body is not None: + ids = _extract_model_ids(body) + if ids: + logger.info( + "azure_detect: /models probe OK at %s (%d models)", + url, len(ids), + ) + return True, ids + # 200 + empty list still counts as "OpenAI shape, no models + # listed" — let the user proceed with manual entry. + if isinstance(body, dict) and "data" in body: + return True, [] + return False, [] + + +def _probe_anthropic_messages(base_url: str, api_key: str) -> bool: + """Send a zero-token request to ``/v1/messages`` and check + whether the endpoint at least *recognises* the Anthropic Messages + shape (any 4xx that mentions ``messages`` or ``model``, or a 400 + ``invalid_request`` with an Anthropic error shape). Never completes + a real chat. + """ + base = _strip_trailing_v1(base_url) + url = f"{base}/v1/messages?api-version={_AZURE_ANTHROPIC_API_VERSION}" + payload = json.dumps({ + "model": "probe", + "max_tokens": 1, + "messages": [{"role": "user", "content": "ping"}], + }).encode("utf-8") + req = urllib_request.Request(url, method="POST", data=payload) + req.add_header("api-key", api_key) + req.add_header("Authorization", f"Bearer {api_key}") + req.add_header("anthropic-version", "2023-06-01") + req.add_header("content-type", "application/json") + req.add_header("User-Agent", "hermes-agent/azure-detect") + try: + with urllib_request.urlopen(req, timeout=6.0) as resp: + # Should never 200 — "probe" isn't a real deployment. But + # if it does, the endpoint definitely speaks Anthropic. + return resp.status < 500 + except HTTPError as exc: + # 4xx with an Anthropic-shaped error body = Anthropic endpoint. + try: + body = exc.read().decode("utf-8", errors="replace") + lowered = body.lower() + if "anthropic" in lowered or '"type"' in lowered and '"error"' in lowered: + return True + # Pre-Azure-v1 Azure Foundry returns a plain 404 for + # Anthropic-style calls on non-Anthropic deployments. A + # 400 "model not found" IS Anthropic though. + if exc.code == 400 and ("messages" in lowered or "model" in lowered): + return True + return False + except Exception: + return False + except (URLError, TimeoutError, OSError): + return False + except Exception: # pragma: no cover + return False + + +def detect(base_url: str, api_key: str) -> DetectionResult: + """Inspect an Azure endpoint and describe its transport + models. + + Call this from the wizard before asking the user to pick an API + mode manually. The caller should treat the returned + :class:`DetectionResult` as *advisory* — if ``api_mode`` is None, + fall back to asking the user. + """ + result = DetectionResult() + + try: + parsed = urlparse(base_url) + result.hostname = (parsed.hostname or "").lower() + except Exception: + result.hostname = "" + + # 1. Path sniff. Azure Foundry exposes Anthropic-style deployments + # under a dedicated ``/anthropic`` path. + if _looks_like_anthropic_path(base_url): + result.is_anthropic = True + result.api_mode = "anthropic_messages" + result.reason = "URL path ends in /anthropic → Anthropic Messages API" + return result + + # 2. Try the OpenAI-style /models probe. If this works, the + # endpoint definitely speaks OpenAI wire. + ok, models = _probe_openai_models(base_url, api_key) + if ok: + result.models_probe_ok = True + result.models = models + result.api_mode = "chat_completions" + result.reason = ( + f"GET /models returned {len(models)} model(s) — OpenAI-style endpoint" + if models + else "GET /models returned an OpenAI-shaped empty list — OpenAI-style endpoint" + ) + return result + + # 3. Fallback: probe the Anthropic Messages shape. Slower and more + # intrusive than /models, so only run it when the OpenAI probe + # failed. + if _probe_anthropic_messages(base_url, api_key): + result.is_anthropic = True + result.api_mode = "anthropic_messages" + result.reason = "Endpoint accepts Anthropic Messages shape" + return result + + # Nothing matched. Caller falls back to manual selection. + result.reason = ( + "Could not probe endpoint (private network, missing model list, or " + "non-standard path) — falling back to manual API-mode selection" + ) + return result + + +def lookup_context_length(model: str, base_url: str, api_key: str) -> Optional[int]: + """Thin wrapper around :func:`agent.model_metadata.get_model_context_length` + that returns ``None`` when only the fallback default (128k) would + fire, so the wizard can distinguish "we actually know this" from + "we guessed.""" + try: + from agent.model_metadata import ( + DEFAULT_FALLBACK_CONTEXT, + get_model_context_length, + ) + except Exception: + return None + + try: + n = get_model_context_length(model, base_url=base_url, api_key=api_key) + except Exception as exc: + logger.debug("azure_detect: context length lookup failed: %s", exc) + return None + + if isinstance(n, int) and n > 0 and n != DEFAULT_FALLBACK_CONTEXT: + return n + return None + + +__all__ = ["DetectionResult", "detect", "lookup_context_length"] diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 8a4557e2bb..a8d4c3200b 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -2935,19 +2935,30 @@ def _save_custom_provider( def _model_flow_azure_foundry(config, current_model=""): """Azure Foundry provider: configure endpoint, API mode, API key, and model. - Azure Foundry supports both OpenAI-style (/v1/chat/completions) and - Anthropic-style (/v1/messages) endpoints. The user must select which - API format their endpoint uses. + Azure Foundry supports both OpenAI-style (``/v1/chat/completions``) and + Anthropic-style (``/v1/messages``) endpoints. The wizard auto-detects + the transport and available models when possible: + + * URLs ending in ``/anthropic`` → Anthropic Messages API. + * Successful ``GET /models`` probe → OpenAI-style + populates + a picker with the returned deployment / model IDs. + * Anthropic Messages probe fallback when ``/models`` fails. + * Manual entry when every probe fails (private endpoints, etc.). + + Context lengths for the chosen model are resolved via the standard + :func:`agent.model_metadata.get_model_context_length` chain + (models.dev, provider metadata, hardcoded family fallbacks). """ - from hermes_cli.auth import _save_model_choice, deactivate_provider + from hermes_cli.auth import _save_model_choice, deactivate_provider # noqa: F401 from hermes_cli.config import get_env_value, save_env_value, load_config, save_config + from hermes_cli import azure_detect import getpass - # Load current Azure Foundry configuration + # ── Load current Azure Foundry configuration ───────────────────── model_cfg = config.get("model", {}) - if isinstance(model_cfg, dict): - current_base_url = model_cfg.get("base_url", "") if model_cfg.get("provider") == "azure-foundry" else "" - current_api_mode = model_cfg.get("api_mode", "") if model_cfg.get("provider") == "azure-foundry" else "" + if isinstance(model_cfg, dict) and model_cfg.get("provider") == "azure-foundry": + current_base_url = str(model_cfg.get("base_url", "") or "") + current_api_mode = str(model_cfg.get("api_mode", "") or "") else: current_base_url = "" current_api_mode = "" @@ -2959,64 +2970,43 @@ def _model_flow_azure_foundry(config, current_model=""): print("=" * 50) print() print("Azure Foundry can host models with either OpenAI-style or") - print("Anthropic-style API endpoints. Configure your endpoint below.") + print("Anthropic-style API endpoints. Hermes will probe your") + print("endpoint to auto-detect the transport and the deployed") + print("models when possible.") print() if current_base_url: print(f" Current endpoint: {current_base_url}") if current_api_mode: - mode_label = "OpenAI-style" if current_api_mode == "chat_completions" else "Anthropic-style" - print(f" Current API mode: {mode_label}") + _lbl = "OpenAI-style" if current_api_mode == "chat_completions" else "Anthropic-style" + print(f" Current API mode: {_lbl}") if current_api_key: print(f" Current API key: {current_api_key[:8]}...") print() - # Step 1: Get the endpoint URL + # ── Step 1: endpoint URL ───────────────────────────────────────── try: - base_url = input(f"API endpoint URL [{current_base_url or 'e.g. https://your-model.azure.com/v1'}]: ").strip() + base_url = input( + f"API endpoint URL [{current_base_url or 'e.g. https://your-resource.openai.azure.com/openai/v1'}]: " + ).strip() except (KeyboardInterrupt, EOFError): print("\nCancelled.") return - effective_url = base_url or current_base_url + effective_url = (base_url or current_base_url).rstrip("/") if not effective_url: print("No endpoint URL provided. Cancelled.") return - - # Validate URL format if not effective_url.startswith(("http://", "https://")): print(f"Invalid URL: {effective_url} (must start with http:// or https://)") return - # Step 2: Select API mode (OpenAI or Anthropic style) - print() - print("Select the API format your Azure Foundry endpoint uses:") - print() - print(" 1. OpenAI-style (POST /v1/chat/completions)") - print(" For: GPT models, Llama, Mistral, and most open models") - print() - print(" 2. Anthropic-style (POST /v1/messages)") - print(" For: Claude models deployed via Anthropic API format") - print() - - try: - default_choice = "1" if current_api_mode != "anthropic_messages" else "2" - mode_choice = input(f"API format [1/2] ({default_choice}): ").strip() or default_choice - except (KeyboardInterrupt, EOFError): - print("\nCancelled.") - return - - if mode_choice == "2": - api_mode = "anthropic_messages" - print(" → Using Anthropic-style API format") - else: - api_mode = "chat_completions" - print(" → Using OpenAI-style API format") - - # Step 3: Get the API key + # ── Step 2: API key ────────────────────────────────────────────── print() try: - api_key = getpass.getpass(f"API key [{current_api_key[:8] + '...' if current_api_key else 'required'}]: ").strip() + api_key = getpass.getpass( + f"API key [{current_api_key[:8] + '...' if current_api_key else 'required'}]: " + ).strip() except (KeyboardInterrupt, EOFError): print("\nCancelled.") return @@ -3026,24 +3016,82 @@ def _model_flow_azure_foundry(config, current_model=""): print("No API key provided. Cancelled.") return - # Step 4: Get the model name + # ── Step 3: auto-detect transport + models ─────────────────────── print() - try: - model_name = input(f"Model name [{current_model or 'e.g. gpt-4, claude-3-5-sonnet'}]: ").strip() - except (KeyboardInterrupt, EOFError): - print("\nCancelled.") - return + print("◐ Probing endpoint to auto-detect transport and models...") + detection = azure_detect.detect(effective_url, effective_key) + + discovered_models: list[str] = list(detection.models) + api_mode: str = detection.api_mode or "" + + if api_mode: + mode_label = "OpenAI-style" if api_mode == "chat_completions" else "Anthropic-style" + print(f"✓ Detected API transport: {mode_label}") + if detection.reason: + print(f" ({detection.reason})") + if discovered_models: + print(f"✓ Found {len(discovered_models)} deployed model(s) on this endpoint") + else: + print(f"⚠ Auto-detection incomplete: {detection.reason}") + print() + print("Select the API format your Azure Foundry endpoint uses:") + print(" 1. OpenAI-style (POST /v1/chat/completions)") + print(" For: GPT models, Llama, Mistral, and most open models") + print(" 2. Anthropic-style (POST /v1/messages)") + print(" For: Claude models deployed via Anthropic API format") + try: + default_choice = "2" if current_api_mode == "anthropic_messages" else "1" + mode_choice = input(f"API format [1/2] ({default_choice}): ").strip() or default_choice + except (KeyboardInterrupt, EOFError): + print("\nCancelled.") + return + api_mode = "anthropic_messages" if mode_choice == "2" else "chat_completions" + + # ── Step 4: model name ─────────────────────────────────────────── + print() + effective_model = "" + if discovered_models: + print("Available models on this endpoint:") + for i, mid in enumerate(discovered_models[:30], start=1): + print(f" {i:>2}. {mid}") + if len(discovered_models) > 30: + print(f" ... and {len(discovered_models) - 30} more (type name manually if not shown)") + print() + try: + pick = input( + f"Pick by number, or type a deployment name [{current_model or discovered_models[0]}]: " + ).strip() + except (KeyboardInterrupt, EOFError): + print("\nCancelled.") + return + if not pick: + effective_model = current_model or discovered_models[0] + elif pick.isdigit() and 1 <= int(pick) <= min(len(discovered_models), 30): + effective_model = discovered_models[int(pick) - 1] + else: + effective_model = pick + else: + try: + model_name = input( + f"Model / deployment name [{current_model or 'e.g. gpt-5.4, claude-sonnet-4-6'}]: " + ).strip() + except (KeyboardInterrupt, EOFError): + print("\nCancelled.") + return + effective_model = model_name or current_model - effective_model = model_name or current_model if not effective_model: print("No model name provided. Cancelled.") return - # Step 5: Save configuration - # Save API key to .env + # ── Step 5: context-length lookup ──────────────────────────────── + ctx_len = azure_detect.lookup_context_length( + effective_model, effective_url, effective_key, + ) + + # ── Step 6: persist ────────────────────────────────────────────── save_env_value("AZURE_FOUNDRY_API_KEY", effective_key) - # Update config.yaml cfg = load_config() model = cfg.get("model") if not isinstance(model, dict): @@ -3051,19 +3099,18 @@ def _model_flow_azure_foundry(config, current_model=""): cfg["model"] = model model["provider"] = "azure-foundry" - model["base_url"] = effective_url.rstrip("/") + model["base_url"] = effective_url model["api_mode"] = api_mode model["default"] = effective_model + if ctx_len: + model["context_length"] = ctx_len save_config(cfg) - - # Deactivate any OAuth provider deactivate_provider() - - # Update caller's config dict config["model"] = dict(model) - # Clear any conflicting env vars + # Clear any conflicting env vars so auxiliary clients don't poison + # themselves with a stale OpenAI base URL / key. if get_env_value("OPENAI_BASE_URL"): save_env_value("OPENAI_BASE_URL", "") if get_env_value("OPENAI_API_KEY"): @@ -3071,10 +3118,14 @@ def _model_flow_azure_foundry(config, current_model=""): mode_label = "OpenAI-style" if api_mode == "chat_completions" else "Anthropic-style" print() - print(f"✓ Azure Foundry configured:") - print(f" Endpoint: {effective_url}") - print(f" API mode: {mode_label}") - print(f" Model: {effective_model}") + print("✓ Azure Foundry configured:") + print(f" Endpoint: {effective_url}") + print(f" API mode: {mode_label}") + print(f" Model: {effective_model}") + if ctx_len: + print(f" Context length: {ctx_len:,} tokens") + else: + print(" Context length: not auto-detected (will fall back at runtime)") print() diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py index 5c9375ab70..d77154df54 100644 --- a/hermes_cli/runtime_provider.py +++ b/hermes_cli/runtime_provider.py @@ -602,6 +602,71 @@ def _resolve_openrouter_runtime( } +def _resolve_azure_foundry_runtime( + *, + requested_provider: str, + model_cfg: Dict[str, Any], + explicit_api_key: Optional[str] = None, + explicit_base_url: Optional[str] = None, +) -> Dict[str, Any]: + """Resolve an Azure Foundry runtime entry. + + Reads ``model.base_url`` + ``model.api_mode`` from config.yaml (or + explicit overrides), pulls the API key from ``.env`` / env var, and + strips a trailing ``/v1`` for Anthropic-style endpoints because the + Anthropic SDK appends ``/v1/messages`` internally. + + Raises :class:`AuthError` when required values are missing. + """ + explicit_api_key = str(explicit_api_key or "").strip() + explicit_base_url_clean = str(explicit_base_url or "").strip().rstrip("/") + + cfg_provider = str(model_cfg.get("provider") or "").strip().lower() + cfg_base_url = "" + cfg_api_mode = "chat_completions" + if cfg_provider == "azure-foundry": + cfg_base_url = str(model_cfg.get("base_url") or "").strip().rstrip("/") + cfg_api_mode = _parse_api_mode(model_cfg.get("api_mode")) or "chat_completions" + + env_base_url = os.getenv("AZURE_FOUNDRY_BASE_URL", "").strip().rstrip("/") + base_url = explicit_base_url_clean or cfg_base_url or env_base_url + if not base_url: + raise AuthError( + "Azure Foundry requires a base URL. Set it via 'hermes model' or " + "the AZURE_FOUNDRY_BASE_URL environment variable." + ) + + api_key = explicit_api_key + if not api_key: + try: + from hermes_cli.config import get_env_value + api_key = get_env_value("AZURE_FOUNDRY_API_KEY") or "" + except Exception: + api_key = "" + if not api_key: + api_key = os.getenv("AZURE_FOUNDRY_API_KEY", "").strip() + if not api_key: + raise AuthError( + "Azure Foundry requires an API key. Set AZURE_FOUNDRY_API_KEY in " + "~/.hermes/.env or run 'hermes model' to configure." + ) + + # Anthropic SDK appends /v1/messages itself, so strip any trailing /v1 + # we inherited from the configured base_url to avoid double-/v1 paths. + if cfg_api_mode == "anthropic_messages": + base_url = re.sub(r"/v1/?$", "", base_url) + + source = "explicit" if (explicit_api_key or explicit_base_url) else "config" + return { + "provider": "azure-foundry", + "api_mode": cfg_api_mode, + "base_url": base_url, + "api_key": api_key, + "source": source, + "requested_provider": requested_provider, + } + + def _resolve_explicit_runtime( *, provider: str, @@ -693,44 +758,12 @@ def _resolve_explicit_runtime( # Azure Foundry: user-configured endpoint with selectable API mode if provider == "azure-foundry": - cfg_provider = str(model_cfg.get("provider") or "").strip().lower() - cfg_base_url = "" - cfg_api_mode = "chat_completions" - if cfg_provider == "azure-foundry": - cfg_base_url = str(model_cfg.get("base_url") or "").strip().rstrip("/") - cfg_api_mode = _parse_api_mode(model_cfg.get("api_mode")) or "chat_completions" - - env_base_url = os.getenv("AZURE_FOUNDRY_BASE_URL", "").strip().rstrip("/") - base_url = explicit_base_url or cfg_base_url or env_base_url - if not base_url: - raise AuthError( - "Azure Foundry requires a base URL. Set it via 'hermes model' or " - "the AZURE_FOUNDRY_BASE_URL environment variable." - ) - - api_key = explicit_api_key - if not api_key: - from hermes_cli.config import get_env_value - api_key = get_env_value("AZURE_FOUNDRY_API_KEY") or os.getenv("AZURE_FOUNDRY_API_KEY", "") - if not api_key: - raise AuthError( - "Azure Foundry requires an API key. Set AZURE_FOUNDRY_API_KEY in " - "~/.hermes/.env or run 'hermes model' to configure." - ) - - # For Anthropic-style endpoints, strip /v1 suffix since the Anthropic SDK - # appends /v1/messages internally - if cfg_api_mode == "anthropic_messages": - base_url = re.sub(r"/v1/?$", "", base_url) - - return { - "provider": "azure-foundry", - "api_mode": cfg_api_mode, - "base_url": base_url, - "api_key": api_key, - "source": "explicit", - "requested_provider": requested_provider, - } + return _resolve_azure_foundry_runtime( + requested_provider=requested_provider, + model_cfg=model_cfg, + explicit_api_key=explicit_api_key, + explicit_base_url=explicit_base_url, + ) pconfig = PROVIDER_REGISTRY.get(provider) if pconfig and pconfig.auth_type == "api_key": @@ -820,6 +853,20 @@ def resolve_runtime_provider( "requested_provider": requested_provider, } + # Azure Foundry: user-configured endpoint with selectable API mode + # (OpenAI-style chat_completions or Anthropic-style anthropic_messages). + # Resolve before the custom-runtime / pool / generic paths so Azure + # config is always picked up from model.base_url + model.api_mode, + # regardless of whether the caller passed explicit_* args. + if requested_provider == "azure-foundry": + azure_runtime = _resolve_azure_foundry_runtime( + requested_provider=requested_provider, + model_cfg=_get_model_config(), + explicit_api_key=explicit_api_key, + explicit_base_url=explicit_base_url, + ) + return azure_runtime + custom_runtime = _resolve_named_custom_runtime( requested_provider=requested_provider, explicit_api_key=explicit_api_key, diff --git a/tests/hermes_cli/test_azure_detect.py b/tests/hermes_cli/test_azure_detect.py new file mode 100644 index 0000000000..45eaa86e73 --- /dev/null +++ b/tests/hermes_cli/test_azure_detect.py @@ -0,0 +1,237 @@ +"""Tests for hermes_cli.azure_detect — transport & model auto-detection.""" + +from __future__ import annotations + +import json +from unittest.mock import MagicMock, patch + +import pytest + +from hermes_cli import azure_detect + + +# ---------------------------------------------------------------------- +# Helpers +# ---------------------------------------------------------------------- + +class _FakeHTTPResponse: + """Minimal stand-in for urllib.request.urlopen's context manager.""" + + def __init__(self, status: int, body: bytes): + self.status = status + self._body = body + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + def read(self) -> bytes: + return self._body + + +def _openai_models_body(*ids: str) -> bytes: + return json.dumps({ + "object": "list", + "data": [{"id": i, "object": "model"} for i in ids], + }).encode() + + +def _anthropic_error_body(msg: str = "model not found") -> bytes: + return json.dumps({ + "type": "error", + "error": {"type": "invalid_request_error", "message": msg}, + }).encode() + + +# ---------------------------------------------------------------------- +# _looks_like_anthropic_path +# ---------------------------------------------------------------------- + +@pytest.mark.parametrize("url, expected", [ + ("https://foo.services.ai.azure.com/anthropic", True), + ("https://foo.services.ai.azure.com/anthropic/", True), + ("https://foo.services.ai.azure.com/anthropic/v1", True), + ("https://foo.openai.azure.com/openai/v1", False), + ("https://foo.openai.azure.com/", False), + ("https://openrouter.ai/api/v1", False), +]) +def test_looks_like_anthropic_path(url, expected): + assert azure_detect._looks_like_anthropic_path(url) is expected + + +# ---------------------------------------------------------------------- +# _extract_model_ids +# ---------------------------------------------------------------------- + +def test_extract_model_ids_openai_shape(): + body = { + "object": "list", + "data": [ + {"id": "gpt-4.1-mini", "object": "model"}, + {"id": "claude-sonnet-4-6", "object": "model"}, + ], + } + assert azure_detect._extract_model_ids(body) == ["gpt-4.1-mini", "claude-sonnet-4-6"] + + +def test_extract_model_ids_bad_shape_returns_empty(): + assert azure_detect._extract_model_ids({}) == [] + assert azure_detect._extract_model_ids({"data": "not-a-list"}) == [] + assert azure_detect._extract_model_ids({"data": [{"no-id": True}]}) == [] + + +# ---------------------------------------------------------------------- +# detect() integration +# ---------------------------------------------------------------------- + +def test_detect_anthropic_path_wins_without_http(): + """URL path sniff short-circuits — no HTTP call happens.""" + with patch.object(azure_detect, "_http_get_json") as fake_get, \ + patch.object(azure_detect, "_probe_anthropic_messages") as fake_probe: + result = azure_detect.detect( + "https://foo.services.ai.azure.com/anthropic", "key-abc", + ) + assert result.api_mode == "anthropic_messages" + assert result.is_anthropic is True + assert "path" in result.reason.lower() + fake_get.assert_not_called() + fake_probe.assert_not_called() + + +def test_detect_openai_models_probe_success(): + """/models probe returning a model list → chat_completions.""" + def _fake_get(url, api_key, timeout=6.0): + assert "key-abc" == api_key + return 200, json.loads(_openai_models_body("gpt-5.4", "claude-opus-4-6")) + + with patch.object(azure_detect, "_http_get_json", side_effect=_fake_get): + result = azure_detect.detect( + "https://my.openai.azure.com/openai/v1", "key-abc", + ) + assert result.api_mode == "chat_completions" + assert result.models_probe_ok is True + assert result.models == ["gpt-5.4", "claude-opus-4-6"] + assert "/models" in result.reason + + +def test_detect_openai_models_probe_empty_list_still_counts(): + """Endpoint returned OpenAI shape but no models → still chat_completions.""" + def _fake_get(url, api_key, timeout=6.0): + return 200, {"object": "list", "data": []} + + with patch.object(azure_detect, "_http_get_json", side_effect=_fake_get): + result = azure_detect.detect( + "https://my.openai.azure.com/openai/v1", "key-abc", + ) + assert result.api_mode == "chat_completions" + assert result.models == [] + assert result.models_probe_ok is True + + +def test_detect_falls_back_to_anthropic_probe(): + """/models fails but Anthropic Messages probe succeeds.""" + def _fake_get(url, api_key, timeout=6.0): + return 401, None # /models forbidden + + with patch.object(azure_detect, "_http_get_json", side_effect=_fake_get), \ + patch.object(azure_detect, "_probe_anthropic_messages", return_value=True): + result = azure_detect.detect( + "https://my.services.ai.azure.com/v1", "key-abc", + ) + assert result.api_mode == "anthropic_messages" + assert result.is_anthropic is True + + +def test_detect_all_probes_fail_returns_none(): + """Every probe fails → api_mode is None and caller falls back to manual.""" + with patch.object(azure_detect, "_http_get_json", return_value=(500, None)), \ + patch.object(azure_detect, "_probe_anthropic_messages", return_value=False): + result = azure_detect.detect( + "https://some-private.example.com/", "key-abc", + ) + assert result.api_mode is None + assert result.models == [] + assert "manual" in result.reason.lower() + + +# ---------------------------------------------------------------------- +# _probe_openai_models URL list (Azure vs v1 api-version) +# ---------------------------------------------------------------------- + +def test_probe_openai_models_tries_multiple_api_versions(): + """First call (no api-version) fails, api-version fallback succeeds.""" + calls = [] + + def _fake_get(url, api_key, timeout=6.0): + calls.append(url) + if "api-version" not in url: + return 404, None + return 200, json.loads(_openai_models_body("gpt-4.1")) + + with patch.object(azure_detect, "_http_get_json", side_effect=_fake_get): + ok, models = azure_detect._probe_openai_models( + "https://my.openai.azure.com/openai/v1", "k", + ) + assert ok is True + assert models == ["gpt-4.1"] + # Should have tried without api-version first, then with at least one + assert any("api-version" not in u for u in calls) + assert any("api-version" in u for u in calls) + + +# ---------------------------------------------------------------------- +# _http_get_json error handling +# ---------------------------------------------------------------------- + +def test_http_get_json_on_urlerror_returns_zero_none(): + """Network failure returns (0, None), never raises.""" + import urllib.error + with patch("hermes_cli.azure_detect.urllib_request.urlopen", + side_effect=urllib.error.URLError("dns fail")): + status, body = azure_detect._http_get_json("https://bad.example/", "k") + assert status == 0 + assert body is None + + +def test_http_get_json_on_http_error_returns_code_none(): + """HTTP 4xx/5xx returns (code, None).""" + import urllib.error + err = urllib.error.HTTPError("https://x/", 403, "Forbidden", {}, None) + with patch("hermes_cli.azure_detect.urllib_request.urlopen", side_effect=err): + status, body = azure_detect._http_get_json("https://x/", "k") + assert status == 403 + assert body is None + + +# ---------------------------------------------------------------------- +# lookup_context_length +# ---------------------------------------------------------------------- + +def test_lookup_context_length_returns_known(): + """When model_metadata returns a non-fallback value, we pass it through.""" + fake = MagicMock(return_value=400000) + with patch("agent.model_metadata.get_model_context_length", fake), \ + patch("agent.model_metadata.DEFAULT_FALLBACK_CONTEXT", 128000): + n = azure_detect.lookup_context_length( + "gpt-5.4", "https://x.openai.azure.com/openai/v1", "k", + ) + assert n == 400000 + + +def test_lookup_context_length_returns_none_on_fallback(): + """When resolver falls through to DEFAULT_FALLBACK_CONTEXT, we return None.""" + with patch("agent.model_metadata.get_model_context_length", return_value=128000), \ + patch("agent.model_metadata.DEFAULT_FALLBACK_CONTEXT", 128000): + n = azure_detect.lookup_context_length( + "totally-unknown-model", "https://x.openai.azure.com/openai/v1", "k", + ) + assert n is None + + +def test_lookup_context_length_swallows_exceptions(): + """Resolver raising must not crash the wizard.""" + with patch("agent.model_metadata.get_model_context_length", + side_effect=RuntimeError("boom")): + assert azure_detect.lookup_context_length("m", "https://x/", "k") is None diff --git a/tests/hermes_cli/test_runtime_provider_resolution.py b/tests/hermes_cli/test_runtime_provider_resolution.py index a81dc9f5e2..8ca7a0cf3b 100644 --- a/tests/hermes_cli/test_runtime_provider_resolution.py +++ b/tests/hermes_cli/test_runtime_provider_resolution.py @@ -1,3 +1,5 @@ +import pytest + from hermes_cli import runtime_provider as rp @@ -1565,3 +1567,79 @@ class TestOllamaUrlSubstringLeak: resolved = rp.resolve_runtime_provider(requested="custom") assert resolved["api_key"] == "ol-legit-key" + + +# ============================================================================= +# Azure Foundry — both OpenAI-style and Anthropic-style endpoints +# ============================================================================= + +class TestAzureFoundryResolution: + """Verify Azure Foundry resolves correctly for both API modes.""" + + def _make_cfg(self, base_url: str, api_mode: str = "chat_completions"): + return { + "provider": "azure-foundry", + "base_url": base_url, + "api_mode": api_mode, + "default": "gpt-5.4", + } + + def test_azure_foundry_openai_style_explicit(self, monkeypatch): + """OpenAI-style Azure Foundry → chat_completions, keeps base_url as-is.""" + monkeypatch.setenv("AZURE_FOUNDRY_API_KEY", "az-key-openai") + monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "azure-foundry") + monkeypatch.setattr(rp, "_get_model_config", lambda: self._make_cfg( + "https://my-resource.openai.azure.com/openai/v1", + "chat_completions", + )) + monkeypatch.setattr(rp, "load_pool", lambda provider: None) + + resolved = rp.resolve_runtime_provider(requested="azure-foundry") + + assert resolved["provider"] == "azure-foundry" + assert resolved["api_mode"] == "chat_completions" + assert resolved["base_url"] == "https://my-resource.openai.azure.com/openai/v1" + assert resolved["api_key"] == "az-key-openai" + + def test_azure_foundry_anthropic_style_strips_v1_suffix(self, monkeypatch): + """Anthropic-style Azure Foundry → anthropic_messages, /v1 stripped + because the Anthropic SDK appends /v1/messages itself.""" + monkeypatch.setenv("AZURE_FOUNDRY_API_KEY", "az-key-ant") + monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "azure-foundry") + monkeypatch.setattr(rp, "_get_model_config", lambda: self._make_cfg( + "https://my-resource.services.ai.azure.com/anthropic/v1", + "anthropic_messages", + )) + monkeypatch.setattr(rp, "load_pool", lambda provider: None) + + resolved = rp.resolve_runtime_provider(requested="azure-foundry") + + assert resolved["provider"] == "azure-foundry" + assert resolved["api_mode"] == "anthropic_messages" + # /v1 stripped so SDK can append /v1/messages cleanly + assert resolved["base_url"] == "https://my-resource.services.ai.azure.com/anthropic" + + def test_azure_foundry_missing_base_url_raises(self, monkeypatch): + monkeypatch.setenv("AZURE_FOUNDRY_API_KEY", "az-key") + monkeypatch.delenv("AZURE_FOUNDRY_BASE_URL", raising=False) + monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "azure-foundry") + monkeypatch.setattr(rp, "_get_model_config", lambda: {}) + monkeypatch.setattr(rp, "load_pool", lambda provider: None) + + with pytest.raises(rp.AuthError, match="base URL"): + rp.resolve_runtime_provider(requested="azure-foundry") + + def test_azure_foundry_missing_api_key_raises(self, monkeypatch): + monkeypatch.delenv("AZURE_FOUNDRY_API_KEY", raising=False) + # `get_env_value` reads from ~/.hermes/.env — mock it to return None + # so the resolver can't find a key there either. + import hermes_cli.config as cfg_mod + monkeypatch.setattr(cfg_mod, "get_env_value", lambda k: None) + monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "azure-foundry") + monkeypatch.setattr(rp, "_get_model_config", lambda: self._make_cfg( + "https://my-resource.openai.azure.com/openai/v1" + )) + monkeypatch.setattr(rp, "load_pool", lambda provider: None) + + with pytest.raises(rp.AuthError, match="API key"): + rp.resolve_runtime_provider(requested="azure-foundry")