feat(azure-foundry): auto-detect transport, models, context length

The azure-foundry wizard now probes the endpoint before asking the user to pick anything by hand: 1. URL path sniff — endpoints ending in /anthropic are Azure Foundry Claude routes and skip to anthropic_messages. 2. GET <base>/models probe — if the endpoint returns an OpenAI-shaped model list, we switch to chat_completions and prefill the picker with the returned deployment/model IDs. 3. Anthropic Messages probe — fallback for endpoints that don't expose /models but do speak the Anthropic Messages shape. 4. Manual fallback — private endpoints / custom routes still work; the user picks API mode + types a deployment name. Context length for the selected model is resolved through the existing agent.model_metadata.get_model_context_length chain (models.dev, provider metadata, hardcoded family fallbacks) and stored in model.context_length when a non-default value is found. Also refactors runtime_provider so Azure Foundry resolution is reused between the explicit-credentials path and the default top-level path — previously the /v1 strip for Anthropic-style Azure only ran when the caller passed explicit_* args, which meant config-driven sessions hit a double-/v1 URL. New module hermes_cli/azure_detect.py with 19 unit tests covering: - path sniff, model ID extraction, probe fallbacks - HTTP error handling (URLError, HTTPError) - context-length lookup passthrough - DEFAULT_FALLBACK_CONTEXT rejection New runtime tests cover: - OpenAI-style Azure Foundry - Anthropic-style Azure Foundry with /v1 stripping - Missing base_url / API key raising AuthError Rationale: Microsoft confirms there's no pure-API-key endpoint to list Azure deployments (that requires ARM management auth). The v1 Azure OpenAI endpoint does expose /models with the resource's available model catalog, which is good enough for picker prefill in the common case. Users on private/gated endpoints fall through to manual entry.
2026-04-28 06:51:16 +08:00 · 2026-04-25 18:38:38 -07:00
parent ac57114284
commit 731e1ef8cb
5 changed files with 814 additions and 101 deletions
--- a/hermes_cli/azure_detect.py
+++ b/hermes_cli/azure_detect.py
@@ -0,0 +1,300 @@
+"""Azure Foundry endpoint auto-detection.
+
+Inspect an Azure AI Foundry / Azure OpenAI endpoint to determine:
+  - API transport (OpenAI-style ``chat_completions`` vs
+    Anthropic-style ``anthropic_messages``)
+  - Available models (best effort — Azure does not expose a deployment
+    listing via the inference API key, but Azure OpenAI v1 endpoints
+    return the resource's model catalog via ``GET /models``)
+  - Context length for each discovered/entered model, via the existing
+    :func:`agent.model_metadata.get_model_context_length` resolver.
+
+Rationale:
+
+Azure has no pure-API-key deployment-listing endpoint — per Microsoft,
+deployment enumeration requires ARM management-plane auth.  Azure
+OpenAI v1 endpoints ``{resource}.openai.azure.com/openai/v1`` do return
+a ``/models`` list, but it reflects the resource's *available* models
+rather than the user's *deployed* deployment names.  In practice it is
+still a useful hint — the user picks a familiar model name and we look
+up its context length from the catalog.
+
+The detector never crashes on errors (every HTTP call is wrapped in a
+broad try/except).  Callers get a :class:`DetectionResult` with whatever
+information could be gathered, and fall back to manual entry for the
+rest.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import re
+from dataclasses import dataclass, field
+from typing import Optional
+from urllib import request as urllib_request
+from urllib.error import HTTPError, URLError
+from urllib.parse import urlparse, urlunparse
+
+logger = logging.getLogger(__name__)
+
+
+# Default Azure OpenAI ``api-version`` to probe with.  The v1 GA endpoint
+# accepts requests without ``api-version`` entirely, so this is only used
+# as a fallback for pre-v1 resources that still require it.
+_AZURE_OPENAI_PROBE_API_VERSIONS = (
+    "2025-04-01-preview",
+    "2024-10-21",  # oldest GA that supports /models
+)
+
+# Default Azure Anthropic ``api-version``.  Matches the value used by
+# ``agent/anthropic_adapter.py`` when building the Anthropic client.
+_AZURE_ANTHROPIC_API_VERSION = "2025-04-15"
+
+
+@dataclass
+class DetectionResult:
+    """Everything auto-detection could gather from a base URL + API key."""
+
+    #: Detected API transport: ``"chat_completions"``,
+    #: ``"anthropic_messages"``, or ``None`` when detection failed.
+    api_mode: Optional[str] = None
+
+    #: Deployment / model IDs returned by ``/models`` (best effort).
+    #: Empty when the endpoint doesn't expose the list with an API key.
+    models: list[str] = field(default_factory=list)
+
+    #: Lowercased host from the base URL (used for display messages).
+    hostname: str = ""
+
+    #: Human-readable reason the detector chose ``api_mode``.  Useful
+    #: for explaining auto-detection to the user in the wizard.
+    reason: str = ""
+
+    #: ``True`` when ``/models`` returned a valid OpenAI-shaped payload.
+    models_probe_ok: bool = False
+
+    #: ``True`` when the URL was determined to be an Anthropic-style
+    #: endpoint (from path suffix or live probe).
+    is_anthropic: bool = False
+
+
+def _http_get_json(url: str, api_key: str, timeout: float = 6.0) -> tuple[int, Optional[dict]]:
+    """GET a URL with ``api-key`` + ``Authorization`` headers.  Return
+    ``(status_code, parsed_json_or_None)``.  Never raises."""
+    req = urllib_request.Request(url, method="GET")
+    # Azure OpenAI uses ``api-key``.  Some Azure deployments (and
+    # Anthropic-style routes) use ``Authorization: Bearer``.  Send both
+    # so we probe once per URL rather than twice.
+    req.add_header("api-key", api_key)
+    req.add_header("Authorization", f"Bearer {api_key}")
+    req.add_header("User-Agent", "hermes-agent/azure-detect")
+    try:
+        with urllib_request.urlopen(req, timeout=timeout) as resp:
+            body = resp.read()
+            try:
+                return resp.status, json.loads(body.decode("utf-8", errors="replace"))
+            except Exception:
+                return resp.status, None
+    except HTTPError as exc:
+        return exc.code, None
+    except (URLError, TimeoutError, OSError) as exc:
+        logger.debug("azure_detect: GET %s failed: %s", url, exc)
+        return 0, None
+    except Exception as exc:  # pragma: no cover — defensive
+        logger.debug("azure_detect: GET %s unexpected error: %s", url, exc)
+        return 0, None
+
+
+def _strip_trailing_v1(url: str) -> str:
+    """Strip trailing ``/v1`` or ``/v1/`` so we can construct sub-paths."""
+    return re.sub(r"/v1/?$", "", url.rstrip("/"))
+
+
+def _looks_like_anthropic_path(url: str) -> bool:
+    """Return True when the URL's path ends in ``/anthropic`` or
+    contains a ``/anthropic/`` segment.  Used by Azure Foundry
+    resources that route Claude traffic through a dedicated path."""
+    try:
+        parsed = urlparse(url)
+        path = (parsed.path or "").lower().rstrip("/")
+        return path.endswith("/anthropic") or "/anthropic/" in path + "/"
+    except Exception:
+        return False
+
+
+def _extract_model_ids(payload: dict) -> list[str]:
+    """Extract a list of model IDs from an OpenAI-shaped ``/models``
+    response.  Returns ``[]`` on any shape mismatch."""
+    data = payload.get("data") if isinstance(payload, dict) else None
+    if not isinstance(data, list):
+        return []
+    ids: list[str] = []
+    for item in data:
+        if not isinstance(item, dict):
+            continue
+        # OpenAI shape: {"id": "gpt-5.4", "object": "model", ...}
+        mid = item.get("id") or item.get("model") or item.get("name")
+        if isinstance(mid, str) and mid:
+            ids.append(mid)
+    return ids
+
+
+def _probe_openai_models(base_url: str, api_key: str) -> tuple[bool, list[str]]:
+    """Probe ``<base>/models`` for an OpenAI-shaped response.
+
+    Returns ``(ok, models)``.  ``ok`` is True iff the endpoint accepted
+    us as an OpenAI-style caller (200 OK + OpenAI-shaped JSON body).
+    """
+    base_url = base_url.rstrip("/")
+
+    # Azure OpenAI v1: {resource}.openai.azure.com/openai/v1 — no
+    # api-version required for GA paths, so probe without first.
+    candidates = [f"{base_url}/models"]
+    # Fallback: explicit api-version for pre-v1 resources
+    for v in _AZURE_OPENAI_PROBE_API_VERSIONS:
+        candidates.append(f"{base_url}/models?api-version={v}")
+
+    for url in candidates:
+        status, body = _http_get_json(url, api_key)
+        if status == 200 and body is not None:
+            ids = _extract_model_ids(body)
+            if ids:
+                logger.info(
+                    "azure_detect: /models probe OK at %s (%d models)",
+                    url, len(ids),
+                )
+                return True, ids
+            # 200 + empty list still counts as "OpenAI shape, no models
+            # listed" — let the user proceed with manual entry.
+            if isinstance(body, dict) and "data" in body:
+                return True, []
+    return False, []
+
+
+def _probe_anthropic_messages(base_url: str, api_key: str) -> bool:
+    """Send a zero-token request to ``<base>/v1/messages`` and check
+    whether the endpoint at least *recognises* the Anthropic Messages
+    shape (any 4xx that mentions ``messages`` or ``model``, or a 400
+    ``invalid_request`` with an Anthropic error shape).  Never completes
+    a real chat.
+    """
+    base = _strip_trailing_v1(base_url)
+    url = f"{base}/v1/messages?api-version={_AZURE_ANTHROPIC_API_VERSION}"
+    payload = json.dumps({
+        "model": "probe",
+        "max_tokens": 1,
+        "messages": [{"role": "user", "content": "ping"}],
+    }).encode("utf-8")
+    req = urllib_request.Request(url, method="POST", data=payload)
+    req.add_header("api-key", api_key)
+    req.add_header("Authorization", f"Bearer {api_key}")
+    req.add_header("anthropic-version", "2023-06-01")
+    req.add_header("content-type", "application/json")
+    req.add_header("User-Agent", "hermes-agent/azure-detect")
+    try:
+        with urllib_request.urlopen(req, timeout=6.0) as resp:
+            # Should never 200 — "probe" isn't a real deployment.  But
+            # if it does, the endpoint definitely speaks Anthropic.
+            return resp.status < 500
+    except HTTPError as exc:
+        # 4xx with an Anthropic-shaped error body = Anthropic endpoint.
+        try:
+            body = exc.read().decode("utf-8", errors="replace")
+            lowered = body.lower()
+            if "anthropic" in lowered or '"type"' in lowered and '"error"' in lowered:
+                return True
+            # Pre-Azure-v1 Azure Foundry returns a plain 404 for
+            # Anthropic-style calls on non-Anthropic deployments.  A
+            # 400 "model not found" IS Anthropic though.
+            if exc.code == 400 and ("messages" in lowered or "model" in lowered):
+                return True
+            return False
+        except Exception:
+            return False
+    except (URLError, TimeoutError, OSError):
+        return False
+    except Exception:  # pragma: no cover
+        return False
+
+
+def detect(base_url: str, api_key: str) -> DetectionResult:
+    """Inspect an Azure endpoint and describe its transport + models.
+
+    Call this from the wizard before asking the user to pick an API
+    mode manually.  The caller should treat the returned
+    :class:`DetectionResult` as *advisory* — if ``api_mode`` is None,
+    fall back to asking the user.
+    """
+    result = DetectionResult()
+
+    try:
+        parsed = urlparse(base_url)
+        result.hostname = (parsed.hostname or "").lower()
+    except Exception:
+        result.hostname = ""
+
+    # 1. Path sniff.  Azure Foundry exposes Anthropic-style deployments
+    #    under a dedicated ``/anthropic`` path.
+    if _looks_like_anthropic_path(base_url):
+        result.is_anthropic = True
+        result.api_mode = "anthropic_messages"
+        result.reason = "URL path ends in /anthropic → Anthropic Messages API"
+        return result
+
+    # 2. Try the OpenAI-style /models probe.  If this works, the
+    #    endpoint definitely speaks OpenAI wire.
+    ok, models = _probe_openai_models(base_url, api_key)
+    if ok:
+        result.models_probe_ok = True
+        result.models = models
+        result.api_mode = "chat_completions"
+        result.reason = (
+            f"GET /models returned {len(models)} model(s) — OpenAI-style endpoint"
+            if models
+            else "GET /models returned an OpenAI-shaped empty list — OpenAI-style endpoint"
+        )
+        return result
+
+    # 3. Fallback: probe the Anthropic Messages shape.  Slower and more
+    #    intrusive than /models, so only run it when the OpenAI probe
+    #    failed.
+    if _probe_anthropic_messages(base_url, api_key):
+        result.is_anthropic = True
+        result.api_mode = "anthropic_messages"
+        result.reason = "Endpoint accepts Anthropic Messages shape"
+        return result
+
+    # Nothing matched.  Caller falls back to manual selection.
+    result.reason = (
+        "Could not probe endpoint (private network, missing model list, or "
+        "non-standard path) — falling back to manual API-mode selection"
+    )
+    return result
+
+
+def lookup_context_length(model: str, base_url: str, api_key: str) -> Optional[int]:
+    """Thin wrapper around :func:`agent.model_metadata.get_model_context_length`
+    that returns ``None`` when only the fallback default (128k) would
+    fire, so the wizard can distinguish "we actually know this" from
+    "we guessed."""
+    try:
+        from agent.model_metadata import (
+            DEFAULT_FALLBACK_CONTEXT,
+            get_model_context_length,
+        )
+    except Exception:
+        return None
+
+    try:
+        n = get_model_context_length(model, base_url=base_url, api_key=api_key)
+    except Exception as exc:
+        logger.debug("azure_detect: context length lookup failed: %s", exc)
+        return None
+
+    if isinstance(n, int) and n > 0 and n != DEFAULT_FALLBACK_CONTEXT:
+        return n
+    return None
+
+
+__all__ = ["DetectionResult", "detect", "lookup_context_length"]
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -2935,19 +2935,30 @@ def _save_custom_provider(
 def _model_flow_azure_foundry(config, current_model=""):
    """Azure Foundry provider: configure endpoint, API mode, API key, and model.

-    Azure Foundry supports both OpenAI-style (/v1/chat/completions) and
-    Anthropic-style (/v1/messages) endpoints. The user must select which
-    API format their endpoint uses.
+    Azure Foundry supports both OpenAI-style (``/v1/chat/completions``) and
+    Anthropic-style (``/v1/messages``) endpoints.  The wizard auto-detects
+    the transport and available models when possible:
+
+    * URLs ending in ``/anthropic`` → Anthropic Messages API.
+    * Successful ``GET <base>/models`` probe → OpenAI-style + populates
+      a picker with the returned deployment / model IDs.
+    * Anthropic Messages probe fallback when ``/models`` fails.
+    * Manual entry when every probe fails (private endpoints, etc.).
+
+    Context lengths for the chosen model are resolved via the standard
+    :func:`agent.model_metadata.get_model_context_length` chain
+    (models.dev, provider metadata, hardcoded family fallbacks).
    """
-    from hermes_cli.auth import _save_model_choice, deactivate_provider
+    from hermes_cli.auth import _save_model_choice, deactivate_provider  # noqa: F401
    from hermes_cli.config import get_env_value, save_env_value, load_config, save_config
+    from hermes_cli import azure_detect
    import getpass

-    # Load current Azure Foundry configuration
+    # ── Load current Azure Foundry configuration ─────────────────────
    model_cfg = config.get("model", {})
-    if isinstance(model_cfg, dict):
-        current_base_url = model_cfg.get("base_url", "") if model_cfg.get("provider") == "azure-foundry" else ""
-        current_api_mode = model_cfg.get("api_mode", "") if model_cfg.get("provider") == "azure-foundry" else ""
+    if isinstance(model_cfg, dict) and model_cfg.get("provider") == "azure-foundry":
+        current_base_url = str(model_cfg.get("base_url", "") or "")
+        current_api_mode = str(model_cfg.get("api_mode", "") or "")
    else:
        current_base_url = ""
        current_api_mode = ""
@@ -2959,64 +2970,43 @@ def _model_flow_azure_foundry(config, current_model=""):
    print("=" * 50)
    print()
    print("Azure Foundry can host models with either OpenAI-style or")
-    print("Anthropic-style API endpoints. Configure your endpoint below.")
+    print("Anthropic-style API endpoints.  Hermes will probe your")
+    print("endpoint to auto-detect the transport and the deployed")
+    print("models when possible.")
    print()

    if current_base_url:
        print(f"  Current endpoint: {current_base_url}")
    if current_api_mode:
-        mode_label = "OpenAI-style" if current_api_mode == "chat_completions" else "Anthropic-style"
-        print(f"  Current API mode: {mode_label}")
+        _lbl = "OpenAI-style" if current_api_mode == "chat_completions" else "Anthropic-style"
+        print(f"  Current API mode: {_lbl}")
    if current_api_key:
        print(f"  Current API key:  {current_api_key[:8]}...")
    print()

-    # Step 1: Get the endpoint URL
+    # ── Step 1: endpoint URL ─────────────────────────────────────────
    try:
-        base_url = input(f"API endpoint URL [{current_base_url or 'e.g. https://your-model.azure.com/v1'}]: ").strip()
+        base_url = input(
+            f"API endpoint URL [{current_base_url or 'e.g. https://your-resource.openai.azure.com/openai/v1'}]: "
+        ).strip()
    except (KeyboardInterrupt, EOFError):
        print("\nCancelled.")
        return

-    effective_url = base_url or current_base_url
+    effective_url = (base_url or current_base_url).rstrip("/")
    if not effective_url:
        print("No endpoint URL provided. Cancelled.")
        return
-
-    # Validate URL format
    if not effective_url.startswith(("http://", "https://")):
        print(f"Invalid URL: {effective_url} (must start with http:// or https://)")
        return

-    # Step 2: Select API mode (OpenAI or Anthropic style)
-    print()
-    print("Select the API format your Azure Foundry endpoint uses:")
-    print()
-    print("  1. OpenAI-style  (POST /v1/chat/completions)")
-    print("     For: GPT models, Llama, Mistral, and most open models")
-    print()
-    print("  2. Anthropic-style  (POST /v1/messages)")
-    print("     For: Claude models deployed via Anthropic API format")
-    print()
-
-    try:
-        default_choice = "1" if current_api_mode != "anthropic_messages" else "2"
-        mode_choice = input(f"API format [1/2] ({default_choice}): ").strip() or default_choice
-    except (KeyboardInterrupt, EOFError):
-        print("\nCancelled.")
-        return
-
-    if mode_choice == "2":
-        api_mode = "anthropic_messages"
-        print("  → Using Anthropic-style API format")
-    else:
-        api_mode = "chat_completions"
-        print("  → Using OpenAI-style API format")
-
-    # Step 3: Get the API key
+    # ── Step 2: API key ──────────────────────────────────────────────
    print()
    try:
-        api_key = getpass.getpass(f"API key [{current_api_key[:8] + '...' if current_api_key else 'required'}]: ").strip()
+        api_key = getpass.getpass(
+            f"API key [{current_api_key[:8] + '...' if current_api_key else 'required'}]: "
+        ).strip()
    except (KeyboardInterrupt, EOFError):
        print("\nCancelled.")
        return
@@ -3026,24 +3016,82 @@ def _model_flow_azure_foundry(config, current_model=""):
        print("No API key provided. Cancelled.")
        return

-    # Step 4: Get the model name
+    # ── Step 3: auto-detect transport + models ───────────────────────
    print()
-    try:
-        model_name = input(f"Model name [{current_model or 'e.g. gpt-4, claude-3-5-sonnet'}]: ").strip()
-    except (KeyboardInterrupt, EOFError):
-        print("\nCancelled.")
-        return
+    print("◐ Probing endpoint to auto-detect transport and models...")
+    detection = azure_detect.detect(effective_url, effective_key)
+
+    discovered_models: list[str] = list(detection.models)
+    api_mode: str = detection.api_mode or ""
+
+    if api_mode:
+        mode_label = "OpenAI-style" if api_mode == "chat_completions" else "Anthropic-style"
+        print(f"✓ Detected API transport: {mode_label}")
+        if detection.reason:
+            print(f"    ({detection.reason})")
+        if discovered_models:
+            print(f"✓ Found {len(discovered_models)} deployed model(s) on this endpoint")
+    else:
+        print(f"⚠ Auto-detection incomplete: {detection.reason}")
+        print()
+        print("Select the API format your Azure Foundry endpoint uses:")
+        print("  1. OpenAI-style  (POST /v1/chat/completions)")
+        print("     For: GPT models, Llama, Mistral, and most open models")
+        print("  2. Anthropic-style  (POST /v1/messages)")
+        print("     For: Claude models deployed via Anthropic API format")
+        try:
+            default_choice = "2" if current_api_mode == "anthropic_messages" else "1"
+            mode_choice = input(f"API format [1/2] ({default_choice}): ").strip() or default_choice
+        except (KeyboardInterrupt, EOFError):
+            print("\nCancelled.")
+            return
+        api_mode = "anthropic_messages" if mode_choice == "2" else "chat_completions"
+
+    # ── Step 4: model name ───────────────────────────────────────────
+    print()
+    effective_model = ""
+    if discovered_models:
+        print("Available models on this endpoint:")
+        for i, mid in enumerate(discovered_models[:30], start=1):
+            print(f"  {i:>2}. {mid}")
+        if len(discovered_models) > 30:
+            print(f"  ... and {len(discovered_models) - 30} more (type name manually if not shown)")
+        print()
+        try:
+            pick = input(
+                f"Pick by number, or type a deployment name [{current_model or discovered_models[0]}]: "
+            ).strip()
+        except (KeyboardInterrupt, EOFError):
+            print("\nCancelled.")
+            return
+        if not pick:
+            effective_model = current_model or discovered_models[0]
+        elif pick.isdigit() and 1 <= int(pick) <= min(len(discovered_models), 30):
+            effective_model = discovered_models[int(pick) - 1]
+        else:
+            effective_model = pick
+    else:
+        try:
+            model_name = input(
+                f"Model / deployment name [{current_model or 'e.g. gpt-5.4, claude-sonnet-4-6'}]: "
+            ).strip()
+        except (KeyboardInterrupt, EOFError):
+            print("\nCancelled.")
+            return
+        effective_model = model_name or current_model

-    effective_model = model_name or current_model
    if not effective_model:
        print("No model name provided. Cancelled.")
        return

-    # Step 5: Save configuration
-    # Save API key to .env
+    # ── Step 5: context-length lookup ────────────────────────────────
+    ctx_len = azure_detect.lookup_context_length(
+        effective_model, effective_url, effective_key,
+    )
+
+    # ── Step 6: persist ──────────────────────────────────────────────
    save_env_value("AZURE_FOUNDRY_API_KEY", effective_key)

-    # Update config.yaml
    cfg = load_config()
    model = cfg.get("model")
    if not isinstance(model, dict):
@@ -3051,19 +3099,18 @@ def _model_flow_azure_foundry(config, current_model=""):
        cfg["model"] = model

    model["provider"] = "azure-foundry"
-    model["base_url"] = effective_url.rstrip("/")
+    model["base_url"] = effective_url
    model["api_mode"] = api_mode
    model["default"] = effective_model
+    if ctx_len:
+        model["context_length"] = ctx_len

    save_config(cfg)
-
-    # Deactivate any OAuth provider
    deactivate_provider()
-
-    # Update caller's config dict
    config["model"] = dict(model)

-    # Clear any conflicting env vars
+    # Clear any conflicting env vars so auxiliary clients don't poison
+    # themselves with a stale OpenAI base URL / key.
    if get_env_value("OPENAI_BASE_URL"):
        save_env_value("OPENAI_BASE_URL", "")
    if get_env_value("OPENAI_API_KEY"):
@@ -3071,10 +3118,14 @@ def _model_flow_azure_foundry(config, current_model=""):

    mode_label = "OpenAI-style" if api_mode == "chat_completions" else "Anthropic-style"
    print()
-    print(f"✓ Azure Foundry configured:")
-    print(f"    Endpoint:  {effective_url}")
-    print(f"    API mode:  {mode_label}")
-    print(f"    Model:     {effective_model}")
+    print("✓ Azure Foundry configured:")
+    print(f"    Endpoint:       {effective_url}")
+    print(f"    API mode:       {mode_label}")
+    print(f"    Model:          {effective_model}")
+    if ctx_len:
+        print(f"    Context length: {ctx_len:,} tokens")
+    else:
+        print("    Context length: not auto-detected (will fall back at runtime)")
    print()


--- a/hermes_cli/runtime_provider.py
+++ b/hermes_cli/runtime_provider.py
@@ -602,6 +602,71 @@ def _resolve_openrouter_runtime(
    }


+def _resolve_azure_foundry_runtime(
+    *,
+    requested_provider: str,
+    model_cfg: Dict[str, Any],
+    explicit_api_key: Optional[str] = None,
+    explicit_base_url: Optional[str] = None,
+) -> Dict[str, Any]:
+    """Resolve an Azure Foundry runtime entry.
+
+    Reads ``model.base_url`` + ``model.api_mode`` from config.yaml (or
+    explicit overrides), pulls the API key from ``.env`` / env var, and
+    strips a trailing ``/v1`` for Anthropic-style endpoints because the
+    Anthropic SDK appends ``/v1/messages`` internally.
+
+    Raises :class:`AuthError` when required values are missing.
+    """
+    explicit_api_key = str(explicit_api_key or "").strip()
+    explicit_base_url_clean = str(explicit_base_url or "").strip().rstrip("/")
+
+    cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
+    cfg_base_url = ""
+    cfg_api_mode = "chat_completions"
+    if cfg_provider == "azure-foundry":
+        cfg_base_url = str(model_cfg.get("base_url") or "").strip().rstrip("/")
+        cfg_api_mode = _parse_api_mode(model_cfg.get("api_mode")) or "chat_completions"
+
+    env_base_url = os.getenv("AZURE_FOUNDRY_BASE_URL", "").strip().rstrip("/")
+    base_url = explicit_base_url_clean or cfg_base_url or env_base_url
+    if not base_url:
+        raise AuthError(
+            "Azure Foundry requires a base URL. Set it via 'hermes model' or "
+            "the AZURE_FOUNDRY_BASE_URL environment variable."
+        )
+
+    api_key = explicit_api_key
+    if not api_key:
+        try:
+            from hermes_cli.config import get_env_value
+            api_key = get_env_value("AZURE_FOUNDRY_API_KEY") or ""
+        except Exception:
+            api_key = ""
+    if not api_key:
+        api_key = os.getenv("AZURE_FOUNDRY_API_KEY", "").strip()
+    if not api_key:
+        raise AuthError(
+            "Azure Foundry requires an API key. Set AZURE_FOUNDRY_API_KEY in "
+            "~/.hermes/.env or run 'hermes model' to configure."
+        )
+
+    # Anthropic SDK appends /v1/messages itself, so strip any trailing /v1
+    # we inherited from the configured base_url to avoid double-/v1 paths.
+    if cfg_api_mode == "anthropic_messages":
+        base_url = re.sub(r"/v1/?$", "", base_url)
+
+    source = "explicit" if (explicit_api_key or explicit_base_url) else "config"
+    return {
+        "provider": "azure-foundry",
+        "api_mode": cfg_api_mode,
+        "base_url": base_url,
+        "api_key": api_key,
+        "source": source,
+        "requested_provider": requested_provider,
+    }
+
+
 def _resolve_explicit_runtime(
    *,
    provider: str,
@@ -693,44 +758,12 @@ def _resolve_explicit_runtime(

    # Azure Foundry: user-configured endpoint with selectable API mode
    if provider == "azure-foundry":
-        cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
-        cfg_base_url = ""
-        cfg_api_mode = "chat_completions"
-        if cfg_provider == "azure-foundry":
-            cfg_base_url = str(model_cfg.get("base_url") or "").strip().rstrip("/")
-            cfg_api_mode = _parse_api_mode(model_cfg.get("api_mode")) or "chat_completions"
-
-        env_base_url = os.getenv("AZURE_FOUNDRY_BASE_URL", "").strip().rstrip("/")
-        base_url = explicit_base_url or cfg_base_url or env_base_url
-        if not base_url:
-            raise AuthError(
-                "Azure Foundry requires a base URL. Set it via 'hermes model' or "
-                "the AZURE_FOUNDRY_BASE_URL environment variable."
-            )
-
-        api_key = explicit_api_key
-        if not api_key:
-            from hermes_cli.config import get_env_value
-            api_key = get_env_value("AZURE_FOUNDRY_API_KEY") or os.getenv("AZURE_FOUNDRY_API_KEY", "")
-        if not api_key:
-            raise AuthError(
-                "Azure Foundry requires an API key. Set AZURE_FOUNDRY_API_KEY in "
-                "~/.hermes/.env or run 'hermes model' to configure."
-            )
-
-        # For Anthropic-style endpoints, strip /v1 suffix since the Anthropic SDK
-        # appends /v1/messages internally
-        if cfg_api_mode == "anthropic_messages":
-            base_url = re.sub(r"/v1/?$", "", base_url)
-
-        return {
-            "provider": "azure-foundry",
-            "api_mode": cfg_api_mode,
-            "base_url": base_url,
-            "api_key": api_key,
-            "source": "explicit",
-            "requested_provider": requested_provider,
-        }
+        return _resolve_azure_foundry_runtime(
+            requested_provider=requested_provider,
+            model_cfg=model_cfg,
+            explicit_api_key=explicit_api_key,
+            explicit_base_url=explicit_base_url,
+        )

    pconfig = PROVIDER_REGISTRY.get(provider)
    if pconfig and pconfig.auth_type == "api_key":
@@ -820,6 +853,20 @@ def resolve_runtime_provider(
            "requested_provider": requested_provider,
        }

+    # Azure Foundry: user-configured endpoint with selectable API mode
+    # (OpenAI-style chat_completions or Anthropic-style anthropic_messages).
+    # Resolve before the custom-runtime / pool / generic paths so Azure
+    # config is always picked up from model.base_url + model.api_mode,
+    # regardless of whether the caller passed explicit_* args.
+    if requested_provider == "azure-foundry":
+        azure_runtime = _resolve_azure_foundry_runtime(
+            requested_provider=requested_provider,
+            model_cfg=_get_model_config(),
+            explicit_api_key=explicit_api_key,
+            explicit_base_url=explicit_base_url,
+        )
+        return azure_runtime
+
    custom_runtime = _resolve_named_custom_runtime(
        requested_provider=requested_provider,
        explicit_api_key=explicit_api_key,
--- a/tests/hermes_cli/test_azure_detect.py
+++ b/tests/hermes_cli/test_azure_detect.py
@@ -0,0 +1,237 @@
+"""Tests for hermes_cli.azure_detect — transport & model auto-detection."""
+
+from __future__ import annotations
+
+import json
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from hermes_cli import azure_detect
+
+
+# ----------------------------------------------------------------------
+# Helpers
+# ----------------------------------------------------------------------
+
+class _FakeHTTPResponse:
+    """Minimal stand-in for urllib.request.urlopen's context manager."""
+
+    def __init__(self, status: int, body: bytes):
+        self.status = status
+        self._body = body
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc, tb):
+        return False
+
+    def read(self) -> bytes:
+        return self._body
+
+
+def _openai_models_body(*ids: str) -> bytes:
+    return json.dumps({
+        "object": "list",
+        "data": [{"id": i, "object": "model"} for i in ids],
+    }).encode()
+
+
+def _anthropic_error_body(msg: str = "model not found") -> bytes:
+    return json.dumps({
+        "type": "error",
+        "error": {"type": "invalid_request_error", "message": msg},
+    }).encode()
+
+
+# ----------------------------------------------------------------------
+# _looks_like_anthropic_path
+# ----------------------------------------------------------------------
+
+@pytest.mark.parametrize("url, expected", [
+    ("https://foo.services.ai.azure.com/anthropic", True),
+    ("https://foo.services.ai.azure.com/anthropic/", True),
+    ("https://foo.services.ai.azure.com/anthropic/v1", True),
+    ("https://foo.openai.azure.com/openai/v1", False),
+    ("https://foo.openai.azure.com/", False),
+    ("https://openrouter.ai/api/v1", False),
+])
+def test_looks_like_anthropic_path(url, expected):
+    assert azure_detect._looks_like_anthropic_path(url) is expected
+
+
+# ----------------------------------------------------------------------
+# _extract_model_ids
+# ----------------------------------------------------------------------
+
+def test_extract_model_ids_openai_shape():
+    body = {
+        "object": "list",
+        "data": [
+            {"id": "gpt-4.1-mini", "object": "model"},
+            {"id": "claude-sonnet-4-6", "object": "model"},
+        ],
+    }
+    assert azure_detect._extract_model_ids(body) == ["gpt-4.1-mini", "claude-sonnet-4-6"]
+
+
+def test_extract_model_ids_bad_shape_returns_empty():
+    assert azure_detect._extract_model_ids({}) == []
+    assert azure_detect._extract_model_ids({"data": "not-a-list"}) == []
+    assert azure_detect._extract_model_ids({"data": [{"no-id": True}]}) == []
+
+
+# ----------------------------------------------------------------------
+# detect() integration
+# ----------------------------------------------------------------------
+
+def test_detect_anthropic_path_wins_without_http():
+    """URL path sniff short-circuits — no HTTP call happens."""
+    with patch.object(azure_detect, "_http_get_json") as fake_get, \
+         patch.object(azure_detect, "_probe_anthropic_messages") as fake_probe:
+        result = azure_detect.detect(
+            "https://foo.services.ai.azure.com/anthropic", "key-abc",
+        )
+        assert result.api_mode == "anthropic_messages"
+        assert result.is_anthropic is True
+        assert "path" in result.reason.lower()
+        fake_get.assert_not_called()
+        fake_probe.assert_not_called()
+
+
+def test_detect_openai_models_probe_success():
+    """/models probe returning a model list → chat_completions."""
+    def _fake_get(url, api_key, timeout=6.0):
+        assert "key-abc" == api_key
+        return 200, json.loads(_openai_models_body("gpt-5.4", "claude-opus-4-6"))
+
+    with patch.object(azure_detect, "_http_get_json", side_effect=_fake_get):
+        result = azure_detect.detect(
+            "https://my.openai.azure.com/openai/v1", "key-abc",
+        )
+    assert result.api_mode == "chat_completions"
+    assert result.models_probe_ok is True
+    assert result.models == ["gpt-5.4", "claude-opus-4-6"]
+    assert "/models" in result.reason
+
+
+def test_detect_openai_models_probe_empty_list_still_counts():
+    """Endpoint returned OpenAI shape but no models → still chat_completions."""
+    def _fake_get(url, api_key, timeout=6.0):
+        return 200, {"object": "list", "data": []}
+
+    with patch.object(azure_detect, "_http_get_json", side_effect=_fake_get):
+        result = azure_detect.detect(
+            "https://my.openai.azure.com/openai/v1", "key-abc",
+        )
+    assert result.api_mode == "chat_completions"
+    assert result.models == []
+    assert result.models_probe_ok is True
+
+
+def test_detect_falls_back_to_anthropic_probe():
+    """/models fails but Anthropic Messages probe succeeds."""
+    def _fake_get(url, api_key, timeout=6.0):
+        return 401, None  # /models forbidden
+
+    with patch.object(azure_detect, "_http_get_json", side_effect=_fake_get), \
+         patch.object(azure_detect, "_probe_anthropic_messages", return_value=True):
+        result = azure_detect.detect(
+            "https://my.services.ai.azure.com/v1", "key-abc",
+        )
+    assert result.api_mode == "anthropic_messages"
+    assert result.is_anthropic is True
+
+
+def test_detect_all_probes_fail_returns_none():
+    """Every probe fails → api_mode is None and caller falls back to manual."""
+    with patch.object(azure_detect, "_http_get_json", return_value=(500, None)), \
+         patch.object(azure_detect, "_probe_anthropic_messages", return_value=False):
+        result = azure_detect.detect(
+            "https://some-private.example.com/", "key-abc",
+        )
+    assert result.api_mode is None
+    assert result.models == []
+    assert "manual" in result.reason.lower()
+
+
+# ----------------------------------------------------------------------
+# _probe_openai_models URL list (Azure vs v1 api-version)
+# ----------------------------------------------------------------------
+
+def test_probe_openai_models_tries_multiple_api_versions():
+    """First call (no api-version) fails, api-version fallback succeeds."""
+    calls = []
+
+    def _fake_get(url, api_key, timeout=6.0):
+        calls.append(url)
+        if "api-version" not in url:
+            return 404, None
+        return 200, json.loads(_openai_models_body("gpt-4.1"))
+
+    with patch.object(azure_detect, "_http_get_json", side_effect=_fake_get):
+        ok, models = azure_detect._probe_openai_models(
+            "https://my.openai.azure.com/openai/v1", "k",
+        )
+    assert ok is True
+    assert models == ["gpt-4.1"]
+    # Should have tried without api-version first, then with at least one
+    assert any("api-version" not in u for u in calls)
+    assert any("api-version" in u for u in calls)
+
+
+# ----------------------------------------------------------------------
+# _http_get_json error handling
+# ----------------------------------------------------------------------
+
+def test_http_get_json_on_urlerror_returns_zero_none():
+    """Network failure returns (0, None), never raises."""
+    import urllib.error
+    with patch("hermes_cli.azure_detect.urllib_request.urlopen",
+               side_effect=urllib.error.URLError("dns fail")):
+        status, body = azure_detect._http_get_json("https://bad.example/", "k")
+    assert status == 0
+    assert body is None
+
+
+def test_http_get_json_on_http_error_returns_code_none():
+    """HTTP 4xx/5xx returns (code, None)."""
+    import urllib.error
+    err = urllib.error.HTTPError("https://x/", 403, "Forbidden", {}, None)
+    with patch("hermes_cli.azure_detect.urllib_request.urlopen", side_effect=err):
+        status, body = azure_detect._http_get_json("https://x/", "k")
+    assert status == 403
+    assert body is None
+
+
+# ----------------------------------------------------------------------
+# lookup_context_length
+# ----------------------------------------------------------------------
+
+def test_lookup_context_length_returns_known():
+    """When model_metadata returns a non-fallback value, we pass it through."""
+    fake = MagicMock(return_value=400000)
+    with patch("agent.model_metadata.get_model_context_length", fake), \
+         patch("agent.model_metadata.DEFAULT_FALLBACK_CONTEXT", 128000):
+        n = azure_detect.lookup_context_length(
+            "gpt-5.4", "https://x.openai.azure.com/openai/v1", "k",
+        )
+    assert n == 400000
+
+
+def test_lookup_context_length_returns_none_on_fallback():
+    """When resolver falls through to DEFAULT_FALLBACK_CONTEXT, we return None."""
+    with patch("agent.model_metadata.get_model_context_length", return_value=128000), \
+         patch("agent.model_metadata.DEFAULT_FALLBACK_CONTEXT", 128000):
+        n = azure_detect.lookup_context_length(
+            "totally-unknown-model", "https://x.openai.azure.com/openai/v1", "k",
+        )
+    assert n is None
+
+
+def test_lookup_context_length_swallows_exceptions():
+    """Resolver raising must not crash the wizard."""
+    with patch("agent.model_metadata.get_model_context_length",
+               side_effect=RuntimeError("boom")):
+        assert azure_detect.lookup_context_length("m", "https://x/", "k") is None
--- a/tests/hermes_cli/test_runtime_provider_resolution.py
+++ b/tests/hermes_cli/test_runtime_provider_resolution.py
@@ -1,3 +1,5 @@
+import pytest
+
 from hermes_cli import runtime_provider as rp


@@ -1565,3 +1567,79 @@ class TestOllamaUrlSubstringLeak:
        resolved = rp.resolve_runtime_provider(requested="custom")

        assert resolved["api_key"] == "ol-legit-key"
+
+
+# =============================================================================
+# Azure Foundry — both OpenAI-style and Anthropic-style endpoints
+# =============================================================================
+
+class TestAzureFoundryResolution:
+    """Verify Azure Foundry resolves correctly for both API modes."""
+
+    def _make_cfg(self, base_url: str, api_mode: str = "chat_completions"):
+        return {
+            "provider": "azure-foundry",
+            "base_url": base_url,
+            "api_mode": api_mode,
+            "default": "gpt-5.4",
+        }
+
+    def test_azure_foundry_openai_style_explicit(self, monkeypatch):
+        """OpenAI-style Azure Foundry → chat_completions, keeps base_url as-is."""
+        monkeypatch.setenv("AZURE_FOUNDRY_API_KEY", "az-key-openai")
+        monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "azure-foundry")
+        monkeypatch.setattr(rp, "_get_model_config", lambda: self._make_cfg(
+            "https://my-resource.openai.azure.com/openai/v1",
+            "chat_completions",
+        ))
+        monkeypatch.setattr(rp, "load_pool", lambda provider: None)
+
+        resolved = rp.resolve_runtime_provider(requested="azure-foundry")
+
+        assert resolved["provider"] == "azure-foundry"
+        assert resolved["api_mode"] == "chat_completions"
+        assert resolved["base_url"] == "https://my-resource.openai.azure.com/openai/v1"
+        assert resolved["api_key"] == "az-key-openai"
+
+    def test_azure_foundry_anthropic_style_strips_v1_suffix(self, monkeypatch):
+        """Anthropic-style Azure Foundry → anthropic_messages, /v1 stripped
+        because the Anthropic SDK appends /v1/messages itself."""
+        monkeypatch.setenv("AZURE_FOUNDRY_API_KEY", "az-key-ant")
+        monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "azure-foundry")
+        monkeypatch.setattr(rp, "_get_model_config", lambda: self._make_cfg(
+            "https://my-resource.services.ai.azure.com/anthropic/v1",
+            "anthropic_messages",
+        ))
+        monkeypatch.setattr(rp, "load_pool", lambda provider: None)
+
+        resolved = rp.resolve_runtime_provider(requested="azure-foundry")
+
+        assert resolved["provider"] == "azure-foundry"
+        assert resolved["api_mode"] == "anthropic_messages"
+        # /v1 stripped so SDK can append /v1/messages cleanly
+        assert resolved["base_url"] == "https://my-resource.services.ai.azure.com/anthropic"
+
+    def test_azure_foundry_missing_base_url_raises(self, monkeypatch):
+        monkeypatch.setenv("AZURE_FOUNDRY_API_KEY", "az-key")
+        monkeypatch.delenv("AZURE_FOUNDRY_BASE_URL", raising=False)
+        monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "azure-foundry")
+        monkeypatch.setattr(rp, "_get_model_config", lambda: {})
+        monkeypatch.setattr(rp, "load_pool", lambda provider: None)
+
+        with pytest.raises(rp.AuthError, match="base URL"):
+            rp.resolve_runtime_provider(requested="azure-foundry")
+
+    def test_azure_foundry_missing_api_key_raises(self, monkeypatch):
+        monkeypatch.delenv("AZURE_FOUNDRY_API_KEY", raising=False)
+        # `get_env_value` reads from ~/.hermes/.env — mock it to return None
+        # so the resolver can't find a key there either.
+        import hermes_cli.config as cfg_mod
+        monkeypatch.setattr(cfg_mod, "get_env_value", lambda k: None)
+        monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "azure-foundry")
+        monkeypatch.setattr(rp, "_get_model_config", lambda: self._make_cfg(
+            "https://my-resource.openai.azure.com/openai/v1"
+        ))
+        monkeypatch.setattr(rp, "load_pool", lambda provider: None)
+
+        with pytest.raises(rp.AuthError, match="API key"):
+            rp.resolve_runtime_provider(requested="azure-foundry")