feat(azure-foundry): auto-detect transport, models, context length

The azure-foundry wizard now probes the endpoint before asking the user to pick anything by hand: 1. URL path sniff — endpoints ending in /anthropic are Azure Foundry Claude routes and skip to anthropic_messages. 2. GET <base>/models probe — if the endpoint returns an OpenAI-shaped model list, we switch to chat_completions and prefill the picker with the returned deployment/model IDs. 3. Anthropic Messages probe — fallback for endpoints that don't expose /models but do speak the Anthropic Messages shape. 4. Manual fallback — private endpoints / custom routes still work; the user picks API mode + types a deployment name. Context length for the selected model is resolved through the existing agent.model_metadata.get_model_context_length chain (models.dev, provider metadata, hardcoded family fallbacks) and stored in model.context_length when a non-default value is found. Also refactors runtime_provider so Azure Foundry resolution is reused between the explicit-credentials path and the default top-level path — previously the /v1 strip for Anthropic-style Azure only ran when the caller passed explicit_* args, which meant config-driven sessions hit a double-/v1 URL. New module hermes_cli/azure_detect.py with 19 unit tests covering: - path sniff, model ID extraction, probe fallbacks - HTTP error handling (URLError, HTTPError) - context-length lookup passthrough - DEFAULT_FALLBACK_CONTEXT rejection New runtime tests cover: - OpenAI-style Azure Foundry - Anthropic-style Azure Foundry with /v1 stripping - Missing base_url / API key raising AuthError Rationale: Microsoft confirms there's no pure-API-key endpoint to list Azure deployments (that requires ARM management auth). The v1 Azure OpenAI endpoint does expose /models with the resource's available model catalog, which is good enough for picker prefill in the common case. Users on private/gated endpoints fall through to manual entry.
2026-04-28 06:51:16 +08:00 · 2026-04-25 18:38:38 -07:00
parent ac57114284
commit 731e1ef8cb
5 changed files with 814 additions and 101 deletions
--- a/tests/hermes_cli/test_azure_detect.py
+++ b/tests/hermes_cli/test_azure_detect.py
@@ -0,0 +1,237 @@
+"""Tests for hermes_cli.azure_detect — transport & model auto-detection."""
+
+from __future__ import annotations
+
+import json
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from hermes_cli import azure_detect
+
+
+# ----------------------------------------------------------------------
+# Helpers
+# ----------------------------------------------------------------------
+
+class _FakeHTTPResponse:
+    """Minimal stand-in for urllib.request.urlopen's context manager."""
+
+    def __init__(self, status: int, body: bytes):
+        self.status = status
+        self._body = body
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc, tb):
+        return False
+
+    def read(self) -> bytes:
+        return self._body
+
+
+def _openai_models_body(*ids: str) -> bytes:
+    return json.dumps({
+        "object": "list",
+        "data": [{"id": i, "object": "model"} for i in ids],
+    }).encode()
+
+
+def _anthropic_error_body(msg: str = "model not found") -> bytes:
+    return json.dumps({
+        "type": "error",
+        "error": {"type": "invalid_request_error", "message": msg},
+    }).encode()
+
+
+# ----------------------------------------------------------------------
+# _looks_like_anthropic_path
+# ----------------------------------------------------------------------
+
+@pytest.mark.parametrize("url, expected", [
+    ("https://foo.services.ai.azure.com/anthropic", True),
+    ("https://foo.services.ai.azure.com/anthropic/", True),
+    ("https://foo.services.ai.azure.com/anthropic/v1", True),
+    ("https://foo.openai.azure.com/openai/v1", False),
+    ("https://foo.openai.azure.com/", False),
+    ("https://openrouter.ai/api/v1", False),
+])
+def test_looks_like_anthropic_path(url, expected):
+    assert azure_detect._looks_like_anthropic_path(url) is expected
+
+
+# ----------------------------------------------------------------------
+# _extract_model_ids
+# ----------------------------------------------------------------------
+
+def test_extract_model_ids_openai_shape():
+    body = {
+        "object": "list",
+        "data": [
+            {"id": "gpt-4.1-mini", "object": "model"},
+            {"id": "claude-sonnet-4-6", "object": "model"},
+        ],
+    }
+    assert azure_detect._extract_model_ids(body) == ["gpt-4.1-mini", "claude-sonnet-4-6"]
+
+
+def test_extract_model_ids_bad_shape_returns_empty():
+    assert azure_detect._extract_model_ids({}) == []
+    assert azure_detect._extract_model_ids({"data": "not-a-list"}) == []
+    assert azure_detect._extract_model_ids({"data": [{"no-id": True}]}) == []
+
+
+# ----------------------------------------------------------------------
+# detect() integration
+# ----------------------------------------------------------------------
+
+def test_detect_anthropic_path_wins_without_http():
+    """URL path sniff short-circuits — no HTTP call happens."""
+    with patch.object(azure_detect, "_http_get_json") as fake_get, \
+         patch.object(azure_detect, "_probe_anthropic_messages") as fake_probe:
+        result = azure_detect.detect(
+            "https://foo.services.ai.azure.com/anthropic", "key-abc",
+        )
+        assert result.api_mode == "anthropic_messages"
+        assert result.is_anthropic is True
+        assert "path" in result.reason.lower()
+        fake_get.assert_not_called()
+        fake_probe.assert_not_called()
+
+
+def test_detect_openai_models_probe_success():
+    """/models probe returning a model list → chat_completions."""
+    def _fake_get(url, api_key, timeout=6.0):
+        assert "key-abc" == api_key
+        return 200, json.loads(_openai_models_body("gpt-5.4", "claude-opus-4-6"))
+
+    with patch.object(azure_detect, "_http_get_json", side_effect=_fake_get):
+        result = azure_detect.detect(
+            "https://my.openai.azure.com/openai/v1", "key-abc",
+        )
+    assert result.api_mode == "chat_completions"
+    assert result.models_probe_ok is True
+    assert result.models == ["gpt-5.4", "claude-opus-4-6"]
+    assert "/models" in result.reason
+
+
+def test_detect_openai_models_probe_empty_list_still_counts():
+    """Endpoint returned OpenAI shape but no models → still chat_completions."""
+    def _fake_get(url, api_key, timeout=6.0):
+        return 200, {"object": "list", "data": []}
+
+    with patch.object(azure_detect, "_http_get_json", side_effect=_fake_get):
+        result = azure_detect.detect(
+            "https://my.openai.azure.com/openai/v1", "key-abc",
+        )
+    assert result.api_mode == "chat_completions"
+    assert result.models == []
+    assert result.models_probe_ok is True
+
+
+def test_detect_falls_back_to_anthropic_probe():
+    """/models fails but Anthropic Messages probe succeeds."""
+    def _fake_get(url, api_key, timeout=6.0):
+        return 401, None  # /models forbidden
+
+    with patch.object(azure_detect, "_http_get_json", side_effect=_fake_get), \
+         patch.object(azure_detect, "_probe_anthropic_messages", return_value=True):
+        result = azure_detect.detect(
+            "https://my.services.ai.azure.com/v1", "key-abc",
+        )
+    assert result.api_mode == "anthropic_messages"
+    assert result.is_anthropic is True
+
+
+def test_detect_all_probes_fail_returns_none():
+    """Every probe fails → api_mode is None and caller falls back to manual."""
+    with patch.object(azure_detect, "_http_get_json", return_value=(500, None)), \
+         patch.object(azure_detect, "_probe_anthropic_messages", return_value=False):
+        result = azure_detect.detect(
+            "https://some-private.example.com/", "key-abc",
+        )
+    assert result.api_mode is None
+    assert result.models == []
+    assert "manual" in result.reason.lower()
+
+
+# ----------------------------------------------------------------------
+# _probe_openai_models URL list (Azure vs v1 api-version)
+# ----------------------------------------------------------------------
+
+def test_probe_openai_models_tries_multiple_api_versions():
+    """First call (no api-version) fails, api-version fallback succeeds."""
+    calls = []
+
+    def _fake_get(url, api_key, timeout=6.0):
+        calls.append(url)
+        if "api-version" not in url:
+            return 404, None
+        return 200, json.loads(_openai_models_body("gpt-4.1"))
+
+    with patch.object(azure_detect, "_http_get_json", side_effect=_fake_get):
+        ok, models = azure_detect._probe_openai_models(
+            "https://my.openai.azure.com/openai/v1", "k",
+        )
+    assert ok is True
+    assert models == ["gpt-4.1"]
+    # Should have tried without api-version first, then with at least one
+    assert any("api-version" not in u for u in calls)
+    assert any("api-version" in u for u in calls)
+
+
+# ----------------------------------------------------------------------
+# _http_get_json error handling
+# ----------------------------------------------------------------------
+
+def test_http_get_json_on_urlerror_returns_zero_none():
+    """Network failure returns (0, None), never raises."""
+    import urllib.error
+    with patch("hermes_cli.azure_detect.urllib_request.urlopen",
+               side_effect=urllib.error.URLError("dns fail")):
+        status, body = azure_detect._http_get_json("https://bad.example/", "k")
+    assert status == 0
+    assert body is None
+
+
+def test_http_get_json_on_http_error_returns_code_none():
+    """HTTP 4xx/5xx returns (code, None)."""
+    import urllib.error
+    err = urllib.error.HTTPError("https://x/", 403, "Forbidden", {}, None)
+    with patch("hermes_cli.azure_detect.urllib_request.urlopen", side_effect=err):
+        status, body = azure_detect._http_get_json("https://x/", "k")
+    assert status == 403
+    assert body is None
+
+
+# ----------------------------------------------------------------------
+# lookup_context_length
+# ----------------------------------------------------------------------
+
+def test_lookup_context_length_returns_known():
+    """When model_metadata returns a non-fallback value, we pass it through."""
+    fake = MagicMock(return_value=400000)
+    with patch("agent.model_metadata.get_model_context_length", fake), \
+         patch("agent.model_metadata.DEFAULT_FALLBACK_CONTEXT", 128000):
+        n = azure_detect.lookup_context_length(
+            "gpt-5.4", "https://x.openai.azure.com/openai/v1", "k",
+        )
+    assert n == 400000
+
+
+def test_lookup_context_length_returns_none_on_fallback():
+    """When resolver falls through to DEFAULT_FALLBACK_CONTEXT, we return None."""
+    with patch("agent.model_metadata.get_model_context_length", return_value=128000), \
+         patch("agent.model_metadata.DEFAULT_FALLBACK_CONTEXT", 128000):
+        n = azure_detect.lookup_context_length(
+            "totally-unknown-model", "https://x.openai.azure.com/openai/v1", "k",
+        )
+    assert n is None
+
+
+def test_lookup_context_length_swallows_exceptions():
+    """Resolver raising must not crash the wizard."""
+    with patch("agent.model_metadata.get_model_context_length",
+               side_effect=RuntimeError("boom")):
+        assert azure_detect.lookup_context_length("m", "https://x/", "k") is None
--- a/tests/hermes_cli/test_runtime_provider_resolution.py
+++ b/tests/hermes_cli/test_runtime_provider_resolution.py
@@ -1,3 +1,5 @@
+import pytest
+
 from hermes_cli import runtime_provider as rp


@@ -1565,3 +1567,79 @@ class TestOllamaUrlSubstringLeak:
        resolved = rp.resolve_runtime_provider(requested="custom")

        assert resolved["api_key"] == "ol-legit-key"
+
+
+# =============================================================================
+# Azure Foundry — both OpenAI-style and Anthropic-style endpoints
+# =============================================================================
+
+class TestAzureFoundryResolution:
+    """Verify Azure Foundry resolves correctly for both API modes."""
+
+    def _make_cfg(self, base_url: str, api_mode: str = "chat_completions"):
+        return {
+            "provider": "azure-foundry",
+            "base_url": base_url,
+            "api_mode": api_mode,
+            "default": "gpt-5.4",
+        }
+
+    def test_azure_foundry_openai_style_explicit(self, monkeypatch):
+        """OpenAI-style Azure Foundry → chat_completions, keeps base_url as-is."""
+        monkeypatch.setenv("AZURE_FOUNDRY_API_KEY", "az-key-openai")
+        monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "azure-foundry")
+        monkeypatch.setattr(rp, "_get_model_config", lambda: self._make_cfg(
+            "https://my-resource.openai.azure.com/openai/v1",
+            "chat_completions",
+        ))
+        monkeypatch.setattr(rp, "load_pool", lambda provider: None)
+
+        resolved = rp.resolve_runtime_provider(requested="azure-foundry")
+
+        assert resolved["provider"] == "azure-foundry"
+        assert resolved["api_mode"] == "chat_completions"
+        assert resolved["base_url"] == "https://my-resource.openai.azure.com/openai/v1"
+        assert resolved["api_key"] == "az-key-openai"
+
+    def test_azure_foundry_anthropic_style_strips_v1_suffix(self, monkeypatch):
+        """Anthropic-style Azure Foundry → anthropic_messages, /v1 stripped
+        because the Anthropic SDK appends /v1/messages itself."""
+        monkeypatch.setenv("AZURE_FOUNDRY_API_KEY", "az-key-ant")
+        monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "azure-foundry")
+        monkeypatch.setattr(rp, "_get_model_config", lambda: self._make_cfg(
+            "https://my-resource.services.ai.azure.com/anthropic/v1",
+            "anthropic_messages",
+        ))
+        monkeypatch.setattr(rp, "load_pool", lambda provider: None)
+
+        resolved = rp.resolve_runtime_provider(requested="azure-foundry")
+
+        assert resolved["provider"] == "azure-foundry"
+        assert resolved["api_mode"] == "anthropic_messages"
+        # /v1 stripped so SDK can append /v1/messages cleanly
+        assert resolved["base_url"] == "https://my-resource.services.ai.azure.com/anthropic"
+
+    def test_azure_foundry_missing_base_url_raises(self, monkeypatch):
+        monkeypatch.setenv("AZURE_FOUNDRY_API_KEY", "az-key")
+        monkeypatch.delenv("AZURE_FOUNDRY_BASE_URL", raising=False)
+        monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "azure-foundry")
+        monkeypatch.setattr(rp, "_get_model_config", lambda: {})
+        monkeypatch.setattr(rp, "load_pool", lambda provider: None)
+
+        with pytest.raises(rp.AuthError, match="base URL"):
+            rp.resolve_runtime_provider(requested="azure-foundry")
+
+    def test_azure_foundry_missing_api_key_raises(self, monkeypatch):
+        monkeypatch.delenv("AZURE_FOUNDRY_API_KEY", raising=False)
+        # `get_env_value` reads from ~/.hermes/.env — mock it to return None
+        # so the resolver can't find a key there either.
+        import hermes_cli.config as cfg_mod
+        monkeypatch.setattr(cfg_mod, "get_env_value", lambda k: None)
+        monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "azure-foundry")
+        monkeypatch.setattr(rp, "_get_model_config", lambda: self._make_cfg(
+            "https://my-resource.openai.azure.com/openai/v1"
+        ))
+        monkeypatch.setattr(rp, "load_pool", lambda provider: None)
+
+        with pytest.raises(rp.AuthError, match="API key"):
+            rp.resolve_runtime_provider(requested="azure-foundry")