mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-28 06:51:16 +08:00
fix(copilot): mark native image requests as vision
Co-authored-by: dhabibi <9087935+dhabibi@users.noreply.github.com>
This commit is contained in:
committed by
Teknium
parent
8402ba150e
commit
aa53fb661a
52
run_agent.py
52
run_agent.py
@@ -5258,7 +5258,39 @@ class AIAgent:
|
||||
logger.debug("Dead connection check error: %s", exc)
|
||||
return False
|
||||
|
||||
def _create_request_openai_client(self, *, reason: str) -> Any:
|
||||
@staticmethod
|
||||
def _api_kwargs_have_image_parts(api_kwargs: dict) -> bool:
|
||||
"""Return True when the outbound request still contains native image parts."""
|
||||
if not isinstance(api_kwargs, dict):
|
||||
return False
|
||||
candidates = []
|
||||
messages = api_kwargs.get("messages")
|
||||
if isinstance(messages, list):
|
||||
candidates.extend(messages)
|
||||
# Responses API payloads use `input`; after conversion, image parts can
|
||||
# still be present there instead of in `messages`.
|
||||
response_input = api_kwargs.get("input")
|
||||
if isinstance(response_input, list):
|
||||
candidates.extend(response_input)
|
||||
|
||||
def _contains_image(value: Any) -> bool:
|
||||
if isinstance(value, dict):
|
||||
ptype = value.get("type")
|
||||
if ptype in {"image_url", "input_image"}:
|
||||
return True
|
||||
return any(_contains_image(v) for v in value.values())
|
||||
if isinstance(value, list):
|
||||
return any(_contains_image(v) for v in value)
|
||||
return False
|
||||
|
||||
return any(_contains_image(item) for item in candidates)
|
||||
|
||||
def _copilot_headers_for_request(self, *, is_vision: bool) -> dict:
|
||||
from hermes_cli.copilot_auth import copilot_request_headers
|
||||
|
||||
return copilot_request_headers(is_agent_turn=True, is_vision=is_vision)
|
||||
|
||||
def _create_request_openai_client(self, *, reason: str, api_kwargs: Optional[dict] = None) -> Any:
|
||||
from unittest.mock import Mock
|
||||
|
||||
primary_client = self._ensure_primary_openai_client(reason=reason)
|
||||
@@ -5266,6 +5298,11 @@ class AIAgent:
|
||||
return primary_client
|
||||
with self._openai_client_lock():
|
||||
request_kwargs = dict(self._client_kwargs)
|
||||
if (
|
||||
base_url_host_matches(str(request_kwargs.get("base_url", "")), "api.githubcopilot.com")
|
||||
and self._api_kwargs_have_image_parts(api_kwargs or {})
|
||||
):
|
||||
request_kwargs["default_headers"] = self._copilot_headers_for_request(is_vision=True)
|
||||
return self._create_openai_client(request_kwargs, reason=reason, shared=False)
|
||||
|
||||
def _close_request_openai_client(self, client: Any, *, reason: str) -> None:
|
||||
@@ -5808,7 +5845,10 @@ class AIAgent:
|
||||
def _call():
|
||||
try:
|
||||
if self.api_mode == "codex_responses":
|
||||
request_client_holder["client"] = self._create_request_openai_client(reason="codex_stream_request")
|
||||
request_client_holder["client"] = self._create_request_openai_client(
|
||||
reason="codex_stream_request",
|
||||
api_kwargs=api_kwargs,
|
||||
)
|
||||
result["response"] = self._run_codex_stream(
|
||||
api_kwargs,
|
||||
client=request_client_holder["client"],
|
||||
@@ -5840,7 +5880,10 @@ class AIAgent:
|
||||
raise
|
||||
result["response"] = normalize_converse_response(raw_response)
|
||||
else:
|
||||
request_client_holder["client"] = self._create_request_openai_client(reason="chat_completion_request")
|
||||
request_client_holder["client"] = self._create_request_openai_client(
|
||||
reason="chat_completion_request",
|
||||
api_kwargs=api_kwargs,
|
||||
)
|
||||
result["response"] = request_client_holder["client"].chat.completions.create(**api_kwargs)
|
||||
except Exception as e:
|
||||
result["error"] = e
|
||||
@@ -6183,7 +6226,8 @@ class AIAgent:
|
||||
),
|
||||
}
|
||||
request_client_holder["client"] = self._create_request_openai_client(
|
||||
reason="chat_completion_stream_request"
|
||||
reason="chat_completion_stream_request",
|
||||
api_kwargs=stream_kwargs,
|
||||
)
|
||||
# Reset stale-stream timer so the detector measures from this
|
||||
# attempt's start, not a previous attempt's last chunk.
|
||||
|
||||
96
tests/run_agent/test_copilot_native_vision_headers.py
Normal file
96
tests/run_agent/test_copilot_native_vision_headers.py
Normal file
@@ -0,0 +1,96 @@
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from run_agent import AIAgent
|
||||
|
||||
|
||||
def _make_copilot_agent():
|
||||
with patch("run_agent.OpenAI") as mock_openai:
|
||||
mock_openai.return_value = MagicMock()
|
||||
agent = AIAgent(
|
||||
api_key="gh-token",
|
||||
base_url="https://api.githubcopilot.com",
|
||||
provider="copilot",
|
||||
model="gpt-5.4",
|
||||
quiet_mode=True,
|
||||
skip_context_files=True,
|
||||
skip_memory=True,
|
||||
)
|
||||
return agent
|
||||
|
||||
|
||||
def test_request_client_adds_copilot_vision_header_for_native_image_payload():
|
||||
agent = _make_copilot_agent()
|
||||
built_kwargs = []
|
||||
|
||||
def fake_create(kwargs, *, reason, shared):
|
||||
built_kwargs.append(dict(kwargs))
|
||||
return MagicMock()
|
||||
|
||||
api_kwargs = {
|
||||
"model": "gpt-5.4",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": "What is in this image?"},
|
||||
{"type": "image_url", "image_url": {"url": "data:image/png;base64,abc"}},
|
||||
],
|
||||
}
|
||||
],
|
||||
}
|
||||
|
||||
agent.client = object()
|
||||
with patch.object(agent, "_is_openai_client_closed", return_value=False), patch.object(
|
||||
agent, "_create_openai_client", side_effect=fake_create
|
||||
):
|
||||
agent._create_request_openai_client(reason="test", api_kwargs=api_kwargs)
|
||||
|
||||
headers = built_kwargs[-1]["default_headers"]
|
||||
assert headers["Copilot-Vision-Request"] == "true"
|
||||
|
||||
|
||||
def test_request_client_leaves_copilot_text_requests_without_vision_header():
|
||||
agent = _make_copilot_agent()
|
||||
built_kwargs = []
|
||||
|
||||
def fake_create(kwargs, *, reason, shared):
|
||||
built_kwargs.append(dict(kwargs))
|
||||
return MagicMock()
|
||||
|
||||
api_kwargs = {"model": "gpt-5.4", "messages": [{"role": "user", "content": "hello"}]}
|
||||
|
||||
agent.client = object()
|
||||
with patch.object(agent, "_is_openai_client_closed", return_value=False), patch.object(
|
||||
agent, "_create_openai_client", side_effect=fake_create
|
||||
):
|
||||
agent._create_request_openai_client(reason="test", api_kwargs=api_kwargs)
|
||||
|
||||
headers = built_kwargs[-1]["default_headers"]
|
||||
assert "Copilot-Vision-Request" not in headers
|
||||
|
||||
|
||||
def test_request_client_does_not_add_vision_header_after_non_vision_fallback():
|
||||
agent = _make_copilot_agent()
|
||||
built_kwargs = []
|
||||
|
||||
def fake_create(kwargs, *, reason, shared):
|
||||
built_kwargs.append(dict(kwargs))
|
||||
return MagicMock()
|
||||
|
||||
# This is the shape after _prepare_messages_for_non_vision_model has
|
||||
# replaced image parts with text, so Copilot should not get the vision route.
|
||||
api_kwargs = {
|
||||
"model": "gpt-5.4",
|
||||
"messages": [
|
||||
{"role": "user", "content": "[user image: a dog]\n\nWhat is in this image?"}
|
||||
],
|
||||
}
|
||||
|
||||
agent.client = object()
|
||||
with patch.object(agent, "_is_openai_client_closed", return_value=False), patch.object(
|
||||
agent, "_create_openai_client", side_effect=fake_create
|
||||
):
|
||||
agent._create_request_openai_client(reason="test", api_kwargs=api_kwargs)
|
||||
|
||||
headers = built_kwargs[-1]["default_headers"]
|
||||
assert "Copilot-Vision-Request" not in headers
|
||||
Reference in New Issue
Block a user