diff --git a/run_agent.py b/run_agent.py index 8f508c0a90..9801952352 100644 --- a/run_agent.py +++ b/run_agent.py @@ -5258,7 +5258,39 @@ class AIAgent: logger.debug("Dead connection check error: %s", exc) return False - def _create_request_openai_client(self, *, reason: str) -> Any: + @staticmethod + def _api_kwargs_have_image_parts(api_kwargs: dict) -> bool: + """Return True when the outbound request still contains native image parts.""" + if not isinstance(api_kwargs, dict): + return False + candidates = [] + messages = api_kwargs.get("messages") + if isinstance(messages, list): + candidates.extend(messages) + # Responses API payloads use `input`; after conversion, image parts can + # still be present there instead of in `messages`. + response_input = api_kwargs.get("input") + if isinstance(response_input, list): + candidates.extend(response_input) + + def _contains_image(value: Any) -> bool: + if isinstance(value, dict): + ptype = value.get("type") + if ptype in {"image_url", "input_image"}: + return True + return any(_contains_image(v) for v in value.values()) + if isinstance(value, list): + return any(_contains_image(v) for v in value) + return False + + return any(_contains_image(item) for item in candidates) + + def _copilot_headers_for_request(self, *, is_vision: bool) -> dict: + from hermes_cli.copilot_auth import copilot_request_headers + + return copilot_request_headers(is_agent_turn=True, is_vision=is_vision) + + def _create_request_openai_client(self, *, reason: str, api_kwargs: Optional[dict] = None) -> Any: from unittest.mock import Mock primary_client = self._ensure_primary_openai_client(reason=reason) @@ -5266,6 +5298,11 @@ class AIAgent: return primary_client with self._openai_client_lock(): request_kwargs = dict(self._client_kwargs) + if ( + base_url_host_matches(str(request_kwargs.get("base_url", "")), "api.githubcopilot.com") + and self._api_kwargs_have_image_parts(api_kwargs or {}) + ): + request_kwargs["default_headers"] = self._copilot_headers_for_request(is_vision=True) return self._create_openai_client(request_kwargs, reason=reason, shared=False) def _close_request_openai_client(self, client: Any, *, reason: str) -> None: @@ -5808,7 +5845,10 @@ class AIAgent: def _call(): try: if self.api_mode == "codex_responses": - request_client_holder["client"] = self._create_request_openai_client(reason="codex_stream_request") + request_client_holder["client"] = self._create_request_openai_client( + reason="codex_stream_request", + api_kwargs=api_kwargs, + ) result["response"] = self._run_codex_stream( api_kwargs, client=request_client_holder["client"], @@ -5840,7 +5880,10 @@ class AIAgent: raise result["response"] = normalize_converse_response(raw_response) else: - request_client_holder["client"] = self._create_request_openai_client(reason="chat_completion_request") + request_client_holder["client"] = self._create_request_openai_client( + reason="chat_completion_request", + api_kwargs=api_kwargs, + ) result["response"] = request_client_holder["client"].chat.completions.create(**api_kwargs) except Exception as e: result["error"] = e @@ -6183,7 +6226,8 @@ class AIAgent: ), } request_client_holder["client"] = self._create_request_openai_client( - reason="chat_completion_stream_request" + reason="chat_completion_stream_request", + api_kwargs=stream_kwargs, ) # Reset stale-stream timer so the detector measures from this # attempt's start, not a previous attempt's last chunk. diff --git a/tests/run_agent/test_copilot_native_vision_headers.py b/tests/run_agent/test_copilot_native_vision_headers.py new file mode 100644 index 0000000000..85190e0078 --- /dev/null +++ b/tests/run_agent/test_copilot_native_vision_headers.py @@ -0,0 +1,96 @@ +from unittest.mock import MagicMock, patch + +from run_agent import AIAgent + + +def _make_copilot_agent(): + with patch("run_agent.OpenAI") as mock_openai: + mock_openai.return_value = MagicMock() + agent = AIAgent( + api_key="gh-token", + base_url="https://api.githubcopilot.com", + provider="copilot", + model="gpt-5.4", + quiet_mode=True, + skip_context_files=True, + skip_memory=True, + ) + return agent + + +def test_request_client_adds_copilot_vision_header_for_native_image_payload(): + agent = _make_copilot_agent() + built_kwargs = [] + + def fake_create(kwargs, *, reason, shared): + built_kwargs.append(dict(kwargs)) + return MagicMock() + + api_kwargs = { + "model": "gpt-5.4", + "messages": [ + { + "role": "user", + "content": [ + {"type": "text", "text": "What is in this image?"}, + {"type": "image_url", "image_url": {"url": "data:image/png;base64,abc"}}, + ], + } + ], + } + + agent.client = object() + with patch.object(agent, "_is_openai_client_closed", return_value=False), patch.object( + agent, "_create_openai_client", side_effect=fake_create + ): + agent._create_request_openai_client(reason="test", api_kwargs=api_kwargs) + + headers = built_kwargs[-1]["default_headers"] + assert headers["Copilot-Vision-Request"] == "true" + + +def test_request_client_leaves_copilot_text_requests_without_vision_header(): + agent = _make_copilot_agent() + built_kwargs = [] + + def fake_create(kwargs, *, reason, shared): + built_kwargs.append(dict(kwargs)) + return MagicMock() + + api_kwargs = {"model": "gpt-5.4", "messages": [{"role": "user", "content": "hello"}]} + + agent.client = object() + with patch.object(agent, "_is_openai_client_closed", return_value=False), patch.object( + agent, "_create_openai_client", side_effect=fake_create + ): + agent._create_request_openai_client(reason="test", api_kwargs=api_kwargs) + + headers = built_kwargs[-1]["default_headers"] + assert "Copilot-Vision-Request" not in headers + + +def test_request_client_does_not_add_vision_header_after_non_vision_fallback(): + agent = _make_copilot_agent() + built_kwargs = [] + + def fake_create(kwargs, *, reason, shared): + built_kwargs.append(dict(kwargs)) + return MagicMock() + + # This is the shape after _prepare_messages_for_non_vision_model has + # replaced image parts with text, so Copilot should not get the vision route. + api_kwargs = { + "model": "gpt-5.4", + "messages": [ + {"role": "user", "content": "[user image: a dog]\n\nWhat is in this image?"} + ], + } + + agent.client = object() + with patch.object(agent, "_is_openai_client_closed", return_value=False), patch.object( + agent, "_create_openai_client", side_effect=fake_create + ): + agent._create_request_openai_client(reason="test", api_kwargs=api_kwargs) + + headers = built_kwargs[-1]["default_headers"] + assert "Copilot-Vision-Request" not in headers