mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-28 15:01:34 +08:00
171 lines
6.7 KiB
Python
171 lines
6.7 KiB
Python
|
|
"""Tests for the vision-aware image preprocessing in run_agent.py.
|
||
|
|
|
||
|
|
Covers:
|
||
|
|
|
||
|
|
* ``_prepare_anthropic_messages_for_api`` — passes image parts through
|
||
|
|
unchanged when the active model reports ``supports_vision=True`` (the
|
||
|
|
adapter handles them natively), and falls back to text-description
|
||
|
|
replacement when the model lacks vision.
|
||
|
|
|
||
|
|
* ``_prepare_messages_for_non_vision_model`` — the mirror method for the
|
||
|
|
chat.completions / codex_responses paths. Same contract.
|
||
|
|
"""
|
||
|
|
|
||
|
|
from __future__ import annotations
|
||
|
|
|
||
|
|
from unittest.mock import MagicMock, patch
|
||
|
|
|
||
|
|
import pytest
|
||
|
|
|
||
|
|
from run_agent import AIAgent
|
||
|
|
|
||
|
|
|
||
|
|
def _make_agent() -> AIAgent:
|
||
|
|
"""Build a bare-bones AIAgent instance without running __init__.
|
||
|
|
|
||
|
|
Avoids the heavy provider/credential setup for these pure-method tests.
|
||
|
|
"""
|
||
|
|
agent = object.__new__(AIAgent)
|
||
|
|
agent.provider = "anthropic"
|
||
|
|
agent.model = "claude-sonnet-4"
|
||
|
|
agent._anthropic_image_fallback_cache = {}
|
||
|
|
return agent
|
||
|
|
|
||
|
|
|
||
|
|
IMG_PARTS_USER_MSG = {
|
||
|
|
"role": "user",
|
||
|
|
"content": [
|
||
|
|
{"type": "text", "text": "What's in this image?"},
|
||
|
|
{"type": "image_url", "image_url": {"url": "data:image/png;base64,AAAA"}},
|
||
|
|
],
|
||
|
|
}
|
||
|
|
|
||
|
|
PLAIN_USER_MSG = {"role": "user", "content": "hello, no images here"}
|
||
|
|
|
||
|
|
|
||
|
|
# ─── _prepare_anthropic_messages_for_api ─────────────────────────────────────
|
||
|
|
|
||
|
|
|
||
|
|
class TestPrepareAnthropicMessages:
|
||
|
|
def test_no_images_passes_through(self):
|
||
|
|
agent = _make_agent()
|
||
|
|
msgs = [PLAIN_USER_MSG]
|
||
|
|
out = agent._prepare_anthropic_messages_for_api(msgs)
|
||
|
|
assert out is msgs # unchanged reference
|
||
|
|
|
||
|
|
def test_vision_capable_passes_images_through(self):
|
||
|
|
"""The Anthropic adapter handles image_url/input_image natively."""
|
||
|
|
agent = _make_agent()
|
||
|
|
with patch.object(agent, "_model_supports_vision", return_value=True):
|
||
|
|
out = agent._prepare_anthropic_messages_for_api([IMG_PARTS_USER_MSG])
|
||
|
|
# Passes through unchanged — image_url parts still present.
|
||
|
|
assert out[0]["content"][1]["type"] == "image_url"
|
||
|
|
|
||
|
|
def test_non_vision_replaces_images_with_text(self):
|
||
|
|
agent = _make_agent()
|
||
|
|
with patch.object(agent, "_model_supports_vision", return_value=False), \
|
||
|
|
patch.object(
|
||
|
|
agent,
|
||
|
|
"_describe_image_for_anthropic_fallback",
|
||
|
|
return_value="[Image description: a cat]",
|
||
|
|
):
|
||
|
|
out = agent._prepare_anthropic_messages_for_api([IMG_PARTS_USER_MSG])
|
||
|
|
# Content collapsed to a string containing the description + user text.
|
||
|
|
content = out[0]["content"]
|
||
|
|
assert isinstance(content, str)
|
||
|
|
assert "[Image description: a cat]" in content
|
||
|
|
assert "What's in this image?" in content
|
||
|
|
# No more image parts.
|
||
|
|
assert "image_url" not in content
|
||
|
|
|
||
|
|
|
||
|
|
# ─── _prepare_messages_for_non_vision_model ──────────────────────────────────
|
||
|
|
|
||
|
|
|
||
|
|
class TestPrepareMessagesForNonVision:
|
||
|
|
def test_no_images_passes_through(self):
|
||
|
|
agent = _make_agent()
|
||
|
|
msgs = [PLAIN_USER_MSG]
|
||
|
|
out = agent._prepare_messages_for_non_vision_model(msgs)
|
||
|
|
assert out is msgs
|
||
|
|
|
||
|
|
def test_vision_capable_passes_through(self):
|
||
|
|
"""For vision-capable models on chat.completions path, provider handles pixels."""
|
||
|
|
agent = _make_agent()
|
||
|
|
agent.provider = "openrouter"
|
||
|
|
agent.model = "anthropic/claude-sonnet-4"
|
||
|
|
with patch.object(agent, "_model_supports_vision", return_value=True):
|
||
|
|
out = agent._prepare_messages_for_non_vision_model([IMG_PARTS_USER_MSG])
|
||
|
|
assert out[0]["content"][1]["type"] == "image_url"
|
||
|
|
|
||
|
|
def test_non_vision_strips_images(self):
|
||
|
|
agent = _make_agent()
|
||
|
|
agent.provider = "openrouter"
|
||
|
|
agent.model = "qwen/qwen3-235b-a22b"
|
||
|
|
with patch.object(agent, "_model_supports_vision", return_value=False), \
|
||
|
|
patch.object(
|
||
|
|
agent,
|
||
|
|
"_describe_image_for_anthropic_fallback",
|
||
|
|
return_value="[Image description: a dog]",
|
||
|
|
):
|
||
|
|
out = agent._prepare_messages_for_non_vision_model([IMG_PARTS_USER_MSG])
|
||
|
|
content = out[0]["content"]
|
||
|
|
assert isinstance(content, str)
|
||
|
|
assert "[Image description: a dog]" in content
|
||
|
|
assert "image_url" not in content
|
||
|
|
|
||
|
|
def test_multiple_messages_with_mixed_content(self):
|
||
|
|
agent = _make_agent()
|
||
|
|
agent.model = "qwen/qwen3-235b"
|
||
|
|
msgs = [
|
||
|
|
{"role": "user", "content": "first turn"},
|
||
|
|
{"role": "assistant", "content": "ack"},
|
||
|
|
IMG_PARTS_USER_MSG,
|
||
|
|
]
|
||
|
|
with patch.object(agent, "_model_supports_vision", return_value=False), \
|
||
|
|
patch.object(
|
||
|
|
agent,
|
||
|
|
"_describe_image_for_anthropic_fallback",
|
||
|
|
return_value="[Image: thing]",
|
||
|
|
):
|
||
|
|
out = agent._prepare_messages_for_non_vision_model(msgs)
|
||
|
|
# First two messages unchanged (no images), third stripped.
|
||
|
|
assert out[0]["content"] == "first turn"
|
||
|
|
assert out[1]["content"] == "ack"
|
||
|
|
assert isinstance(out[2]["content"], str)
|
||
|
|
assert "[Image: thing]" in out[2]["content"]
|
||
|
|
|
||
|
|
|
||
|
|
# ─── _model_supports_vision ──────────────────────────────────────────────────
|
||
|
|
|
||
|
|
|
||
|
|
class TestModelSupportsVision:
|
||
|
|
def test_missing_provider_or_model_returns_false(self):
|
||
|
|
agent = _make_agent()
|
||
|
|
agent.provider = ""
|
||
|
|
agent.model = "claude-sonnet-4"
|
||
|
|
assert agent._model_supports_vision() is False
|
||
|
|
agent.provider = "anthropic"
|
||
|
|
agent.model = ""
|
||
|
|
assert agent._model_supports_vision() is False
|
||
|
|
|
||
|
|
def test_uses_get_model_capabilities(self):
|
||
|
|
agent = _make_agent()
|
||
|
|
fake_caps = MagicMock()
|
||
|
|
fake_caps.supports_vision = True
|
||
|
|
with patch("agent.models_dev.get_model_capabilities", return_value=fake_caps):
|
||
|
|
assert agent._model_supports_vision() is True
|
||
|
|
fake_caps.supports_vision = False
|
||
|
|
with patch("agent.models_dev.get_model_capabilities", return_value=fake_caps):
|
||
|
|
assert agent._model_supports_vision() is False
|
||
|
|
|
||
|
|
def test_none_caps_returns_false(self):
|
||
|
|
agent = _make_agent()
|
||
|
|
with patch("agent.models_dev.get_model_capabilities", return_value=None):
|
||
|
|
assert agent._model_supports_vision() is False
|
||
|
|
|
||
|
|
def test_exception_returns_false(self):
|
||
|
|
agent = _make_agent()
|
||
|
|
with patch("agent.models_dev.get_model_capabilities", side_effect=RuntimeError("boom")):
|
||
|
|
assert agent._model_supports_vision() is False
|