mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-29 07:21:37 +08:00
Compare commits
1 Commits
fix/plugin
...
hermes/her
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
73a4c7ad4a |
@@ -216,6 +216,23 @@ def get_image_cache_dir() -> Path:
|
||||
return IMAGE_CACHE_DIR
|
||||
|
||||
|
||||
def _looks_like_image(data: bytes) -> bool:
|
||||
"""Return True if *data* starts with a known image magic-byte sequence."""
|
||||
if len(data) < 4:
|
||||
return False
|
||||
if data[:8] == b"\x89PNG\r\n\x1a\n":
|
||||
return True
|
||||
if data[:3] == b"\xff\xd8\xff":
|
||||
return True
|
||||
if data[:6] in (b"GIF87a", b"GIF89a"):
|
||||
return True
|
||||
if data[:2] == b"BM":
|
||||
return True
|
||||
if data[:4] == b"RIFF" and len(data) >= 12 and data[8:12] == b"WEBP":
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def cache_image_from_bytes(data: bytes, ext: str = ".jpg") -> str:
|
||||
"""
|
||||
Save raw image bytes to the cache and return the absolute file path.
|
||||
@@ -226,7 +243,17 @@ def cache_image_from_bytes(data: bytes, ext: str = ".jpg") -> str:
|
||||
|
||||
Returns:
|
||||
Absolute path to the cached image file as a string.
|
||||
|
||||
Raises:
|
||||
ValueError: If *data* does not look like a valid image (e.g. an HTML
|
||||
error page returned by the upstream server).
|
||||
"""
|
||||
if not _looks_like_image(data):
|
||||
snippet = data[:80].decode("utf-8", errors="replace")
|
||||
raise ValueError(
|
||||
f"Refusing to cache non-image data as {ext} "
|
||||
f"(starts with: {snippet!r})"
|
||||
)
|
||||
cache_dir = get_image_cache_dir()
|
||||
filename = f"img_{uuid.uuid4().hex[:12]}{ext}"
|
||||
filepath = cache_dir / filename
|
||||
|
||||
@@ -195,7 +195,11 @@ def _extract_attachments(
|
||||
|
||||
ext = Path(filename).suffix.lower()
|
||||
if ext in _IMAGE_EXTS:
|
||||
cached_path = cache_image_from_bytes(payload, ext)
|
||||
try:
|
||||
cached_path = cache_image_from_bytes(payload, ext)
|
||||
except ValueError:
|
||||
logger.debug("Skipping non-image attachment %s (invalid magic bytes)", filename)
|
||||
continue
|
||||
attachments.append({
|
||||
"path": cached_path,
|
||||
"filename": filename,
|
||||
|
||||
@@ -1596,6 +1596,18 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
# Slack may return an HTML sign-in/redirect page
|
||||
# instead of actual media bytes (e.g. expired token,
|
||||
# restricted file access). Detect this early so we
|
||||
# don't cache bogus data and confuse downstream tools.
|
||||
ct = response.headers.get("content-type", "")
|
||||
if "text/html" in ct:
|
||||
raise ValueError(
|
||||
"Slack returned HTML instead of media "
|
||||
f"(content-type: {ct}); "
|
||||
"check bot token scopes and file permissions"
|
||||
)
|
||||
|
||||
if audio:
|
||||
from gateway.platforms.base import cache_audio_from_bytes
|
||||
return cache_audio_from_bytes(response.content, ext)
|
||||
|
||||
@@ -696,7 +696,11 @@ class WeComAdapter(BasePlatformAdapter):
|
||||
|
||||
if kind == "image":
|
||||
ext = self._detect_image_ext(raw)
|
||||
return cache_image_from_bytes(raw, ext), self._mime_for_ext(ext, fallback="image/jpeg")
|
||||
try:
|
||||
return cache_image_from_bytes(raw, ext), self._mime_for_ext(ext, fallback="image/jpeg")
|
||||
except ValueError as exc:
|
||||
logger.warning("[%s] Rejected non-image bytes: %s", self.name, exc)
|
||||
return None
|
||||
|
||||
filename = str(media.get("filename") or media.get("name") or "wecom_file")
|
||||
return cache_document_from_bytes(raw, filename), mimetypes.guess_type(filename)[0] or "application/octet-stream"
|
||||
@@ -722,7 +726,11 @@ class WeComAdapter(BasePlatformAdapter):
|
||||
content_type = str(headers.get("content-type") or "").split(";", 1)[0].strip() or "application/octet-stream"
|
||||
if kind == "image":
|
||||
ext = self._guess_extension(url, content_type, fallback=self._detect_image_ext(raw))
|
||||
return cache_image_from_bytes(raw, ext), content_type or self._mime_for_ext(ext, fallback="image/jpeg")
|
||||
try:
|
||||
return cache_image_from_bytes(raw, ext), content_type or self._mime_for_ext(ext, fallback="image/jpeg")
|
||||
except ValueError as exc:
|
||||
logger.warning("[%s] Rejected non-image bytes from %s: %s", self.name, url, exc)
|
||||
return None
|
||||
|
||||
filename = self._guess_filename(url, headers.get("content-disposition"), content_type)
|
||||
return cache_document_from_bytes(raw, filename), content_type
|
||||
|
||||
@@ -34,6 +34,45 @@ def _make_timeout_error() -> httpx.TimeoutException:
|
||||
return httpx.TimeoutException("timed out")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# cache_image_from_bytes (base.py)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestCacheImageFromBytes:
|
||||
"""Tests for gateway.platforms.base.cache_image_from_bytes"""
|
||||
|
||||
def test_caches_valid_jpeg(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setattr("gateway.platforms.base.IMAGE_CACHE_DIR", tmp_path / "img")
|
||||
from gateway.platforms.base import cache_image_from_bytes
|
||||
path = cache_image_from_bytes(b"\xff\xd8\xff fake jpeg data", ".jpg")
|
||||
assert path.endswith(".jpg")
|
||||
|
||||
def test_caches_valid_png(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setattr("gateway.platforms.base.IMAGE_CACHE_DIR", tmp_path / "img")
|
||||
from gateway.platforms.base import cache_image_from_bytes
|
||||
path = cache_image_from_bytes(b"\x89PNG\r\n\x1a\n fake png data", ".png")
|
||||
assert path.endswith(".png")
|
||||
|
||||
def test_rejects_html_content(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setattr("gateway.platforms.base.IMAGE_CACHE_DIR", tmp_path / "img")
|
||||
from gateway.platforms.base import cache_image_from_bytes
|
||||
with pytest.raises(ValueError, match="non-image data"):
|
||||
cache_image_from_bytes(b"<!DOCTYPE html><html><title>Slack</title></html>", ".png")
|
||||
|
||||
def test_rejects_empty_data(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setattr("gateway.platforms.base.IMAGE_CACHE_DIR", tmp_path / "img")
|
||||
from gateway.platforms.base import cache_image_from_bytes
|
||||
with pytest.raises(ValueError, match="non-image data"):
|
||||
cache_image_from_bytes(b"", ".jpg")
|
||||
|
||||
def test_rejects_plain_text(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setattr("gateway.platforms.base.IMAGE_CACHE_DIR", tmp_path / "img")
|
||||
from gateway.platforms.base import cache_image_from_bytes
|
||||
with pytest.raises(ValueError, match="non-image data"):
|
||||
cache_image_from_bytes(b"just some text, not an image", ".jpg")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# cache_image_from_url (base.py)
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -71,7 +110,7 @@ class TestCacheImageFromUrl:
|
||||
monkeypatch.setattr("gateway.platforms.base.IMAGE_CACHE_DIR", tmp_path / "img")
|
||||
|
||||
fake_response = MagicMock()
|
||||
fake_response.content = b"image data"
|
||||
fake_response.content = b"\xff\xd8\xff image data"
|
||||
fake_response.raise_for_status = MagicMock()
|
||||
|
||||
mock_client = AsyncMock()
|
||||
@@ -101,7 +140,7 @@ class TestCacheImageFromUrl:
|
||||
monkeypatch.setattr("gateway.platforms.base.IMAGE_CACHE_DIR", tmp_path / "img")
|
||||
|
||||
ok_response = MagicMock()
|
||||
ok_response.content = b"image data"
|
||||
ok_response.content = b"\xff\xd8\xff image data"
|
||||
ok_response.raise_for_status = MagicMock()
|
||||
|
||||
mock_client = AsyncMock()
|
||||
@@ -395,8 +434,9 @@ class TestSlackDownloadSlackFile:
|
||||
adapter = _make_slack_adapter()
|
||||
|
||||
fake_response = MagicMock()
|
||||
fake_response.content = b"fake image bytes"
|
||||
fake_response.content = b"\x89PNG\r\n\x1a\n fake png"
|
||||
fake_response.raise_for_status = MagicMock()
|
||||
fake_response.headers = {"content-type": "image/png"}
|
||||
|
||||
mock_client = AsyncMock()
|
||||
mock_client.get = AsyncMock(return_value=fake_response)
|
||||
@@ -413,14 +453,44 @@ class TestSlackDownloadSlackFile:
|
||||
assert path.endswith(".jpg")
|
||||
mock_client.get.assert_called_once()
|
||||
|
||||
def test_rejects_html_response(self, tmp_path, monkeypatch):
|
||||
"""An HTML sign-in page from Slack is rejected, not cached as image."""
|
||||
monkeypatch.setattr("gateway.platforms.base.IMAGE_CACHE_DIR", tmp_path / "img")
|
||||
adapter = _make_slack_adapter()
|
||||
|
||||
fake_response = MagicMock()
|
||||
fake_response.content = b"<!DOCTYPE html><html><title>Slack</title></html>"
|
||||
fake_response.raise_for_status = MagicMock()
|
||||
fake_response.headers = {"content-type": "text/html; charset=utf-8"}
|
||||
|
||||
mock_client = AsyncMock()
|
||||
mock_client.get = AsyncMock(return_value=fake_response)
|
||||
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
|
||||
mock_client.__aexit__ = AsyncMock(return_value=False)
|
||||
|
||||
async def run():
|
||||
with patch("httpx.AsyncClient", return_value=mock_client):
|
||||
await adapter._download_slack_file(
|
||||
"https://files.slack.com/img.jpg", ext=".jpg"
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match="HTML instead of media"):
|
||||
asyncio.run(run())
|
||||
|
||||
# Verify nothing was cached
|
||||
img_dir = tmp_path / "img"
|
||||
if img_dir.exists():
|
||||
assert list(img_dir.iterdir()) == []
|
||||
|
||||
def test_retries_on_timeout_then_succeeds(self, tmp_path, monkeypatch):
|
||||
"""Timeout on first attempt triggers retry; success on second."""
|
||||
monkeypatch.setattr("gateway.platforms.base.IMAGE_CACHE_DIR", tmp_path / "img")
|
||||
adapter = _make_slack_adapter()
|
||||
|
||||
fake_response = MagicMock()
|
||||
fake_response.content = b"image bytes"
|
||||
fake_response.content = b"\x89PNG\r\n\x1a\n image bytes"
|
||||
fake_response.raise_for_status = MagicMock()
|
||||
fake_response.headers = {"content-type": "image/png"}
|
||||
|
||||
mock_client = AsyncMock()
|
||||
mock_client.get = AsyncMock(
|
||||
|
||||
Reference in New Issue
Block a user