mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-29 15:31:38 +08:00
Compare commits
1 Commits
fix/plugin
...
hermes/her
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
73a4c7ad4a |
@@ -216,6 +216,23 @@ def get_image_cache_dir() -> Path:
|
|||||||
return IMAGE_CACHE_DIR
|
return IMAGE_CACHE_DIR
|
||||||
|
|
||||||
|
|
||||||
|
def _looks_like_image(data: bytes) -> bool:
|
||||||
|
"""Return True if *data* starts with a known image magic-byte sequence."""
|
||||||
|
if len(data) < 4:
|
||||||
|
return False
|
||||||
|
if data[:8] == b"\x89PNG\r\n\x1a\n":
|
||||||
|
return True
|
||||||
|
if data[:3] == b"\xff\xd8\xff":
|
||||||
|
return True
|
||||||
|
if data[:6] in (b"GIF87a", b"GIF89a"):
|
||||||
|
return True
|
||||||
|
if data[:2] == b"BM":
|
||||||
|
return True
|
||||||
|
if data[:4] == b"RIFF" and len(data) >= 12 and data[8:12] == b"WEBP":
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
def cache_image_from_bytes(data: bytes, ext: str = ".jpg") -> str:
|
def cache_image_from_bytes(data: bytes, ext: str = ".jpg") -> str:
|
||||||
"""
|
"""
|
||||||
Save raw image bytes to the cache and return the absolute file path.
|
Save raw image bytes to the cache and return the absolute file path.
|
||||||
@@ -226,7 +243,17 @@ def cache_image_from_bytes(data: bytes, ext: str = ".jpg") -> str:
|
|||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Absolute path to the cached image file as a string.
|
Absolute path to the cached image file as a string.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: If *data* does not look like a valid image (e.g. an HTML
|
||||||
|
error page returned by the upstream server).
|
||||||
"""
|
"""
|
||||||
|
if not _looks_like_image(data):
|
||||||
|
snippet = data[:80].decode("utf-8", errors="replace")
|
||||||
|
raise ValueError(
|
||||||
|
f"Refusing to cache non-image data as {ext} "
|
||||||
|
f"(starts with: {snippet!r})"
|
||||||
|
)
|
||||||
cache_dir = get_image_cache_dir()
|
cache_dir = get_image_cache_dir()
|
||||||
filename = f"img_{uuid.uuid4().hex[:12]}{ext}"
|
filename = f"img_{uuid.uuid4().hex[:12]}{ext}"
|
||||||
filepath = cache_dir / filename
|
filepath = cache_dir / filename
|
||||||
|
|||||||
@@ -195,7 +195,11 @@ def _extract_attachments(
|
|||||||
|
|
||||||
ext = Path(filename).suffix.lower()
|
ext = Path(filename).suffix.lower()
|
||||||
if ext in _IMAGE_EXTS:
|
if ext in _IMAGE_EXTS:
|
||||||
cached_path = cache_image_from_bytes(payload, ext)
|
try:
|
||||||
|
cached_path = cache_image_from_bytes(payload, ext)
|
||||||
|
except ValueError:
|
||||||
|
logger.debug("Skipping non-image attachment %s (invalid magic bytes)", filename)
|
||||||
|
continue
|
||||||
attachments.append({
|
attachments.append({
|
||||||
"path": cached_path,
|
"path": cached_path,
|
||||||
"filename": filename,
|
"filename": filename,
|
||||||
|
|||||||
@@ -1596,6 +1596,18 @@ class SlackAdapter(BasePlatformAdapter):
|
|||||||
)
|
)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
|
|
||||||
|
# Slack may return an HTML sign-in/redirect page
|
||||||
|
# instead of actual media bytes (e.g. expired token,
|
||||||
|
# restricted file access). Detect this early so we
|
||||||
|
# don't cache bogus data and confuse downstream tools.
|
||||||
|
ct = response.headers.get("content-type", "")
|
||||||
|
if "text/html" in ct:
|
||||||
|
raise ValueError(
|
||||||
|
"Slack returned HTML instead of media "
|
||||||
|
f"(content-type: {ct}); "
|
||||||
|
"check bot token scopes and file permissions"
|
||||||
|
)
|
||||||
|
|
||||||
if audio:
|
if audio:
|
||||||
from gateway.platforms.base import cache_audio_from_bytes
|
from gateway.platforms.base import cache_audio_from_bytes
|
||||||
return cache_audio_from_bytes(response.content, ext)
|
return cache_audio_from_bytes(response.content, ext)
|
||||||
|
|||||||
@@ -696,7 +696,11 @@ class WeComAdapter(BasePlatformAdapter):
|
|||||||
|
|
||||||
if kind == "image":
|
if kind == "image":
|
||||||
ext = self._detect_image_ext(raw)
|
ext = self._detect_image_ext(raw)
|
||||||
return cache_image_from_bytes(raw, ext), self._mime_for_ext(ext, fallback="image/jpeg")
|
try:
|
||||||
|
return cache_image_from_bytes(raw, ext), self._mime_for_ext(ext, fallback="image/jpeg")
|
||||||
|
except ValueError as exc:
|
||||||
|
logger.warning("[%s] Rejected non-image bytes: %s", self.name, exc)
|
||||||
|
return None
|
||||||
|
|
||||||
filename = str(media.get("filename") or media.get("name") or "wecom_file")
|
filename = str(media.get("filename") or media.get("name") or "wecom_file")
|
||||||
return cache_document_from_bytes(raw, filename), mimetypes.guess_type(filename)[0] or "application/octet-stream"
|
return cache_document_from_bytes(raw, filename), mimetypes.guess_type(filename)[0] or "application/octet-stream"
|
||||||
@@ -722,7 +726,11 @@ class WeComAdapter(BasePlatformAdapter):
|
|||||||
content_type = str(headers.get("content-type") or "").split(";", 1)[0].strip() or "application/octet-stream"
|
content_type = str(headers.get("content-type") or "").split(";", 1)[0].strip() or "application/octet-stream"
|
||||||
if kind == "image":
|
if kind == "image":
|
||||||
ext = self._guess_extension(url, content_type, fallback=self._detect_image_ext(raw))
|
ext = self._guess_extension(url, content_type, fallback=self._detect_image_ext(raw))
|
||||||
return cache_image_from_bytes(raw, ext), content_type or self._mime_for_ext(ext, fallback="image/jpeg")
|
try:
|
||||||
|
return cache_image_from_bytes(raw, ext), content_type or self._mime_for_ext(ext, fallback="image/jpeg")
|
||||||
|
except ValueError as exc:
|
||||||
|
logger.warning("[%s] Rejected non-image bytes from %s: %s", self.name, url, exc)
|
||||||
|
return None
|
||||||
|
|
||||||
filename = self._guess_filename(url, headers.get("content-disposition"), content_type)
|
filename = self._guess_filename(url, headers.get("content-disposition"), content_type)
|
||||||
return cache_document_from_bytes(raw, filename), content_type
|
return cache_document_from_bytes(raw, filename), content_type
|
||||||
|
|||||||
@@ -34,6 +34,45 @@ def _make_timeout_error() -> httpx.TimeoutException:
|
|||||||
return httpx.TimeoutException("timed out")
|
return httpx.TimeoutException("timed out")
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# cache_image_from_bytes (base.py)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestCacheImageFromBytes:
|
||||||
|
"""Tests for gateway.platforms.base.cache_image_from_bytes"""
|
||||||
|
|
||||||
|
def test_caches_valid_jpeg(self, tmp_path, monkeypatch):
|
||||||
|
monkeypatch.setattr("gateway.platforms.base.IMAGE_CACHE_DIR", tmp_path / "img")
|
||||||
|
from gateway.platforms.base import cache_image_from_bytes
|
||||||
|
path = cache_image_from_bytes(b"\xff\xd8\xff fake jpeg data", ".jpg")
|
||||||
|
assert path.endswith(".jpg")
|
||||||
|
|
||||||
|
def test_caches_valid_png(self, tmp_path, monkeypatch):
|
||||||
|
monkeypatch.setattr("gateway.platforms.base.IMAGE_CACHE_DIR", tmp_path / "img")
|
||||||
|
from gateway.platforms.base import cache_image_from_bytes
|
||||||
|
path = cache_image_from_bytes(b"\x89PNG\r\n\x1a\n fake png data", ".png")
|
||||||
|
assert path.endswith(".png")
|
||||||
|
|
||||||
|
def test_rejects_html_content(self, tmp_path, monkeypatch):
|
||||||
|
monkeypatch.setattr("gateway.platforms.base.IMAGE_CACHE_DIR", tmp_path / "img")
|
||||||
|
from gateway.platforms.base import cache_image_from_bytes
|
||||||
|
with pytest.raises(ValueError, match="non-image data"):
|
||||||
|
cache_image_from_bytes(b"<!DOCTYPE html><html><title>Slack</title></html>", ".png")
|
||||||
|
|
||||||
|
def test_rejects_empty_data(self, tmp_path, monkeypatch):
|
||||||
|
monkeypatch.setattr("gateway.platforms.base.IMAGE_CACHE_DIR", tmp_path / "img")
|
||||||
|
from gateway.platforms.base import cache_image_from_bytes
|
||||||
|
with pytest.raises(ValueError, match="non-image data"):
|
||||||
|
cache_image_from_bytes(b"", ".jpg")
|
||||||
|
|
||||||
|
def test_rejects_plain_text(self, tmp_path, monkeypatch):
|
||||||
|
monkeypatch.setattr("gateway.platforms.base.IMAGE_CACHE_DIR", tmp_path / "img")
|
||||||
|
from gateway.platforms.base import cache_image_from_bytes
|
||||||
|
with pytest.raises(ValueError, match="non-image data"):
|
||||||
|
cache_image_from_bytes(b"just some text, not an image", ".jpg")
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# cache_image_from_url (base.py)
|
# cache_image_from_url (base.py)
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
@@ -71,7 +110,7 @@ class TestCacheImageFromUrl:
|
|||||||
monkeypatch.setattr("gateway.platforms.base.IMAGE_CACHE_DIR", tmp_path / "img")
|
monkeypatch.setattr("gateway.platforms.base.IMAGE_CACHE_DIR", tmp_path / "img")
|
||||||
|
|
||||||
fake_response = MagicMock()
|
fake_response = MagicMock()
|
||||||
fake_response.content = b"image data"
|
fake_response.content = b"\xff\xd8\xff image data"
|
||||||
fake_response.raise_for_status = MagicMock()
|
fake_response.raise_for_status = MagicMock()
|
||||||
|
|
||||||
mock_client = AsyncMock()
|
mock_client = AsyncMock()
|
||||||
@@ -101,7 +140,7 @@ class TestCacheImageFromUrl:
|
|||||||
monkeypatch.setattr("gateway.platforms.base.IMAGE_CACHE_DIR", tmp_path / "img")
|
monkeypatch.setattr("gateway.platforms.base.IMAGE_CACHE_DIR", tmp_path / "img")
|
||||||
|
|
||||||
ok_response = MagicMock()
|
ok_response = MagicMock()
|
||||||
ok_response.content = b"image data"
|
ok_response.content = b"\xff\xd8\xff image data"
|
||||||
ok_response.raise_for_status = MagicMock()
|
ok_response.raise_for_status = MagicMock()
|
||||||
|
|
||||||
mock_client = AsyncMock()
|
mock_client = AsyncMock()
|
||||||
@@ -395,8 +434,9 @@ class TestSlackDownloadSlackFile:
|
|||||||
adapter = _make_slack_adapter()
|
adapter = _make_slack_adapter()
|
||||||
|
|
||||||
fake_response = MagicMock()
|
fake_response = MagicMock()
|
||||||
fake_response.content = b"fake image bytes"
|
fake_response.content = b"\x89PNG\r\n\x1a\n fake png"
|
||||||
fake_response.raise_for_status = MagicMock()
|
fake_response.raise_for_status = MagicMock()
|
||||||
|
fake_response.headers = {"content-type": "image/png"}
|
||||||
|
|
||||||
mock_client = AsyncMock()
|
mock_client = AsyncMock()
|
||||||
mock_client.get = AsyncMock(return_value=fake_response)
|
mock_client.get = AsyncMock(return_value=fake_response)
|
||||||
@@ -413,14 +453,44 @@ class TestSlackDownloadSlackFile:
|
|||||||
assert path.endswith(".jpg")
|
assert path.endswith(".jpg")
|
||||||
mock_client.get.assert_called_once()
|
mock_client.get.assert_called_once()
|
||||||
|
|
||||||
|
def test_rejects_html_response(self, tmp_path, monkeypatch):
|
||||||
|
"""An HTML sign-in page from Slack is rejected, not cached as image."""
|
||||||
|
monkeypatch.setattr("gateway.platforms.base.IMAGE_CACHE_DIR", tmp_path / "img")
|
||||||
|
adapter = _make_slack_adapter()
|
||||||
|
|
||||||
|
fake_response = MagicMock()
|
||||||
|
fake_response.content = b"<!DOCTYPE html><html><title>Slack</title></html>"
|
||||||
|
fake_response.raise_for_status = MagicMock()
|
||||||
|
fake_response.headers = {"content-type": "text/html; charset=utf-8"}
|
||||||
|
|
||||||
|
mock_client = AsyncMock()
|
||||||
|
mock_client.get = AsyncMock(return_value=fake_response)
|
||||||
|
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
|
||||||
|
mock_client.__aexit__ = AsyncMock(return_value=False)
|
||||||
|
|
||||||
|
async def run():
|
||||||
|
with patch("httpx.AsyncClient", return_value=mock_client):
|
||||||
|
await adapter._download_slack_file(
|
||||||
|
"https://files.slack.com/img.jpg", ext=".jpg"
|
||||||
|
)
|
||||||
|
|
||||||
|
with pytest.raises(ValueError, match="HTML instead of media"):
|
||||||
|
asyncio.run(run())
|
||||||
|
|
||||||
|
# Verify nothing was cached
|
||||||
|
img_dir = tmp_path / "img"
|
||||||
|
if img_dir.exists():
|
||||||
|
assert list(img_dir.iterdir()) == []
|
||||||
|
|
||||||
def test_retries_on_timeout_then_succeeds(self, tmp_path, monkeypatch):
|
def test_retries_on_timeout_then_succeeds(self, tmp_path, monkeypatch):
|
||||||
"""Timeout on first attempt triggers retry; success on second."""
|
"""Timeout on first attempt triggers retry; success on second."""
|
||||||
monkeypatch.setattr("gateway.platforms.base.IMAGE_CACHE_DIR", tmp_path / "img")
|
monkeypatch.setattr("gateway.platforms.base.IMAGE_CACHE_DIR", tmp_path / "img")
|
||||||
adapter = _make_slack_adapter()
|
adapter = _make_slack_adapter()
|
||||||
|
|
||||||
fake_response = MagicMock()
|
fake_response = MagicMock()
|
||||||
fake_response.content = b"image bytes"
|
fake_response.content = b"\x89PNG\r\n\x1a\n image bytes"
|
||||||
fake_response.raise_for_status = MagicMock()
|
fake_response.raise_for_status = MagicMock()
|
||||||
|
fake_response.headers = {"content-type": "image/png"}
|
||||||
|
|
||||||
mock_client = AsyncMock()
|
mock_client = AsyncMock()
|
||||||
mock_client.get = AsyncMock(
|
mock_client.get = AsyncMock(
|
||||||
|
|||||||
Reference in New Issue
Block a user