mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-28 23:11:37 +08:00
Compare commits
1 Commits
skill/gith
...
fix/multim
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1b8a1c7d5e |
@@ -5,6 +5,7 @@ Uses Gemini Flash (cheap/fast) to summarize middle turns while
|
|||||||
protecting head and tail context.
|
protecting head and tail context.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
from typing import Any, Dict, List, Optional
|
from typing import Any, Dict, List, Optional
|
||||||
@@ -82,6 +83,41 @@ class ContextCompressor:
|
|||||||
"compression_count": self.compression_count,
|
"compression_count": self.compression_count,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _content_to_text(content: Any) -> str:
|
||||||
|
"""Convert message content to plain text for summarization.
|
||||||
|
|
||||||
|
Handles:
|
||||||
|
- str → returned as-is
|
||||||
|
- None → empty string
|
||||||
|
- list (multimodal) → text parts joined, images replaced with [image]
|
||||||
|
- other → JSON serialization or str() fallback
|
||||||
|
"""
|
||||||
|
if isinstance(content, str):
|
||||||
|
return content
|
||||||
|
if content is None:
|
||||||
|
return ""
|
||||||
|
if isinstance(content, list):
|
||||||
|
parts = []
|
||||||
|
for item in content:
|
||||||
|
if isinstance(item, dict):
|
||||||
|
item_type = item.get("type")
|
||||||
|
if item_type == "text":
|
||||||
|
parts.append(item.get("text", ""))
|
||||||
|
elif item_type == "image_url":
|
||||||
|
parts.append("[image]")
|
||||||
|
elif item_type:
|
||||||
|
parts.append(f"[{item_type}]")
|
||||||
|
else:
|
||||||
|
parts.append(str(item))
|
||||||
|
else:
|
||||||
|
parts.append(str(item))
|
||||||
|
return "\n".join(part for part in parts if part)
|
||||||
|
try:
|
||||||
|
return json.dumps(content, ensure_ascii=False, sort_keys=True)
|
||||||
|
except TypeError:
|
||||||
|
return str(content)
|
||||||
|
|
||||||
def _generate_summary(self, turns_to_summarize: List[Dict[str, Any]]) -> Optional[str]:
|
def _generate_summary(self, turns_to_summarize: List[Dict[str, Any]]) -> Optional[str]:
|
||||||
"""Generate a concise summary of conversation turns.
|
"""Generate a concise summary of conversation turns.
|
||||||
|
|
||||||
@@ -93,7 +129,7 @@ class ContextCompressor:
|
|||||||
parts = []
|
parts = []
|
||||||
for msg in turns_to_summarize:
|
for msg in turns_to_summarize:
|
||||||
role = msg.get("role", "unknown")
|
role = msg.get("role", "unknown")
|
||||||
content = msg.get("content") or ""
|
content = self._content_to_text(msg.get("content"))
|
||||||
if len(content) > 2000:
|
if len(content) > 2000:
|
||||||
content = content[:1000] + "\n...[truncated]...\n" + content[-500:]
|
content = content[:1000] + "\n...[truncated]...\n" + content[-500:]
|
||||||
tool_calls = msg.get("tool_calls", [])
|
tool_calls = msg.get("tool_calls", [])
|
||||||
|
|||||||
@@ -115,6 +115,70 @@ class TestCompress:
|
|||||||
assert result[-2]["content"] == msgs[-2]["content"]
|
assert result[-2]["content"] == msgs[-2]["content"]
|
||||||
|
|
||||||
|
|
||||||
|
class TestContentToText:
|
||||||
|
"""Test _content_to_text handles all content types without crashing."""
|
||||||
|
|
||||||
|
def test_string_passthrough(self, compressor):
|
||||||
|
assert compressor._content_to_text("hello") == "hello"
|
||||||
|
|
||||||
|
def test_none_returns_empty(self, compressor):
|
||||||
|
assert compressor._content_to_text(None) == ""
|
||||||
|
|
||||||
|
def test_multimodal_text_parts(self, compressor):
|
||||||
|
content = [
|
||||||
|
{"type": "text", "text": "describe this image"},
|
||||||
|
{"type": "image_url", "image_url": {"url": "data:image/png;base64,AAAA"}},
|
||||||
|
]
|
||||||
|
result = compressor._content_to_text(content)
|
||||||
|
assert "describe this image" in result
|
||||||
|
assert "[image]" in result
|
||||||
|
|
||||||
|
def test_multimodal_mixed_types(self, compressor):
|
||||||
|
content = [
|
||||||
|
{"type": "text", "text": "first part"},
|
||||||
|
{"type": "audio", "audio": {"data": "..."}},
|
||||||
|
{"type": "text", "text": "second part"},
|
||||||
|
]
|
||||||
|
result = compressor._content_to_text(content)
|
||||||
|
assert "first part" in result
|
||||||
|
assert "[audio]" in result
|
||||||
|
assert "second part" in result
|
||||||
|
|
||||||
|
def test_dict_content_json_serialized(self, compressor):
|
||||||
|
content = {"key": "value"}
|
||||||
|
result = compressor._content_to_text(content)
|
||||||
|
assert "key" in result
|
||||||
|
assert "value" in result
|
||||||
|
|
||||||
|
def test_multimodal_in_generate_summary(self):
|
||||||
|
"""Multimodal user messages should not crash _generate_summary."""
|
||||||
|
mock_client = MagicMock()
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.choices = [MagicMock()]
|
||||||
|
mock_response.choices[0].message.content = "[CONTEXT SUMMARY]: image was discussed"
|
||||||
|
mock_client.chat.completions.create.return_value = mock_response
|
||||||
|
|
||||||
|
with patch("agent.context_compressor.get_model_context_length", return_value=100000), \
|
||||||
|
patch("agent.context_compressor.get_text_auxiliary_client", return_value=(mock_client, "test-model")):
|
||||||
|
c = ContextCompressor(model="test", quiet_mode=True)
|
||||||
|
|
||||||
|
messages = [
|
||||||
|
{"role": "user", "content": [
|
||||||
|
{"type": "text", "text": "What is in this image?"},
|
||||||
|
{"type": "image_url", "image_url": {"url": "data:image/png;base64,AAAA"}},
|
||||||
|
]},
|
||||||
|
{"role": "assistant", "content": "I see a cat."},
|
||||||
|
{"role": "user", "content": "thanks"},
|
||||||
|
]
|
||||||
|
|
||||||
|
summary = c._generate_summary(messages)
|
||||||
|
assert isinstance(summary, str)
|
||||||
|
# The prompt sent to the model should contain the text, not raw list
|
||||||
|
prompt = mock_client.chat.completions.create.call_args.kwargs["messages"][0]["content"]
|
||||||
|
assert "What is in this image?" in prompt
|
||||||
|
assert "[image]" in prompt
|
||||||
|
|
||||||
|
|
||||||
class TestGenerateSummaryNoneContent:
|
class TestGenerateSummaryNoneContent:
|
||||||
"""Regression: content=None (from tool-call-only assistant messages) must not crash."""
|
"""Regression: content=None (from tool-call-only assistant messages) must not crash."""
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user