mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-05 02:07:34 +08:00
Compare commits
1 Commits
fix/plugin
...
openclaw-p
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
72d53e14ae |
@@ -31,6 +31,7 @@ from agent.model_metadata import (
|
||||
get_model_context_length,
|
||||
estimate_messages_tokens_rough,
|
||||
)
|
||||
from agent.redact import redact_sensitive_text
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -593,7 +594,13 @@ class ContextCompressor(ContextEngine):
|
||||
content = content[:self._CONTENT_HEAD] + "\n...[truncated]...\n" + content[-self._CONTENT_TAIL:]
|
||||
parts.append(f"[{role.upper()}]: {content}")
|
||||
|
||||
return "\n\n".join(parts)
|
||||
# Scrub credential-like values before sending to the summarizer.
|
||||
# The summarizer is instructed to preserve "specific values" so raw
|
||||
# API keys, bearer tokens, or env-var assignments that leak in via
|
||||
# tool output (terminal, file_read, curl -v) would otherwise be
|
||||
# copied verbatim into the persistent summary and re-injected on
|
||||
# every subsequent compaction. Ported from openclaw/openclaw#67801.
|
||||
return redact_sensitive_text("\n\n".join(parts))
|
||||
|
||||
def _generate_summary(self, turns_to_summarize: List[Dict[str, Any]], focus_topic: str = None) -> Optional[str]:
|
||||
"""Generate a structured summary of conversation turns.
|
||||
@@ -699,13 +706,17 @@ Target ~{summary_budget} tokens. Be CONCRETE — include file paths, command out
|
||||
Write only the summary body. Do not include any preamble or prefix."""
|
||||
|
||||
if self._previous_summary:
|
||||
# Iterative update: preserve existing info, add new progress
|
||||
# Iterative update: preserve existing info, add new progress.
|
||||
# Re-scrub the previous summary in case it was produced before
|
||||
# output-side redaction was added or restored from older session
|
||||
# state. (Idempotent on already-clean text.)
|
||||
previous_summary_clean = redact_sensitive_text(self._previous_summary)
|
||||
prompt = f"""{_summarizer_preamble}
|
||||
|
||||
You are updating a context compaction summary. A previous compaction produced the summary below. New conversation turns have occurred since then and need to be incorporated.
|
||||
|
||||
PREVIOUS SUMMARY:
|
||||
{self._previous_summary}
|
||||
{previous_summary_clean}
|
||||
|
||||
NEW TURNS TO INCORPORATE:
|
||||
{content_to_summarize}
|
||||
@@ -756,6 +767,12 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
||||
if not isinstance(content, str):
|
||||
content = str(content) if content else ""
|
||||
summary = content.strip()
|
||||
# Defense-in-depth: scrub any credential-like values that the
|
||||
# summarizer may have echoed back from the input. Input is already
|
||||
# scrubbed in _serialize_for_summary, but a poorly-behaved
|
||||
# summarizer model could paraphrase a secret ("the API key was
|
||||
# sk-..."). Ported from openclaw/openclaw#67801.
|
||||
summary = redact_sensitive_text(summary)
|
||||
# Store for iterative updates on next compaction
|
||||
self._previous_summary = summary
|
||||
self._summary_failure_cooldown_until = 0.0
|
||||
|
||||
@@ -905,3 +905,102 @@ class TestTruncateToolCallArgsJson:
|
||||
parsed = _json.loads(shrunk)
|
||||
assert parsed["path"] == "~/.hermes/skills/shopping/browser-setup-notes.md"
|
||||
assert parsed["content"].endswith("...[truncated]")
|
||||
|
||||
|
||||
class TestSerializationRedaction:
|
||||
"""Regression tests for the openclaw/openclaw#67801 port.
|
||||
|
||||
The summarizer is instructed to preserve specific values, so credential-like
|
||||
strings surfaced through tool output (e.g. echo env vars, curl -v, reading
|
||||
a .env file) must be scrubbed before they reach the summary prompt —
|
||||
otherwise they get copied verbatim into the persistent summary and
|
||||
re-injected on every subsequent compaction.
|
||||
"""
|
||||
|
||||
def test_api_key_prefix_redacted_from_tool_result(self, compressor):
|
||||
secret = "sk-proj-abc123DEADBEEFdef456GHIJKL789mnop0123QRSTUVwxYZ"
|
||||
turns = [
|
||||
{"role": "user", "content": "show me the openai key"},
|
||||
{"role": "assistant", "content": None, "tool_calls": [
|
||||
{"id": "c1", "type": "function",
|
||||
"function": {"name": "terminal",
|
||||
"arguments": '{"command": "echo $OPENAI_API_KEY"}'}},
|
||||
]},
|
||||
{"role": "tool", "tool_call_id": "c1", "content": secret},
|
||||
{"role": "user", "content": "thanks"},
|
||||
]
|
||||
serialized = compressor._serialize_for_summary(turns)
|
||||
assert secret not in serialized
|
||||
# At least one form of masked output should remain; redact never
|
||||
# removes everything — it replaces with a masked form.
|
||||
assert len(serialized) > 0
|
||||
|
||||
def test_env_assignment_redacted(self, compressor):
|
||||
secret = "sk-verysecretvalue123456789abcdef"
|
||||
turns = [
|
||||
{"role": "tool", "tool_call_id": "c1",
|
||||
"content": f"OPENAI_API_KEY={secret}\nOTHER_VAR=harmless"},
|
||||
]
|
||||
serialized = compressor._serialize_for_summary(turns)
|
||||
assert secret not in serialized
|
||||
assert "OPENAI_API_KEY=" in serialized
|
||||
|
||||
def test_authorization_header_redacted(self, compressor):
|
||||
secret = "ghp_ABCDEFGHIJKLMNOPQRSTUVWXYZ123456"
|
||||
turns = [
|
||||
{"role": "tool", "tool_call_id": "c1",
|
||||
"content": f"curl -H 'Authorization: Bearer {secret}' https://api.github.com"},
|
||||
]
|
||||
serialized = compressor._serialize_for_summary(turns)
|
||||
assert secret not in serialized
|
||||
|
||||
def test_json_api_key_field_redacted(self, compressor):
|
||||
secret = "xoxb-11111-22222-deadbeefcafebabefeed"
|
||||
turns = [
|
||||
{"role": "tool", "tool_call_id": "c1",
|
||||
"content": '{"apiKey": "' + secret + '"}'},
|
||||
]
|
||||
serialized = compressor._serialize_for_summary(turns)
|
||||
assert secret not in serialized
|
||||
|
||||
def test_non_secret_content_preserved(self, compressor):
|
||||
"""Redaction must not damage legitimate content — file paths, UUIDs,
|
||||
port numbers, error messages should all survive."""
|
||||
turns = [
|
||||
{"role": "user", "content": "fix the bug at /home/user/repo/src/main.py:42"},
|
||||
{"role": "assistant", "content":
|
||||
"Fixed. The UUID 550e8400-e29b-41d4-a716-446655440000 is now "
|
||||
"correctly handled. Server listens on 127.0.0.1:8080."},
|
||||
{"role": "tool", "tool_call_id": "c1",
|
||||
"content": "ImportError: No module named 'foo'"},
|
||||
]
|
||||
serialized = compressor._serialize_for_summary(turns)
|
||||
assert "/home/user/repo/src/main.py:42" in serialized
|
||||
assert "550e8400-e29b-41d4-a716-446655440000" in serialized
|
||||
assert "127.0.0.1:8080" in serialized
|
||||
assert "ImportError: No module named 'foo'" in serialized
|
||||
|
||||
def test_stored_summary_is_redacted(self, compressor):
|
||||
"""If the summarizer echoes a secret back, the stored summary must be
|
||||
scrubbed before being retained in _previous_summary."""
|
||||
secret = "sk-leakedfromsummarizer9876543210"
|
||||
mock_response = MagicMock()
|
||||
mock_response.choices = [MagicMock()]
|
||||
mock_response.choices[0].message = MagicMock()
|
||||
mock_response.choices[0].message.content = (
|
||||
f"The user set OPENAI_API_KEY={secret} and ran the script."
|
||||
)
|
||||
mock_response.usage = None
|
||||
|
||||
fake_client = MagicMock()
|
||||
fake_client.chat.completions.create.return_value = mock_response
|
||||
compressor.client = fake_client
|
||||
|
||||
turns = [
|
||||
{"role": "user", "content": "set up the key"},
|
||||
{"role": "assistant", "content": "done"},
|
||||
]
|
||||
summary = compressor._generate_summary(turns)
|
||||
assert summary is not None
|
||||
assert secret not in summary
|
||||
assert secret not in (compressor._previous_summary or "")
|
||||
|
||||
Reference in New Issue
Block a user