mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-30 16:01:49 +08:00
Compare commits
1 Commits
fix/plugin
...
hermes/her
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ba93a142b4 |
@@ -476,12 +476,7 @@ class GatewayRunner:
|
|||||||
self._honcho_managers: Dict[str, Any] = {}
|
self._honcho_managers: Dict[str, Any] = {}
|
||||||
self._honcho_configs: Dict[str, Any] = {}
|
self._honcho_configs: Dict[str, Any] = {}
|
||||||
|
|
||||||
# Rate-limit compression warning messages sent to users.
|
|
||||||
# Keyed by chat_id — value is the timestamp of the last warning sent.
|
|
||||||
# Prevents the warning from firing on every message when a session
|
|
||||||
# remains above the threshold after compression.
|
|
||||||
self._compression_warn_sent: Dict[str, float] = {}
|
|
||||||
self._compression_warn_cooldown: int = 3600 # seconds (1 hour)
|
|
||||||
|
|
||||||
# Ensure tirith security scanner is available (downloads if needed)
|
# Ensure tirith security scanner is available (downloads if needed)
|
||||||
try:
|
try:
|
||||||
@@ -2354,18 +2349,7 @@ class GatewayRunner:
|
|||||||
f"{_compress_token_threshold:,}",
|
f"{_compress_token_threshold:,}",
|
||||||
)
|
)
|
||||||
|
|
||||||
_hyg_adapter = self.adapters.get(source.platform)
|
|
||||||
_hyg_meta = {"thread_id": source.thread_id} if source.thread_id else None
|
_hyg_meta = {"thread_id": source.thread_id} if source.thread_id else None
|
||||||
if _hyg_adapter:
|
|
||||||
try:
|
|
||||||
await _hyg_adapter.send(
|
|
||||||
source.chat_id,
|
|
||||||
f"🗜️ Session is large ({_msg_count} messages, "
|
|
||||||
f"~{_approx_tokens:,} tokens). Auto-compressing...",
|
|
||||||
metadata=_hyg_meta,
|
|
||||||
)
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from run_agent import AIAgent
|
from run_agent import AIAgent
|
||||||
@@ -2426,70 +2410,17 @@ class GatewayRunner:
|
|||||||
f"{_approx_tokens:,}", f"{_new_tokens:,}",
|
f"{_approx_tokens:,}", f"{_new_tokens:,}",
|
||||||
)
|
)
|
||||||
|
|
||||||
if _hyg_adapter:
|
|
||||||
try:
|
|
||||||
await _hyg_adapter.send(
|
|
||||||
source.chat_id,
|
|
||||||
f"🗜️ Compressed: {_msg_count} → "
|
|
||||||
f"{_new_count} messages, "
|
|
||||||
f"~{_approx_tokens:,} → "
|
|
||||||
f"~{_new_tokens:,} tokens",
|
|
||||||
metadata=_hyg_meta,
|
|
||||||
)
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
# Still too large after compression — warn user
|
|
||||||
# Rate-limited to once per cooldown period per
|
|
||||||
# chat to avoid spamming on every message.
|
|
||||||
if _new_tokens >= _warn_token_threshold:
|
if _new_tokens >= _warn_token_threshold:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"Session hygiene: still ~%s tokens after "
|
"Session hygiene: still ~%s tokens after "
|
||||||
"compression — suggesting /reset",
|
"compression",
|
||||||
f"{_new_tokens:,}",
|
f"{_new_tokens:,}",
|
||||||
)
|
)
|
||||||
_now = time.time()
|
|
||||||
_last_warn = self._compression_warn_sent.get(source.chat_id, 0)
|
|
||||||
if _hyg_adapter and _now - _last_warn >= self._compression_warn_cooldown:
|
|
||||||
self._compression_warn_sent[source.chat_id] = _now
|
|
||||||
try:
|
|
||||||
await _hyg_adapter.send(
|
|
||||||
source.chat_id,
|
|
||||||
"⚠️ Session is still very large "
|
|
||||||
"after compression "
|
|
||||||
f"(~{_new_tokens:,} tokens). "
|
|
||||||
"Consider using /reset to start "
|
|
||||||
"fresh if you experience issues.",
|
|
||||||
metadata=_hyg_meta,
|
|
||||||
)
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"Session hygiene auto-compress failed: %s", e
|
"Session hygiene auto-compress failed: %s", e
|
||||||
)
|
)
|
||||||
# Compression failed and session is dangerously large
|
|
||||||
if _approx_tokens >= _warn_token_threshold:
|
|
||||||
_hyg_adapter = self.adapters.get(source.platform)
|
|
||||||
_hyg_meta = {"thread_id": source.thread_id} if source.thread_id else None
|
|
||||||
_now = time.time()
|
|
||||||
_last_warn = self._compression_warn_sent.get(source.chat_id, 0)
|
|
||||||
if _hyg_adapter and _now - _last_warn >= self._compression_warn_cooldown:
|
|
||||||
self._compression_warn_sent[source.chat_id] = _now
|
|
||||||
try:
|
|
||||||
await _hyg_adapter.send(
|
|
||||||
source.chat_id,
|
|
||||||
f"⚠️ Session is very large "
|
|
||||||
f"({_msg_count} messages, "
|
|
||||||
f"~{_approx_tokens:,} tokens) and "
|
|
||||||
"auto-compression failed. Consider "
|
|
||||||
"using /compress or /reset to avoid "
|
|
||||||
"issues.",
|
|
||||||
metadata=_hyg_meta,
|
|
||||||
)
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
# First-message onboarding -- only on the very first interaction ever
|
# First-message onboarding -- only on the very first interaction ever
|
||||||
if not history and not self.session_store.has_any_sessions():
|
if not history and not self.session_store.has_any_sessions():
|
||||||
|
|||||||
@@ -212,47 +212,7 @@ class TestSessionHygieneWarnThreshold:
|
|||||||
assert post_compress_tokens < warn_threshold
|
assert post_compress_tokens < warn_threshold
|
||||||
|
|
||||||
|
|
||||||
class TestCompressionWarnRateLimit:
|
|
||||||
"""Compression warning messages must be rate-limited per chat_id."""
|
|
||||||
|
|
||||||
def _make_runner(self):
|
|
||||||
from unittest.mock import MagicMock, patch
|
|
||||||
with patch("gateway.run.load_gateway_config"), \
|
|
||||||
patch("gateway.run.SessionStore"), \
|
|
||||||
patch("gateway.run.DeliveryRouter"):
|
|
||||||
from gateway.run import GatewayRunner
|
|
||||||
runner = GatewayRunner.__new__(GatewayRunner)
|
|
||||||
runner._compression_warn_sent = {}
|
|
||||||
runner._compression_warn_cooldown = 3600
|
|
||||||
return runner
|
|
||||||
|
|
||||||
def test_first_warn_is_sent(self):
|
|
||||||
runner = self._make_runner()
|
|
||||||
now = 1_000_000.0
|
|
||||||
last = runner._compression_warn_sent.get("chat:1", 0)
|
|
||||||
assert now - last >= runner._compression_warn_cooldown
|
|
||||||
|
|
||||||
def test_second_warn_suppressed_within_cooldown(self):
|
|
||||||
runner = self._make_runner()
|
|
||||||
now = 1_000_000.0
|
|
||||||
runner._compression_warn_sent["chat:1"] = now - 60 # 1 minute ago
|
|
||||||
last = runner._compression_warn_sent.get("chat:1", 0)
|
|
||||||
assert now - last < runner._compression_warn_cooldown
|
|
||||||
|
|
||||||
def test_warn_allowed_after_cooldown(self):
|
|
||||||
runner = self._make_runner()
|
|
||||||
now = 1_000_000.0
|
|
||||||
runner._compression_warn_sent["chat:1"] = now - 3601 # just past cooldown
|
|
||||||
last = runner._compression_warn_sent.get("chat:1", 0)
|
|
||||||
assert now - last >= runner._compression_warn_cooldown
|
|
||||||
|
|
||||||
def test_rate_limit_is_per_chat(self):
|
|
||||||
"""Rate-limiting one chat must not suppress warnings for another."""
|
|
||||||
runner = self._make_runner()
|
|
||||||
now = 1_000_000.0
|
|
||||||
runner._compression_warn_sent["chat:1"] = now - 60 # suppressed
|
|
||||||
last_other = runner._compression_warn_sent.get("chat:2", 0)
|
|
||||||
assert now - last_other >= runner._compression_warn_cooldown
|
|
||||||
|
|
||||||
|
|
||||||
class TestEstimatedTokenThreshold:
|
class TestEstimatedTokenThreshold:
|
||||||
@@ -421,10 +381,6 @@ async def test_session_hygiene_messages_stay_in_originating_topic(monkeypatch, t
|
|||||||
result = await runner._handle_message(event)
|
result = await runner._handle_message(event)
|
||||||
|
|
||||||
assert result == "ok"
|
assert result == "ok"
|
||||||
assert len(adapter.sent) == 2
|
# Compression warnings are no longer sent to users — compression
|
||||||
assert adapter.sent[0]["chat_id"] == "-1001"
|
# happens silently with server-side logging only.
|
||||||
assert "Session is large" in adapter.sent[0]["content"]
|
assert len(adapter.sent) == 0
|
||||||
assert adapter.sent[0]["metadata"] == {"thread_id": "17585"}
|
|
||||||
assert adapter.sent[1]["chat_id"] == "-1001"
|
|
||||||
assert "Compressed:" in adapter.sent[1]["content"]
|
|
||||||
assert adapter.sent[1]["metadata"] == {"thread_id": "17585"}
|
|
||||||
|
|||||||
Reference in New Issue
Block a user