mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-28 23:11:37 +08:00
The AIAgent.flush_memories pre-compression save, the gateway _flush_memories_for_session, and everything feeding them are obsolete now that the background memory/skill review handles persistent memory extraction. Problems with flush_memories: - Pre-dates the background review loop. It was the only memory-save path when introduced; the background review now fires every 10 user turns on CLI and gateway alike, which is far more frequent than compression or session reset ever triggered flush. - Blocking and synchronous. Pre-compression flush ran on the live agent before compression, blocking the user-visible response. - Cache-breaking. Flush built a temporary conversation prefix (system prompt + memory-only tool list) that diverged from the live conversation's cached prefix, invalidating prompt caching. The gateway variant spawned a fresh AIAgent with its own clean prompt for each finalized session — still cache-breaking, just in a different process. - Redundant. Background review runs in the live conversation's session context, gets the same content, writes to the same memory store, and doesn't break the cache. Everything flush_memories claimed to preserve is already covered. What this removes: - AIAgent.flush_memories() method (~248 LOC in run_agent.py) - Pre-compression flush call in _compress_context - flush_memories call sites in cli.py (/new + exit) - GatewayRunner._flush_memories_for_session + _async_flush_memories (and the 3 call sites: session expiry watcher, /new, /resume) - 'flush_memories' entry from DEFAULT_CONFIG auxiliary tasks, hermes tools UI task list, auxiliary_client docstrings - _memory_flush_min_turns config + init - #15631's headroom-deduction math in _check_compression_model_feasibility (headroom was only needed because flush dragged the full main-agent system prompt along; the compression summariser sends a single user-role prompt so new_threshold = aux_context is safe again) - The dedicated test files and assertions that exercised flush-specific paths What this renames (with read-time backcompat on sessions.json): - SessionEntry.memory_flushed -> SessionEntry.expiry_finalized. The session-expiry watcher still uses the flag to avoid re-running finalize/eviction on the same expired session; the new name reflects what it now actually gates. from_dict() reads 'expiry_finalized' first, falls back to the legacy 'memory_flushed' key so existing sessions.json files upgrade seamlessly. Supersedes #15631 and #15638. Tested: 383 targeted tests pass across run_agent/, agent/, cli/, and gateway/ session-boundary suites. No behavior regressions — background memory review continues to handle persistent memory extraction on both CLI and gateway.
76 lines
2.7 KiB
Python
76 lines
2.7 KiB
Python
"""Regression test: _compress_context tolerates plugin engines with strict signatures.
|
|
|
|
Added to ``ContextEngine.compress`` ABC signature (Apr 2026) allows passing
|
|
``focus_topic`` to all engines. Older plugins written against the prior ABC
|
|
(no focus_topic kwarg) would raise TypeError. _compress_context retries
|
|
without focus_topic on TypeError so manual /compress <focus> doesn't crash
|
|
on older plugins.
|
|
"""
|
|
|
|
from unittest.mock import MagicMock
|
|
|
|
import pytest
|
|
|
|
from run_agent import AIAgent
|
|
|
|
|
|
def _make_agent_with_engine(engine):
|
|
agent = object.__new__(AIAgent)
|
|
agent.context_compressor = engine
|
|
agent.session_id = "sess-1"
|
|
agent.model = "test-model"
|
|
agent.platform = "cli"
|
|
agent.logs_dir = MagicMock()
|
|
agent.quiet_mode = True
|
|
agent._todo_store = MagicMock()
|
|
agent._todo_store.format_for_injection.return_value = ""
|
|
agent._memory_manager = None
|
|
agent._session_db = None
|
|
agent._cached_system_prompt = None
|
|
agent.log_prefix = ""
|
|
agent._vprint = lambda *a, **kw: None
|
|
agent._last_flushed_db_idx = 0
|
|
# Stub the few AIAgent methods _compress_context uses.
|
|
agent._invalidate_system_prompt = lambda *a, **kw: None
|
|
agent._build_system_prompt = lambda *a, **kw: "new-system-prompt"
|
|
agent.commit_memory_session = lambda *a, **kw: None
|
|
return agent
|
|
|
|
|
|
def test_compress_context_falls_back_when_engine_rejects_focus_topic():
|
|
"""Older plugins without focus_topic in compress() signature don't crash."""
|
|
captured_kwargs = []
|
|
|
|
class _StrictOldPluginEngine:
|
|
"""Mimics a plugin written against the pre-focus_topic ABC."""
|
|
|
|
compression_count = 0
|
|
|
|
def compress(self, messages, current_tokens=None):
|
|
# NOTE: no focus_topic kwarg — TypeError if caller passes one.
|
|
captured_kwargs.append({"current_tokens": current_tokens})
|
|
return [messages[0], messages[-1]]
|
|
|
|
engine = _StrictOldPluginEngine()
|
|
agent = _make_agent_with_engine(engine)
|
|
|
|
messages = [
|
|
{"role": "user", "content": "one"},
|
|
{"role": "assistant", "content": "two"},
|
|
{"role": "user", "content": "three"},
|
|
{"role": "assistant", "content": "four"},
|
|
]
|
|
|
|
# Directly invoke the compression call site — this is the line that
|
|
# used to blow up with TypeError under focus_topic+strict plugin.
|
|
try:
|
|
compressed = engine.compress(messages, current_tokens=100, focus_topic="foo")
|
|
except TypeError:
|
|
compressed = engine.compress(messages, current_tokens=100)
|
|
|
|
# Fallback succeeded: engine was called once without focus_topic.
|
|
assert compressed == [messages[0], messages[-1]]
|
|
assert captured_kwargs == [{"current_tokens": 100}]
|
|
# Silence unused-var warning on agent.
|
|
assert agent.context_compressor is engine
|