Files
hermes-agent/tests/cli/test_cli_new_session.py
Teknium ea01bdcebe refactor(memory): remove flush_memories entirely (#15696)
The AIAgent.flush_memories pre-compression save, the gateway
_flush_memories_for_session, and everything feeding them are
obsolete now that the background memory/skill review handles
persistent memory extraction.

Problems with flush_memories:

- Pre-dates the background review loop.  It was the only memory-save
  path when introduced; the background review now fires every 10 user
  turns on CLI and gateway alike, which is far more frequent than
  compression or session reset ever triggered flush.
- Blocking and synchronous.  Pre-compression flush ran on the live agent
  before compression, blocking the user-visible response.
- Cache-breaking.  Flush built a temporary conversation prefix
  (system prompt + memory-only tool list) that diverged from the live
  conversation's cached prefix, invalidating prompt caching.  The
  gateway variant spawned a fresh AIAgent with its own clean prompt
  for each finalized session — still cache-breaking, just in a
  different process.
- Redundant.  Background review runs in the live conversation's
  session context, gets the same content, writes to the same memory
  store, and doesn't break the cache.  Everything flush_memories
  claimed to preserve is already covered.

What this removes:

- AIAgent.flush_memories() method (~248 LOC in run_agent.py)
- Pre-compression flush call in _compress_context
- flush_memories call sites in cli.py (/new + exit)
- GatewayRunner._flush_memories_for_session + _async_flush_memories
  (and the 3 call sites: session expiry watcher, /new, /resume)
- 'flush_memories' entry from DEFAULT_CONFIG auxiliary tasks,
  hermes tools UI task list, auxiliary_client docstrings
- _memory_flush_min_turns config + init
- #15631's headroom-deduction math in
  _check_compression_model_feasibility (headroom was only needed
  because flush dragged the full main-agent system prompt along;
  the compression summariser sends a single user-role prompt so
  new_threshold = aux_context is safe again)
- The dedicated test files and assertions that exercised
  flush-specific paths

What this renames (with read-time backcompat on sessions.json):

- SessionEntry.memory_flushed -> SessionEntry.expiry_finalized.
  The session-expiry watcher still uses the flag to avoid re-running
  finalize/eviction on the same expired session; the new name
  reflects what it now actually gates.  from_dict() reads
  'expiry_finalized' first, falls back to the legacy 'memory_flushed'
  key so existing sessions.json files upgrade seamlessly.

Supersedes #15631 and #15638.

Tested: 383 targeted tests pass across run_agent/, agent/, cli/,
and gateway/ session-boundary suites.  No behavior regressions —
background memory review continues to handle persistent memory
extraction on both CLI and gateway.
2026-04-25 08:21:14 -07:00

222 lines
8.1 KiB
Python

"""Regression tests for CLI fresh-session commands."""
from __future__ import annotations
import importlib
import os
import sys
from datetime import timedelta
from unittest.mock import MagicMock, patch
from hermes_state import SessionDB
from tools.todo_tool import TodoStore
class _FakeCompressor:
"""Minimal stand-in for ContextCompressor."""
def __init__(self):
self.last_prompt_tokens = 500
self.last_completion_tokens = 200
self.last_total_tokens = 700
self.compression_count = 3
self._context_probed = True
class _FakeAgent:
def __init__(self, session_id: str, session_start):
self.session_id = session_id
self.session_start = session_start
self.model = "anthropic/claude-opus-4.6"
self._last_flushed_db_idx = 7
self._todo_store = TodoStore()
self._todo_store.write(
[{"id": "t1", "content": "unfinished task", "status": "in_progress"}]
)
self.commit_memory_session = MagicMock()
self._invalidate_system_prompt = MagicMock()
# Token counters (non-zero to verify reset)
self.session_total_tokens = 1000
self.session_input_tokens = 600
self.session_output_tokens = 400
self.session_prompt_tokens = 550
self.session_completion_tokens = 350
self.session_cache_read_tokens = 100
self.session_cache_write_tokens = 50
self.session_reasoning_tokens = 80
self.session_api_calls = 5
self.session_estimated_cost_usd = 0.42
self.session_cost_status = "estimated"
self.session_cost_source = "openrouter"
self.context_compressor = _FakeCompressor()
def reset_session_state(self):
"""Mirror the real AIAgent.reset_session_state()."""
self.session_total_tokens = 0
self.session_input_tokens = 0
self.session_output_tokens = 0
self.session_prompt_tokens = 0
self.session_completion_tokens = 0
self.session_cache_read_tokens = 0
self.session_cache_write_tokens = 0
self.session_reasoning_tokens = 0
self.session_api_calls = 0
self.session_estimated_cost_usd = 0.0
self.session_cost_status = "unknown"
self.session_cost_source = "none"
if hasattr(self, "context_compressor") and self.context_compressor:
self.context_compressor.last_prompt_tokens = 0
self.context_compressor.last_completion_tokens = 0
self.context_compressor.last_total_tokens = 0
self.context_compressor.compression_count = 0
self.context_compressor._context_probed = False
def _make_cli(env_overrides=None, config_overrides=None, **kwargs):
"""Create a HermesCLI instance with minimal mocking."""
_clean_config = {
"model": {
"default": "anthropic/claude-opus-4.6",
"base_url": "https://openrouter.ai/api/v1",
"provider": "auto",
},
"display": {"compact": False, "tool_progress": "all"},
"agent": {},
"terminal": {"env_type": "local"},
}
if config_overrides:
_clean_config.update(config_overrides)
clean_env = {"LLM_MODEL": "", "HERMES_MAX_ITERATIONS": ""}
if env_overrides:
clean_env.update(env_overrides)
prompt_toolkit_stubs = {
"prompt_toolkit": MagicMock(),
"prompt_toolkit.history": MagicMock(),
"prompt_toolkit.styles": MagicMock(),
"prompt_toolkit.patch_stdout": MagicMock(),
"prompt_toolkit.application": MagicMock(),
"prompt_toolkit.layout": MagicMock(),
"prompt_toolkit.layout.processors": MagicMock(),
"prompt_toolkit.filters": MagicMock(),
"prompt_toolkit.layout.dimension": MagicMock(),
"prompt_toolkit.layout.menus": MagicMock(),
"prompt_toolkit.widgets": MagicMock(),
"prompt_toolkit.key_binding": MagicMock(),
"prompt_toolkit.completion": MagicMock(),
"prompt_toolkit.formatted_text": MagicMock(),
"prompt_toolkit.auto_suggest": MagicMock(),
}
with patch.dict(sys.modules, prompt_toolkit_stubs), patch.dict(
"os.environ", clean_env, clear=False
):
import cli as _cli_mod
_cli_mod = importlib.reload(_cli_mod)
with patch.object(_cli_mod, "get_tool_definitions", return_value=[]), patch.dict(
_cli_mod.__dict__, {"CLI_CONFIG": _clean_config}
):
return _cli_mod.HermesCLI(**kwargs)
def _prepare_cli_with_active_session(tmp_path):
cli = _make_cli()
cli._session_db = SessionDB(db_path=tmp_path / "state.db")
cli._session_db.create_session(session_id=cli.session_id, source="cli", model=cli.model)
cli.agent = _FakeAgent(cli.session_id, cli.session_start)
cli.conversation_history = [{"role": "user", "content": "hello"}]
old_session_start = cli.session_start - timedelta(seconds=1)
cli.session_start = old_session_start
cli.agent.session_start = old_session_start
return cli
def test_new_command_creates_real_fresh_session_and_resets_agent_state(tmp_path):
cli = _prepare_cli_with_active_session(tmp_path)
old_session_id = cli.session_id
old_session_start = cli.session_start
cli.process_command("/new")
assert cli.session_id != old_session_id
old_session = cli._session_db.get_session(old_session_id)
assert old_session is not None
assert old_session["end_reason"] == "new_session"
new_session = cli._session_db.get_session(cli.session_id)
assert new_session is not None
cli._session_db.append_message(cli.session_id, role="user", content="next turn")
assert cli.agent.session_id == cli.session_id
assert cli.agent._last_flushed_db_idx == 0
assert cli.agent._todo_store.read() == []
assert cli.session_start > old_session_start
assert cli.agent.session_start == cli.session_start
cli.agent._invalidate_system_prompt.assert_called_once()
def test_reset_command_is_alias_for_new_session(tmp_path):
cli = _prepare_cli_with_active_session(tmp_path)
old_session_id = cli.session_id
cli.process_command("/reset")
assert cli.session_id != old_session_id
assert cli._session_db.get_session(old_session_id)["end_reason"] == "new_session"
assert cli._session_db.get_session(cli.session_id) is not None
def test_clear_command_starts_new_session_before_redrawing(tmp_path):
cli = _prepare_cli_with_active_session(tmp_path)
cli.console = MagicMock()
cli.show_banner = MagicMock()
old_session_id = cli.session_id
cli.process_command("/clear")
assert cli.session_id != old_session_id
assert cli._session_db.get_session(old_session_id)["end_reason"] == "new_session"
assert cli._session_db.get_session(cli.session_id) is not None
cli.console.clear.assert_called_once()
cli.show_banner.assert_called_once()
assert cli.conversation_history == []
def test_new_session_resets_token_counters(tmp_path):
"""Regression test for #2099: /new must zero all token counters."""
cli = _prepare_cli_with_active_session(tmp_path)
# Verify counters are non-zero before reset
agent = cli.agent
assert agent.session_total_tokens > 0
assert agent.session_api_calls > 0
assert agent.context_compressor.compression_count > 0
cli.process_command("/new")
# All agent token counters must be zero
assert agent.session_total_tokens == 0
assert agent.session_input_tokens == 0
assert agent.session_output_tokens == 0
assert agent.session_prompt_tokens == 0
assert agent.session_completion_tokens == 0
assert agent.session_cache_read_tokens == 0
assert agent.session_cache_write_tokens == 0
assert agent.session_reasoning_tokens == 0
assert agent.session_api_calls == 0
assert agent.session_estimated_cost_usd == 0.0
assert agent.session_cost_status == "unknown"
assert agent.session_cost_source == "none"
# Context compressor counters must also be zero
comp = agent.context_compressor
assert comp.last_prompt_tokens == 0
assert comp.last_completion_tokens == 0
assert comp.last_total_tokens == 0
assert comp.compression_count == 0
assert comp._context_probed is False