Files
hermes-agent/tests/gateway/test_session_boundary_security_state.py

217 lines
8.0 KiB
Python
Raw Normal View History

"""Regression tests for approval-state cleanup on session boundaries."""
from datetime import datetime
refactor(memory): remove flush_memories entirely (#15696) The AIAgent.flush_memories pre-compression save, the gateway _flush_memories_for_session, and everything feeding them are obsolete now that the background memory/skill review handles persistent memory extraction. Problems with flush_memories: - Pre-dates the background review loop. It was the only memory-save path when introduced; the background review now fires every 10 user turns on CLI and gateway alike, which is far more frequent than compression or session reset ever triggered flush. - Blocking and synchronous. Pre-compression flush ran on the live agent before compression, blocking the user-visible response. - Cache-breaking. Flush built a temporary conversation prefix (system prompt + memory-only tool list) that diverged from the live conversation's cached prefix, invalidating prompt caching. The gateway variant spawned a fresh AIAgent with its own clean prompt for each finalized session — still cache-breaking, just in a different process. - Redundant. Background review runs in the live conversation's session context, gets the same content, writes to the same memory store, and doesn't break the cache. Everything flush_memories claimed to preserve is already covered. What this removes: - AIAgent.flush_memories() method (~248 LOC in run_agent.py) - Pre-compression flush call in _compress_context - flush_memories call sites in cli.py (/new + exit) - GatewayRunner._flush_memories_for_session + _async_flush_memories (and the 3 call sites: session expiry watcher, /new, /resume) - 'flush_memories' entry from DEFAULT_CONFIG auxiliary tasks, hermes tools UI task list, auxiliary_client docstrings - _memory_flush_min_turns config + init - #15631's headroom-deduction math in _check_compression_model_feasibility (headroom was only needed because flush dragged the full main-agent system prompt along; the compression summariser sends a single user-role prompt so new_threshold = aux_context is safe again) - The dedicated test files and assertions that exercised flush-specific paths What this renames (with read-time backcompat on sessions.json): - SessionEntry.memory_flushed -> SessionEntry.expiry_finalized. The session-expiry watcher still uses the flag to avoid re-running finalize/eviction on the same expired session; the new name reflects what it now actually gates. from_dict() reads 'expiry_finalized' first, falls back to the legacy 'memory_flushed' key so existing sessions.json files upgrade seamlessly. Supersedes #15631 and #15638. Tested: 383 targeted tests pass across run_agent/, agent/, cli/, and gateway/ session-boundary suites. No behavior regressions — background memory review continues to handle persistent memory extraction on both CLI and gateway.
2026-04-25 08:21:14 -07:00
from unittest.mock import MagicMock
import pytest
from gateway.config import Platform
from gateway.platforms.base import MessageEvent
from gateway.session import SessionEntry, SessionSource, build_session_key
from tools import approval as approval_mod
from tools.approval import (
approve_session,
enable_session_yolo,
is_approved,
is_session_yolo_enabled,
)
@pytest.fixture(autouse=True)
def _clear_approval_state():
approval_mod._gateway_queues.clear()
approval_mod._gateway_notify_cbs.clear()
approval_mod._session_approved.clear()
approval_mod._session_yolo.clear()
approval_mod._permanent_approved.clear()
approval_mod._pending.clear()
yield
approval_mod._gateway_queues.clear()
approval_mod._gateway_notify_cbs.clear()
approval_mod._session_approved.clear()
approval_mod._session_yolo.clear()
approval_mod._permanent_approved.clear()
approval_mod._pending.clear()
def _make_source() -> SessionSource:
return SessionSource(
platform=Platform.TELEGRAM,
user_id="u1",
chat_id="c1",
user_name="tester",
chat_type="dm",
)
def _make_event(text: str) -> MessageEvent:
return MessageEvent(text=text, source=_make_source(), message_id="m1")
def _make_entry(session_id: str, source: SessionSource | None = None) -> SessionEntry:
source = source or _make_source()
return SessionEntry(
session_key=build_session_key(source),
session_id=session_id,
created_at=datetime.now(),
updated_at=datetime.now(),
origin=source,
platform=source.platform,
chat_type=source.chat_type,
)
def _make_resume_runner():
from gateway.run import GatewayRunner
source = _make_source()
session_key = build_session_key(source)
current_entry = _make_entry("current-session", source)
resumed_entry = _make_entry("resumed-session", source)
runner = object.__new__(GatewayRunner)
runner.adapters = {}
runner._background_tasks = set()
runner._running_agents = {}
runner._running_agents_ts = {}
runner._busy_ack_ts = {}
runner._pending_approvals = {}
runner._update_prompt_pending = {}
runner._agent_cache_lock = None
runner.session_store = MagicMock()
runner.session_store.get_or_create_session.return_value = current_entry
runner.session_store.switch_session.return_value = resumed_entry
runner.session_store.load_transcript.return_value = []
runner._session_db = MagicMock()
runner._session_db.resolve_session_by_title.return_value = "resumed-session"
runner._session_db.get_session_title.return_value = "Resumed Work"
return runner, session_key
def _make_branch_runner():
from gateway.run import GatewayRunner
source = _make_source()
session_key = build_session_key(source)
current_entry = _make_entry("current-session", source)
branched_entry = _make_entry("branched-session", source)
runner = object.__new__(GatewayRunner)
runner.adapters = {}
runner.config = {}
runner._running_agents = {}
runner._running_agents_ts = {}
runner._busy_ack_ts = {}
runner._pending_approvals = {}
runner._update_prompt_pending = {}
runner._agent_cache_lock = None
runner.session_store = MagicMock()
runner.session_store.get_or_create_session.return_value = current_entry
runner.session_store.load_transcript.return_value = [
{"role": "user", "content": "hello"},
{"role": "assistant", "content": "world"},
]
runner.session_store.switch_session.return_value = branched_entry
runner._session_db = MagicMock()
runner._session_db.get_session_title.return_value = "Current Work"
runner._session_db.get_next_title_in_lineage.return_value = "Current Work #2"
return runner, session_key
@pytest.mark.asyncio
async def test_resume_clears_session_scoped_approval_and_yolo_state():
runner, session_key = _make_resume_runner()
other_key = "agent:main:telegram:dm:other-chat"
approve_session(session_key, "recursive delete")
approve_session(other_key, "recursive delete")
enable_session_yolo(session_key)
enable_session_yolo(other_key)
runner._pending_approvals[session_key] = {"command": "rm -rf /tmp/demo"}
runner._pending_approvals[other_key] = {"command": "rm -rf /tmp/other"}
runner._update_prompt_pending[session_key] = True
runner._update_prompt_pending[other_key] = True
result = await runner._handle_resume_command(_make_event("/resume Resumed Work"))
assert "Resumed session" in result
assert is_approved(session_key, "recursive delete") is False
assert is_session_yolo_enabled(session_key) is False
assert session_key not in runner._pending_approvals
assert session_key not in runner._update_prompt_pending
assert is_approved(other_key, "recursive delete") is True
assert is_session_yolo_enabled(other_key) is True
assert other_key in runner._pending_approvals
assert other_key in runner._update_prompt_pending
@pytest.mark.asyncio
async def test_branch_clears_session_scoped_approval_and_yolo_state():
runner, session_key = _make_branch_runner()
other_key = "agent:main:telegram:dm:other-chat"
approve_session(session_key, "recursive delete")
approve_session(other_key, "recursive delete")
enable_session_yolo(session_key)
enable_session_yolo(other_key)
runner._pending_approvals[session_key] = {"command": "rm -rf /tmp/demo"}
runner._pending_approvals[other_key] = {"command": "rm -rf /tmp/other"}
runner._update_prompt_pending[session_key] = True
runner._update_prompt_pending[other_key] = True
result = await runner._handle_branch_command(_make_event("/branch"))
assert "Branched to" in result
assert is_approved(session_key, "recursive delete") is False
assert is_session_yolo_enabled(session_key) is False
assert session_key not in runner._pending_approvals
assert session_key not in runner._update_prompt_pending
assert is_approved(other_key, "recursive delete") is True
assert is_session_yolo_enabled(other_key) is True
assert other_key in runner._pending_approvals
assert other_key in runner._update_prompt_pending
def test_clear_session_boundary_security_state_is_scoped():
"""The helper must wipe only the target session's approval/yolo state.
Also exercises the /new reset path indirectly: /new calls this helper,
so if the helper is scoped correctly, /new's clearing is correct too.
"""
from gateway.run import GatewayRunner
runner = object.__new__(GatewayRunner)
runner._pending_approvals = {}
runner._update_prompt_pending = {}
source = _make_source()
session_key = build_session_key(source)
other_key = "agent:main:telegram:dm:other-chat"
approve_session(session_key, "recursive delete")
approve_session(other_key, "recursive delete")
enable_session_yolo(session_key)
enable_session_yolo(other_key)
runner._pending_approvals[session_key] = {"command": "rm -rf /tmp/demo"}
runner._pending_approvals[other_key] = {"command": "rm -rf /tmp/other"}
runner._update_prompt_pending[session_key] = True
runner._update_prompt_pending[other_key] = True
runner._clear_session_boundary_security_state(session_key)
# Target session cleared
assert is_approved(session_key, "recursive delete") is False
assert is_session_yolo_enabled(session_key) is False
assert session_key not in runner._pending_approvals
assert session_key not in runner._update_prompt_pending
# Other session untouched
assert is_approved(other_key, "recursive delete") is True
assert is_session_yolo_enabled(other_key) is True
assert other_key in runner._pending_approvals
assert other_key in runner._update_prompt_pending
# Empty session_key is a no-op
runner._clear_session_boundary_security_state("")
assert is_approved(other_key, "recursive delete") is True
assert other_key in runner._update_prompt_pending