fix(gateway): prevent stale memory overwrites by flush agent (#2670)

The gateway memory flush agent reviews old conversation history on session reset/expiry and writes to memory. It had no awareness of memory changes made after that conversation ended (by the live agent, cron jobs, or other sessions), causing silent overwrites of newer entries. Two fixes: 1. Skip memory flush entirely for cron sessions (session IDs starting with 'cron_'). Cron sessions are headless with no meaningful user conversation to extract memories from. 2. Inject the current live memory state (MEMORY.md + USER.md) directly into the flush prompt. The flush agent can now see what's already saved and make informed decisions — only adding genuinely new information rather than blindly overwriting entries that may have been updated since the conversation ended. Addresses the root cause identified in #2670: the flush agent was making memory decisions blind to the current state of memory, causing stale context to overwrite newer entries on gateway restarts and session resets. Co-authored-by: devorun <devorun@users.noreply.github.com> Co-authored-by: dlkakbs <dlkakbs@users.noreply.github.com>
2026-05-05 02:07:34 +08:00 · 2026-03-23 16:05:35 -07:00
parent f9c2565ab4
commit bc5a67dbf2
2 changed files with 204 additions and 0 deletions
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -525,6 +525,12 @@ class GatewayRunner:
        Synchronous worker — meant to be called via run_in_executor from
        an async context so it doesn't block the event loop.
        """
+        # Skip cron sessions — they run headless with no meaningful user
+        # conversation to extract memories from.
+        if old_session_id and old_session_id.startswith("cron_"):
+            logger.debug("Skipping memory flush for cron session: %s", old_session_id)
+            return
+
        try:
            history = self.session_store.load_transcript(old_session_id)
            if not history or len(history) < 4:
@@ -557,6 +563,23 @@ class GatewayRunner:
                if m.get("role") in ("user", "assistant") and m.get("content")
            ]

+            # Read live memory state from disk so the flush agent can see
+            # what's already saved and avoid overwriting newer entries.
+            _current_memory = ""
+            try:
+                from tools.memory_tool import MEMORY_DIR
+                for fname, label in [
+                    ("MEMORY.md", "MEMORY (your personal notes)"),
+                    ("USER.md", "USER PROFILE (who the user is)"),
+                ]:
+                    fpath = MEMORY_DIR / fname
+                    if fpath.exists():
+                        content = fpath.read_text(encoding="utf-8").strip()
+                        if content:
+                            _current_memory += f"\n\n## Current {label}:\n{content}"
+            except Exception:
+                pass  # Non-fatal — flush still works, just without the guard
+
            # Give the agent a real turn to think about what to save
            flush_prompt = (
                "[System: This session is about to be automatically reset due to "
@@ -568,6 +591,20 @@ class GatewayRunner:
                "2. If you discovered a reusable workflow or solved a non-trivial "
                "problem, consider saving it as a skill.\n"
                "3. If nothing is worth saving, that's fine — just skip.\n\n"
+            )
+
+            if _current_memory:
+                flush_prompt += (
+                    "IMPORTANT — here is the current live state of memory. Other "
+                    "sessions, cron jobs, or the user may have updated it since this "
+                    "conversation ended. Do NOT overwrite or remove entries unless "
+                    "the conversation above reveals something that genuinely "
+                    "supersedes them. Only add new information that is not already "
+                    "captured below."
+                    f"{_current_memory}\n\n"
+                )
+
+            flush_prompt += (
                "Do NOT respond to the user. Just use the memory and skill_manage "
                "tools if needed, then stop.]"
            )