mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-28 06:51:16 +08:00
fixup! fix(gateway): preserve inactivity clock on interrupt-recursive cached-agent turns (#15654)
Address Copilot review findings: 1. Gate _last_activity_desc on interrupt_depth == 0 alongside _last_activity_ts. Both fields are semantically paired — desc describes the activity *at* ts. Updating desc without ts made get_activity_summary() report "starting new turn (cached)" for 20+ minutes while the timestamp showed the true stale duration, producing misleading diagnostic output. 2. Monkeypatch gateway.run.time.time to a fixed epoch in tests that assert on _last_activity_ts values. Real time.time() comparisons were latently flaky under slow CI or NTP adjustments. _FAKE_NOW = 10_000.0 is used as the reference; assertions are now exact equality rather than >=. 3. Add test_fresh_turn_resets_desc and test_interrupt_turn_preserves_desc to directly cover the gated desc behaviour introduced by (1). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -8726,16 +8726,19 @@ class GatewayRunner:
|
||||
def _init_cached_agent_for_turn(agent: Any, interrupt_depth: int) -> None:
|
||||
"""Reset per-turn state on a cached agent before a new turn starts.
|
||||
|
||||
_last_activity_ts is only reset for fresh external turns (depth 0).
|
||||
For interrupt-recursive turns the timestamp is preserved so the
|
||||
inactivity watchdog can accumulate stuck-turn idle time and fire
|
||||
the 30-min timeout (#15654). The depth-0 reset is still needed:
|
||||
a session idle for 29 min would otherwise trip the watchdog before
|
||||
the new turn makes its first API call (#9051).
|
||||
Both _last_activity_ts and _last_activity_desc are only reset for
|
||||
fresh external turns (depth 0); they are semantically paired —
|
||||
desc describes the activity *at* ts, so updating one without the
|
||||
other would make get_activity_summary() misleading.
|
||||
For interrupt-recursive turns both are preserved so the inactivity
|
||||
watchdog can accumulate stuck-turn idle time and fire the 30-min
|
||||
timeout (#15654). The depth-0 reset is still needed: a session
|
||||
idle for 29 min would otherwise trip the watchdog before the new
|
||||
turn makes its first API call (#9051).
|
||||
"""
|
||||
if interrupt_depth == 0:
|
||||
agent._last_activity_ts = time.time()
|
||||
agent._last_activity_desc = "starting new turn (cached)"
|
||||
agent._last_activity_desc = "starting new turn (cached)"
|
||||
agent._api_call_count = 0
|
||||
|
||||
def _release_evicted_agent_soft(self, agent: Any) -> None:
|
||||
|
||||
@@ -1045,6 +1045,9 @@ class TestAgentCacheIdleResume:
|
||||
pass
|
||||
|
||||
|
||||
_FAKE_NOW = 10_000.0 # Fixed epoch for deterministic time assertions
|
||||
|
||||
|
||||
class TestCachedAgentInactivityReset:
|
||||
"""Inactivity-clock reset must be gated on _interrupt_depth == 0.
|
||||
|
||||
@@ -1057,9 +1060,8 @@ class TestCachedAgentInactivityReset:
|
||||
"""
|
||||
|
||||
def _fake_agent(self, stale_seconds: float = 1800.0):
|
||||
import time as _t
|
||||
m = MagicMock()
|
||||
m._last_activity_ts = _t.time() - stale_seconds
|
||||
m._last_activity_ts = _FAKE_NOW - stale_seconds
|
||||
m._api_call_count = 10
|
||||
m._last_activity_desc = "previous turn activity"
|
||||
return m
|
||||
@@ -1067,22 +1069,34 @@ class TestCachedAgentInactivityReset:
|
||||
def test_fresh_turn_resets_idle_clock(self):
|
||||
"""interrupt_depth=0: clock resets so a post-idle turn gets a
|
||||
fresh 30-min inactivity window (guard for #9051)."""
|
||||
import time as _t
|
||||
from gateway.run import GatewayRunner
|
||||
|
||||
agent = self._fake_agent(stale_seconds=1800.0)
|
||||
old_ts = agent._last_activity_ts
|
||||
before = _t.time()
|
||||
|
||||
GatewayRunner._init_cached_agent_for_turn(agent, interrupt_depth=0)
|
||||
with patch("gateway.run.time") as mock_time:
|
||||
mock_time.time.return_value = _FAKE_NOW
|
||||
GatewayRunner._init_cached_agent_for_turn(agent, interrupt_depth=0)
|
||||
|
||||
assert agent._last_activity_ts >= before, (
|
||||
assert agent._last_activity_ts == _FAKE_NOW, (
|
||||
"_last_activity_ts was not reset on a fresh turn (interrupt_depth=0)"
|
||||
)
|
||||
assert agent._last_activity_ts > old_ts, (
|
||||
"Stale idle time should be cleared so the new turn gets a fresh window"
|
||||
)
|
||||
|
||||
def test_fresh_turn_resets_desc(self):
|
||||
"""interrupt_depth=0: description is updated to reflect the new turn."""
|
||||
from gateway.run import GatewayRunner
|
||||
|
||||
agent = self._fake_agent()
|
||||
|
||||
with patch("gateway.run.time") as mock_time:
|
||||
mock_time.time.return_value = _FAKE_NOW
|
||||
GatewayRunner._init_cached_agent_for_turn(agent, interrupt_depth=0)
|
||||
|
||||
assert agent._last_activity_desc == "starting new turn (cached)"
|
||||
|
||||
def test_interrupt_turn_preserves_idle_clock(self):
|
||||
"""interrupt_depth=1: clock preserved so accumulated stuck-turn
|
||||
idle time is not discarded by an interrupt-recursive re-entry (#15654)."""
|
||||
@@ -1098,6 +1112,19 @@ class TestCachedAgentInactivityReset:
|
||||
"(interrupt_depth>0) — the watchdog needs the accumulated idle time"
|
||||
)
|
||||
|
||||
def test_interrupt_turn_preserves_desc(self):
|
||||
"""interrupt_depth=1: desc preserved — it is semantically paired with ts."""
|
||||
from gateway.run import GatewayRunner
|
||||
|
||||
agent = self._fake_agent(stale_seconds=1200.0)
|
||||
|
||||
GatewayRunner._init_cached_agent_for_turn(agent, interrupt_depth=1)
|
||||
|
||||
assert agent._last_activity_desc == "previous turn activity", (
|
||||
"_last_activity_desc must not change on interrupt-recursive turns; "
|
||||
"it describes the activity *at* _last_activity_ts"
|
||||
)
|
||||
|
||||
def test_deep_interrupt_recursion_preserves_idle_clock(self):
|
||||
"""interrupt_depth=MAX-1: clock still preserved at any non-zero depth."""
|
||||
from gateway.run import GatewayRunner
|
||||
@@ -1116,7 +1143,9 @@ class TestCachedAgentInactivityReset:
|
||||
agent_fresh = self._fake_agent()
|
||||
agent_interrupted = self._fake_agent()
|
||||
|
||||
GatewayRunner._init_cached_agent_for_turn(agent_fresh, interrupt_depth=0)
|
||||
with patch("gateway.run.time") as mock_time:
|
||||
mock_time.time.return_value = _FAKE_NOW
|
||||
GatewayRunner._init_cached_agent_for_turn(agent_fresh, interrupt_depth=0)
|
||||
GatewayRunner._init_cached_agent_for_turn(agent_interrupted, interrupt_depth=1)
|
||||
|
||||
assert agent_fresh._api_call_count == 0
|
||||
@@ -1128,7 +1157,6 @@ class TestCachedAgentInactivityReset:
|
||||
The idle time seen by the watchdog must reflect the full stuck
|
||||
duration, not restart from zero on the recursive re-entry.
|
||||
"""
|
||||
import time as _t
|
||||
from gateway.run import GatewayRunner
|
||||
|
||||
STUCK_FOR = 1750.0
|
||||
@@ -1139,7 +1167,7 @@ class TestCachedAgentInactivityReset:
|
||||
GatewayRunner._init_cached_agent_for_turn(agent, interrupt_depth=1)
|
||||
|
||||
# Watchdog sees time.time() - _last_activity_ts ≥ STUCK_FOR.
|
||||
idle_secs = _t.time() - agent._last_activity_ts
|
||||
idle_secs = _FAKE_NOW - agent._last_activity_ts
|
||||
assert idle_secs >= STUCK_FOR - 1.0, (
|
||||
f"Watchdog would see {idle_secs:.0f}s idle, expected ~{STUCK_FOR}s. "
|
||||
"Inactivity timeout could not fire for a stuck interrupted turn."
|
||||
|
||||
Reference in New Issue
Block a user