feat(honcho): dialectic liveness — stale-thread watchdog, stale-result discard, empty-streak backoff

Hardens the dialectic lifecycle against three failure modes that could
leave the prefetch pipeline stuck or injecting stale content:

- Stale-thread watchdog: _thread_is_live() treats any prefetch thread
  older than timeout × 2.0 as dead. A hung Honcho call can no longer
  block subsequent fires indefinitely.

- Stale-result discard: pending _prefetch_result is tagged with its
  fire turn. prefetch() discards the result if more than cadence × 2
  turns passed before a consumer read it (e.g. a run of trivial-prompt
  turns between fire and read).

- Empty-streak backoff: consecutive empty dialectic returns widen the
  effective cadence (dialectic_cadence + streak, capped at cadence × 8).
  A healthy fire resets the streak. Prevents the plugin from hammering
  the backend every turn when the peer graph is cold.

- liveness_snapshot() on the provider exposes current turn, last fire,
  pending fire-at, empty streak, effective cadence, and thread status
  for in-process diagnostics.

- system_prompt_block: nudge the model that honcho_reasoning accepts
  reasoning_level minimal/low/medium/high/max per call.

- hermes honcho status: surface base reasoning level, cap, and heuristic
  toggle so config drift is visible at a glance.

Tests: 550 passed.
- TestDialecticLiveness (8 tests): stale-thread recovery, stale-result
  discard, fresh-result retention, backoff widening, backoff ceiling,
  streak reset on success, streak increment on empty, snapshot shape.
- Existing TestDialecticCadenceAdvancesOnSuccess::test_in_flight_thread_is_not_stacked
  updated to set _prefetch_thread_started_at so it tests the
  fresh-thread-blocks branch (stale path covered separately).
- test_cli TestCmdStatus fake updated with the new config attrs surfaced
  in the status block.
This commit is contained in:
Erosika
2026-04-18 13:07:09 -04:00
committed by kshitij
parent 098efde848
commit c630dfcdac
4 changed files with 266 additions and 16 deletions

View File

@@ -19,6 +19,7 @@ import json
import logging import logging
import re import re
import threading import threading
import time
from typing import Any, Dict, List, Optional from typing import Any, Dict, List, Optional
from agent.memory_provider import MemoryProvider from agent.memory_provider import MemoryProvider
@@ -214,6 +215,11 @@ class HonchoMemoryProvider(MemoryProvider):
self._last_context_turn = -999 self._last_context_turn = -999
self._last_dialectic_turn = -999 self._last_dialectic_turn = -999
# Liveness + observability state
self._prefetch_thread_started_at: float = 0.0 # monotonic ts of current thread
self._prefetch_result_fired_at: int = -999 # turn the pending result was fired at
self._dialectic_empty_streak: int = 0 # consecutive empty returns
# Port #1957: lazy session init for tools-only mode # Port #1957: lazy session init for tools-only mode
self._session_initialized = False self._session_initialized = False
self._lazy_init_kwargs: Optional[dict] = None self._lazy_init_kwargs: Optional[dict] = None
@@ -413,13 +419,19 @@ class HonchoMemoryProvider(MemoryProvider):
r = self._run_dialectic_depth(_prewarm_query) r = self._run_dialectic_depth(_prewarm_query)
except Exception as exc: except Exception as exc:
logger.debug("Honcho dialectic prewarm failed: %s", exc) logger.debug("Honcho dialectic prewarm failed: %s", exc)
self._dialectic_empty_streak += 1
return return
if r and r.strip(): if r and r.strip():
with self._prefetch_lock: with self._prefetch_lock:
self._prefetch_result = r self._prefetch_result = r
self._prefetch_result_fired_at = 0
# Treat prewarm as turn 0 so cadence gating starts clean. # Treat prewarm as turn 0 so cadence gating starts clean.
self._last_dialectic_turn = 0 self._last_dialectic_turn = 0
self._dialectic_empty_streak = 0
else:
self._dialectic_empty_streak += 1
self._prefetch_thread_started_at = time.monotonic()
self._prefetch_thread = threading.Thread( self._prefetch_thread = threading.Thread(
target=_prewarm_dialectic, daemon=True, name="honcho-prewarm-dialectic" target=_prewarm_dialectic, daemon=True, name="honcho-prewarm-dialectic"
) )
@@ -513,7 +525,8 @@ class HonchoMemoryProvider(MemoryProvider):
"# Honcho Memory\n" "# Honcho Memory\n"
"Active (tools-only mode). Use honcho_profile for a quick factual snapshot, " "Active (tools-only mode). Use honcho_profile for a quick factual snapshot, "
"honcho_search for raw excerpts, honcho_context for raw peer context, " "honcho_search for raw excerpts, honcho_context for raw peer context, "
"honcho_reasoning for synthesized answers, " "honcho_reasoning for synthesized answers (pass reasoning_level "
"minimal/low/medium/high/max — you pick the depth per call), "
"honcho_conclude to save facts about the user. " "honcho_conclude to save facts about the user. "
"No automatic context injection — you must use tools to access memory." "No automatic context injection — you must use tools to access memory."
) )
@@ -523,7 +536,8 @@ class HonchoMemoryProvider(MemoryProvider):
"Active (hybrid mode). Relevant context is auto-injected AND memory tools are available. " "Active (hybrid mode). Relevant context is auto-injected AND memory tools are available. "
"Use honcho_profile for a quick factual snapshot, " "Use honcho_profile for a quick factual snapshot, "
"honcho_search for raw excerpts, honcho_context for raw peer context, " "honcho_search for raw excerpts, honcho_context for raw peer context, "
"honcho_reasoning for synthesized answers, " "honcho_reasoning for synthesized answers (pass reasoning_level "
"minimal/low/medium/high/max — you pick the depth per call), "
"honcho_conclude to save facts about the user." "honcho_conclude to save facts about the user."
) )
@@ -611,14 +625,20 @@ class HonchoMemoryProvider(MemoryProvider):
r = self._run_dialectic_depth(query) r = self._run_dialectic_depth(query)
except Exception as exc: except Exception as exc:
logger.debug("Honcho first-turn dialectic failed: %s", exc) logger.debug("Honcho first-turn dialectic failed: %s", exc)
self._dialectic_empty_streak += 1
return return
if r and r.strip(): if r and r.strip():
with self._prefetch_lock: with self._prefetch_lock:
self._prefetch_result = r self._prefetch_result = r
self._prefetch_result_fired_at = _fired_at
# Advance cadence only on a non-empty result so the next # Advance cadence only on a non-empty result so the next
# turn retries when the call returned nothing. # turn retries when the call returned nothing.
self._last_dialectic_turn = _fired_at self._last_dialectic_turn = _fired_at
self._dialectic_empty_streak = 0
else:
self._dialectic_empty_streak += 1
self._prefetch_thread_started_at = time.monotonic()
self._prefetch_thread = threading.Thread( self._prefetch_thread = threading.Thread(
target=_run_first_turn, daemon=True, name="honcho-prefetch-first" target=_run_first_turn, daemon=True, name="honcho-prefetch-first"
) )
@@ -635,7 +655,21 @@ class HonchoMemoryProvider(MemoryProvider):
self._prefetch_thread.join(timeout=3.0) self._prefetch_thread.join(timeout=3.0)
with self._prefetch_lock: with self._prefetch_lock:
dialectic_result = self._prefetch_result dialectic_result = self._prefetch_result
fired_at = self._prefetch_result_fired_at
self._prefetch_result = "" self._prefetch_result = ""
self._prefetch_result_fired_at = -999
# Discard stale pending results: if the fire happened more than
# cadence × multiplier turns ago (e.g. a run of trivial-prompt turns
# passed without consumption), the content likely no longer tracks
# the current conversational pivot.
stale_limit = self._dialectic_cadence * self._STALE_RESULT_MULTIPLIER
if dialectic_result and fired_at >= 0 and (self._turn_count - fired_at) > stale_limit:
logger.debug(
"Honcho pending dialectic discarded as stale: fired_at=%d, "
"turn=%d, limit=%d", fired_at, self._turn_count, stale_limit,
)
dialectic_result = ""
if dialectic_result and dialectic_result.strip(): if dialectic_result and dialectic_result.strip():
parts.append(dialectic_result) parts.append(dialectic_result)
@@ -693,18 +727,23 @@ class HonchoMemoryProvider(MemoryProvider):
logger.debug("Honcho context prefetch failed: %s", e) logger.debug("Honcho context prefetch failed: %s", e)
# ----- Dialectic prefetch (supplement layer) ----- # ----- Dialectic prefetch (supplement layer) -----
# Guard against thread pile-up: if a prior dialectic is still in flight, # Thread-alive guard with stale-thread recovery: a hung Honcho call
# let it finish instead of stacking races on _prefetch_result. # older than timeout × multiplier is treated as dead so it can't
if self._prefetch_thread and self._prefetch_thread.is_alive(): # block subsequent fires.
if self._thread_is_live():
logger.debug("Honcho dialectic prefetch skipped: prior thread still running") logger.debug("Honcho dialectic prefetch skipped: prior thread still running")
return return
# B5: cadence check — skip if too soon since last *successful* dialectic call. # Cadence gate, widened by the empty-streak backoff so a persistently
# The gate applies uniformly (including cadence=1): "every turn" means once # silent backend doesn't retry every turn forever.
# per turn, not twice on the same turn when first-turn sync already fired. effective = self._effective_cadence()
if (self._turn_count - self._last_dialectic_turn) < self._dialectic_cadence: if (self._turn_count - self._last_dialectic_turn) < effective:
logger.debug("Honcho dialectic prefetch skipped: cadence %d, turns since last: %d", logger.debug(
self._dialectic_cadence, self._turn_count - self._last_dialectic_turn) "Honcho dialectic prefetch skipped: effective cadence %d "
"(base %d, empty streak %d), turns since last: %d",
effective, self._dialectic_cadence, self._dialectic_empty_streak,
self._turn_count - self._last_dialectic_turn,
)
return return
# Cadence advances only on a non-empty result so empty returns # Cadence advances only on a non-empty result so empty returns
@@ -716,12 +755,18 @@ class HonchoMemoryProvider(MemoryProvider):
result = self._run_dialectic_depth(query) result = self._run_dialectic_depth(query)
except Exception as e: except Exception as e:
logger.debug("Honcho prefetch failed: %s", e) logger.debug("Honcho prefetch failed: %s", e)
self._dialectic_empty_streak += 1
return return
if result and result.strip(): if result and result.strip():
with self._prefetch_lock: with self._prefetch_lock:
self._prefetch_result = result self._prefetch_result = result
self._prefetch_result_fired_at = _fired_at
self._last_dialectic_turn = _fired_at self._last_dialectic_turn = _fired_at
self._dialectic_empty_streak = 0
else:
self._dialectic_empty_streak += 1
self._prefetch_thread_started_at = time.monotonic()
self._prefetch_thread = threading.Thread( self._prefetch_thread = threading.Thread(
target=_run, daemon=True, name="honcho-prefetch" target=_run, daemon=True, name="honcho-prefetch"
) )
@@ -750,6 +795,59 @@ class HonchoMemoryProvider(MemoryProvider):
_HEURISTIC_LENGTH_MEDIUM = 120 _HEURISTIC_LENGTH_MEDIUM = 120
_HEURISTIC_LENGTH_HIGH = 400 _HEURISTIC_LENGTH_HIGH = 400
# Liveness constants. A thread older than timeout × multiplier is treated
# as dead so a hung Honcho call can't block future retries indefinitely.
_STALE_THREAD_MULTIPLIER = 2.0
# Pending result whose fire-turn is older than cadence × multiplier is
# discarded on read so we don't inject context for a stale conversational
# pivot after a gap of trivial-prompt turns.
_STALE_RESULT_MULTIPLIER = 2
# Cap on the empty-streak backoff so a persistently silent backend
# eventually settles on a ceiling instead of unbounded widening.
_BACKOFF_MAX = 8
def _thread_is_live(self) -> bool:
"""Thread-alive guard that treats threads older than the stale
threshold as dead, so a hung Honcho request can't block new fires."""
if not self._prefetch_thread or not self._prefetch_thread.is_alive():
return False
timeout = (self._config.timeout if self._config and self._config.timeout else 8.0)
age = time.monotonic() - self._prefetch_thread_started_at
if age > timeout * self._STALE_THREAD_MULTIPLIER:
logger.debug(
"Honcho prefetch thread age %.1fs exceeds stale threshold "
"%.1fs — treating as dead", age, timeout * self._STALE_THREAD_MULTIPLIER,
)
return False
return True
def _effective_cadence(self) -> int:
"""Cadence plus empty-streak backoff, capped at _BACKOFF_MAX × base."""
if self._dialectic_empty_streak <= 0:
return self._dialectic_cadence
widened = self._dialectic_cadence + self._dialectic_empty_streak
ceiling = self._dialectic_cadence * self._BACKOFF_MAX
return min(widened, ceiling)
def liveness_snapshot(self) -> dict:
"""In-process snapshot of dialectic liveness state for diagnostics.
Returns current turn, last successful dialectic turn, pending-result
fire turn, empty streak, effective cadence, and thread status.
"""
thread_age = None
if self._prefetch_thread and self._prefetch_thread.is_alive():
thread_age = time.monotonic() - self._prefetch_thread_started_at
return {
"turn_count": self._turn_count,
"last_dialectic_turn": self._last_dialectic_turn,
"pending_result_fired_at": self._prefetch_result_fired_at,
"empty_streak": self._dialectic_empty_streak,
"effective_cadence": self._effective_cadence(),
"thread_alive": thread_age is not None,
"thread_age_seconds": thread_age,
}
def _apply_reasoning_heuristic(self, base: str, query: str) -> str: def _apply_reasoning_heuristic(self, base: str, query: str) -> str:
"""Scale `base` up by query length, clamped at reasoning_level_cap. """Scale `base` up by query length, clamped at reasoning_level_cap.

View File

@@ -638,6 +638,9 @@ def cmd_status(args) -> None:
raw = getattr(hcfg, "raw", None) or {} raw = getattr(hcfg, "raw", None) or {}
dialectic_cadence = raw.get("dialecticCadence") or 1 dialectic_cadence = raw.get("dialecticCadence") or 1
print(f" Dialectic cad: every {dialectic_cadence} turn{'s' if dialectic_cadence != 1 else ''}") print(f" Dialectic cad: every {dialectic_cadence} turn{'s' if dialectic_cadence != 1 else ''}")
reasoning_cap = raw.get("reasoningLevelCap") or hcfg.reasoning_level_cap
heuristic_on = "on" if hcfg.reasoning_heuristic else "off"
print(f" Reasoning: base={hcfg.dialectic_reasoning_level}, cap={reasoning_cap}, heuristic={heuristic_on}")
print(f" Observation: user(me={hcfg.user_observe_me},others={hcfg.user_observe_others}) ai(me={hcfg.ai_observe_me},others={hcfg.ai_observe_others})") print(f" Observation: user(me={hcfg.user_observe_me},others={hcfg.user_observe_others}) ai(me={hcfg.ai_observe_me},others={hcfg.ai_observe_others})")
print(f" Write freq: {hcfg.write_frequency}") print(f" Write freq: {hcfg.write_frequency}")

View File

@@ -26,6 +26,9 @@ class TestCmdStatus:
write_frequency = "async" write_frequency = "async"
session_strategy = "per-session" session_strategy = "per-session"
context_tokens = 800 context_tokens = 800
dialectic_reasoning_level = "low"
reasoning_level_cap = "high"
reasoning_heuristic = True
def resolve_session_name(self): def resolve_session_name(self):
return "hermes" return "hermes"

View File

@@ -823,8 +823,11 @@ def _settle_prewarm(provider):
provider._prefetch_thread.join(timeout=3.0) provider._prefetch_thread.join(timeout=3.0)
with provider._prefetch_lock: with provider._prefetch_lock:
provider._prefetch_result = "" provider._prefetch_result = ""
provider._prefetch_result_fired_at = -999
provider._prefetch_thread = None provider._prefetch_thread = None
provider._prefetch_thread_started_at = 0.0
provider._last_dialectic_turn = -999 provider._last_dialectic_turn = -999
provider._dialectic_empty_streak = 0
if getattr(provider, "_manager", None) is not None: if getattr(provider, "_manager", None) is not None:
try: try:
provider._manager.dialectic_query.reset_mock() provider._manager.dialectic_query.reset_mock()
@@ -1227,26 +1230,28 @@ class TestDialecticCadenceAdvancesOnSuccess:
def test_in_flight_thread_is_not_stacked(self): def test_in_flight_thread_is_not_stacked(self):
import threading as _threading import threading as _threading
import time as _time
provider = self._make_provider() provider = self._make_provider()
provider._session_key = "test" provider._session_key = "test"
provider._turn_count = 10 provider._turn_count = 10
provider._last_dialectic_turn = 0 provider._last_dialectic_turn = 0
# Simulate a prior thread still running # Simulate a prior thread still running (fresh, not stale)
hold = _threading.Event() hold = _threading.Event()
def _block(): def _block():
hold.wait(timeout=5.0) hold.wait(timeout=5.0)
stale = _threading.Thread(target=_block, daemon=True) fresh = _threading.Thread(target=_block, daemon=True)
stale.start() fresh.start()
provider._prefetch_thread = stale provider._prefetch_thread = fresh
provider._prefetch_thread_started_at = _time.monotonic() # fresh start
provider.queue_prefetch("hello") provider.queue_prefetch("hello")
# Should have short-circuited — no new dialectic call # Should have short-circuited — no new dialectic call
assert provider._manager.dialectic_query.call_count == 0 assert provider._manager.dialectic_query.call_count == 0
hold.set() hold.set()
stale.join(timeout=2.0) fresh.join(timeout=2.0)
class TestSessionStartDialecticPrewarm: class TestSessionStartDialecticPrewarm:
@@ -1321,6 +1326,147 @@ class TestSessionStartDialecticPrewarm:
assert p._manager.dialectic_query.call_count == 1 assert p._manager.dialectic_query.call_count == 1
class TestDialecticLiveness:
"""Liveness + observability: stale-thread recovery, stale-result discard,
empty-streak backoff, and the snapshot method used for diagnostics."""
@staticmethod
def _make_provider(cfg_extra=None):
from unittest.mock import patch, MagicMock
from plugins.memory.honcho.client import HonchoClientConfig
defaults = dict(api_key="test-key", enabled=True, recall_mode="hybrid", timeout=2.0)
if cfg_extra:
defaults.update(cfg_extra)
cfg = HonchoClientConfig(**defaults)
provider = HonchoMemoryProvider()
mock_manager = MagicMock()
mock_manager.get_or_create.return_value = MagicMock(messages=[])
mock_manager.get_prefetch_context.return_value = None
mock_manager.pop_context_result.return_value = None
mock_manager.dialectic_query.return_value = "" # default: silent
with patch("plugins.memory.honcho.client.HonchoClientConfig.from_global_config", return_value=cfg), \
patch("plugins.memory.honcho.client.get_honcho_client", return_value=MagicMock()), \
patch("plugins.memory.honcho.session.HonchoSessionManager", return_value=mock_manager), \
patch("hermes_constants.get_hermes_home", return_value=MagicMock()):
provider.initialize(session_id="test-liveness")
_settle_prewarm(provider)
return provider
def test_stale_thread_is_treated_as_dead(self):
"""A thread older than timeout × multiplier no longer blocks new fires."""
import threading as _threading
p = self._make_provider()
p._session_key = "test"
p._turn_count = 10
p._last_dialectic_turn = 0
p._manager.dialectic_query.return_value = "fresh synthesis"
# Plant an alive thread with an old timestamp (stale)
hold = _threading.Event()
stuck = _threading.Thread(target=lambda: hold.wait(timeout=10.0), daemon=True)
stuck.start()
p._prefetch_thread = stuck
# timeout=2.0, multiplier=2.0, so anything older than 4s is stale
p._prefetch_thread_started_at = 0.0 # very old (1970 monotonic baseline)
p.queue_prefetch("hello")
# New thread should have been spawned since stuck one is stale
assert p._prefetch_thread is not stuck, "stale thread must be recycled"
if p._prefetch_thread:
p._prefetch_thread.join(timeout=2.0)
assert p._manager.dialectic_query.call_count == 1
hold.set()
stuck.join(timeout=2.0)
def test_stale_pending_result_is_discarded_on_read(self):
"""A pending dialectic result from many turns ago is discarded
instead of injected against a fresh conversational pivot."""
p = self._make_provider(cfg_extra={"raw": {"dialecticCadence": 2}})
p._session_key = "test"
p._base_context_cache = "base ctx"
with p._prefetch_lock:
p._prefetch_result = "ancient synthesis"
p._prefetch_result_fired_at = 1
# cadence=2, multiplier=2 → stale after 4 turns since fire
p._turn_count = 10
p._last_dialectic_turn = 1 # prevents sync first-turn path
result = p.prefetch("what's new")
assert "ancient synthesis" not in result, "stale pending must be discarded"
# Cache slot cleared
with p._prefetch_lock:
assert p._prefetch_result == ""
assert p._prefetch_result_fired_at == -999
def test_fresh_pending_result_is_kept(self):
"""A pending result within the staleness window is injected normally."""
p = self._make_provider(cfg_extra={"raw": {"dialecticCadence": 3}})
p._session_key = "test"
p._base_context_cache = ""
with p._prefetch_lock:
p._prefetch_result = "recent synthesis"
p._prefetch_result_fired_at = 8
p._turn_count = 9 # 1 turn since fire, well within cadence × 2 = 6
p._last_dialectic_turn = 8
result = p.prefetch("what's new")
assert "recent synthesis" in result
def test_empty_streak_widens_effective_cadence(self):
"""After N empty returns, the gate waits cadence + N turns."""
p = self._make_provider(cfg_extra={"raw": {"dialecticCadence": 1}})
p._dialectic_empty_streak = 3
# cadence=1, streak=3 → effective = 4
assert p._effective_cadence() == 4
def test_backoff_is_capped(self):
"""Effective cadence is capped at cadence × _BACKOFF_MAX."""
p = self._make_provider(cfg_extra={"raw": {"dialecticCadence": 2}})
p._dialectic_empty_streak = 100
# cadence=2, ceiling = 2 × 8 = 16
assert p._effective_cadence() == 16
def test_success_resets_empty_streak(self):
"""A non-empty result zeroes the streak so healthy operation restores
the base cadence immediately."""
p = self._make_provider(cfg_extra={"raw": {"dialecticCadence": 1}})
p._session_key = "test"
p._dialectic_empty_streak = 5
p._turn_count = 10
p._last_dialectic_turn = 0
p._manager.dialectic_query.return_value = "real output"
p.queue_prefetch("hello")
if p._prefetch_thread:
p._prefetch_thread.join(timeout=2.0)
assert p._dialectic_empty_streak == 0
assert p._last_dialectic_turn == 10
def test_empty_result_increments_streak(self):
p = self._make_provider(cfg_extra={"raw": {"dialecticCadence": 1}})
p._session_key = "test"
p._turn_count = 5
p._last_dialectic_turn = 0
p._manager.dialectic_query.return_value = "" # empty
p.queue_prefetch("hello")
if p._prefetch_thread:
p._prefetch_thread.join(timeout=2.0)
assert p._dialectic_empty_streak == 1
assert p._last_dialectic_turn == 0 # cadence not advanced
def test_liveness_snapshot_shape(self):
p = self._make_provider()
snap = p.liveness_snapshot()
for key in (
"turn_count", "last_dialectic_turn", "pending_result_fired_at",
"empty_streak", "effective_cadence", "thread_alive", "thread_age_seconds",
):
assert key in snap
class TestDialecticLifecycleSmoke: class TestDialecticLifecycleSmoke:
"""End-to-end smoke walking a multi-turn session through prewarm, """End-to-end smoke walking a multi-turn session through prewarm,
turn 1 consume, trivial skip, cadence fire, empty-result retry, turn 1 consume, trivial skip, cadence fire, empty-result retry,