From 5f9907c11616f30a03356900b8831b1fc98e7d31 Mon Sep 17 00:00:00 2001 From: Erosika Date: Sat, 18 Apr 2026 11:01:45 -0400 Subject: [PATCH] chore(honcho): drop docs from PR scope, scrub commentary - Revert website/docs and SKILL.md changes; docs unification handled separately - Scrub commit/PR refs and process narration from code comments and test docstrings (no behavior change) --- .../autonomous-ai-agents/honcho/SKILL.md | 6 ++-- plugins/memory/honcho/__init__.py | 27 ++++++-------- plugins/memory/honcho/cli.py | 2 +- plugins/memory/honcho/client.py | 10 +++--- tests/honcho_plugin/test_session.py | 35 ++++++------------- website/docs/user-guide/features/honcho.md | 4 +-- .../user-guide/features/memory-providers.md | 4 +-- 7 files changed, 33 insertions(+), 55 deletions(-) diff --git a/optional-skills/autonomous-ai-agents/honcho/SKILL.md b/optional-skills/autonomous-ai-agents/honcho/SKILL.md index 5d03a54985..c60d2c6356 100644 --- a/optional-skills/autonomous-ai-agents/honcho/SKILL.md +++ b/optional-skills/autonomous-ai-agents/honcho/SKILL.md @@ -145,10 +145,10 @@ Controls **how often** dialectic and context calls happen. | Key | Default | Description | |-----|---------|-------------| | `contextCadence` | `1` | Min turns between context API calls | -| `dialecticCadence` | `1` | Min turns between dialectic API calls | +| `dialecticCadence` | `3` | Min turns between dialectic API calls | | `injectionFrequency` | `every-turn` | `every-turn` or `first-turn` for base context injection | -Higher cadence values reduce API calls and cost. `dialecticCadence: 1` (default) fires every turn; set to `3` or higher to throttle for cost. +Higher cadence values reduce API calls and cost. `dialecticCadence: 3` (default) means the dialectic engine fires at most every 3rd turn. ### Depth (how many) @@ -368,7 +368,7 @@ Config file: `$HERMES_HOME/honcho.json` (profile-local) or `~/.honcho/config.jso | `contextTokens` | uncapped | Max tokens for the combined base context injection (summary + representation + card). Opt-in cap — omit to leave uncapped, set to an integer to bound injection size. | | `injectionFrequency` | `every-turn` | `every-turn` or `first-turn` | | `contextCadence` | `1` | Min turns between context API calls | -| `dialecticCadence` | `1` | Min turns between dialectic LLM calls | +| `dialecticCadence` | `3` | Min turns between dialectic LLM calls | The `contextTokens` budget is enforced at injection time. If the session summary + representation + card exceed the budget, Honcho trims the summary first, then the representation, preserving the card. This prevents context blowup in long sessions. diff --git a/plugins/memory/honcho/__init__.py b/plugins/memory/honcho/__init__.py index ac0f60279a..51345b8e92 100644 --- a/plugins/memory/honcho/__init__.py +++ b/plugins/memory/honcho/__init__.py @@ -393,13 +393,10 @@ class HonchoMemoryProvider(MemoryProvider): logger.debug("Honcho memory file migration skipped: %s", e) # ----- B7: Pre-warming at init ----- - # Context prewarm: warms peer.context() cache (base layer), consumed - # via pop_context_result() in prefetch(). - # Dialectic prewarm: fires a depth-aware cycle against the plugin's - # own _prefetch_result so turn 1 can consume it directly. Without this - # the first-turn sync path pays for a duplicate .chat() — and at - # depth>1 a single-pass session-start dialectic often returns weak - # output that multi-pass audit/reconciliation is meant to catch. + # Context prewarm warms peer.context() (base layer), consumed via + # pop_context_result() in prefetch(). Dialectic prewarm runs the + # full configured depth and writes into _prefetch_result so turn 1 + # consumes the result directly. if self._recall_mode in ("context", "hybrid"): try: self._manager.prefetch_context(self._session_key) @@ -555,8 +552,7 @@ class HonchoMemoryProvider(MemoryProvider): if self._injection_frequency == "first-turn" and self._turn_count > 1: return "" - # Skip trivial prompts — "ok", "yes", slash commands carry no semantic signal, - # so injecting user context there just burns tokens and can derail the reply. + # Trivial prompts ("ok", "yes", slash commands) carry no semantic signal. if self._is_trivial_prompt(query): return "" @@ -619,8 +615,8 @@ class HonchoMemoryProvider(MemoryProvider): if r and r.strip(): with self._prefetch_lock: self._prefetch_result = r - # Only advance cadence on a non-empty result so failures - # don't burn a 3-turn cooldown on nothing. + # Advance cadence only on a non-empty result so the next + # turn retries when the call returned nothing. self._last_dialectic_turn = _fired_at self._prefetch_thread = threading.Thread( @@ -711,9 +707,8 @@ class HonchoMemoryProvider(MemoryProvider): self._dialectic_cadence, self._turn_count - self._last_dialectic_turn) return - # Advance cadence only on a non-empty result — otherwise a silent failure - # (empty dialectic, transient API error) would burn the full cadence window - # before the next retry, making it look like dialectic "never fires again". + # Cadence advances only on a non-empty result so empty returns + # (transient API error, sparse representation) retry next turn. _fired_at = self._turn_count def _run(): @@ -751,9 +746,7 @@ class HonchoMemoryProvider(MemoryProvider): _LEVEL_ORDER = ("minimal", "low", "medium", "high", "max") - # Reasoning-level heuristic thresholds (restored from pre-9a0ab34c behavior). - # Promoted to class constants so tests can override without widening the - # config surface. Bump to config fields only if real use shows they're needed. + # Char-count thresholds for the query-length reasoning heuristic. _HEURISTIC_LENGTH_MEDIUM = 120 _HEURISTIC_LENGTH_HIGH = 400 diff --git a/plugins/memory/honcho/cli.py b/plugins/memory/honcho/cli.py index 478bf39d8a..5cd25bfbab 100644 --- a/plugins/memory/honcho/cli.py +++ b/plugins/memory/honcho/cli.py @@ -463,7 +463,7 @@ def cmd_setup(args) -> None: current_dialectic = str(hermes_host.get("dialecticCadence") or cfg.get("dialecticCadence") or "1") print("\n Dialectic cadence:") print(" How often Honcho rebuilds its user model (LLM call on Honcho backend).") - print(" 1 = every turn (default), 3+ = sparse (cost-saving).") + print(" 1 = every turn (default), 3+ = sparse.") new_dialectic = _prompt("Dialectic cadence", default=current_dialectic) try: val = int(new_dialectic) diff --git a/plugins/memory/honcho/client.py b/plugins/memory/honcho/client.py index 136b1e60dc..346c2b76e6 100644 --- a/plugins/memory/honcho/client.py +++ b/plugins/memory/honcho/client.py @@ -251,13 +251,11 @@ class HonchoClientConfig: # matching dialectic_depth length. When None, uses proportional defaults # derived from dialectic_reasoning_level. dialectic_depth_levels: list[str] | None = None - # Reasoning-level heuristic for auto-injected dialectic calls. When true, - # scales the base level up on longer queries (restored from pre-#10619 - # behavior; see plugins/memory/honcho/__init__.py for thresholds). - # Never auto-selects a level above reasoning_level_cap. + # When true, the auto-injected dialectic scales reasoning level up on + # longer queries. See HonchoMemoryProvider for thresholds. reasoning_heuristic: bool = True - # Ceiling for heuristic-selected reasoning level. "max" is reserved for - # explicit tool-path selection; default "high" matches the old behavior. + # Ceiling for the heuristic-selected reasoning level. "max" is reserved + # for explicit tool-path selection. reasoning_level_cap: str = "high" # Honcho API limits — configurable for self-hosted instances # Max chars per message sent via add_messages() (Honcho cloud: 25000) diff --git a/tests/honcho_plugin/test_session.py b/tests/honcho_plugin/test_session.py index b0282b1969..83db3f24dc 100644 --- a/tests/honcho_plugin/test_session.py +++ b/tests/honcho_plugin/test_session.py @@ -862,9 +862,7 @@ class TestDialecticCadenceDefaults: return provider def test_default_is_1(self): - """Default dialectic_cadence should be 1 (every turn) — restored from - pre-#10619 behavior to avoid a silent regression on upgrade for users - who never set dialecticCadence explicitly.""" + """Default dialectic_cadence is 1 — fires every turn unless overridden.""" provider = self._make_provider() assert provider._dialectic_cadence == 1 @@ -1112,10 +1110,7 @@ class TestDialecticDepth: class TestTrivialPromptHeuristic: - """Trivial prompts ('ok', 'y', slash commands) must short-circuit injection. - - Restored after accidental removal during the two-layer prefetch refactor. - """ + """Trivial prompts ('ok', 'y', slash commands) must short-circuit injection.""" @staticmethod def _make_provider(): @@ -1173,11 +1168,9 @@ class TestTrivialPromptHeuristic: class TestDialecticCadenceAdvancesOnSuccess: - """Cadence tracker must only advance when the dialectic call actually returned. - - A silent failure (empty result, API blip) used to burn the full cadence window - before retrying — making it look like dialectic 'never fires again'. - """ + """Cadence tracker advances only when the dialectic call returns a + non-empty result. Empty results (transient API error, sparse representation) + must retry on the next eligible turn instead of waiting the full cadence.""" @staticmethod def _make_provider(): @@ -1329,13 +1322,9 @@ class TestSessionStartDialecticPrewarm: class TestDialecticLifecycleSmoke: - """End-to-end smoke: walks a realistic multi-turn session through every - behavior we care about — prewarm → turn 1 consume → trivial skip → cadence - fire → silent-failure retry → heuristic bump → session-end flush. - - This is the 'velvet circuit' test: one provider, one flow, one set of - assertions. If the suite above lies about intent, this one catches it. - """ + """End-to-end smoke walking a multi-turn session through prewarm, + turn 1 consume, trivial skip, cadence fire, empty-result retry, + heuristic bump, and session-end flush.""" @staticmethod def _make_provider(cfg_extra=None): @@ -1473,11 +1462,9 @@ class TestDialecticLifecycleSmoke: class TestReasoningHeuristic: - """Restored char-count heuristic for auto-injected dialectic reasoning level. - - Pre-9a0ab34c behavior: scale base up by query length, capped at - reasoning_level_cap. 'max' is reserved for explicit tool-path selection. - """ + """Char-count heuristic that scales the auto-injected reasoning level by + query length, clamped at reasoning_level_cap. 'max' is reserved for + explicit tool-path selection.""" @staticmethod def _make_provider(cfg_extra=None): diff --git a/website/docs/user-guide/features/honcho.md b/website/docs/user-guide/features/honcho.md index 906a7c030e..2040949d25 100644 --- a/website/docs/user-guide/features/honcho.md +++ b/website/docs/user-guide/features/honcho.md @@ -77,7 +77,7 @@ Cost and depth are controlled by three independent knobs: | Knob | Controls | Default | |------|----------|---------| | `contextCadence` | Turns between `context()` API calls (base layer refresh) | `1` | -| `dialecticCadence` | Turns between `peer.chat()` LLM calls (dialectic layer refresh) | `1` | +| `dialecticCadence` | Turns between `peer.chat()` LLM calls (dialectic layer refresh) | `3` | | `dialecticDepth` | Number of `.chat()` passes per dialectic invocation (1–3) | `1` | These are orthogonal — you can have frequent context refreshes with infrequent dialectic, or deep multi-pass dialectic at low frequency. Example: `contextCadence: 1, dialecticCadence: 5, dialecticDepth: 2` refreshes base context every turn, runs dialectic every 5 turns, and each dialectic run makes 2 passes. @@ -104,7 +104,7 @@ Honcho is configured in `~/.honcho/config.json` (global) or `$HERMES_HOME/honcho |-----|---------|-------------| | `contextTokens` | `null` (uncapped) | Token budget for auto-injected context per turn. Set to an integer (e.g. 1200) to cap. Truncates at word boundaries | | `contextCadence` | `1` | Minimum turns between `context()` API calls (base layer refresh) | -| `dialecticCadence` | `1` | Minimum turns between `peer.chat()` LLM calls (dialectic layer). In `tools` mode, irrelevant — model calls explicitly | +| `dialecticCadence` | `3` | Minimum turns between `peer.chat()` LLM calls (dialectic layer). In `tools` mode, irrelevant — model calls explicitly | | `dialecticDepth` | `1` | Number of `.chat()` passes per dialectic invocation. Clamped to 1–3 | | `dialecticDepthLevels` | `null` | Optional array of reasoning levels per pass, e.g. `["minimal", "low", "medium"]`. Overrides proportional defaults | | `dialecticReasoningLevel` | `'low'` | Base reasoning level: `minimal`, `low`, `medium`, `high`, `max` | diff --git a/website/docs/user-guide/features/memory-providers.md b/website/docs/user-guide/features/memory-providers.md index 181f30f7fa..f571c7d48f 100644 --- a/website/docs/user-guide/features/memory-providers.md +++ b/website/docs/user-guide/features/memory-providers.md @@ -82,7 +82,7 @@ hermes memory setup # select "honcho" | `workspace` | host key | Shared workspace ID | | `contextTokens` | `null` (uncapped) | Token budget for auto-injected context per turn. Truncates at word boundaries | | `contextCadence` | `1` | Minimum turns between `context()` API calls (base layer refresh) | -| `dialecticCadence` | `1` | Minimum turns between `peer.chat()` LLM calls. Only applies to `hybrid`/`context` modes | +| `dialecticCadence` | `3` | Minimum turns between `peer.chat()` LLM calls. Only applies to `hybrid`/`context` modes | | `dialecticDepth` | `1` | Number of `.chat()` passes per dialectic invocation. Clamped 1–3. Pass 0: cold/warm prompt, pass 1: self-audit, pass 2: reconciliation | | `dialecticDepthLevels` | `null` | Optional array of reasoning levels per pass, e.g. `["minimal", "low", "medium"]`. Overrides proportional defaults | | `dialecticReasoningLevel` | `'low'` | Base reasoning level: `minimal`, `low`, `medium`, `high`, `max` | @@ -181,7 +181,7 @@ This inherits settings from the default `hermes` host block and creates new AI p }, "dialecticReasoningLevel": "low", "dialecticDynamic": true, - "dialecticCadence": 1, + "dialecticCadence": 3, "dialecticDepth": 1, "dialecticMaxChars": 600, "contextCadence": 1,