mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-28 06:51:16 +08:00
chore(honcho): drop docs from PR scope, scrub commentary
- Revert website/docs and SKILL.md changes; docs unification handled separately - Scrub commit/PR refs and process narration from code comments and test docstrings (no behavior change)
This commit is contained in:
@@ -145,10 +145,10 @@ Controls **how often** dialectic and context calls happen.
|
||||
| Key | Default | Description |
|
||||
|-----|---------|-------------|
|
||||
| `contextCadence` | `1` | Min turns between context API calls |
|
||||
| `dialecticCadence` | `1` | Min turns between dialectic API calls |
|
||||
| `dialecticCadence` | `3` | Min turns between dialectic API calls |
|
||||
| `injectionFrequency` | `every-turn` | `every-turn` or `first-turn` for base context injection |
|
||||
|
||||
Higher cadence values reduce API calls and cost. `dialecticCadence: 1` (default) fires every turn; set to `3` or higher to throttle for cost.
|
||||
Higher cadence values reduce API calls and cost. `dialecticCadence: 3` (default) means the dialectic engine fires at most every 3rd turn.
|
||||
|
||||
### Depth (how many)
|
||||
|
||||
@@ -368,7 +368,7 @@ Config file: `$HERMES_HOME/honcho.json` (profile-local) or `~/.honcho/config.jso
|
||||
| `contextTokens` | uncapped | Max tokens for the combined base context injection (summary + representation + card). Opt-in cap — omit to leave uncapped, set to an integer to bound injection size. |
|
||||
| `injectionFrequency` | `every-turn` | `every-turn` or `first-turn` |
|
||||
| `contextCadence` | `1` | Min turns between context API calls |
|
||||
| `dialecticCadence` | `1` | Min turns between dialectic LLM calls |
|
||||
| `dialecticCadence` | `3` | Min turns between dialectic LLM calls |
|
||||
|
||||
The `contextTokens` budget is enforced at injection time. If the session summary + representation + card exceed the budget, Honcho trims the summary first, then the representation, preserving the card. This prevents context blowup in long sessions.
|
||||
|
||||
|
||||
@@ -393,13 +393,10 @@ class HonchoMemoryProvider(MemoryProvider):
|
||||
logger.debug("Honcho memory file migration skipped: %s", e)
|
||||
|
||||
# ----- B7: Pre-warming at init -----
|
||||
# Context prewarm: warms peer.context() cache (base layer), consumed
|
||||
# via pop_context_result() in prefetch().
|
||||
# Dialectic prewarm: fires a depth-aware cycle against the plugin's
|
||||
# own _prefetch_result so turn 1 can consume it directly. Without this
|
||||
# the first-turn sync path pays for a duplicate .chat() — and at
|
||||
# depth>1 a single-pass session-start dialectic often returns weak
|
||||
# output that multi-pass audit/reconciliation is meant to catch.
|
||||
# Context prewarm warms peer.context() (base layer), consumed via
|
||||
# pop_context_result() in prefetch(). Dialectic prewarm runs the
|
||||
# full configured depth and writes into _prefetch_result so turn 1
|
||||
# consumes the result directly.
|
||||
if self._recall_mode in ("context", "hybrid"):
|
||||
try:
|
||||
self._manager.prefetch_context(self._session_key)
|
||||
@@ -555,8 +552,7 @@ class HonchoMemoryProvider(MemoryProvider):
|
||||
if self._injection_frequency == "first-turn" and self._turn_count > 1:
|
||||
return ""
|
||||
|
||||
# Skip trivial prompts — "ok", "yes", slash commands carry no semantic signal,
|
||||
# so injecting user context there just burns tokens and can derail the reply.
|
||||
# Trivial prompts ("ok", "yes", slash commands) carry no semantic signal.
|
||||
if self._is_trivial_prompt(query):
|
||||
return ""
|
||||
|
||||
@@ -619,8 +615,8 @@ class HonchoMemoryProvider(MemoryProvider):
|
||||
if r and r.strip():
|
||||
with self._prefetch_lock:
|
||||
self._prefetch_result = r
|
||||
# Only advance cadence on a non-empty result so failures
|
||||
# don't burn a 3-turn cooldown on nothing.
|
||||
# Advance cadence only on a non-empty result so the next
|
||||
# turn retries when the call returned nothing.
|
||||
self._last_dialectic_turn = _fired_at
|
||||
|
||||
self._prefetch_thread = threading.Thread(
|
||||
@@ -711,9 +707,8 @@ class HonchoMemoryProvider(MemoryProvider):
|
||||
self._dialectic_cadence, self._turn_count - self._last_dialectic_turn)
|
||||
return
|
||||
|
||||
# Advance cadence only on a non-empty result — otherwise a silent failure
|
||||
# (empty dialectic, transient API error) would burn the full cadence window
|
||||
# before the next retry, making it look like dialectic "never fires again".
|
||||
# Cadence advances only on a non-empty result so empty returns
|
||||
# (transient API error, sparse representation) retry next turn.
|
||||
_fired_at = self._turn_count
|
||||
|
||||
def _run():
|
||||
@@ -751,9 +746,7 @@ class HonchoMemoryProvider(MemoryProvider):
|
||||
|
||||
_LEVEL_ORDER = ("minimal", "low", "medium", "high", "max")
|
||||
|
||||
# Reasoning-level heuristic thresholds (restored from pre-9a0ab34c behavior).
|
||||
# Promoted to class constants so tests can override without widening the
|
||||
# config surface. Bump to config fields only if real use shows they're needed.
|
||||
# Char-count thresholds for the query-length reasoning heuristic.
|
||||
_HEURISTIC_LENGTH_MEDIUM = 120
|
||||
_HEURISTIC_LENGTH_HIGH = 400
|
||||
|
||||
|
||||
@@ -463,7 +463,7 @@ def cmd_setup(args) -> None:
|
||||
current_dialectic = str(hermes_host.get("dialecticCadence") or cfg.get("dialecticCadence") or "1")
|
||||
print("\n Dialectic cadence:")
|
||||
print(" How often Honcho rebuilds its user model (LLM call on Honcho backend).")
|
||||
print(" 1 = every turn (default), 3+ = sparse (cost-saving).")
|
||||
print(" 1 = every turn (default), 3+ = sparse.")
|
||||
new_dialectic = _prompt("Dialectic cadence", default=current_dialectic)
|
||||
try:
|
||||
val = int(new_dialectic)
|
||||
|
||||
@@ -251,13 +251,11 @@ class HonchoClientConfig:
|
||||
# matching dialectic_depth length. When None, uses proportional defaults
|
||||
# derived from dialectic_reasoning_level.
|
||||
dialectic_depth_levels: list[str] | None = None
|
||||
# Reasoning-level heuristic for auto-injected dialectic calls. When true,
|
||||
# scales the base level up on longer queries (restored from pre-#10619
|
||||
# behavior; see plugins/memory/honcho/__init__.py for thresholds).
|
||||
# Never auto-selects a level above reasoning_level_cap.
|
||||
# When true, the auto-injected dialectic scales reasoning level up on
|
||||
# longer queries. See HonchoMemoryProvider for thresholds.
|
||||
reasoning_heuristic: bool = True
|
||||
# Ceiling for heuristic-selected reasoning level. "max" is reserved for
|
||||
# explicit tool-path selection; default "high" matches the old behavior.
|
||||
# Ceiling for the heuristic-selected reasoning level. "max" is reserved
|
||||
# for explicit tool-path selection.
|
||||
reasoning_level_cap: str = "high"
|
||||
# Honcho API limits — configurable for self-hosted instances
|
||||
# Max chars per message sent via add_messages() (Honcho cloud: 25000)
|
||||
|
||||
@@ -862,9 +862,7 @@ class TestDialecticCadenceDefaults:
|
||||
return provider
|
||||
|
||||
def test_default_is_1(self):
|
||||
"""Default dialectic_cadence should be 1 (every turn) — restored from
|
||||
pre-#10619 behavior to avoid a silent regression on upgrade for users
|
||||
who never set dialecticCadence explicitly."""
|
||||
"""Default dialectic_cadence is 1 — fires every turn unless overridden."""
|
||||
provider = self._make_provider()
|
||||
assert provider._dialectic_cadence == 1
|
||||
|
||||
@@ -1112,10 +1110,7 @@ class TestDialecticDepth:
|
||||
|
||||
|
||||
class TestTrivialPromptHeuristic:
|
||||
"""Trivial prompts ('ok', 'y', slash commands) must short-circuit injection.
|
||||
|
||||
Restored after accidental removal during the two-layer prefetch refactor.
|
||||
"""
|
||||
"""Trivial prompts ('ok', 'y', slash commands) must short-circuit injection."""
|
||||
|
||||
@staticmethod
|
||||
def _make_provider():
|
||||
@@ -1173,11 +1168,9 @@ class TestTrivialPromptHeuristic:
|
||||
|
||||
|
||||
class TestDialecticCadenceAdvancesOnSuccess:
|
||||
"""Cadence tracker must only advance when the dialectic call actually returned.
|
||||
|
||||
A silent failure (empty result, API blip) used to burn the full cadence window
|
||||
before retrying — making it look like dialectic 'never fires again'.
|
||||
"""
|
||||
"""Cadence tracker advances only when the dialectic call returns a
|
||||
non-empty result. Empty results (transient API error, sparse representation)
|
||||
must retry on the next eligible turn instead of waiting the full cadence."""
|
||||
|
||||
@staticmethod
|
||||
def _make_provider():
|
||||
@@ -1329,13 +1322,9 @@ class TestSessionStartDialecticPrewarm:
|
||||
|
||||
|
||||
class TestDialecticLifecycleSmoke:
|
||||
"""End-to-end smoke: walks a realistic multi-turn session through every
|
||||
behavior we care about — prewarm → turn 1 consume → trivial skip → cadence
|
||||
fire → silent-failure retry → heuristic bump → session-end flush.
|
||||
|
||||
This is the 'velvet circuit' test: one provider, one flow, one set of
|
||||
assertions. If the suite above lies about intent, this one catches it.
|
||||
"""
|
||||
"""End-to-end smoke walking a multi-turn session through prewarm,
|
||||
turn 1 consume, trivial skip, cadence fire, empty-result retry,
|
||||
heuristic bump, and session-end flush."""
|
||||
|
||||
@staticmethod
|
||||
def _make_provider(cfg_extra=None):
|
||||
@@ -1473,11 +1462,9 @@ class TestDialecticLifecycleSmoke:
|
||||
|
||||
|
||||
class TestReasoningHeuristic:
|
||||
"""Restored char-count heuristic for auto-injected dialectic reasoning level.
|
||||
|
||||
Pre-9a0ab34c behavior: scale base up by query length, capped at
|
||||
reasoning_level_cap. 'max' is reserved for explicit tool-path selection.
|
||||
"""
|
||||
"""Char-count heuristic that scales the auto-injected reasoning level by
|
||||
query length, clamped at reasoning_level_cap. 'max' is reserved for
|
||||
explicit tool-path selection."""
|
||||
|
||||
@staticmethod
|
||||
def _make_provider(cfg_extra=None):
|
||||
|
||||
@@ -77,7 +77,7 @@ Cost and depth are controlled by three independent knobs:
|
||||
| Knob | Controls | Default |
|
||||
|------|----------|---------|
|
||||
| `contextCadence` | Turns between `context()` API calls (base layer refresh) | `1` |
|
||||
| `dialecticCadence` | Turns between `peer.chat()` LLM calls (dialectic layer refresh) | `1` |
|
||||
| `dialecticCadence` | Turns between `peer.chat()` LLM calls (dialectic layer refresh) | `3` |
|
||||
| `dialecticDepth` | Number of `.chat()` passes per dialectic invocation (1–3) | `1` |
|
||||
|
||||
These are orthogonal — you can have frequent context refreshes with infrequent dialectic, or deep multi-pass dialectic at low frequency. Example: `contextCadence: 1, dialecticCadence: 5, dialecticDepth: 2` refreshes base context every turn, runs dialectic every 5 turns, and each dialectic run makes 2 passes.
|
||||
@@ -104,7 +104,7 @@ Honcho is configured in `~/.honcho/config.json` (global) or `$HERMES_HOME/honcho
|
||||
|-----|---------|-------------|
|
||||
| `contextTokens` | `null` (uncapped) | Token budget for auto-injected context per turn. Set to an integer (e.g. 1200) to cap. Truncates at word boundaries |
|
||||
| `contextCadence` | `1` | Minimum turns between `context()` API calls (base layer refresh) |
|
||||
| `dialecticCadence` | `1` | Minimum turns between `peer.chat()` LLM calls (dialectic layer). In `tools` mode, irrelevant — model calls explicitly |
|
||||
| `dialecticCadence` | `3` | Minimum turns between `peer.chat()` LLM calls (dialectic layer). In `tools` mode, irrelevant — model calls explicitly |
|
||||
| `dialecticDepth` | `1` | Number of `.chat()` passes per dialectic invocation. Clamped to 1–3 |
|
||||
| `dialecticDepthLevels` | `null` | Optional array of reasoning levels per pass, e.g. `["minimal", "low", "medium"]`. Overrides proportional defaults |
|
||||
| `dialecticReasoningLevel` | `'low'` | Base reasoning level: `minimal`, `low`, `medium`, `high`, `max` |
|
||||
|
||||
@@ -82,7 +82,7 @@ hermes memory setup # select "honcho"
|
||||
| `workspace` | host key | Shared workspace ID |
|
||||
| `contextTokens` | `null` (uncapped) | Token budget for auto-injected context per turn. Truncates at word boundaries |
|
||||
| `contextCadence` | `1` | Minimum turns between `context()` API calls (base layer refresh) |
|
||||
| `dialecticCadence` | `1` | Minimum turns between `peer.chat()` LLM calls. Only applies to `hybrid`/`context` modes |
|
||||
| `dialecticCadence` | `3` | Minimum turns between `peer.chat()` LLM calls. Only applies to `hybrid`/`context` modes |
|
||||
| `dialecticDepth` | `1` | Number of `.chat()` passes per dialectic invocation. Clamped 1–3. Pass 0: cold/warm prompt, pass 1: self-audit, pass 2: reconciliation |
|
||||
| `dialecticDepthLevels` | `null` | Optional array of reasoning levels per pass, e.g. `["minimal", "low", "medium"]`. Overrides proportional defaults |
|
||||
| `dialecticReasoningLevel` | `'low'` | Base reasoning level: `minimal`, `low`, `medium`, `high`, `max` |
|
||||
@@ -181,7 +181,7 @@ This inherits settings from the default `hermes` host block and creates new AI p
|
||||
},
|
||||
"dialecticReasoningLevel": "low",
|
||||
"dialecticDynamic": true,
|
||||
"dialecticCadence": 1,
|
||||
"dialecticCadence": 3,
|
||||
"dialecticDepth": 1,
|
||||
"dialecticMaxChars": 600,
|
||||
"contextCadence": 1,
|
||||
|
||||
Reference in New Issue
Block a user